From adcece792768751df3cb443eba8fe4e12bea5b13 Mon Sep 17 00:00:00 2001 From: yunfeibai Date: Wed, 27 Sep 2017 14:14:57 -0700 Subject: [PATCH 01/11] Adjust the IK setup to address the inverse kinematics issues mentioned in #1249. --- examples/SharedMemory/IKTrajectoryHelper.cpp | 4 +++- examples/ThirdPartyLibs/BussIK/Jacobian.cpp | 6 ++++++ examples/pybullet/examples/inverse_kinematics.py | 7 ++++--- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/examples/SharedMemory/IKTrajectoryHelper.cpp b/examples/SharedMemory/IKTrajectoryHelper.cpp index 0a6cbbf80..0145251bb 100644 --- a/examples/SharedMemory/IKTrajectoryHelper.cpp +++ b/examples/SharedMemory/IKTrajectoryHelper.cpp @@ -185,7 +185,9 @@ bool IKTrajectoryHelper::computeNullspaceVel(int numQ, const double* q_current, { m_data->m_nullSpaceVelocity.SetLength(numQ); m_data->m_nullSpaceVelocity.SetZero(); - double stayCloseToZeroGain = 0.1; + // TODO: Expose the coefficents of the null space term so that the user can choose to balance the null space task and the IK target task. + // Can also adaptively adjust the coefficients based on the residual of the null space velocity in the IK target task space. + double stayCloseToZeroGain = 0.001; double stayAwayFromLimitsGain = 10.0; // Stay close to zero diff --git a/examples/ThirdPartyLibs/BussIK/Jacobian.cpp b/examples/ThirdPartyLibs/BussIK/Jacobian.cpp index 301910a9c..c082bef31 100644 --- a/examples/ThirdPartyLibs/BussIK/Jacobian.cpp +++ b/examples/ThirdPartyLibs/BussIK/Jacobian.cpp @@ -354,6 +354,12 @@ void Jacobian::CalcDeltaThetasDLSwithNullspace(const VectorRn& desiredV) // Compute null space velocity VectorRn nullV(J.GetNumColumns()); P.Multiply(desiredV, nullV); + + // Compute residual + VectorRn residual(J.GetNumRows()); + J.Multiply(nullV, residual); + // TODO: Use residual to set the null space term coefficient adaptively. + //printf("residual: %f\n", residual.Norm()); // Add null space velocity dTheta += nullV; diff --git a/examples/pybullet/examples/inverse_kinematics.py b/examples/pybullet/examples/inverse_kinematics.py index 0dba11bf9..9aa32afed 100644 --- a/examples/pybullet/examples/inverse_kinematics.py +++ b/examples/pybullet/examples/inverse_kinematics.py @@ -37,20 +37,21 @@ hasPrevPose = 0 useNullSpace = 0 useOrientation = 1 -useSimulation = 1 +#If we set useSimulation=0, it sets the arm pose to be the IK result directly without using dynamic control. +#This can be used to test the IK result accuracy. +useSimulation = 0 useRealTimeSimulation = 1 p.setRealTimeSimulation(useRealTimeSimulation) #trailDuration is duration (in seconds) after debug lines will be removed automatically #use 0 for no-removal trailDuration = 15 - while 1: if (useRealTimeSimulation): dt = datetime.now() t = (dt.second/60.)*2.*math.pi else: - t=t+0.1 + t=t+0.001 if (useSimulation and useRealTimeSimulation==0): p.stepSimulation() From 113e103bc234d25fdee11b0a8fe88cc1a8f755bf Mon Sep 17 00:00:00 2001 From: yunfeibai Date: Wed, 27 Sep 2017 15:18:08 -0700 Subject: [PATCH 02/11] Modify the joint damping input check, and output error message when it doesn't pass the size check. Modify the kuka grasping example accordingly. Setting joint damping in this example was a no-op before. --- .../pybullet/gym/pybullet_envs/bullet/kuka.py | 2 +- examples/pybullet/pybullet.c | 30 ++++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py index 5a35bf9a7..3e7ee6c49 100644 --- a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py +++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py @@ -34,7 +34,7 @@ class Kuka: #restposes for null space self.rp=[0,0,0,0.5*math.pi,0,-math.pi*0.5*0.66,0] #joint damping coefficents - self.jd=[0.1,0.1,0.1,0.1,0.1,0.1,0.1] + self.jd=[0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001] self.reset() def reset(self): diff --git a/examples/pybullet/pybullet.c b/examples/pybullet/pybullet.c index c1f7e2eb2..40e234421 100644 --- a/examples/pybullet/pybullet.c +++ b/examples/pybullet/pybullet.c @@ -6735,16 +6735,32 @@ static PyObject* pybullet_calculateInverseKinematics(PyObject* self, hasNullSpace = 1; } - if (numJoints && (szJointDamping == numJoints)) + if (szJointDamping > 0) { - int szInBytes = sizeof(double) * numJoints; - int i; - jointDamping = (double*)malloc(szInBytes); - for (i = 0; i < numJoints; i++) + // We allow the number of joint damping values to be larger than + // the number of degrees of freedom (DOFs). On the server side, it does + // the check and only sets joint damping for all DOFs. + // We can use the number of DOFs here when that is exposed. Since the + // number of joints is larger than the number of DOFs (we assume the + // joint is either fixed joint or one DOF joint), it is safe to use + // number of joints here. + if (szJointDamping < numJoints) { - jointDamping[i] = pybullet_internalGetFloatFromSequence(jointDampingObj, i); + PyErr_SetString(SpamError, + "calculateInverseKinematics the size of input joint damping values is smaller than the number of joints."); + return NULL; + } + else + { + int szInBytes = sizeof(double) * szJointDamping; + int i; + jointDamping = (double*)malloc(szInBytes); + for (i = 0; i < szJointDamping; i++) + { + jointDamping[i] = pybullet_internalGetFloatFromSequence(jointDampingObj, i); + } + hasJointDamping = 1; } - hasJointDamping = 1; } if (hasPos) From 54eada7579ba1b485ada22375780269e977ee46d Mon Sep 17 00:00:00 2001 From: yunfeibai Date: Wed, 27 Sep 2017 16:42:29 -0700 Subject: [PATCH 03/11] Fix memory leak. --- examples/pybullet/pybullet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/pybullet/pybullet.c b/examples/pybullet/pybullet.c index c52ff4630..a6a51d23b 100644 --- a/examples/pybullet/pybullet.c +++ b/examples/pybullet/pybullet.c @@ -6945,6 +6945,7 @@ static PyObject* pybullet_calculateInverseKinematics(PyObject* self, { b3CalculateInverseKinematicsSetJointDamping(command, numJoints, jointDamping); } + free(jointDamping); statusHandle = b3SubmitClientCommandAndWaitStatus(sm, command); From 0b239e8bc0f42cde603db2022252edb32e34486c Mon Sep 17 00:00:00 2001 From: Jeffrey Bingham Date: Mon, 25 Sep 2017 07:57:06 -0700 Subject: [PATCH 04/11] [pybullet] Add an example for jacobian. --- examples/pybullet/examples/jacobian.py | 56 ++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 examples/pybullet/examples/jacobian.py diff --git a/examples/pybullet/examples/jacobian.py b/examples/pybullet/examples/jacobian.py new file mode 100644 index 000000000..3a0dfd6e9 --- /dev/null +++ b/examples/pybullet/examples/jacobian.py @@ -0,0 +1,56 @@ +import sys +sys.path.append('/Users/bingjeff/Documents/projects/walkman/bullet3/bin/') +import pybullet as p +import numpy as np + +def getJointStates(robot): + joint_states = p.getJointStates(robot, range(p.getNumJoints(robot))) + joint_positions = [state[0] for state in joint_states] + joint_velocities = [state[1] for state in joint_states] + joint_torques = [state[3] for state in joint_states] + return joint_positions, joint_velocities, joint_torques + +def setJointPosition(robot, position, kp=1.0, kv=0.3): + num_joints = p.getNumJoints(robot) + zero_vec = [0.0] * num_joints + if len(position) == num_joints: + p.setJointMotorControlArray(robot, range(num_joints), + p.POSITION_CONTROL, + targetPositions=position, + targetVelocities=zero_vec, + positionGains=[kp] * num_joints, + velocityGains=[kv] * num_joints) + else: + print("Not setting torque. " + "Expected torque vector of " + "length {}, got {}".format(num_joints, len(torque))) + + +clid = p.connect(p.SHARED_MEMORY) +if (clid<0): + p.connect(p.GUI) +time_step = 0.001 +gravity_constant = -9.81 +p.resetSimulation() +p.setTimeStep(time_step) +p.setGravity(0.0, 0.0, gravity_constant) +p.loadURDF("plane.urdf",[0,0,-0.3]) +kukaId = p.loadURDF("kuka_iiwa/model.urdf",[0,0,0]) +p.resetBasePositionAndOrientation(kukaId,[0,0,0],[0,0,0,1]) +kukaEndEffectorIndex = 6 +numJoints = p.getNumJoints(kukaId) +if (numJoints!=7): + exit() + +setJointPosition(kukaId, [0.1] * p.getNumJoints(kukaId)) +p.stepSimulation() + +result = p.getLinkState(kukaId, kukaEndEffectorIndex, computeLinkVelocity=1) +link_trn, link_rot, com_trn, com_rot, frame_pos, frame_rot, link_vt, link_vr = result +pos, vel, torq = getJointStates(kukaId) +jac_t, jac_r = p.calculateJacobian(kukaId, kukaEndEffectorIndex, com_trn, pos, vel, vel) + +print "Link velocity of CoM from getLinkState:" +print link_vt +print "Link velocity of CoM from linearJacobian * q_dot:" +print np.dot(np.array(jac_t), np.array(vel)) From 6da931d0bd9f3028cef2f274d75629c0fddf5fe6 Mon Sep 17 00:00:00 2001 From: Jeffrey Bingham Date: Mon, 25 Sep 2017 23:11:35 -0700 Subject: [PATCH 05/11] Adjust jacobian.py to show mismatch between joint state and link state. --- examples/pybullet/examples/jacobian.py | 40 ++++++++++++++++++++++---- examples/pybullet/pybullet.c | 5 ++-- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/examples/pybullet/examples/jacobian.py b/examples/pybullet/examples/jacobian.py index 3a0dfd6e9..d0b4f68ce 100644 --- a/examples/pybullet/examples/jacobian.py +++ b/examples/pybullet/examples/jacobian.py @@ -25,6 +25,18 @@ def setJointPosition(robot, position, kp=1.0, kv=0.3): "Expected torque vector of " "length {}, got {}".format(num_joints, len(torque))) +def setJointState(robot, position, velocity=None): + num_joints = pb.getNumJoints(robot) + if velocity is None: + velocity = [0.0] * num_joints + if len(position) == num_joints and len(velocity) == num_joints: + for c in range(num_joints): + pb.resetJointState(robot, c, position[c], velocity[c]) + else: + print("Not setting state. " + "Expected vectors of length {}, " + "got pos={}, vel={}".format(num_joints, len(position), len(velocity))) + clid = p.connect(p.SHARED_MEMORY) if (clid<0): @@ -45,12 +57,30 @@ if (numJoints!=7): setJointPosition(kukaId, [0.1] * p.getNumJoints(kukaId)) p.stepSimulation() -result = p.getLinkState(kukaId, kukaEndEffectorIndex, computeLinkVelocity=1) -link_trn, link_rot, com_trn, com_rot, frame_pos, frame_rot, link_vt, link_vr = result +# Get the value of the link and joint state after a simulation step. +result0 = p.getLinkState(kukaId, kukaEndEffectorIndex, computeLinkVelocity=1) pos, vel, torq = getJointStates(kukaId) -jac_t, jac_r = p.calculateJacobian(kukaId, kukaEndEffectorIndex, com_trn, pos, vel, vel) -print "Link velocity of CoM from getLinkState:" +# Now "reset" the simulation to have the same joint state and read the link state. +setJointState(kukaId, pos, vel) +result1 = p.getLinkState(kukaId, kukaEndEffectorIndex, computeLinkVelocity=1) +link_trn, link_rot, com_trn, com_rot, frame_pos, frame_rot, link_vt, link_vr = result1 + +# Show that the link origin is different for supposedly the same joint state. +print "Link origin - stepSimulation:" +print result0[0] +print "Link origin - resetJointState:" +print result1[0] + +# Using the "reset" system the body velocities (should) match. +zero_vec = [0.0] * numJoints +jac_t, jac_r = p.calculateJacobian(kukaId, kukaEndEffectorIndex, com_trn, pos, zero_vec, zero_vec) + +print "Link linear velocity of CoM from getLinkState:" print link_vt -print "Link velocity of CoM from linearJacobian * q_dot:" +print "Link linear velocity of CoM from linearJacobian * q_dot:" print np.dot(np.array(jac_t), np.array(vel)) +print "Link angular velocity of CoM from getLinkState:" +print link_vr +print "Link angular velocity of CoM from angularJacobian * q_dot:" +print np.dot(np.array(jac_r), np.array(vel)) diff --git a/examples/pybullet/pybullet.c b/examples/pybullet/pybullet.c index a6a51d23b..ea4917a88 100644 --- a/examples/pybullet/pybullet.c +++ b/examples/pybullet/pybullet.c @@ -7416,8 +7416,9 @@ static PyMethodDef SpamMethods[] = { " frame."}, {"resetJointState", (PyCFunction)pybullet_resetJointState, METH_VARARGS | METH_KEYWORDS, - "Reset the state (position, velocity etc) for a joint on a body " - "instantaneously, not through physics simulation."}, + "resetJointState(objectUniqueId, jointIndex, targetValue, targetVelocity=0, physicsClientId=0)\n" + "Reset the state (position, velocity etc) for a joint on a body " + "instantaneously, not through physics simulation."}, {"changeDynamics", (PyCFunction)pybullet_changeDynamicsInfo, METH_VARARGS | METH_KEYWORDS, "change dynamics information such as mass, lateral friction coefficient."}, From d8b80bce40612d5ba64fdd1b65aeac673f52537c Mon Sep 17 00:00:00 2001 From: Jeffrey Bingham Date: Wed, 27 Sep 2017 22:23:22 -0700 Subject: [PATCH 06/11] Fix the translation jacobian. --- .../PhysicsServerCommandProcessor.cpp | 5 ++ examples/pybullet/examples/jacobian.py | 61 +++++++------------ examples/pybullet/pybullet.c | 8 ++- 3 files changed, 35 insertions(+), 39 deletions(-) diff --git a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp index 7f3223b59..790991d37 100644 --- a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp +++ b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp @@ -6667,6 +6667,11 @@ bool PhysicsServerCommandProcessor::processCommand(const struct SharedMemoryComm } // Only calculate if the localPosition is non-zero. if (btInverseDynamics::maxAbs(localPosition) > 0.0) { + // Write the localPosition into world coordinates. + btInverseDynamics::mat33 world_rotation_body; + tree->getBodyTransform(clientCmd.m_calculateJacobianArguments.m_linkIndex + 1, &world_rotation_body); + localPosition = world_rotation_body * localPosition; + // Correct the translational jacobian. btInverseDynamics::mat33 skewCrossProduct; btInverseDynamics::skew(localPosition, &skewCrossProduct); btInverseDynamics::mat3x jac_l(3, numDofs + baseDofs); diff --git a/examples/pybullet/examples/jacobian.py b/examples/pybullet/examples/jacobian.py index d0b4f68ce..e52caccdf 100644 --- a/examples/pybullet/examples/jacobian.py +++ b/examples/pybullet/examples/jacobian.py @@ -1,7 +1,7 @@ import sys sys.path.append('/Users/bingjeff/Documents/projects/walkman/bullet3/bin/') import pybullet as p -import numpy as np + def getJointStates(robot): joint_states = p.getJointStates(robot, range(p.getNumJoints(robot))) @@ -25,22 +25,17 @@ def setJointPosition(robot, position, kp=1.0, kv=0.3): "Expected torque vector of " "length {}, got {}".format(num_joints, len(torque))) -def setJointState(robot, position, velocity=None): - num_joints = pb.getNumJoints(robot) - if velocity is None: - velocity = [0.0] * num_joints - if len(position) == num_joints and len(velocity) == num_joints: - for c in range(num_joints): - pb.resetJointState(robot, c, position[c], velocity[c]) - else: - print("Not setting state. " - "Expected vectors of length {}, " - "got pos={}, vel={}".format(num_joints, len(position), len(velocity))) +def multiplyJacobian(jacobian, vector): + result = [0.0, 0.0, 0.0] + for c in range(len(vector)): + for r in range(3): + result[r] += jacobian[r][c] * vector[c] + return result clid = p.connect(p.SHARED_MEMORY) if (clid<0): - p.connect(p.GUI) + p.connect(p.DIRECT) time_step = 0.001 gravity_constant = -9.81 p.resetSimulation() @@ -53,34 +48,24 @@ kukaEndEffectorIndex = 6 numJoints = p.getNumJoints(kukaId) if (numJoints!=7): exit() - +# Set a joint target for the position control and step the sim. setJointPosition(kukaId, [0.1] * p.getNumJoints(kukaId)) p.stepSimulation() - -# Get the value of the link and joint state after a simulation step. -result0 = p.getLinkState(kukaId, kukaEndEffectorIndex, computeLinkVelocity=1) +# Get the joint and link state directly from Bullet. pos, vel, torq = getJointStates(kukaId) - -# Now "reset" the simulation to have the same joint state and read the link state. -setJointState(kukaId, pos, vel) -result1 = p.getLinkState(kukaId, kukaEndEffectorIndex, computeLinkVelocity=1) -link_trn, link_rot, com_trn, com_rot, frame_pos, frame_rot, link_vt, link_vr = result1 - -# Show that the link origin is different for supposedly the same joint state. -print "Link origin - stepSimulation:" -print result0[0] -print "Link origin - resetJointState:" -print result1[0] - -# Using the "reset" system the body velocities (should) match. +result = p.getLinkState(kukaId, kukaEndEffectorIndex, computeLinkVelocity=1, computeForwardKinematics=1) +link_trn, link_rot, com_trn, com_rot, frame_pos, frame_rot, link_vt, link_vr = result +# Get the Jacobians for the CoM of the end-effector link. +# Note that in this example com_rot = identity, and we would need to use com_rot.T * com_trn. +# The localPosition is always defined in terms of the link frame coordinates. zero_vec = [0.0] * numJoints jac_t, jac_r = p.calculateJacobian(kukaId, kukaEndEffectorIndex, com_trn, pos, zero_vec, zero_vec) -print "Link linear velocity of CoM from getLinkState:" -print link_vt -print "Link linear velocity of CoM from linearJacobian * q_dot:" -print np.dot(np.array(jac_t), np.array(vel)) -print "Link angular velocity of CoM from getLinkState:" -print link_vr -print "Link angular velocity of CoM from angularJacobian * q_dot:" -print np.dot(np.array(jac_r), np.array(vel)) +print ("Link linear velocity of CoM from getLinkState:") +print (link_vt) +print ("Link linear velocity of CoM from linearJacobian * q_dot:") +print (multiplyJacobian(jac_t, vel)) +print ("Link angular velocity of CoM from getLinkState:") +print (link_vr) +print ("Link angular velocity of CoM from angularJacobian * q_dot:") +print (multiplyJacobian(jac_r, vel)) diff --git a/examples/pybullet/pybullet.c b/examples/pybullet/pybullet.c index ea4917a88..f21d294c3 100644 --- a/examples/pybullet/pybullet.c +++ b/examples/pybullet/pybullet.c @@ -7411,6 +7411,12 @@ static PyMethodDef SpamMethods[] = { "Get the state (position, velocity etc) for multiple joints on a body."}, {"getLinkState", (PyCFunction)pybullet_getLinkState, METH_VARARGS | METH_KEYWORDS, + "position_linkcom_world, world_rotation_linkcom, " + "position_linkcom_frame, frame_rotation_linkcom, " + "position_frame_world, world_rotation_frame, " + "linearVelocity_linkcom_world, angularVelocity_linkcom_world = " + "getLinkState(objectUniqueId, linkIndex, computeLinkVelocity=0, " + "computeForwardKinematics=0, physicsClientId=0)\n" "Provides extra information such as the Cartesian world coordinates" " center of mass (COM) of the link, relative to the world reference" " frame."}, @@ -7564,7 +7570,7 @@ static PyMethodDef SpamMethods[] = { "Args:\n" " bodyIndex - a scalar defining the unique object id.\n" " linkIndex - a scalar identifying the link containing the local point.\n" - " localPosition - a list of [x, y, z] of the coordinates of the local point.\n" + " localPosition - a list of [x, y, z] of the coordinates defined in the link frame.\n" " objPositions - a list of the joint positions.\n" " objVelocities - a list of the joint velocities.\n" " objAccelerations - a list of the joint accelerations.\n" From d49fade7042a54454e5b8442cf742518a2c5f6da Mon Sep 17 00:00:00 2001 From: Jeffrey Bingham Date: Wed, 27 Sep 2017 22:30:32 -0700 Subject: [PATCH 07/11] fix some formatting. --- examples/SharedMemory/PhysicsServerCommandProcessor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp index 790991d37..f9e0cd6a9 100644 --- a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp +++ b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp @@ -6671,7 +6671,7 @@ bool PhysicsServerCommandProcessor::processCommand(const struct SharedMemoryComm btInverseDynamics::mat33 world_rotation_body; tree->getBodyTransform(clientCmd.m_calculateJacobianArguments.m_linkIndex + 1, &world_rotation_body); localPosition = world_rotation_body * localPosition; - // Correct the translational jacobian. + // Correct the translational jacobian. btInverseDynamics::mat33 skewCrossProduct; btInverseDynamics::skew(localPosition, &skewCrossProduct); btInverseDynamics::mat3x jac_l(3, numDofs + baseDofs); From cd231c030e267389b66b45e544864fe7c924a1f0 Mon Sep 17 00:00:00 2001 From: Jeffrey Bingham Date: Wed, 27 Sep 2017 22:35:02 -0700 Subject: [PATCH 08/11] more formatting. --- examples/pybullet/examples/jacobian.py | 11 +++-------- examples/pybullet/pybullet.c | 12 ++++++------ 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/examples/pybullet/examples/jacobian.py b/examples/pybullet/examples/jacobian.py index e52caccdf..17638bfb3 100644 --- a/examples/pybullet/examples/jacobian.py +++ b/examples/pybullet/examples/jacobian.py @@ -1,5 +1,3 @@ -import sys -sys.path.append('/Users/bingjeff/Documents/projects/walkman/bullet3/bin/') import pybullet as p @@ -14,12 +12,9 @@ def setJointPosition(robot, position, kp=1.0, kv=0.3): num_joints = p.getNumJoints(robot) zero_vec = [0.0] * num_joints if len(position) == num_joints: - p.setJointMotorControlArray(robot, range(num_joints), - p.POSITION_CONTROL, - targetPositions=position, - targetVelocities=zero_vec, - positionGains=[kp] * num_joints, - velocityGains=[kv] * num_joints) + p.setJointMotorControlArray(robot, range(num_joints), p.POSITION_CONTROL, + targetPositions=position, targetVelocities=zero_vec, + positionGains=[kp] * num_joints, velocityGains=[kv] * num_joints) else: print("Not setting torque. " "Expected torque vector of " diff --git a/examples/pybullet/pybullet.c b/examples/pybullet/pybullet.c index f21d294c3..b332ad650 100644 --- a/examples/pybullet/pybullet.c +++ b/examples/pybullet/pybullet.c @@ -7411,12 +7411,12 @@ static PyMethodDef SpamMethods[] = { "Get the state (position, velocity etc) for multiple joints on a body."}, {"getLinkState", (PyCFunction)pybullet_getLinkState, METH_VARARGS | METH_KEYWORDS, - "position_linkcom_world, world_rotation_linkcom, " - "position_linkcom_frame, frame_rotation_linkcom, " - "position_frame_world, world_rotation_frame, " - "linearVelocity_linkcom_world, angularVelocity_linkcom_world = " - "getLinkState(objectUniqueId, linkIndex, computeLinkVelocity=0, " - "computeForwardKinematics=0, physicsClientId=0)\n" + "position_linkcom_world, world_rotation_linkcom,\n" + "position_linkcom_frame, frame_rotation_linkcom,\n" + "position_frame_world, world_rotation_frame,\n" + "linearVelocity_linkcom_world, angularVelocity_linkcom_world\n" + " = getLinkState(objectUniqueId, linkIndex, computeLinkVelocity=0,\n" + " computeForwardKinematics=0, physicsClientId=0)\n" "Provides extra information such as the Cartesian world coordinates" " center of mass (COM) of the link, relative to the world reference" " frame."}, From 345836d670a001be8b0ee1738eb483f7a9332a7b Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Thu, 28 Sep 2017 10:19:41 -0700 Subject: [PATCH 09/11] revert to original agents train/visualize scripts, but using pybullet envs in configs.py (agents lacks a convenient way to extend environments) --- .../agents/{config_ppo.py => configs.py} | 42 +++-- .../gym/pybullet_envs/agents/train_ppo.py | 163 +++++++++++++++--- .../gym/pybullet_envs/agents/visualize_ppo.py | 161 ++++++++++++++--- 3 files changed, 307 insertions(+), 59 deletions(-) rename examples/pybullet/gym/pybullet_envs/agents/{config_ppo.py => configs.py} (73%) diff --git a/examples/pybullet/gym/pybullet_envs/agents/config_ppo.py b/examples/pybullet/gym/pybullet_envs/agents/configs.py similarity index 73% rename from examples/pybullet/gym/pybullet_envs/agents/config_ppo.py rename to examples/pybullet/gym/pybullet_envs/agents/configs.py index 2ccd38adc..3def13e70 100644 --- a/examples/pybullet/gym/pybullet_envs/agents/config_ppo.py +++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py @@ -1,8 +1,25 @@ -"""The PPO training configuration file for minitaur environments.""" +# Copyright 2017 The TensorFlow Agents Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example configurations using the PPO algorithm.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function -import functools + +# pylint: disable=unused-variable + from agents import ppo from agents.scripts import networks from pybullet_envs.bullet import minitaur_gym_env @@ -11,33 +28,33 @@ import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env import pybullet_envs -# pylint: disable=unused-variable def default(): - """The default configurations.""" + """Default configuration for PPO.""" # General algorithm = ppo.PPOAlgorithm - num_agents = 25 + num_agents = 10 eval_episodes = 25 use_gpu = False # Network network = networks.ForwardGaussianPolicy weight_summaries = dict( - all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*') + all=r'.*', + policy=r'.*/policy/.*', + value=r'.*/value/.*') policy_layers = 200, 100 value_layers = 200, 100 - init_mean_factor = 0.2 + init_mean_factor = 0.05 init_logstd = -1 - network_config = dict() # Optimization update_every = 25 policy_optimizer = 'AdamOptimizer' value_optimizer = 'AdamOptimizer' - update_epochs_policy = 25 - update_epochs_value = 25 - value_lr = 1e-3 + update_epochs_policy = 50 + update_epochs_value = 50 policy_lr = 1e-4 + value_lr = 3e-4 # Losses - discount = 0.99 + discount = 0.985 kl_target = 1e-2 kl_cutoff_factor = 2 kl_cutoff_coef = 1000 @@ -107,4 +124,3 @@ def pybullet_minitaur(): steps = 3e7 # 30M return locals() - diff --git a/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py b/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py index 195df7df8..6b6a49766 100644 --- a/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py +++ b/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py @@ -1,8 +1,22 @@ -r"""Script to use Proximal Policy Gradient for the minitaur environments. +# Copyright 2017 The TensorFlow Agents Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -Run: - python train_ppo.py --logdif=/tmp/train --config=minitaur_pybullet +r"""Script to train a batch reinforcement learning algorithm. +Command line: + + python3 -m agents.scripts.train --logdir=/path/to/logdir --config=pendulum """ from __future__ import absolute_import @@ -10,39 +24,142 @@ from __future__ import division from __future__ import print_function import datetime +import functools import os + +import gym import tensorflow as tf from agents import tools -from agents.scripts import train +from . import configs from agents.scripts import utility -from . import config_ppo -flags = tf.app.flags -FLAGS = tf.app.flags.FLAGS +def _create_environment(config): + """Constructor for an instance of the environment. -flags.DEFINE_string( - 'logdir', None, - 'Base directory to store logs.') -flags.DEFINE_string( - 'config', None, - 'Configuration to execute.') -flags.DEFINE_string( - 'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'), - 'Sub directory to store logs.') + Args: + config: Object providing configurations via attributes. + + Returns: + Wrapped OpenAI Gym environment. + """ + if isinstance(config.env, str): + env = gym.make(config.env) + else: + env = config.env() + if config.max_length: + env = tools.wrappers.LimitDuration(env, config.max_length) + env = tools.wrappers.RangeNormalize(env) + env = tools.wrappers.ClipAction(env) + env = tools.wrappers.ConvertTo32Bit(env) + return env + + +def _define_loop(graph, logdir, train_steps, eval_steps): + """Create and configure a training loop with training and evaluation phases. + + Args: + graph: Object providing graph elements via attributes. + logdir: Log directory for storing checkpoints and summaries. + train_steps: Number of training steps per epoch. + eval_steps: Number of evaluation steps per epoch. + + Returns: + Loop object. + """ + loop = tools.Loop( + logdir, graph.step, graph.should_log, graph.do_report, + graph.force_reset) + loop.add_phase( + 'train', graph.done, graph.score, graph.summary, train_steps, + report_every=None, + log_every=train_steps // 2, + checkpoint_every=None, + feed={graph.is_training: True}) + loop.add_phase( + 'eval', graph.done, graph.score, graph.summary, eval_steps, + report_every=eval_steps, + log_every=eval_steps // 2, + checkpoint_every=10 * eval_steps, + feed={graph.is_training: False}) + return loop + + +def train(config, env_processes): + """Training and evaluation entry point yielding scores. + + Resolves some configuration attributes, creates environments, graph, and + training loop. By default, assigns all operations to the CPU. + + Args: + config: Object providing configurations via attributes. + env_processes: Whether to step environments in separate processes. + + Yields: + Evaluation scores. + """ + tf.reset_default_graph() + with config.unlocked: + config.network = functools.partial( + utility.define_network, config.network, config) + config.policy_optimizer = getattr(tf.train, config.policy_optimizer) + config.value_optimizer = getattr(tf.train, config.value_optimizer) + if config.update_every % config.num_agents: + tf.logging.warn('Number of agents should divide episodes per update.') + with tf.device('/cpu:0'): + batch_env = utility.define_batch_env( + lambda: _create_environment(config), + config.num_agents, env_processes) + graph = utility.define_simulation_graph( + batch_env, config.algorithm, config) + loop = _define_loop( + graph, config.logdir, + config.update_every * config.max_length, + config.eval_episodes * config.max_length) + total_steps = int( + config.steps / config.update_every * + (config.update_every + config.eval_episodes)) + # Exclude episode related variables since the Python state of environments is + # not checkpointed and thus new episodes start after resuming. + saver = utility.define_saver(exclude=(r'.*_temporary/.*',)) + sess_config = tf.ConfigProto(allow_soft_placement=True) + sess_config.gpu_options.allow_growth = True + with tf.Session(config=sess_config) as sess: + utility.initialize_variables(sess, saver, config.logdir) + for score in loop.run(sess, saver, total_steps): + yield score + batch_env.close() def main(_): """Create or load configuration and launch the trainer.""" - config = tools.AttrDict(getattr(config_ppo, FLAGS.config)()) - logdir = FLAGS.logdir and os.path.join( - FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)) - utility.save_config(config, logdir) - for score in train.train(config, env_processes=True): - tf.logging.info(str(score)) + utility.set_up_logging() + if not FLAGS.config: + raise KeyError('You must specify a configuration.') + logdir = FLAGS.logdir and os.path.expanduser(os.path.join( + FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config))) + try: + config = utility.load_config(logdir) + except IOError: + config = tools.AttrDict(getattr(configs, FLAGS.config)()) + config = utility.save_config(config, logdir) + for score in train(config, FLAGS.env_processes): + tf.logging.info('Score {}.'.format(score)) if __name__ == '__main__': + FLAGS = tf.app.flags.FLAGS + tf.app.flags.DEFINE_string( + 'logdir', None, + 'Base directory to store logs.') + tf.app.flags.DEFINE_string( + 'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'), + 'Sub directory to store logs.') + tf.app.flags.DEFINE_string( + 'config', None, + 'Configuration to execute.') + tf.app.flags.DEFINE_boolean( + 'env_processes', True, + 'Step environments in separate processes to circumvent the GIL.') tf.app.run() - diff --git a/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py b/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py index fa5f7b21c..f6673dc1d 100644 --- a/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py +++ b/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py @@ -1,42 +1,157 @@ +# Copyright 2017 The TensorFlow Agents Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -r"""Script to visualize the trained PPO agent. +r"""Script to render videos of the Proximal Policy Gradient algorithm. -python -m pybullet_envs.agents.visualize \ ---logdir=ppo ---outdir=/tmp/video/ +Command line: + python3 -m agents.scripts.visualize \ + --logdir=/path/to/logdir/