diff --git a/examples/SharedMemory/PhysicsClientC_API.cpp b/examples/SharedMemory/PhysicsClientC_API.cpp index 01371d357..094683da0 100644 --- a/examples/SharedMemory/PhysicsClientC_API.cpp +++ b/examples/SharedMemory/PhysicsClientC_API.cpp @@ -2831,6 +2831,18 @@ B3_SHARED_API int b3ChangeDynamicsInfoSetAngularDamping(b3SharedMemoryCommandHan return 0; } +B3_SHARED_API int b3ChangeDynamicsInfoSetJointDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double jointDamping) +{ + struct SharedMemoryCommand* command = (struct SharedMemoryCommand*)commandHandle; + b3Assert(command->m_type == CMD_CHANGE_DYNAMICS_INFO); + command->m_changeDynamicsInfoArgs.m_bodyUniqueId = bodyUniqueId; + command->m_changeDynamicsInfoArgs.m_linkIndex = linkIndex; + command->m_changeDynamicsInfoArgs.m_jointDamping = jointDamping; + command->m_updateFlags |= CHANGE_DYNAMICS_INFO_SET_JOINT_DAMPING; + return 0; +} + + B3_SHARED_API int b3ChangeDynamicsInfoSetContactStiffnessAndDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double contactStiffness, double contactDamping) { struct SharedMemoryCommand* command = (struct SharedMemoryCommand*)commandHandle; diff --git a/examples/SharedMemory/PhysicsClientC_API.h b/examples/SharedMemory/PhysicsClientC_API.h index 0545c899d..2c695e0e9 100644 --- a/examples/SharedMemory/PhysicsClientC_API.h +++ b/examples/SharedMemory/PhysicsClientC_API.h @@ -155,6 +155,8 @@ extern "C" B3_SHARED_API int b3ChangeDynamicsInfoSetRestitution(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double restitution); B3_SHARED_API int b3ChangeDynamicsInfoSetLinearDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, double linearDamping); B3_SHARED_API int b3ChangeDynamicsInfoSetAngularDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, double angularDamping); + B3_SHARED_API int b3ChangeDynamicsInfoSetJointDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double jointDamping); + B3_SHARED_API int b3ChangeDynamicsInfoSetContactStiffnessAndDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double contactStiffness, double contactDamping); B3_SHARED_API int b3ChangeDynamicsInfoSetFrictionAnchor(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, int frictionAnchor); B3_SHARED_API int b3ChangeDynamicsInfoSetCcdSweptSphereRadius(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double ccdSweptSphereRadius); diff --git a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp index bf912342a..94072b1a3 100644 --- a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp +++ b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp @@ -7737,6 +7737,12 @@ bool PhysicsServerCommandProcessor::processChangeDynamicsInfoCommand(const struc mb->getLinkCollider(linkIndex)->setContactStiffnessAndDamping(clientCmd.m_changeDynamicsInfoArgs.m_contactStiffness, clientCmd.m_changeDynamicsInfoArgs.m_contactDamping); } } + + if (clientCmd.m_updateFlags &CHANGE_DYNAMICS_INFO_SET_JOINT_DAMPING) + { + mb->getLink(linkIndex).m_jointDamping = clientCmd.m_changeDynamicsInfoArgs.m_jointDamping; + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_MASS) { mb->getLink(linkIndex).m_mass = mass; @@ -8057,6 +8063,7 @@ bool PhysicsServerCommandProcessor::processSendPhysicsParametersCommand(const st BT_PROFILE("CMD_SEND_PHYSICS_SIMULATION_PARAMETERS"); + if (clientCmd.m_updateFlags & SIM_PARAM_ENABLE_CONE_FRICTION) { if (clientCmd.m_physSimParamArgs.m_enableConeFriction) diff --git a/examples/SharedMemory/SharedMemoryCommands.h b/examples/SharedMemory/SharedMemoryCommands.h index adab74ee6..dce9cdd1a 100644 --- a/examples/SharedMemory/SharedMemoryCommands.h +++ b/examples/SharedMemory/SharedMemoryCommands.h @@ -164,6 +164,7 @@ enum EnumChangeDynamicsInfoFlags CHANGE_DYNAMICS_INFO_SET_CCD_SWEPT_SPHERE_RADIUS = 2048, CHANGE_DYNAMICS_INFO_SET_CONTACT_PROCESSING_THRESHOLD = 4096, CHANGE_DYNAMICS_INFO_SET_ACTIVATION_STATE = 8192, + CHANGE_DYNAMICS_INFO_SET_JOINT_DAMPING = 16384, }; struct ChangeDynamicsInfoArgs @@ -185,6 +186,7 @@ struct ChangeDynamicsInfoArgs double m_ccdSweptSphereRadius; double m_contactProcessingThreshold; int m_activationState; + double m_jointDamping; }; struct GetDynamicsInfoArgs diff --git a/examples/SharedMemory/physx/PhysXServerCommandProcessor.cpp b/examples/SharedMemory/physx/PhysXServerCommandProcessor.cpp index 821f17216..1d18b3143 100644 --- a/examples/SharedMemory/physx/PhysXServerCommandProcessor.cpp +++ b/examples/SharedMemory/physx/PhysXServerCommandProcessor.cpp @@ -19,6 +19,7 @@ #include "PxDefaultSimulationFilterShader.h" #include "URDF2PhysX.h" #include "../b3PluginManager.h" +#include "PxRigidActorExt.h" #define STATIC_EGLRENDERER_PLUGIN #ifdef STATIC_EGLRENDERER_PLUGIN @@ -50,10 +51,22 @@ public: struct InternalPhysXBodyData { physx::PxArticulationReducedCoordinate* mArticulation; + physx::PxRigidDynamic* m_rigidDynamic; + physx::PxRigidStatic* m_rigidStatic; + std::string m_bodyName; + + InternalPhysXBodyData() + { + clear(); + } //physx::PxArticulationJointReducedCoordinate* gDriveJoint; void clear() { + mArticulation = 0; + m_rigidDynamic = 0; + m_rigidStatic = 0; + m_bodyName = ""; } }; @@ -298,216 +311,741 @@ bool PhysXServerCommandProcessor::processStateLoggingCommand(const struct Shared } } -#if 0 - if (clientCmd.m_updateFlags & STATE_LOGGING_START_LOG) + + return hasStatus; +} + + +bool PhysXServerCommandProcessor::processInitPoseCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes) +{ + bool hasStatus = true; + + BT_PROFILE("CMD_INIT_POSE"); + + if (m_data->m_verboseOutput) { - if (clientCmd.m_stateLoggingArguments.m_logType == STATE_LOGGING_ALL_COMMANDS) + b3Printf("Server Init Pose not implemented yet"); + } + int bodyUniqueId = clientCmd.m_initPoseArgs.m_bodyUniqueId; + InternalPhysXBodyData* body = m_data->m_bodyHandles.getHandle(bodyUniqueId); + + btVector3 baseLinVel(0, 0, 0); + btVector3 baseAngVel(0, 0, 0); + + if (clientCmd.m_updateFlags & INIT_POSE_HAS_BASE_LINEAR_VELOCITY) + { + baseLinVel.setValue(clientCmd.m_initPoseArgs.m_initialStateQdot[0], + clientCmd.m_initPoseArgs.m_initialStateQdot[1], + clientCmd.m_initPoseArgs.m_initialStateQdot[2]); + } + if (clientCmd.m_updateFlags & INIT_POSE_HAS_BASE_ANGULAR_VELOCITY) + { + baseAngVel.setValue(clientCmd.m_initPoseArgs.m_initialStateQdot[3], + clientCmd.m_initPoseArgs.m_initialStateQdot[4], + clientCmd.m_initPoseArgs.m_initialStateQdot[5]); + } + btVector3 basePos(0, 0, 0); + if (clientCmd.m_updateFlags & INIT_POSE_HAS_INITIAL_POSITION) + { + basePos = btVector3( + clientCmd.m_initPoseArgs.m_initialStateQ[0], + clientCmd.m_initPoseArgs.m_initialStateQ[1], + clientCmd.m_initPoseArgs.m_initialStateQ[2]); + } + btQuaternion baseOrn(0, 0, 0, 1); + if (clientCmd.m_updateFlags & INIT_POSE_HAS_INITIAL_ORIENTATION) + { + baseOrn.setValue(clientCmd.m_initPoseArgs.m_initialStateQ[3], + clientCmd.m_initPoseArgs.m_initialStateQ[4], + clientCmd.m_initPoseArgs.m_initialStateQ[5], + clientCmd.m_initPoseArgs.m_initialStateQ[6]); + } + if (body && body->mArticulation) + { + physx::PxArticulationCache* c = body->mArticulation->createCache(); + body->mArticulation->copyInternalStateToCache(*c, physx::PxArticulationCache::ePOSITION | physx::PxArticulationCache::eVELOCITY);// physx::PxArticulationCache::eALL); + physx::PxArticulationLink* physxLinks[64]; + physx::PxU32 bufferSize = 64; + physx::PxU32 startIndex = 0; + int numLinks2 = body->mArticulation->getLinks(physxLinks, bufferSize, startIndex); + + btAlignedObjectArray dofStarts; + dofStarts.resize(numLinks2); + dofStarts[0] = 0; //We know that the root link does not have a joint + //cache positions in PhysX may be reshuffled, see + //http://gameworksdocs.nvidia.com/PhysX/4.0/documentation/PhysXGuide/Manual/Articulations.html + + for (int i = 1; i < numLinks2; ++i) { - if (m_data->m_commandLogger == 0) - { - enableCommandLogging(true, clientCmd.m_stateLoggingArguments.m_fileName); - serverStatusOut.m_type = CMD_STATE_LOGGING_START_COMPLETED; - int loggerUid = m_data->m_stateLoggersUniqueId++; - m_data->m_commandLoggingUid = loggerUid; - serverStatusOut.m_stateLoggingResultArgs.m_loggingUniqueId = loggerUid; - } + int llIndex = physxLinks[i]->getLinkIndex(); + int dofs = physxLinks[i]->getInboundJointDof(); + + dofStarts[llIndex] = dofs; } - if (clientCmd.m_stateLoggingArguments.m_logType == STATE_REPLAY_ALL_COMMANDS) + int count = 0; + for (int i = 1; i < numLinks2; ++i) { - if (m_data->m_logPlayback == 0) - { - replayFromLogFile(clientCmd.m_stateLoggingArguments.m_fileName); - serverStatusOut.m_type = CMD_STATE_LOGGING_START_COMPLETED; - int loggerUid = m_data->m_stateLoggersUniqueId++; - m_data->m_logPlaybackUid = loggerUid; - serverStatusOut.m_stateLoggingResultArgs.m_loggingUniqueId = loggerUid; - } + int dofs = dofStarts[i]; + dofStarts[i] = count; + count += dofs; } - - if (clientCmd.m_stateLoggingArguments.m_logType == STATE_LOGGING_VIDEO_MP4) + if (numLinks2 > 0) { - //if (clientCmd.m_stateLoggingArguments.m_fileName) + int dofs = physxLinks[0]->getInboundJointDof(); + physx::PxTransform pt = physxLinks[0]->getGlobalPose(); + physx::PxVec3 linVel = physxLinks[0]->getLinearVelocity(); + physx::PxVec3 angVel = physxLinks[0]->getAngularVelocity(); + + + if (clientCmd.m_updateFlags & INIT_POSE_HAS_BASE_LINEAR_VELOCITY) { - int loggerUid = m_data->m_stateLoggersUniqueId++; - VideoMP4Loggger* logger = new VideoMP4Loggger(loggerUid, clientCmd.m_stateLoggingArguments.m_fileName, this->m_data->m_guiHelper); - m_data->m_stateLoggers.push_back(logger); - serverStatusOut.m_type = CMD_STATE_LOGGING_START_COMPLETED; - serverStatusOut.m_stateLoggingResultArgs.m_loggingUniqueId = loggerUid; + physxLinks[0]->setLinearVelocity(physx::PxVec3(baseLinVel[0], baseLinVel[1], baseLinVel[2])); + } + + if (clientCmd.m_updateFlags & INIT_POSE_HAS_BASE_ANGULAR_VELOCITY) + { + physxLinks[0]->setAngularVelocity(physx::PxVec3(baseAngVel[0], baseAngVel[1], baseAngVel[2])); + } + + if (clientCmd.m_updateFlags & INIT_POSE_HAS_INITIAL_POSITION) + { + pt.p.x = basePos[0]; + pt.p.y = basePos[0]; + pt.p.z = basePos[0]; + physxLinks[0]->setGlobalPose(pt); + } + if (clientCmd.m_updateFlags & INIT_POSE_HAS_INITIAL_ORIENTATION) + { + btAssert(clientCmd.m_initPoseArgs.m_hasInitialStateQ[3] && + clientCmd.m_initPoseArgs.m_hasInitialStateQ[4] && + clientCmd.m_initPoseArgs.m_hasInitialStateQ[5] && + clientCmd.m_initPoseArgs.m_hasInitialStateQ[6]); + pt.q.x = baseOrn[0]; + pt.q.y = baseOrn[1]; + pt.q.z = baseOrn[2]; + pt.q.w = baseOrn[3]; + physxLinks[0]->setGlobalPose(pt); } } - - if (clientCmd.m_stateLoggingArguments.m_logType == STATE_LOGGING_MINITAUR) + if (clientCmd.m_updateFlags & INIT_POSE_HAS_JOINT_STATE) { - std::string fileName = clientCmd.m_stateLoggingArguments.m_fileName; - //either provide the minitaur by object unique Id, or search for first multibody with 8 motors... - - if ((clientCmd.m_updateFlags & STATE_LOGGING_FILTER_OBJECT_UNIQUE_ID) && (clientCmd.m_stateLoggingArguments.m_numBodyUniqueIds > 0)) + int uDofIndex = 6; + int posVarCountIndex = 7; + + //skip 'root' link + for (int i = 1; i < numLinks2; i++) { - int bodyUniqueId = clientCmd.m_stateLoggingArguments.m_bodyUniqueIds[0]; - InternalBodyData* body = m_data->m_bodyHandles.getHandle(bodyUniqueId); - if (body) + int physxCacheLinkIndex = physxLinks[i]->getLinkIndex(); + int dofs = physxLinks[i]->getInboundJointDof(); + int posVarCount = dofs;//?? + bool hasPosVar = posVarCount > 0; + + for (int j = 0; j < posVarCount; j++) { - if (body->m_multiBody) + if (clientCmd.m_initPoseArgs.m_hasInitialStateQ[posVarCountIndex + j] == 0) { - btAlignedObjectArray motorNames; - motorNames.push_back("motor_front_leftR_joint"); - motorNames.push_back("motor_front_leftL_joint"); - motorNames.push_back("motor_back_leftR_joint"); - motorNames.push_back("motor_back_leftL_joint"); - motorNames.push_back("motor_front_rightL_joint"); - motorNames.push_back("motor_front_rightR_joint"); - motorNames.push_back("motor_back_rightL_joint"); - motorNames.push_back("motor_back_rightR_joint"); + hasPosVar = false; + break; + } + } + if (hasPosVar) + { + if (posVarCount == 1) + { + c->jointPosition[dofStarts[physxCacheLinkIndex]] = clientCmd.m_initPoseArgs.m_initialStateQ[posVarCountIndex]; + } + if (posVarCount == 3) + { + btQuaternion q( + clientCmd.m_initPoseArgs.m_initialStateQ[posVarCountIndex], + clientCmd.m_initPoseArgs.m_initialStateQ[posVarCountIndex + 1], + clientCmd.m_initPoseArgs.m_initialStateQ[posVarCountIndex + 2], + clientCmd.m_initPoseArgs.m_initialStateQ[posVarCountIndex + 3]); + q.normalize(); + //mb->setJointPosMultiDof(i, &q[0]); + //double vel[6] = { 0, 0, 0, 0, 0, 0 }; + //mb->setJointVelMultiDof(i, vel); + } + } - btAlignedObjectArray motorIdList; - for (int m = 0; m < motorNames.size(); m++) + bool hasVel = true; + for (int j = 0; j < posVarCount; j++) + { + if (clientCmd.m_initPoseArgs.m_hasInitialStateQdot[uDofIndex + j] == 0) + { + hasVel = false; + break; + } + } + + if (hasVel) + { + if (posVarCount == 1) + { + btScalar vel = clientCmd.m_initPoseArgs.m_initialStateQdot[uDofIndex]; + c->jointVelocity[dofStarts[physxCacheLinkIndex]] = vel; + } + if (posVarCount == 3) + { + //mb->setJointVelMultiDof(i, &clientCmd.m_initPoseArgs.m_initialStateQdot[uDofIndex]); + } + } + + posVarCountIndex += dofs; + uDofIndex += dofs;// mb->getLink(i).m_dofCount; + } + } + + body->mArticulation->applyCache(*c, physx::PxArticulationCache::ePOSITION| physx::PxArticulationCache::eVELOCITY); + body->mArticulation->releaseCache(*c); + + } + + SharedMemoryStatus& serverCmd = serverStatusOut; + serverCmd.m_type = CMD_CLIENT_COMMAND_COMPLETED; + return hasStatus; +} + +bool PhysXServerCommandProcessor::processSendDesiredStateCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes) +{ + bool hasStatus = true; + BT_PROFILE("CMD_SEND_DESIRED_STATE"); + + int bodyUniqueId = clientCmd.m_sendDesiredStateCommandArgument.m_bodyUniqueId; + InternalPhysXBodyData* body = m_data->m_bodyHandles.getHandle(bodyUniqueId); + if (body && body->mArticulation) + { + physx::PxArticulationLink* physxLinks[64]; + physx::PxU32 bufferSize = 64; + physx::PxU32 startIndex = 0; + int numLinks2 = body->mArticulation->getLinks(physxLinks, bufferSize, startIndex); + //http://gameworksdocs.nvidia.com/PhysX/4.0/documentation/PhysXGuide/Manual/Articulations.html + + + switch (clientCmd.m_sendDesiredStateCommandArgument.m_controlMode) + { + case CONTROL_MODE_VELOCITY: + { + if (m_data->m_verboseOutput) + { + b3Printf("Using CONTROL_MODE_VELOCITY"); + } + + int numMotors = 0; + //find the joint motors and apply the desired velocity and maximum force/torque + { + int dofIndex = 6; //skip the 3 linear + 3 angular degree of freedom entries of the base + for (int link = 1; link < numLinks2; link++) + { + int dofs = physxLinks[link]->getInboundJointDof(); + physx::PxReal stiffness = 10.f; + physx::PxReal damping = 0.1f; + physx::PxReal forceLimit = PX_MAX_F32; + + if (dofs == 1) + { + physx::PxArticulationJointReducedCoordinate* joint = static_cast(physxLinks[link]->getInboundJoint()); + btScalar desiredVelocity = 0.f; + bool hasDesiredVelocity = false; + physx::PxReal stiffness = 10.f; + + if ((clientCmd.m_sendDesiredStateCommandArgument.m_hasDesiredStateFlags[dofIndex] & SIM_DESIRED_STATE_HAS_QDOT) != 0) { - for (int i = 0; i < body->m_multiBody->getNumLinks(); i++) + desiredVelocity = clientCmd.m_sendDesiredStateCommandArgument.m_desiredStateQdot[dofIndex]; + btScalar kd = 0.1f; + if ((clientCmd.m_sendDesiredStateCommandArgument.m_hasDesiredStateFlags[dofIndex] & SIM_DESIRED_STATE_HAS_KD) != 0) { - std::string jointName; - if (body->m_multiBody->getLink(i).m_jointName) - { - jointName = body->m_multiBody->getLink(i).m_jointName; - } - if (motorNames[m] == jointName) - { - motorIdList.push_back(i); - } + kd = clientCmd.m_sendDesiredStateCommandArgument.m_Kd[dofIndex]; } + joint->setDriveVelocity(physx::PxArticulationAxis::eTWIST, desiredVelocity); + physx::PxReal damping = kd; + stiffness = 0; + joint->setDriveTarget(physx::PxArticulationAxis::eTWIST, 0.f); + physx::PxReal forceLimit = 1000000.f; + if ((clientCmd.m_sendDesiredStateCommandArgument.m_hasDesiredStateFlags[dofIndex] & SIM_DESIRED_STATE_HAS_MAX_FORCE) != 0) + { + forceLimit = clientCmd.m_sendDesiredStateCommandArgument.m_desiredStateForceTorque[dofIndex]; + } + joint->setDrive(physx::PxArticulationAxis::eTWIST, stiffness, damping, forceLimit); + } + } + dofIndex += dofs; + } + } + break; + } + + case CONTROL_MODE_POSITION_VELOCITY_PD: + { + if (m_data->m_verboseOutput) + { + b3Printf("Using CONTROL_MODE_VELOCITY"); + } + + int numMotors = 0; + //find the joint motors and apply the desired velocity and maximum force/torque + { + int dofIndex = 6; //skip the 3 linear + 3 angular degree of freedom entries of the base + for (int link = 1; link < numLinks2; link++) + { + int dofs = physxLinks[link]->getInboundJointDof(); + physx::PxReal stiffness = 10.f; + physx::PxReal damping = 0.1f; + physx::PxReal forceLimit = PX_MAX_F32; + int posIndex = 7; //skip 3 positional and 4 orientation (quaternion) positional degrees of freedom of the base + + if (dofs == 1) + { + physx::PxArticulationJointReducedCoordinate* joint = static_cast(physxLinks[link]->getInboundJoint()); + btScalar desiredVelocity = 0.f; + bool hasDesiredVelocity = false; + physx::PxReal stiffness = 10.f; + btScalar kd = 0.1f; + btScalar kp = 0.f; + + if ((clientCmd.m_sendDesiredStateCommandArgument.m_hasDesiredStateFlags[dofIndex] & SIM_DESIRED_STATE_HAS_QDOT) != 0) + { + desiredVelocity = clientCmd.m_sendDesiredStateCommandArgument.m_desiredStateQdot[dofIndex]; } - if (motorIdList.size() == 8) + btScalar desiredPosition = 0.f; + if ((clientCmd.m_sendDesiredStateCommandArgument.m_hasDesiredStateFlags[posIndex] & SIM_DESIRED_STATE_HAS_Q) != 0) { - int loggerUid = m_data->m_stateLoggersUniqueId++; - MinitaurStateLogger* logger = new MinitaurStateLogger(loggerUid, fileName, body->m_multiBody, motorIdList); - m_data->m_stateLoggers.push_back(logger); - serverStatusOut.m_type = CMD_STATE_LOGGING_START_COMPLETED; - serverStatusOut.m_stateLoggingResultArgs.m_loggingUniqueId = loggerUid; + desiredPosition = clientCmd.m_sendDesiredStateCommandArgument.m_desiredStateQ[posIndex]; } + + if ((clientCmd.m_sendDesiredStateCommandArgument.m_hasDesiredStateFlags[dofIndex] & SIM_DESIRED_STATE_HAS_KD) != 0) + { + kd = clientCmd.m_sendDesiredStateCommandArgument.m_Kd[dofIndex]; + } + physx::PxReal damping = kd; + + if ((clientCmd.m_sendDesiredStateCommandArgument.m_hasDesiredStateFlags[dofIndex] & SIM_DESIRED_STATE_HAS_KP) != 0) + { + kp = clientCmd.m_sendDesiredStateCommandArgument.m_Kp[dofIndex]; + stiffness = kp; + } + + joint->setDriveVelocity(physx::PxArticulationAxis::eTWIST, desiredVelocity); + + + + physx::PxReal forceLimit = 1000000.f; + if ((clientCmd.m_sendDesiredStateCommandArgument.m_hasDesiredStateFlags[dofIndex] & SIM_DESIRED_STATE_HAS_MAX_FORCE) != 0) + { + forceLimit = clientCmd.m_sendDesiredStateCommandArgument.m_desiredStateForceTorque[dofIndex]; + } + bool isAcceleration = false; + joint->setDrive(physx::PxArticulationAxis::eTWIST, stiffness, damping, forceLimit, isAcceleration); + + joint->setDriveTarget(physx::PxArticulationAxis::eTWIST, desiredPosition); + } + + dofIndex += dofs; + posIndex += dofs; + } + } + break; + } + default: + { + } + } + + } + serverStatusOut.m_type = CMD_DESIRED_STATE_RECEIVED_COMPLETED; + return hasStatus; +} + + + +bool PhysXServerCommandProcessor::processChangeDynamicsInfoCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes) +{ + bool hasStatus = true; + BT_PROFILE("CMD_CHANGE_DYNAMICS_INFO"); + + int bodyUniqueId = clientCmd.m_changeDynamicsInfoArgs.m_bodyUniqueId; + int linkIndex = clientCmd.m_changeDynamicsInfoArgs.m_linkIndex; + double mass = clientCmd.m_changeDynamicsInfoArgs.m_mass; + double lateralFriction = clientCmd.m_changeDynamicsInfoArgs.m_lateralFriction; + double spinningFriction = clientCmd.m_changeDynamicsInfoArgs.m_spinningFriction; + double rollingFriction = clientCmd.m_changeDynamicsInfoArgs.m_rollingFriction; + double restitution = clientCmd.m_changeDynamicsInfoArgs.m_restitution; + btVector3 newLocalInertiaDiagonal(clientCmd.m_changeDynamicsInfoArgs.m_localInertiaDiagonal[0], + clientCmd.m_changeDynamicsInfoArgs.m_localInertiaDiagonal[1], + clientCmd.m_changeDynamicsInfoArgs.m_localInertiaDiagonal[2]); + + btAssert(bodyUniqueId >= 0); + + InternalPhysXBodyData* body = m_data->m_bodyHandles.getHandle(bodyUniqueId); + + if (body->mArticulation) + { + + physx::PxArticulationLink* physxLinks[64]; + physx::PxU32 bufferSize = 64; + physx::PxU32 startIndex = 0; + int physxLinkIndex = linkIndex + 1; + + int numLinks2 = body->mArticulation->getLinks(physxLinks, bufferSize, startIndex); + + if (physxLinkIndex >= 0 && physxLinkIndex < numLinks2) + { + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_MASS) + { + physx::PxArticulationLink* childLink = physxLinks[physxLinkIndex]; + physx::PxRigidBodyExt::updateMassAndInertia(*childLink, mass); + } + } + } + if (body->m_rigidDynamic) + { + //body->m_rigidDynamic->setMass(mass); + //also update inertia + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_MASS) + { + physx::PxRigidBodyExt::updateMassAndInertia(*body->m_rigidDynamic, mass); + } + } + if (body->m_rigidStatic) + { + + } +#if 0 + if (body && body->m_multiBody) + { + btMultiBody* mb = body->m_multiBody; + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_ACTIVATION_STATE) + { + if (clientCmd.m_changeDynamicsInfoArgs.m_activationState & eActivationStateWakeUp) + { + mb->wakeUp(); + } + if (clientCmd.m_changeDynamicsInfoArgs.m_activationState & eActivationStateSleep) + { + mb->goToSleep(); + } + if (clientCmd.m_changeDynamicsInfoArgs.m_activationState & eActivationStateEnableSleeping) + { + mb->setCanSleep(true); + } + if (clientCmd.m_changeDynamicsInfoArgs.m_activationState & eActivationStateDisableSleeping) + { + mb->setCanSleep(false); + } + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_LINEAR_DAMPING) + { + mb->setLinearDamping(clientCmd.m_changeDynamicsInfoArgs.m_linearDamping); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_ANGULAR_DAMPING) + { + mb->setAngularDamping(clientCmd.m_changeDynamicsInfoArgs.m_angularDamping); + } + + if (linkIndex == -1) + { + if (mb->getBaseCollider()) + { + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_RESTITUTION) + { + mb->getBaseCollider()->setRestitution(restitution); + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_CONTACT_STIFFNESS_AND_DAMPING) + { + mb->getBaseCollider()->setContactStiffnessAndDamping(clientCmd.m_changeDynamicsInfoArgs.m_contactStiffness, clientCmd.m_changeDynamicsInfoArgs.m_contactDamping); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_LATERAL_FRICTION) + { + mb->getBaseCollider()->setFriction(lateralFriction); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_SPINNING_FRICTION) + { + mb->getBaseCollider()->setSpinningFriction(spinningFriction); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_ROLLING_FRICTION) + { + mb->getBaseCollider()->setRollingFriction(rollingFriction); + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_FRICTION_ANCHOR) + { + if (clientCmd.m_changeDynamicsInfoArgs.m_frictionAnchor) + { + mb->getBaseCollider()->setCollisionFlags(mb->getBaseCollider()->getCollisionFlags() | btCollisionObject::CF_HAS_FRICTION_ANCHOR); + } + else + { + mb->getBaseCollider()->setCollisionFlags(mb->getBaseCollider()->getCollisionFlags() & ~btCollisionObject::CF_HAS_FRICTION_ANCHOR); } } } - } - - if (clientCmd.m_stateLoggingArguments.m_logType == STATE_LOGGING_GENERIC_ROBOT) - { - std::string fileName = clientCmd.m_stateLoggingArguments.m_fileName; - - int loggerUid = m_data->m_stateLoggersUniqueId++; - int maxLogDof = 12; - if ((clientCmd.m_updateFlags & STATE_LOGGING_MAX_LOG_DOF)) + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_MASS) { - maxLogDof = clientCmd.m_stateLoggingArguments.m_maxLogDof; - } - - int logFlags = 0; - if (clientCmd.m_updateFlags & STATE_LOGGING_LOG_FLAGS) - { - logFlags = clientCmd.m_stateLoggingArguments.m_logFlags; - } - GenericRobotStateLogger* logger = new GenericRobotStateLogger(loggerUid, fileName, m_data->m_dynamicsWorld, maxLogDof, logFlags); - - if ((clientCmd.m_updateFlags & STATE_LOGGING_FILTER_OBJECT_UNIQUE_ID) && (clientCmd.m_stateLoggingArguments.m_numBodyUniqueIds > 0)) - { - logger->m_filterObjectUniqueId = true; - for (int i = 0; i < clientCmd.m_stateLoggingArguments.m_numBodyUniqueIds; ++i) + mb->setBaseMass(mass); + if (mb->getBaseCollider() && mb->getBaseCollider()->getCollisionShape()) { - int objectUniqueId = clientCmd.m_stateLoggingArguments.m_bodyUniqueIds[i]; - logger->m_bodyIdList.push_back(objectUniqueId); + btVector3 localInertia; + mb->getBaseCollider()->getCollisionShape()->calculateLocalInertia(mass, localInertia); + mb->setBaseInertia(localInertia); } - } - m_data->m_stateLoggers.push_back(logger); - serverStatusOut.m_type = CMD_STATE_LOGGING_START_COMPLETED; - serverStatusOut.m_stateLoggingResultArgs.m_loggingUniqueId = loggerUid; - } - if (clientCmd.m_stateLoggingArguments.m_logType == STATE_LOGGING_CONTACT_POINTS) - { - std::string fileName = clientCmd.m_stateLoggingArguments.m_fileName; - int loggerUid = m_data->m_stateLoggersUniqueId++; - ContactPointsStateLogger* logger = new ContactPointsStateLogger(loggerUid, fileName, m_data->m_dynamicsWorld); - if ((clientCmd.m_updateFlags & STATE_LOGGING_FILTER_LINK_INDEX_A) && clientCmd.m_stateLoggingArguments.m_linkIndexA >= -1) - { - logger->m_filterLinkA = true; - logger->m_linkIndexA = clientCmd.m_stateLoggingArguments.m_linkIndexA; - } - if ((clientCmd.m_updateFlags & STATE_LOGGING_FILTER_LINK_INDEX_B) && clientCmd.m_stateLoggingArguments.m_linkIndexB >= -1) - { - logger->m_filterLinkB = true; - logger->m_linkIndexB = clientCmd.m_stateLoggingArguments.m_linkIndexB; - } - if ((clientCmd.m_updateFlags & STATE_LOGGING_FILTER_BODY_UNIQUE_ID_A) && clientCmd.m_stateLoggingArguments.m_bodyUniqueIdA > -1) - { - logger->m_bodyUniqueIdA = clientCmd.m_stateLoggingArguments.m_bodyUniqueIdA; - } - if ((clientCmd.m_updateFlags & STATE_LOGGING_FILTER_BODY_UNIQUE_ID_B) && clientCmd.m_stateLoggingArguments.m_bodyUniqueIdB > -1) - { - logger->m_bodyUniqueIdB = clientCmd.m_stateLoggingArguments.m_bodyUniqueIdB; - } - m_data->m_stateLoggers.push_back(logger); - serverStatusOut.m_type = CMD_STATE_LOGGING_START_COMPLETED; - serverStatusOut.m_stateLoggingResultArgs.m_loggingUniqueId = loggerUid; - } - if (clientCmd.m_stateLoggingArguments.m_logType == STATE_LOGGING_VR_CONTROLLERS) - { - std::string fileName = clientCmd.m_stateLoggingArguments.m_fileName; - int loggerUid = m_data->m_stateLoggersUniqueId++; - int deviceFilterType = VR_DEVICE_CONTROLLER; - if (clientCmd.m_updateFlags & STATE_LOGGING_FILTER_DEVICE_TYPE) - { - deviceFilterType = clientCmd.m_stateLoggingArguments.m_deviceFilterType; - } - VRControllerStateLogger* logger = new VRControllerStateLogger(loggerUid, deviceFilterType, fileName); - m_data->m_stateLoggers.push_back(logger); - serverStatusOut.m_type = CMD_STATE_LOGGING_START_COMPLETED; - serverStatusOut.m_stateLoggingResultArgs.m_loggingUniqueId = loggerUid; - } - } - if ((clientCmd.m_updateFlags & STATE_LOGGING_STOP_LOG) && clientCmd.m_stateLoggingArguments.m_loggingUniqueId >= 0) - { - if (clientCmd.m_stateLoggingArguments.m_loggingUniqueId == m_data->m_logPlaybackUid) - { - if (m_data->m_logPlayback) - { - delete m_data->m_logPlayback; - m_data->m_logPlayback = 0; - m_data->m_logPlaybackUid = -1; - } - } + //handle switch from static/fixedBase to dynamic and vise-versa + if (mass > 0) + { + bool isDynamic = true; + if (mb->hasFixedBase()) + { + int collisionFilterGroup = isDynamic ? int(btBroadphaseProxy::DefaultFilter) : int(btBroadphaseProxy::StaticFilter); + int collisionFilterMask = isDynamic ? int(btBroadphaseProxy::AllFilter) : int(btBroadphaseProxy::AllFilter ^ btBroadphaseProxy::StaticFilter); - if (clientCmd.m_stateLoggingArguments.m_loggingUniqueId == m_data->m_commandLoggingUid) - { - if (m_data->m_commandLogger) - { - enableCommandLogging(false, 0); - serverStatusOut.m_type = CMD_STATE_LOGGING_COMPLETED; - m_data->m_commandLoggingUid = -1; - } - } + m_data->m_dynamicsWorld->removeCollisionObject(mb->getBaseCollider()); + int oldFlags = mb->getBaseCollider()->getCollisionFlags(); + mb->getBaseCollider()->setCollisionFlags(oldFlags & ~btCollisionObject::CF_STATIC_OBJECT); + mb->setFixedBase(false); + m_data->m_dynamicsWorld->addCollisionObject(mb->getBaseCollider(), collisionFilterGroup, collisionFilterMask); - if (clientCmd.m_stateLoggingArguments.m_loggingUniqueId == m_data->m_profileTimingLoggingUid) - { - serverStatusOut.m_type = CMD_STATE_LOGGING_COMPLETED; - b3ChromeUtilsStopTimingsAndWriteJsonFile(m_data->m_profileTimingFileName.c_str()); - m_data->m_profileTimingLoggingUid = -1; + } + } + else + { + if (!mb->hasFixedBase()) + { + bool isDynamic = false; + int collisionFilterGroup = isDynamic ? int(btBroadphaseProxy::DefaultFilter) : int(btBroadphaseProxy::StaticFilter); + int collisionFilterMask = isDynamic ? int(btBroadphaseProxy::AllFilter) : int(btBroadphaseProxy::AllFilter ^ btBroadphaseProxy::StaticFilter); + int oldFlags = mb->getBaseCollider()->getCollisionFlags(); + mb->getBaseCollider()->setCollisionFlags(oldFlags | btCollisionObject::CF_STATIC_OBJECT); + m_data->m_dynamicsWorld->removeCollisionObject(mb->getBaseCollider()); + mb->setFixedBase(true); + m_data->m_dynamicsWorld->addCollisionObject(mb->getBaseCollider(), collisionFilterGroup, collisionFilterMask); + } + } + + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_LOCAL_INERTIA_DIAGONAL) + { + mb->setBaseInertia(newLocalInertiaDiagonal); + } } else { - serverStatusOut.m_type = CMD_STATE_LOGGING_COMPLETED; - for (int i = 0; i < m_data->m_stateLoggers.size(); i++) + if (linkIndex >= 0 && linkIndex < mb->getNumLinks()) { - if (m_data->m_stateLoggers[i]->m_loggingUniqueId == clientCmd.m_stateLoggingArguments.m_loggingUniqueId) + if (mb->getLinkCollider(linkIndex)) { - m_data->m_stateLoggers[i]->stop(); - delete m_data->m_stateLoggers[i]; - m_data->m_stateLoggers.removeAtIndex(i); + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_RESTITUTION) + { + mb->getLinkCollider(linkIndex)->setRestitution(restitution); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_SPINNING_FRICTION) + { + mb->getLinkCollider(linkIndex)->setSpinningFriction(spinningFriction); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_ROLLING_FRICTION) + { + mb->getLinkCollider(linkIndex)->setRollingFriction(rollingFriction); + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_FRICTION_ANCHOR) + { + if (clientCmd.m_changeDynamicsInfoArgs.m_frictionAnchor) + { + mb->getLinkCollider(linkIndex)->setCollisionFlags(mb->getLinkCollider(linkIndex)->getCollisionFlags() | btCollisionObject::CF_HAS_FRICTION_ANCHOR); + } + else + { + mb->getLinkCollider(linkIndex)->setCollisionFlags(mb->getLinkCollider(linkIndex)->getCollisionFlags() & ~btCollisionObject::CF_HAS_FRICTION_ANCHOR); + } + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_LATERAL_FRICTION) + { + mb->getLinkCollider(linkIndex)->setFriction(lateralFriction); + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_CONTACT_STIFFNESS_AND_DAMPING) + { + mb->getLinkCollider(linkIndex)->setContactStiffnessAndDamping(clientCmd.m_changeDynamicsInfoArgs.m_contactStiffness, clientCmd.m_changeDynamicsInfoArgs.m_contactDamping); + } + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_MASS) + { + mb->getLink(linkIndex).m_mass = mass; + if (mb->getLinkCollider(linkIndex) && mb->getLinkCollider(linkIndex)->getCollisionShape()) + { + btVector3 localInertia; + mb->getLinkCollider(linkIndex)->getCollisionShape()->calculateLocalInertia(mass, localInertia); + mb->getLink(linkIndex).m_inertiaLocal = localInertia; + } + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_LOCAL_INERTIA_DIAGONAL) + { + mb->getLink(linkIndex).m_inertiaLocal = newLocalInertiaDiagonal; } } } } + else + { + if (body && body->m_rigidBody) + { + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_ACTIVATION_STATE) + { + if (clientCmd.m_changeDynamicsInfoArgs.m_activationState & eActivationStateEnableSleeping) + { + body->m_rigidBody->forceActivationState(ACTIVE_TAG); + } + if (clientCmd.m_changeDynamicsInfoArgs.m_activationState & eActivationStateDisableSleeping) + { + body->m_rigidBody->forceActivationState(DISABLE_DEACTIVATION); + } + if (clientCmd.m_changeDynamicsInfoArgs.m_activationState & eActivationStateWakeUp) + { + body->m_rigidBody->forceActivationState(ACTIVE_TAG); + } + if (clientCmd.m_changeDynamicsInfoArgs.m_activationState & eActivationStateSleep) + { + body->m_rigidBody->forceActivationState(ISLAND_SLEEPING); + } + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_LINEAR_DAMPING) + { + btScalar angDamping = body->m_rigidBody->getAngularDamping(); + body->m_rigidBody->setDamping(clientCmd.m_changeDynamicsInfoArgs.m_linearDamping, angDamping); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_ANGULAR_DAMPING) + { + btScalar linDamping = body->m_rigidBody->getLinearDamping(); + body->m_rigidBody->setDamping(linDamping, clientCmd.m_changeDynamicsInfoArgs.m_angularDamping); + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_CONTACT_STIFFNESS_AND_DAMPING) + { + body->m_rigidBody->setContactStiffnessAndDamping(clientCmd.m_changeDynamicsInfoArgs.m_contactStiffness, clientCmd.m_changeDynamicsInfoArgs.m_contactDamping); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_RESTITUTION) + { + body->m_rigidBody->setRestitution(restitution); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_LATERAL_FRICTION) + { + body->m_rigidBody->setFriction(lateralFriction); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_SPINNING_FRICTION) + { + body->m_rigidBody->setSpinningFriction(spinningFriction); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_ROLLING_FRICTION) + { + body->m_rigidBody->setRollingFriction(rollingFriction); + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_FRICTION_ANCHOR) + { + if (clientCmd.m_changeDynamicsInfoArgs.m_frictionAnchor) + { + body->m_rigidBody->setCollisionFlags(body->m_rigidBody->getCollisionFlags() | btCollisionObject::CF_HAS_FRICTION_ANCHOR); + } + else + { + body->m_rigidBody->setCollisionFlags(body->m_rigidBody->getCollisionFlags() & ~btCollisionObject::CF_HAS_FRICTION_ANCHOR); + } + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_MASS) + { + btVector3 localInertia; + if (body->m_rigidBody->getCollisionShape()) + { + body->m_rigidBody->getCollisionShape()->calculateLocalInertia(mass, localInertia); + } + body->m_rigidBody->setMassProps(mass, localInertia); + } + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_LOCAL_INERTIA_DIAGONAL) + { + btScalar orgMass = body->m_rigidBody->getInvMass(); + if (orgMass > 0) + { + body->m_rigidBody->setMassProps(mass, newLocalInertiaDiagonal); + } + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_CONTACT_PROCESSING_THRESHOLD) + { + body->m_rigidBody->setContactProcessingThreshold(clientCmd.m_changeDynamicsInfoArgs.m_contactProcessingThreshold); + } + + if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_CCD_SWEPT_SPHERE_RADIUS) + { + body->m_rigidBody->setCcdSweptSphereRadius(clientCmd.m_changeDynamicsInfoArgs.m_ccdSweptSphereRadius); + //for a given sphere radius, use a motion threshold of half the radius, before the ccd algorithm is enabled + body->m_rigidBody->setCcdMotionThreshold(clientCmd.m_changeDynamicsInfoArgs.m_ccdSweptSphereRadius / 2.); + } + } + } + + b3Notification notification; + notification.m_notificationType = LINK_DYNAMICS_CHANGED; + notification.m_linkArgs.m_bodyUniqueId = bodyUniqueId; + notification.m_linkArgs.m_linkIndex = linkIndex; + m_data->m_pluginManager.addNotification(notification); + + #endif + SharedMemoryStatus& serverCmd = serverStatusOut; + serverCmd.m_type = CMD_CLIENT_COMMAND_COMPLETED; + + + return hasStatus; +} + +bool PhysXServerCommandProcessor::processRequestPhysicsSimulationParametersCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes) +{ + bool hasStatus = true; + SharedMemoryStatus& serverCmd = serverStatusOut; + serverCmd.m_type = CMD_REQUEST_PHYSICS_SIMULATION_PARAMETERS_COMPLETED; + + serverCmd.m_simulationParameterResultArgs.m_allowedCcdPenetration = 0;// m_data->m_dynamicsWorld->getDispatchInfo().m_allowedCcdPenetration; + serverCmd.m_simulationParameterResultArgs.m_collisionFilterMode = 0;// m_data->m_broadphaseCollisionFilterCallback->m_filterMode; + serverCmd.m_simulationParameterResultArgs.m_deltaTime = m_data->m_physicsDeltaTime; + serverCmd.m_simulationParameterResultArgs.m_contactBreakingThreshold = 0;// gContactBreakingThreshold; + serverCmd.m_simulationParameterResultArgs.m_contactSlop = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_linearSlop; + serverCmd.m_simulationParameterResultArgs.m_enableSAT = 0;// m_data->m_dynamicsWorld->getDispatchInfo().m_enableSatConvex; + + serverCmd.m_simulationParameterResultArgs.m_defaultGlobalCFM = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_globalCfm; + serverCmd.m_simulationParameterResultArgs.m_defaultContactERP = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_erp2; + serverCmd.m_simulationParameterResultArgs.m_defaultNonContactERP = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_erp; + + serverCmd.m_simulationParameterResultArgs.m_deterministicOverlappingPairs = 0;// m_data->m_dynamicsWorld->getDispatchInfo().m_deterministicOverlappingPairs; + serverCmd.m_simulationParameterResultArgs.m_enableConeFriction = 0;// (m_data->m_dynamicsWorld->getSolverInfo().m_solverMode & SOLVER_DISABLE_IMPLICIT_CONE_FRICTION) ? 0 : 1; + serverCmd.m_simulationParameterResultArgs.m_enableFileCaching = 0;// b3IsFileCachingEnabled(); + serverCmd.m_simulationParameterResultArgs.m_frictionCFM = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_frictionCFM; + serverCmd.m_simulationParameterResultArgs.m_frictionERP = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_frictionERP; + + physx::PxVec3 grav = m_data->m_scene->getGravity(); + + serverCmd.m_simulationParameterResultArgs.m_gravityAcceleration[0] = grav.x; + serverCmd.m_simulationParameterResultArgs.m_gravityAcceleration[1] = grav.y; + serverCmd.m_simulationParameterResultArgs.m_gravityAcceleration[2] = grav.z; + serverCmd.m_simulationParameterResultArgs.m_internalSimFlags = 0; + serverCmd.m_simulationParameterResultArgs.m_jointFeedbackMode = 0; + + serverCmd.m_simulationParameterResultArgs.m_numSimulationSubSteps = 0;// m_data->m_numSimulationSubSteps; + serverCmd.m_simulationParameterResultArgs.m_numSolverIterations = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_numIterations; + serverCmd.m_simulationParameterResultArgs.m_restitutionVelocityThreshold = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_restitutionVelocityThreshold; + + serverCmd.m_simulationParameterResultArgs.m_solverResidualThreshold = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_leastSquaresResidualThreshold; + serverCmd.m_simulationParameterResultArgs.m_splitImpulsePenetrationThreshold = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_splitImpulsePenetrationThreshold; + serverCmd.m_simulationParameterResultArgs.m_useRealTimeSimulation = 0;// m_data->m_useRealTimeSimulation; + serverCmd.m_simulationParameterResultArgs.m_useSplitImpulse = 0;// m_data->m_dynamicsWorld->getSolverInfo().m_splitImpulse; + return hasStatus; } @@ -598,6 +1136,30 @@ bool PhysXServerCommandProcessor::processCommand(const struct SharedMemoryComman break; } + case CMD_INIT_POSE: + { + hasStatus = processInitPoseCommand(clientCmd, serverStatusOut, bufferServerToClient, bufferSizeInBytes); + break; + } + + case CMD_SEND_DESIRED_STATE: + { + hasStatus = processSendDesiredStateCommand(clientCmd, serverStatusOut, bufferServerToClient, bufferSizeInBytes); + break; + } + + case CMD_CHANGE_DYNAMICS_INFO: + { + hasStatus = processChangeDynamicsInfoCommand(clientCmd, serverStatusOut, bufferServerToClient, bufferSizeInBytes); + break; + }; + + case CMD_REQUEST_PHYSICS_SIMULATION_PARAMETERS: + { + hasStatus = processRequestPhysicsSimulationParametersCommand(clientCmd, serverStatusOut, bufferServerToClient, bufferSizeInBytes); + break; + } + #if 0 case CMD_SET_VR_CAMERA_STATE: @@ -697,11 +1259,7 @@ bool PhysXServerCommandProcessor::processCommand(const struct SharedMemoryComman break; } - case CMD_SEND_DESIRED_STATE: - { - hasStatus = processSendDesiredStateCommand(clientCmd,serverStatusOut,bufferServerToClient, bufferSizeInBytes); - break; - } + case CMD_REQUEST_COLLISION_INFO: { hasStatus = processRequestCollisionInfoCommand(clientCmd,serverStatusOut,bufferServerToClient, bufferSizeInBytes); @@ -710,29 +1268,15 @@ bool PhysXServerCommandProcessor::processCommand(const struct SharedMemoryComman - case CMD_CHANGE_DYNAMICS_INFO: - { - hasStatus = processChangeDynamicsInfoCommand(clientCmd,serverStatusOut,bufferServerToClient, bufferSizeInBytes); - break; - }; + case CMD_GET_DYNAMICS_INFO: { hasStatus = processGetDynamicsInfoCommand(clientCmd,serverStatusOut,bufferServerToClient, bufferSizeInBytes); break; } - case CMD_REQUEST_PHYSICS_SIMULATION_PARAMETERS: - { - hasStatus = processRequestPhysicsSimulationParametersCommand(clientCmd,serverStatusOut,bufferServerToClient, bufferSizeInBytes); - break; - } - case CMD_INIT_POSE: - { - hasStatus = processInitPoseCommand(clientCmd,serverStatusOut,bufferServerToClient, bufferSizeInBytes); - break; - } case CMD_CREATE_RIGID_BODY: { @@ -1040,15 +1584,41 @@ bool PhysXServerCommandProcessor::processLoadURDFCommand(const struct SharedMemo //u2p.getMassAndInertia2((urdfLinkIndex, mass, localInertiaDiagonal, bodyHandle->m_rootLocalInertialFrame, flags); } - physx::PxArticulationReducedCoordinate* articulation = URDF2PhysX(m_data->m_foundation,m_data->m_physics, m_data->m_cooking, m_data->m_scene, u2p, urdfArgs.m_urdfFlags, u2p.getPathPrefix(), rootTrans, &fileIO); + physx::PxBase* baseObj = URDF2PhysX(m_data->m_foundation,m_data->m_physics, m_data->m_cooking, m_data->m_scene, u2p, urdfArgs.m_urdfFlags, u2p.getPathPrefix(), rootTrans, &fileIO, useMultiBody); + + physx::PxRigidDynamic* c = baseObj->is(); + physx::PxRigidStatic* rigidStatic = baseObj->is(); + physx::PxRigidDynamic* rigidDynamic = baseObj->is(); + physx::PxArticulationReducedCoordinate* articulation = 0; + if (rigidDynamic) + { + bodyHandle->m_rigidDynamic = rigidDynamic; + } + if (rigidStatic) + { + bodyHandle->m_rigidStatic = rigidStatic; + serverStatusOut.m_numDataStreamBytes = 0; + serverStatusOut.m_type = CMD_URDF_LOADING_COMPLETED; + serverStatusOut.m_dataStreamArguments.m_bodyUniqueId = bodyUniqueId; + } + if ((rigidDynamic == 0) && (rigidStatic == 0)) + { + articulation = (physx::PxArticulationReducedCoordinate*)baseObj; + serverStatusOut.m_type = CMD_URDF_LOADING_COMPLETED; + serverStatusOut.m_dataStreamArguments.m_bodyUniqueId = bodyUniqueId; + } + if (rigidStatic || rigidDynamic || articulation) + { + serverStatusOut.m_type = CMD_URDF_LOADING_COMPLETED; + serverStatusOut.m_dataStreamArguments.m_bodyUniqueId = bodyUniqueId; + sprintf(serverStatusOut.m_dataStreamArguments.m_bodyName, "%s", bodyHandle->m_bodyName.c_str()); + } if (articulation) { bodyHandle->mArticulation = articulation; - serverStatusOut.m_type = CMD_URDF_LOADING_COMPLETED; - serverStatusOut.m_dataStreamArguments.m_bodyUniqueId = bodyUniqueId; - sprintf(serverStatusOut.m_dataStreamArguments.m_bodyName, "%s", bodyHandle->m_bodyName.c_str()); + btDefaultSerializer ser(bufferSizeInBytes, (unsigned char*)bufferServerToClient); @@ -1215,23 +1785,47 @@ bool PhysXServerCommandProcessor::processForwardDynamicsCommand(const struct Sha for (int i = 0; i < usedHandles.size(); i++) { InternalPhysXBodyHandle* bodyHandle = m_data->m_bodyHandles.getHandle(usedHandles[i]); - physx::PxArticulationLink* physxLinks[64]; - physx::PxU32 bufferSize = 64; - physx::PxU32 startIndex = 0; - int numLinks2 = bodyHandle->mArticulation->getLinks(physxLinks, bufferSize, startIndex); + - for (int l = 0; l < numLinks2; l++) + physx::PxRigidActor* actor = 0; + if (bodyHandle->m_rigidDynamic) + actor = bodyHandle->m_rigidDynamic; + if (bodyHandle->m_rigidStatic) + actor = bodyHandle->m_rigidStatic; + + if (actor) { - MyPhysXUserData* ud = (MyPhysXUserData*)physxLinks[l]->userData; - if (ud) + btTransform tr; + tr.setIdentity(); + physx::PxTransform pt = actor->getGlobalPose(); + tr.setOrigin(btVector3(pt.p[0], pt.p[1], pt.p[2])); + tr.setRotation(btQuaternion(pt.q.x, pt.q.y, pt.q.z, pt.q.w)); + btVector3 localScaling(1, 1, 1);//?? + MyPhysXUserData* ud = (MyPhysXUserData*)actor->userData; + m_data->m_pluginManager.getRenderInterface()->syncTransform(ud->m_graphicsUniqueId, tr, localScaling); + } + + if (bodyHandle->mArticulation) + { + + physx::PxArticulationLink* physxLinks[64]; + physx::PxU32 bufferSize = 64; + physx::PxU32 startIndex = 0; + int numLinks2 = bodyHandle->mArticulation->getLinks(physxLinks, bufferSize, startIndex); + + for (int l = 0; l < numLinks2; l++) { - btTransform tr; - tr.setIdentity(); - physx::PxTransform pt = physxLinks[l]->getGlobalPose(); - tr.setOrigin(btVector3(pt.p[0], pt.p[1], pt.p[2])); - tr.setRotation(btQuaternion(pt.q.x, pt.q.y, pt.q.z, pt.q.w)); - btVector3 localScaling(1, 1, 1);//?? - m_data->m_pluginManager.getRenderInterface()->syncTransform(ud->m_graphicsUniqueId, tr, localScaling); + MyPhysXUserData* ud = (MyPhysXUserData*)physxLinks[l]->userData; + if (ud) + { + btTransform tr; + tr.setIdentity(); + physx::PxTransform pt = physxLinks[l]->getGlobalPose(); + tr.setOrigin(btVector3(pt.p[0], pt.p[1], pt.p[2])); + tr.setRotation(btQuaternion(pt.q.x, pt.q.y, pt.q.z, pt.q.w)); + btVector3 localScaling(1, 1, 1);//?? + m_data->m_pluginManager.getRenderInterface()->syncTransform(ud->m_graphicsUniqueId, tr, localScaling); + } } } } @@ -1284,10 +1878,15 @@ bool PhysXServerCommandProcessor::processSendPhysicsParametersCommand(const stru { b3Printf("Updated Gravity: %f,%f,%f", grav[0], grav[1], grav[2]); } - + } +#if 0 + if (clientCmd.m_updateFlags&SIM_PARAM_UPDATE_NUM_SOLVER_ITERATIONS) + { + + m_data->m_dynamicsWorld->getSolverInfo().m_numIterations = clientCmd.m_physSimParamArgs.m_numSolverIterations; } -#if 0 + if (clientCmd.m_updateFlags & SIM_PARAM_ENABLE_CONE_FRICTION) { if (clientCmd.m_physSimParamArgs.m_enableConeFriction) @@ -1328,10 +1927,6 @@ bool PhysXServerCommandProcessor::processSendPhysicsParametersCommand(const stru } - if (clientCmd.m_updateFlags&SIM_PARAM_UPDATE_NUM_SOLVER_ITERATIONS) - { - m_data->m_dynamicsWorld->getSolverInfo().m_numIterations = clientCmd.m_physSimParamArgs.m_numSolverIterations; - } if (clientCmd.m_updateFlags&SIM_PARAM_UPDATE_SOLVER_RESIDULAL_THRESHOLD) { @@ -1465,13 +2060,7 @@ bool PhysXServerCommandProcessor::processRequestActualStateCommand(const struct physx::PxU32 startIndex = 0; int numLinks2 = bodyHandle->mArticulation->getLinks(physxLinks, bufferSize, startIndex); - - //always add the base, even for static (non-moving objects) - //so that we can easily move the 'fixed' base when needed - //do we don't use this conditional "if (!mb->hasFixedBase())" { - int rootLink = 0; //todo check - serverCmd.m_sendActualStateArgs.m_rootLocalInertialFrame[0] = 0; serverCmd.m_sendActualStateArgs.m_rootLocalInertialFrame[1] = 0; serverCmd.m_sendActualStateArgs.m_rootLocalInertialFrame[2] = 0; @@ -1496,116 +2085,57 @@ bool PhysXServerCommandProcessor::processRequestActualStateCommand(const struct serverCmd.m_sendActualStateArgs.m_actualStateQ[6] = orn.w; totalDegreeOfFreedomQ += 7; //pos + quaternion + physx::PxVec3 linVel = l->getLinearVelocity(); + physx::PxVec3 angVel = l->getAngularVelocity(); + //base linear velocity (in world space, carthesian) - serverCmd.m_sendActualStateArgs.m_actualStateQdot[0] = 0;//cvel[3]; //mb->getBaseVel()[0]; - serverCmd.m_sendActualStateArgs.m_actualStateQdot[1] = 0;//cvel[4]; //mb->getBaseVel()[1]; - serverCmd.m_sendActualStateArgs.m_actualStateQdot[2] = 0;//cvel[5]; //mb->getBaseVel()[2]; + serverCmd.m_sendActualStateArgs.m_actualStateQdot[0] = linVel.x; + serverCmd.m_sendActualStateArgs.m_actualStateQdot[1] = linVel.y; + serverCmd.m_sendActualStateArgs.m_actualStateQdot[2] = linVel.z; //base angular velocity (in world space, carthesian) - serverCmd.m_sendActualStateArgs.m_actualStateQdot[3] = 0;//cvel[0]; //mb->getBaseOmega()[0]; - serverCmd.m_sendActualStateArgs.m_actualStateQdot[4] = 0;//cvel[1]; //mb->getBaseOmega()[1]; - serverCmd.m_sendActualStateArgs.m_actualStateQdot[5] = 0;//cvel[2]; //mb->getBaseOmega()[2]; + serverCmd.m_sendActualStateArgs.m_actualStateQdot[3] = angVel.x; + serverCmd.m_sendActualStateArgs.m_actualStateQdot[4] = angVel.y; + serverCmd.m_sendActualStateArgs.m_actualStateQdot[5] = angVel.z; totalDegreeOfFreedomU += 6; //3 linear and 3 angular DOF } - //btAlignedObjectArray omega; - //btAlignedObjectArray linVel; + physx::PxArticulationCache* c = bodyHandle->mArticulation->createCache(); + bodyHandle->mArticulation->copyInternalStateToCache(*c, physx::PxArticulationCache::ePOSITION | physx::PxArticulationCache::eVELOCITY);// physx::PxArticulationCache::eALL); - int numLinks = 0;// m_data->m_mujocoModel->body_jntnum[bodyUniqueId]; - for (int l = 0; l < numLinks; l++) + btAlignedObjectArray dofStarts; + dofStarts.resize(numLinks2); + dofStarts[0] = 0; //We know that the root link does not have a joint + //cache positions in PhysX may be reshuffled, see + //http://gameworksdocs.nvidia.com/PhysX/4.0/documentation/PhysXGuide/Manual/Articulations.html + + for (int i = 1; i < numLinks2; ++i) { - //int type = (m_data->m_mujocoModel->jnt_type + m_data->m_mujocoModel->body_jntnum[bodyUniqueId])[l]; - //int type=(m_data->m_mujocoModel->jnt_type+m_data->m_mujocoModel->body_jntnum[bodyUniqueId])[l]; + int llIndex = physxLinks[i]->getLinkIndex(); + int dofs = physxLinks[i]->getInboundJointDof(); -#if 0 - physx::PxArticulationCache* c = bodyHandle->mArticulation->createCache(); - if (c) + dofStarts[llIndex] = dofs; + } + + int count = 0; + for (int i = 1; i < numLinks2; ++i) + { + int dofs = dofStarts[i]; + dofStarts[i] = count; + count += dofs; + } + + //start at 1, 0 is the base/root, handled above + for (int l = 1; l < numLinks2; l++) + { + int dofs = physxLinks[l]->getInboundJointDof(); + int llIndex = physxLinks[l]->getLinkIndex(); + + for (int d = 0; d < dofs; d++) { - c->jointVelocity[0] = 1; - bodyHandle->mArticulation->applyCache(*c, physx::PxArticulationCache::eVELOCITY); - bodyHandle->mArticulation->releaseCache(*c); + serverCmd.m_sendActualStateArgs.m_actualStateQ[totalDegreeOfFreedomQ++] = c->jointPosition[dofStarts[llIndex + d]]; + serverCmd.m_sendActualStateArgs.m_actualStateQ[totalDegreeOfFreedomU++] = c->jointVelocity[dofStarts[llIndex + d]]; } -#endif -#if 0 - mjtNum* xpos = - for (int d=0;dgetLink(l).m_posVarCount;d++) - { - serverCmd.m_sendActualStateArgs.m_actualStateQ[totalDegreeOfFreedomQ++] = 0; - } - for (int d=0;dgetLink(l).m_dofCount;d++) - { - serverCmd.m_sendActualStateArgs.m_actualStateQdot[totalDegreeOfFreedomU++] = 0; - } - - if (0 == mb->getLink(l).m_jointFeedback) - { - for (int d=0;d<6;d++) - { - serverCmd.m_sendActualStateArgs.m_jointReactionForces[l*6+d]=0; - } - } else - { - - serverCmd.m_sendActualStateArgs.m_jointReactionForces[l*6+0] = 0; - serverCmd.m_sendActualStateArgs.m_jointReactionForces[l*6+1] = 0; - serverCmd.m_sendActualStateArgs.m_jointReactionForces[l*6+2] = 0; - - serverCmd.m_sendActualStateArgs.m_jointReactionForces[l*6+3] = 0; - serverCmd.m_sendActualStateArgs.m_jointReactionForces[l*6+4] = 0; - serverCmd.m_sendActualStateArgs.m_jointReactionForces[l*6+5] = 0; - } - - serverCmd.m_sendActualStateArgs.m_jointMotorForce[l] = 0; -#if 0 - if (supportsJointMotor(mb,l)) - { - if (motor && m_data->m_physicsDeltaTime>btScalar(0)) - { - serverCmd.m_sendActualStateArgs.m_jointMotorForce[l] = 0; - } - } -#endif - //btVector3 linkLocalInertialOrigin = body->m_linkLocalInertialFrames[l].getOrigin(); - //btQuaternion linkLocalInertialRotation = body->m_linkLocalInertialFrames[l].getRotation(); - - //btVector3 linkCOMOrigin = mb->getLink(l).m_cachedWorldTransform.getOrigin(); - //btQuaternion linkCOMRotation = mb->getLink(l).m_cachedWorldTransform.getRotation(); - - serverCmd.m_sendActualStateArgs.m_linkState[l*7+0] = 0;//linkCOMOrigin.getX(); - serverCmd.m_sendActualStateArgs.m_linkState[l*7+1] = 0;//linkCOMOrigin.getY(); - serverCmd.m_sendActualStateArgs.m_linkState[l*7+2] = 0;//linkCOMOrigin.getZ(); - serverCmd.m_sendActualStateArgs.m_linkState[l*7+3] = 0;//linkCOMRotation.x(); - serverCmd.m_sendActualStateArgs.m_linkState[l*7+4] = 0;//linkCOMRotation.y(); - serverCmd.m_sendActualStateArgs.m_linkState[l*7+5] = 0;//linkCOMRotation.z(); - serverCmd.m_sendActualStateArgs.m_linkState[l*7+6] = 1;//linkCOMRotation.w(); - -#if 0 - btVector3 worldLinVel(0,0,0); - btVector3 worldAngVel(0,0,0); - - if (computeLinkVelocities) - { - const btMatrix3x3& linkRotMat = mb->getLink(l).m_cachedWorldTransform.getBasis(); - worldLinVel = linkRotMat * linVel[l+1]; - worldAngVel = linkRotMat * omega[l+1]; - } -#endif - serverCmd.m_sendActualStateArgs.m_linkWorldVelocities[l*6+0] = 0;//worldLinVel[0]; - serverCmd.m_sendActualStateArgs.m_linkWorldVelocities[l*6+1] = 0;//worldLinVel[1]; - serverCmd.m_sendActualStateArgs.m_linkWorldVelocities[l*6+2] = 0;//worldLinVel[2]; - serverCmd.m_sendActualStateArgs.m_linkWorldVelocities[l*6+3] = 0;//worldAngVel[0]; - serverCmd.m_sendActualStateArgs.m_linkWorldVelocities[l*6+4] = 0;//worldAngVel[1]; - serverCmd.m_sendActualStateArgs.m_linkWorldVelocities[l*6+5] = 0;//worldAngVel[2]; - - serverCmd.m_sendActualStateArgs.m_linkLocalInertialFrames[l*7+0] = 0;//linkLocalInertialOrigin.getX(); - serverCmd.m_sendActualStateArgs.m_linkLocalInertialFrames[l*7+1] = 0;//linkLocalInertialOrigin.getY(); - serverCmd.m_sendActualStateArgs.m_linkLocalInertialFrames[l*7+2] = 0;//linkLocalInertialOrigin.getZ(); - - serverCmd.m_sendActualStateArgs.m_linkLocalInertialFrames[l*7+3] = 0;//linkLocalInertialRotation.x(); - serverCmd.m_sendActualStateArgs.m_linkLocalInertialFrames[l*7+4] = 0;//linkLocalInertialRotation.y(); - serverCmd.m_sendActualStateArgs.m_linkLocalInertialFrames[l*7+5] = 0;//linkLocalInertialRotation.z(); - serverCmd.m_sendActualStateArgs.m_linkLocalInertialFrames[l*7+6] = 1;//linkLocalInertialRotation.w(); -#endif } serverCmd.m_sendActualStateArgs.m_numDegreeOfFreedomQ = totalDegreeOfFreedomQ; diff --git a/examples/SharedMemory/physx/PhysXServerCommandProcessor.h b/examples/SharedMemory/physx/PhysXServerCommandProcessor.h index 36fc62116..5cf8fa058 100644 --- a/examples/SharedMemory/physx/PhysXServerCommandProcessor.h +++ b/examples/SharedMemory/physx/PhysXServerCommandProcessor.h @@ -16,6 +16,11 @@ class PhysXServerCommandProcessor : public PhysicsCommandProcessorInterface bool processSendPhysicsParametersCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes); bool processRequestActualStateCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes); bool processResetSimulationCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes); + bool processInitPoseCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes); + bool processSendDesiredStateCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes); + bool processChangeDynamicsInfoCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes); + bool processRequestPhysicsSimulationParametersCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes); + bool processCustomCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes); diff --git a/examples/SharedMemory/physx/URDF2PhysX.cpp b/examples/SharedMemory/physx/URDF2PhysX.cpp index 51a115605..02713f029 100644 --- a/examples/SharedMemory/physx/URDF2PhysX.cpp +++ b/examples/SharedMemory/physx/URDF2PhysX.cpp @@ -16,11 +16,15 @@ #include "PxMaterial.h" #include "PxCooking.h" #include "PxScene.h" +#include "PxRigidStatic.h" +#include "PxRigidDynamic.h" +#include "PxActor.h" #include "PxAggregate.h" - +#include "Bullet3Common/b3FileUtils.h" #include "PhysXUserData.h" #include "../../CommonInterfaces/CommonFileIOInterface.h" - +#include "../../Importers/ImportObjDemo/LoadMeshFromObj.h" +#include "../../Importers/ImportSTLDemo/LoadMeshFromSTL.h" #include "Importers/ImportURDFDemo/UrdfParser.h" @@ -31,6 +35,8 @@ struct URDF2PhysXCachedData URDF2PhysXCachedData() : m_currentMultiBodyLinkIndex(-1), m_articulation(0), + m_rigidStatic(0), + m_rigidDynamic(0), m_totalNumJoints1(0) { } @@ -44,6 +50,8 @@ struct URDF2PhysXCachedData int m_currentMultiBodyLinkIndex; physx::PxArticulationReducedCoordinate* m_articulation; + physx::PxRigidStatic* m_rigidStatic; + physx::PxRigidDynamic* m_rigidDynamic; btAlignedObjectArray m_linkTransWorldSpace; btAlignedObjectArray m_urdfLinkIndex; @@ -237,7 +245,7 @@ static physx::PxConvexMesh* createPhysXConvex(physx::PxU32 numVerts, const physx int convertLinkPhysXShapes(const URDFImporterInterface& u2b, URDF2PhysXCachedData& cache, ArticulationCreationInterface& creation, int urdfLinkIndex, const char* pathPrefix, const btTransform& localInertiaFrame, - physx::PxArticulationReducedCoordinate* articulation, int mbLinkIndex, physx::PxArticulationLink* linkPtr) + physx::PxArticulationReducedCoordinate* articulation, int mbLinkIndex, physx::PxRigidActor* linkPtr) { int numShapes = 0; @@ -362,9 +370,61 @@ int convertLinkPhysXShapes(const URDFImporterInterface& u2b, URDF2PhysXCachedDat numShapes++; break; } + case URDF_GEOM_MESH: + { + btAlignedObjectArray vertices; + GLInstanceGraphicsShape* glmesh = 0; + switch (collision->m_geometry.m_meshFileType) + { + case UrdfGeometry::FILE_OBJ: + { + char relativeFileName[1024]; + char pathPrefix[1024]; + pathPrefix[0] = 0; + if (creation.m_fileIO->findResourcePath(collision->m_geometry.m_meshFileName.c_str(), relativeFileName, 1024)) + { + b3FileUtils::extractPath(relativeFileName, pathPrefix, 1024); + } + glmesh = LoadMeshFromObj(collision->m_geometry.m_meshFileName.c_str(), pathPrefix, creation.m_fileIO); + + break; + } + case UrdfGeometry::FILE_STL: + { + glmesh = LoadMeshFromSTL(collision->m_geometry.m_meshFileName.c_str(), creation.m_fileIO); + break; + } + default: + { + } + } + if (glmesh && glmesh->m_numvertices) + { + for (int i = 0; i < glmesh->m_numvertices; i++) + { + physx::PxVec3 vert( + glmesh->m_vertices->at(i).xyzw[0] * collision->m_geometry.m_meshScale[0], + glmesh->m_vertices->at(i).xyzw[1] * collision->m_geometry.m_meshScale[1], + glmesh->m_vertices->at(i).xyzw[2] * collision->m_geometry.m_meshScale[2]); + vertices.push_back(vert); + } + + physx::PxConvexMesh* convexMesh = createPhysXConvex(vertices.size(), &vertices[0], creation); + + shape = physx::PxRigidActorExt::createExclusiveShape(*linkPtr, + physx::PxConvexMeshGeometry(convexMesh), *material); + + shape->setLocalPose(tr); + cache.m_geomLocalPoses.push_back(tr); + numShapes++; + } + break; + + } default: { + printf("unknown physx shape\n"); } } @@ -390,7 +450,7 @@ btTransform ConvertURDF2PhysXInternal( ArticulationCreationInterface& creation, URDF2PhysXCachedData& cache, int urdfLinkIndex, const btTransform& parentTransformInWorldSpace, - bool createMultiBody, const char* pathPrefix, + bool createActiculation, const char* pathPrefix, int flags, UrdfVisualShapeCache2* cachedLinkGraphicsShapesIn, UrdfVisualShapeCache2* cachedLinkGraphicsShapesOut, @@ -562,18 +622,45 @@ btTransform ConvertURDF2PhysXInternal( btTransform inertialFrameInWorldSpace = linkTransformInWorldSpace * localInertialFrame; bool canSleep = (flags & CUF_ENABLE_SLEEPING) != 0; - physx::PxArticulationLink* linkPtr = 0; + physx::PxRigidActor* linkPtr = 0; + + physx::PxRigidBody* rbLinkPtr = 0; + - if (!createMultiBody) + physx::PxTransform tr; + tr.p = physx::PxVec3(linkTransformInWorldSpace.getOrigin().x(), linkTransformInWorldSpace.getOrigin().y(), linkTransformInWorldSpace.getOrigin().z()); + tr.q = physx::PxQuat(linkTransformInWorldSpace.getRotation().x(), linkTransformInWorldSpace.getRotation().y(), linkTransformInWorldSpace.getRotation().z(), linkTransformInWorldSpace.getRotation().w()); + bool isFixedBase = (mass == 0); //todo: figure out when base is fixed + + + if (!createActiculation) { + if (isFixedBase) + { + physx::PxRigidStatic* s = creation.m_physics->createRigidStatic(tr); + if ((cache.m_rigidStatic == 0) && (cache.m_rigidDynamic == 0)) + { + cache.m_rigidStatic = s; + } + linkPtr = s; + } + else + { + + physx::PxRigidDynamic* d = creation.m_physics->createRigidDynamic(tr); + linkPtr = d; + if ((cache.m_rigidStatic == 0) && (cache.m_rigidDynamic == 0)) + { + cache.m_rigidDynamic = d; + } + rbLinkPtr = d; + + } } else { - physx::PxTransform tr; - tr.p = physx::PxVec3(linkTransformInWorldSpace.getOrigin().x(), linkTransformInWorldSpace.getOrigin().y(), linkTransformInWorldSpace.getOrigin().z()); - tr.q = physx::PxQuat(linkTransformInWorldSpace.getRotation().x(), linkTransformInWorldSpace.getRotation().y(), linkTransformInWorldSpace.getRotation().z(), linkTransformInWorldSpace.getRotation().w()); - + cache.m_linkTransWorldSpace.push_back(tr); cache.m_urdfLinkIndex.push_back(urdfLinkIndex); cache.m_parentUrdfLinkIndex.push_back(urdfParentIndex); @@ -582,7 +669,7 @@ btTransform ConvertURDF2PhysXInternal( if (cache.m_articulation == 0) { - bool isFixedBase = (mass == 0); //todo: figure out when base is fixed + cache.m_articulation = creation.m_physics->createArticulationReducedCoordinate(); @@ -596,6 +683,7 @@ btTransform ConvertURDF2PhysXInternal( physx::PxArticulationLink* base = cache.m_articulation->createLink(NULL,tr); linkPtr = base; + rbLinkPtr = base; //physx::PxRigidActorExt::createExclusiveShape(*base, PxBoxGeometry(0.5f, 0.25f, 1.5f), *gMaterial); @@ -628,14 +716,19 @@ btTransform ConvertURDF2PhysXInternal( #endif + + cache.registerMultiBody(urdfLinkIndex, base, inertialFrameInWorldSpace, mass, localInertiaDiagonal, localInertialFrame); } else { physx::PxArticulationLink* parentLink = cache.getPhysxLinkFromLink(urdfParentIndex); - linkPtr = cache.m_articulation->createLink(parentLink, tr); - physx::PxArticulationJointReducedCoordinate* joint = static_cast(linkPtr->getInboundJoint()); + physx::PxArticulationLink* childLink = cache.m_articulation->createLink(parentLink, tr); + linkPtr = childLink; + rbLinkPtr = childLink; + + physx::PxArticulationJointReducedCoordinate* joint = static_cast(childLink->getInboundJoint()); switch (jointType) { @@ -690,26 +783,29 @@ btTransform ConvertURDF2PhysXInternal( joint->setParentPose(parentPose); joint->setChildPose(childPose); - - } - cache.registerMultiBody(urdfLinkIndex, linkPtr, inertialFrameInWorldSpace, mass, localInertiaDiagonal, localInertialFrame); - if (linkPtr) - { - //todo: mem leaks - MyPhysXUserData* userData = new MyPhysXUserData(); - userData->m_graphicsUniqueId = graphicsIndex; - linkPtr->userData = userData; + + cache.registerMultiBody(urdfLinkIndex, childLink, inertialFrameInWorldSpace, mass, localInertiaDiagonal, localInertialFrame); } + + } + if (linkPtr) + { + //todo: mem leaks + MyPhysXUserData* userData = new MyPhysXUserData(); + userData->m_graphicsUniqueId = graphicsIndex; + linkPtr->userData = userData; + } + //create collision shapes //physx::PxRigidActorExt::createExclusiveShape convertLinkPhysXShapes(u2b, cache, creation, urdfLinkIndex, pathPrefix, localInertialFrame, cache.m_articulation, mbLinkIndex, linkPtr); - physx::PxRigidBodyExt::updateMassAndInertia(*linkPtr, mass); + physx::PxRigidBodyExt::updateMassAndInertia(*rbLinkPtr, mass); //base->setMass(massOut); @@ -727,7 +823,7 @@ btTransform ConvertURDF2PhysXInternal( bool disableParentCollision = true; - if (createMultiBody && cache.m_articulation) + if (createActiculation && cache.m_articulation) { #if 0 cache.m_bulletMultiBody->getLink(mbLinkIndex).m_jointDamping = jointDamping; @@ -741,7 +837,7 @@ btTransform ConvertURDF2PhysXInternal( } - if (createMultiBody) + if (createActiculation) { } @@ -762,7 +858,7 @@ btTransform ConvertURDF2PhysXInternal( { int urdfChildLinkIndex = urdfChildIndices[i]; - ConvertURDF2PhysXInternal(u2b, creation, cache, urdfChildLinkIndex, linkTransformInWorldSpace, createMultiBody, pathPrefix, flags, cachedLinkGraphicsShapesIn, cachedLinkGraphicsShapesOut, recursive); + ConvertURDF2PhysXInternal(u2b, creation, cache, urdfChildLinkIndex, linkTransformInWorldSpace, createActiculation, pathPrefix, flags, cachedLinkGraphicsShapesIn, cachedLinkGraphicsShapesOut, recursive); } } return linkTransformInWorldSpace; @@ -774,7 +870,7 @@ btTransform ConvertURDF2PhysXInternal( -physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundation, physx::PxPhysics* physics, physx::PxCooking* cooking, physx::PxScene* scene, class PhysXURDFImporter& u2p, int flags, const char* pathPrefix, const btTransform& rootTransformInWorldSpace, struct CommonFileIOInterface* fileIO) +physx::PxBase* URDF2PhysX(physx::PxFoundation* foundation, physx::PxPhysics* physics, physx::PxCooking* cooking, physx::PxScene* scene, class PhysXURDFImporter& u2p, int flags, const char* pathPrefix, const btTransform& rootTransformInWorldSpace, struct CommonFileIOInterface* fileIO, bool createActiculation) { URDF2PhysXCachedData cache; InitURDF2BulletCache(u2p, cache, flags); @@ -792,13 +888,13 @@ physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundati creation.m_scene = scene; creation.m_fileIO = fileIO; - bool createMultiBody = true; + bool recursive = (flags & CUF_MAINTAIN_LINK_ORDER) == 0; if (recursive) { - ConvertURDF2PhysXInternal(u2p, creation, cache, urdfLinkIndex, rootTransformInWorldSpace, createMultiBody, pathPrefix, flags, &cachedLinkGraphicsShapes, &cachedLinkGraphicsShapesOut, recursive); + ConvertURDF2PhysXInternal(u2p, creation, cache, urdfLinkIndex, rootTransformInWorldSpace, createActiculation, pathPrefix, flags, &cachedLinkGraphicsShapes, &cachedLinkGraphicsShapesOut, recursive); } else @@ -820,7 +916,7 @@ physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundati int urdfLinkIndex = allIndices[i].m_index; int parentIndex = allIndices[i].m_parentIndex; btTransform parentTr = parentIndex >= 0 ? parentTransforms[parentIndex] : rootTransformInWorldSpace; - btTransform tr = ConvertURDF2BulletInternal(u2b, creation, cache, urdfLinkIndex, parentTr, world1, createMultiBody, pathPrefix, flags, cachedLinkGraphicsShapes, &cachedLinkGraphicsShapesOut, recursive); + btTransform tr = ConvertURDF2BulletInternal(u2b, creation, cache, urdfLinkIndex, parentTr, world1, createActiculation, pathPrefix, flags, cachedLinkGraphicsShapes, &cachedLinkGraphicsShapesOut, recursive); if ((urdfLinkIndex + 1) >= parentTransforms.size()) { parentTransforms.resize(urdfLinkIndex + 1); @@ -838,7 +934,7 @@ physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundati #endif - if (scene && cache.m_articulation) + if (cache.m_articulation) { #ifdef DEBUG_ARTICULATIONS printf("\n-----------------\n"); @@ -902,7 +998,19 @@ physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundati scene->addArticulation(*cache.m_articulation); } + return cache.m_articulation; } - return cache.m_articulation; + if (cache.m_rigidStatic) + { + scene->addActor(*cache.m_rigidStatic); + return cache.m_rigidStatic; + } + if (cache.m_rigidDynamic) + { + scene->addActor(*cache.m_rigidDynamic); + return cache.m_rigidDynamic; + } + return NULL; + } diff --git a/examples/SharedMemory/physx/URDF2PhysX.h b/examples/SharedMemory/physx/URDF2PhysX.h index f00d18edb..9c0c653e8 100644 --- a/examples/SharedMemory/physx/URDF2PhysX.h +++ b/examples/SharedMemory/physx/URDF2PhysX.h @@ -6,6 +6,7 @@ namespace physx { + class PxBase; class PxFoundation; class PxPhysics; class PxDefaultCpuDispatcher; @@ -20,6 +21,6 @@ struct UrdfVisualShapeCache2 b3AlignedObjectArray m_cachedUrdfLinkVisualShapeIndices; }; -physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundation, physx::PxPhysics* physics, physx::PxCooking* cooking, physx::PxScene* scene, class PhysXURDFImporter& u2p, int flags, const char* pathPrefix, const class btTransform& rootTransformInWorldSpace,struct CommonFileIOInterface* fileIO); +physx::PxBase* URDF2PhysX(physx::PxFoundation* foundation, physx::PxPhysics* physics, physx::PxCooking* cooking, physx::PxScene* scene, class PhysXURDFImporter& u2p, int flags, const char* pathPrefix, const class btTransform& rootTransformInWorldSpace,struct CommonFileIOInterface* fileIO, bool createActiculation); #endif //URDF2PHYSX_H \ No newline at end of file diff --git a/examples/pybullet/examples/otherPhysicsEngine.py b/examples/pybullet/examples/otherPhysicsEngine.py index dfd04352d..3399cff4f 100644 --- a/examples/pybullet/examples/otherPhysicsEngine.py +++ b/examples/pybullet/examples/otherPhysicsEngine.py @@ -1,14 +1,49 @@ import pybullet as p import time +import math -p.connect(p.PhysX) -p.loadPlugin("eglRendererPlugin") +usePhysX = True +if usePhysX: + p.connect(p.PhysX) + p.loadPlugin("eglRendererPlugin") +else: + p.connect(p.GUI) p.loadURDF("plane.urdf") -for i in range (50): - p.loadURDF("r2d2.urdf",[0,0,1+i*2]) + +for i in range (10): + sphere = p.loadURDF("marble_cube.urdf",[0,-1,1+i*1], useMaximalCoordinates=False) +p.changeDynamics(sphere ,-1, mass=1000) + +door = p.loadURDF("door.urdf",[0,0,1]) +p.changeDynamics(door ,1, linearDamping=0, angularDamping=0, jointDamping=0, mass=1) +print("numJoints = ", p.getNumJoints(door)) + + p.setGravity(0,0,-10) +position_control = True + +angle = math.pi*0.25 +p.resetJointState(door,1,angle) +angleread = p.getJointState(door,1) +print("angleread = ",angleread) +prevTime = time.time() + +angle = math.pi*0.5 while (1): - p.stepSimulation() - time.sleep(1./240.) \ No newline at end of file + + curTime = time.time() + + diff = curTime - prevTime + #every second, swap target angle + if (diff>1): + angle = - angle + prevTime = curTime + + if position_control: + p.setJointMotorControl2(door,1,p.POSITION_CONTROL, targetPosition = angle, positionGain=10.1, velocityGain=1, force=11.001) + else: + p.setJointMotorControl2(door,1,p.VELOCITY_CONTROL, targetVelocity=1, force=1011) + p.stepSimulation() + time.sleep(1./240.) diff --git a/examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt b/examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt new file mode 100644 index 000000000..ebc90c946 --- /dev/null +++ b/examples/pybullet/gym/pybullet_data/data/agents/ct_agent_humanoid_ppo.txt @@ -0,0 +1,52 @@ +{ + "AgentType": "PPO", + + "ActorNet": "fc_2layers_1024units", + "ActorStepsize": 0.0000025, + "ActorMomentum": 0.9, + "ActorWeightDecay": 0.0005, + "ActorInitOutputScale": 0.01, + + "CriticNet": "fc_2layers_1024units", + "CriticStepsize": 0.01, + "CriticMomentum": 0.9, + "CriticWeightDecay": 0, + + "UpdatePeriod": 1, + "ItersPerUpdate": 1, + "Discount": 0.95, + "BatchSize": 4096, + "MiniBatchSize": 256, + "Epochs": 1, + "ReplayBufferSize": 500000, + "InitSamples": 1, + "NormalizerSamples": 1000000, + + "RatioClip": 0.2, + "NormAdvClip": 4, + "TDLambda": 0.95, + + "OutputIters": 10, + "IntOutputIters": 400, + "TestEpisodes": 32, + + "ExpAnnealSamples": 64000000, + + "ExpParamsBeg": + { + "Rate": 1, + "InitActionRate": 1, + "Noise": 0.05, + "NoiseInternal": 0, + "Temp": 0.1 + }, + + "ExpParamsEnd": + { + "Rate": 0.2, + "InitActionRate": 0.01, + "Noise": 0.05, + "NoiseInternal": 0, + "Temp": 0.001 + } +} \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_data/data/policies/humanoid3d/humanoid3d_walk.ckpt.data-00000-of-00001 b/examples/pybullet/gym/pybullet_data/data/policies/humanoid3d/humanoid3d_walk.ckpt.data-00000-of-00001 new file mode 100644 index 000000000..ee5684538 Binary files /dev/null and b/examples/pybullet/gym/pybullet_data/data/policies/humanoid3d/humanoid3d_walk.ckpt.data-00000-of-00001 differ diff --git a/examples/pybullet/gym/pybullet_data/data/policies/humanoid3d/humanoid3d_walk.ckpt.index b/examples/pybullet/gym/pybullet_data/data/policies/humanoid3d/humanoid3d_walk.ckpt.index new file mode 100644 index 000000000..d47e20ee8 Binary files /dev/null and b/examples/pybullet/gym/pybullet_data/data/policies/humanoid3d/humanoid3d_walk.ckpt.index differ diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer.py new file mode 100644 index 000000000..ff4eaa79f --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/DeepMimic_Optimizer.py @@ -0,0 +1,52 @@ +import numpy as np +import sys +import os +import inspect +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(os.path.dirname(currentdir)) +os.sys.path.insert(0,parentdir) +print("parentdir=",parentdir) + + +from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv +from pybullet_envs.deep_mimic.learning.rl_world import RLWorld +from pybullet_utils.logger import Logger +from testrl import update_world, update_timestep, build_world +import pybullet_utils.mpi_util as MPIUtil + +args = [] +world = None + +def run(): + global update_timestep + global world + + done = False + while not (done): + update_world(world, update_timestep) + + return + +def shutdown(): + global world + + Logger.print2('Shutting down...') + world.shutdown() + return + +def main(): + global args + global world + + # Command line arguments + args = sys.argv[1:] + enable_draw = False + world = build_world(args, enable_draw) + + run() + shutdown() + + return + +if __name__ == '__main__': + main() diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/action_space.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/action_space.py new file mode 100644 index 000000000..1014f7497 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/action_space.py @@ -0,0 +1,6 @@ +from enum import Enum + +class ActionSpace(Enum): + Null = 0 + Continuous = 1 + Discrete = 2 \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/env.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/env.py new file mode 100644 index 000000000..4c8973674 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/env.py @@ -0,0 +1,13 @@ +from abc import ABC, abstractmethod +import numpy as np +from enum import Enum + +class Env(ABC): + class Terminate(Enum): + Null = 0 + Fail = 1 + Succ = 2 + + def __init__(self, args, enable_draw): + self.enable_draw = enable_draw + return \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_pose_interpolator.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_pose_interpolator.py new file mode 100644 index 000000000..0a6581c62 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_pose_interpolator.py @@ -0,0 +1,249 @@ +from pybullet_envs.minitaur.envs import bullet_client +import math + +class HumanoidPoseInterpolator(object): + def __init__(self): + pass + + def Reset(self): + + self._basePos = [0,0,0] + self._baseLinVel = [0,0,0] + self._baseOrn = [0,0,0,1] + self._baseAngVel = [0,0,0] + + self._chestRot = [0,0,0,1] + self._chestVel = [0,0,0] + self._neckRot = [0,0,0,1] + self._neckVel = [0,0,0] + + self._rightHipRot = [0,0,0,1] + self._rightHipVel = [0,0,0] + self._rightKneeRot = [0] + self._rightKneeVel = [0] + self._rightAnkleRot = [0,0,0,1] + self._rightAnkleVel = [0,0,0] + + self._rightShoulderRot = [0,0,0,1] + self._rightShoulderVel = [0,0,0] + self._rightElbowRot = [0] + self._rightElbowVel = [0] + + self._leftHipRot = [0,0,0,1] + self._leftHipVel = [0,0,0] + self._leftKneeRot = [0] + self._leftKneeVel = [0] + self._leftAnkleRot = [0,0,0,1] + self._leftAnkleVel = [0,0,0] + + self._leftShoulderRot = [0,0,0,1] + self._leftShoulderVel = [0,0,0] + self._leftElbowRot = [0] + self._leftElbowVel = [0] + + def ComputeLinVel(self,posStart, posEnd, deltaTime): + vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime] + return vel + + def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client): + dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd) + axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn) + angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime] + return angVel + + def NormalizeVector(self, vec): + length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2] + if (length2>0): + length = math.sqrt(length2) + + def NormalizeQuaternion(self, orn): + length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]+orn[3]*orn[3] + if (length2>0): + length = math.sqrt(length2) + orn[0]/=length + orn[1]/=length + orn[2]/=length + orn[3]/=length + return orn + + #print("Normalize? length=",length) + + + def PostProcessMotionData(self, frameData): + baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]] + + chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]] + + neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]] + rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]] + rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]] + rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]] + leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]] + leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]] + leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]] + + + def GetPose(self): + pose = [ self._basePos[0],self._basePos[1],self._basePos[2], + self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3], + self._chestRot[0],self._chestRot[1],self._chestRot[2],self._chestRot[3], + self._neckRot[0],self._neckRot[1],self._neckRot[2],self._neckRot[3], + self._rightHipRot[0],self._rightHipRot[1],self._rightHipRot[2],self._rightHipRot[3], + self._rightKneeRot[0], + self._rightAnkleRot[0],self._rightAnkleRot[1],self._rightAnkleRot[2],self._rightAnkleRot[3], + self._rightShoulderRot[0],self._rightShoulderRot[1],self._rightShoulderRot[2],self._rightShoulderRot[3], + self._rightElbowRot[0], + self._leftHipRot[0],self._leftHipRot[1],self._leftHipRot[2],self._leftHipRot[3], + self._leftKneeRot[0], + self._leftAnkleRot[0],self._leftAnkleRot[1],self._leftAnkleRot[2],self._leftAnkleRot[3], + self._leftShoulderRot[0],self._leftShoulderRot[1],self._leftShoulderRot[2],self._leftShoulderRot[3], + self._leftElbowRot[0] ] + return pose + + def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ): + keyFrameDuration = frameData[0] + basePos1Start = [frameData[1],frameData[2],frameData[3]] + basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]] + self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]), + basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]), + basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])] + self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration) + baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]] + baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]] + self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction) + self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client) + + ##pre-rotate to make z-up + #y2zPos=[0,0,0.0] + #y2zOrn = p.getQuaternionFromEuler([1.57,0,0]) + #basePos,baseOrn = p.multiplyTransforms(y2zPos, y2zOrn,basePos1,baseOrn1) + + chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]] + chestRotEnd = [frameDataNext[9],frameDataNext[10],frameDataNext[11],frameDataNext[8]] + self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart,chestRotEnd,frameFraction) + self._chestVel = self.ComputeAngVel(chestRotStart,chestRotEnd,keyFrameDuration,bullet_client) + + neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]] + neckRotEnd= [frameDataNext[13],frameDataNext[14],frameDataNext[15],frameDataNext[12]] + self._neckRot = bullet_client.getQuaternionSlerp(neckRotStart,neckRotEnd,frameFraction) + self._neckVel = self.ComputeAngVel(neckRotStart,neckRotEnd,keyFrameDuration,bullet_client) + + rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]] + rightHipRotEnd = [frameDataNext[17],frameDataNext[18],frameDataNext[19],frameDataNext[16]] + self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart,rightHipRotEnd,frameFraction) + self._rightHipVel = self.ComputeAngVel(rightHipRotStart,rightHipRotEnd,keyFrameDuration,bullet_client) + + rightKneeRotStart = [frameData[20]] + rightKneeRotEnd = [frameDataNext[20]] + self._rightKneeRot = [rightKneeRotStart[0]+frameFraction*(rightKneeRotEnd[0]-rightKneeRotStart[0])] + self._rightKneeVel = [(rightKneeRotEnd[0]-rightKneeRotStart[0])/keyFrameDuration] + + rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]] + rightAnkleRotEnd = [frameDataNext[22],frameDataNext[23],frameDataNext[24],frameDataNext[21]] + self._rightAnkleRot = bullet_client.getQuaternionSlerp(rightAnkleRotStart,rightAnkleRotEnd,frameFraction) + self._rightAnkleVel = self.ComputeAngVel(rightAnkleRotStart,rightAnkleRotEnd,keyFrameDuration,bullet_client) + + rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]] + rightShoulderRotEnd = [frameDataNext[26],frameDataNext[27],frameDataNext[28],frameDataNext[25]] + self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,rightShoulderRotEnd,frameFraction) + self._rightShoulderVel = self.ComputeAngVel(rightShoulderRotStart,rightShoulderRotEnd, keyFrameDuration,bullet_client) + + rightElbowRotStart = [frameData[29]] + rightElbowRotEnd = [frameDataNext[29]] + self._rightElbowRot = [rightElbowRotStart[0]+frameFraction*(rightElbowRotEnd[0]-rightElbowRotStart[0])] + self._rightElbowVel = [(rightElbowRotEnd[0]-rightElbowRotStart[0])/keyFrameDuration] + + leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]] + leftHipRotEnd = [frameDataNext[31],frameDataNext[32],frameDataNext[33],frameDataNext[30]] + self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart,leftHipRotEnd,frameFraction) + self._leftHipVel = self.ComputeAngVel(leftHipRotStart, leftHipRotEnd,keyFrameDuration,bullet_client) + + leftKneeRotStart = [frameData[34]] + leftKneeRotEnd = [frameDataNext[34]] + self._leftKneeRot = [leftKneeRotStart[0] +frameFraction*(leftKneeRotEnd[0]-leftKneeRotStart[0]) ] + self._leftKneeVel = [(leftKneeRotEnd[0]-leftKneeRotStart[0])/keyFrameDuration] + + leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]] + leftAnkleRotEnd = [frameDataNext[36],frameDataNext[37],frameDataNext[38],frameDataNext[35]] + self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart,leftAnkleRotEnd,frameFraction) + self._leftAnkleVel = self.ComputeAngVel(leftAnkleRotStart,leftAnkleRotEnd,keyFrameDuration,bullet_client) + + leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]] + leftShoulderRotEnd = [frameDataNext[40],frameDataNext[41],frameDataNext[42],frameDataNext[39]] + self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,leftShoulderRotEnd,frameFraction) + self._leftShoulderVel = self.ComputeAngVel(leftShoulderRotStart,leftShoulderRotEnd,keyFrameDuration,bullet_client) + + leftElbowRotStart = [frameData[43]] + leftElbowRotEnd = [frameDataNext[43]] + self._leftElbowRot = [leftElbowRotStart[0]+frameFraction*(leftElbowRotEnd[0]-leftElbowRotStart[0])] + self._leftElbowVel = [(leftElbowRotEnd[0]-leftElbowRotStart[0])/keyFrameDuration] + + pose = self.GetPose() + return pose + + def ConvertFromAction(self, pybullet_client, action): + #turn action into pose + + self.Reset()#?? needed? + index=0 + angle = action[index] + axis = [action[index+1],action[index+2],action[index+3]] + index+=4 + self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle) + #print("pose._chestRot=",pose._chestRot) + + angle = action[index] + axis = [action[index+1],action[index+2],action[index+3]] + index+=4 + self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle) + + angle = action[index] + axis = [action[index+1],action[index+2],action[index+3]] + index+=4 + self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle) + + angle = action[index] + index+=1 + self._rightKneeRot = [angle] + + angle = action[index] + axis = [action[index+1],action[index+2],action[index+3]] + index+=4 + self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle) + + angle = action[index] + axis = [action[index+1],action[index+2],action[index+3]] + index+=4 + self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle) + + angle = action[index] + index+=1 + self._rightElbowRot = [angle] + + angle = action[index] + axis = [action[index+1],action[index+2],action[index+3]] + index+=4 + self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle) + + angle = action[index] + index+=1 + self._leftKneeRot = [angle] + + + angle = action[index] + axis = [action[index+1],action[index+2],action[index+3]] + index+=4 + self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle) + + angle = action[index] + axis = [action[index+1],action[index+2],action[index+3]] + index+=4 + self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle) + + angle = action[index] + index+=1 + self._leftElbowRot = [angle] + + pose = self.GetPose() + return pose + diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py new file mode 100644 index 000000000..3239a7e6c --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py @@ -0,0 +1,453 @@ + +from pybullet_envs.deep_mimic.env import pd_controller_stable +from pybullet_envs.deep_mimic.env import humanoid_pose_interpolator +import math + +chest=1 +neck=2 +rightHip=3 +rightKnee=4 +rightAnkle=5 +rightShoulder=6 +rightElbow=7 +leftHip=9 +leftKnee=10 +leftAnkle=11 +leftShoulder=12 +leftElbow=13 +jointFrictionForce = 0 + +class HumanoidStablePD(object): + def __init__(self, pybullet_client, mocap_data, timeStep, useFixedBase=True): + self._pybullet_client = pybullet_client + self._mocap_data = mocap_data + print("LOADING humanoid!") + + self._sim_model = self._pybullet_client.loadURDF( + "humanoid/humanoid.urdf", [0,0.85,0],globalScaling=0.25, useFixedBase=useFixedBase, flags=self._pybullet_client.URDF_MAINTAIN_LINK_ORDER) + + self._kin_model = self._pybullet_client.loadURDF( + "humanoid/humanoid.urdf", [0,2.85,0],globalScaling=0.25, useFixedBase=True, flags=self._pybullet_client.URDF_MAINTAIN_LINK_ORDER) + + self._pybullet_client.changeDynamics(self._sim_model, -1, lateralFriction=0.9) + for j in range (self._pybullet_client.getNumJoints(self._sim_model)): + self._pybullet_client.changeDynamics(self._sim_model, j, lateralFriction=0.9) + + self._pybullet_client.changeDynamics(self._sim_model, -1, linearDamping=0, angularDamping=0) + self._pybullet_client.changeDynamics(self._kin_model, -1, linearDamping=0, angularDamping=0) + self._poseInterpolator = humanoid_pose_interpolator.HumanoidPoseInterpolator() + + for i in range (self._mocap_data.NumFrames()-1): + frameData = self._mocap_data._motion_data['Frames'][i] + self._poseInterpolator.PostProcessMotionData(frameData) + + self._stablePD = pd_controller_stable.PDControllerStableMultiDof(self._pybullet_client) + self._timeStep = timeStep + self._kpOrg = [0,0,0,0,0,0,0,1000,1000,1000,1000,100,100,100,100,500,500,500,500,500,400,400,400,400,400,400,400,400,300,500,500,500,500,500,400,400,400,400,400,400,400,400,300] + self._kdOrg = [0,0,0,0,0,0,0,100,100,100,100,10,10,10,10,50,50,50,50,50,40,40,40,40,40,40,40,40,30,50,50,50,50,50,40,40,40,40,40,40,40,40,30] + + self._jointIndicesAll = [chest,neck, rightHip,rightKnee,rightAnkle,rightShoulder,rightElbow,leftHip,leftKnee,leftAnkle,leftShoulder,leftElbow] + for j in self._jointIndicesAll: + #self._pybullet_client.setJointMotorControlMultiDof(self._sim_model, j, self._pybullet_client.POSITION_CONTROL, force=[1,1,1]) + self._pybullet_client.setJointMotorControl2(self._sim_model, j, self._pybullet_client.POSITION_CONTROL, targetPosition=0, positionGain=0, targetVelocity=0,force=jointFrictionForce) + self._pybullet_client.setJointMotorControlMultiDof(self._sim_model, j, self._pybullet_client.POSITION_CONTROL,targetPosition=[0,0,0,1], targetVelocity=[0,0,0], positionGain=0,velocityGain=1,force=[jointFrictionForce,jointFrictionForce,jointFrictionForce]) + self._pybullet_client.setJointMotorControl2(self._kin_model, j, self._pybullet_client.POSITION_CONTROL, targetPosition=0, positionGain=0, targetVelocity=0,force=0) + self._pybullet_client.setJointMotorControlMultiDof(self._kin_model, j, self._pybullet_client.POSITION_CONTROL,targetPosition=[0,0,0,1], targetVelocity=[0,0,0], positionGain=0,velocityGain=1,force=[jointFrictionForce,jointFrictionForce,0]) + + self._jointDofCounts=[4,4,4,1,4,4,1,4,1,4,4,1] + + #only those body parts/links are allowed to touch the ground, otherwise the episode terminates + self._allowed_body_parts=[5,11] + + #[x,y,z] base position and [x,y,z,w] base orientation! + self._totalDofs = 7 + for dof in self._jointDofCounts: + self._totalDofs += dof + self.setSimTime(0) + self._maxForce = 6000 + self.resetPose() + + def resetPose(self): + print("resetPose with self._frame=", self._frame, " and self._frameFraction=",self._frameFraction) + pose = self.computePose(self._frameFraction) + self.initializePose(self._poseInterpolator, self._sim_model, initBase=True) + self.initializePose(self._poseInterpolator, self._kin_model, initBase=False) + + def initializePose(self, pose, phys_model,initBase, initializeVelocity = True): + + if initializeVelocity: + if initBase: + self._pybullet_client.resetBasePositionAndOrientation(phys_model, pose._basePos, pose._baseOrn) + self._pybullet_client.resetBaseVelocity(phys_model, pose._baseLinVel, pose._baseAngVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,chest,pose._chestRot, pose._chestVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,neck,pose._neckRot, pose._neckVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,rightHip,pose._rightHipRot, pose._rightHipVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,rightKnee,pose._rightKneeRot, pose._rightKneeVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,rightAnkle,pose._rightAnkleRot, pose._rightAnkleVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,rightShoulder,pose._rightShoulderRot, pose._rightShoulderVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,rightElbow, pose._rightElbowRot, pose._rightElbowVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,leftHip, pose._leftHipRot, pose._leftHipVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,leftKnee, pose._leftKneeRot, pose._leftKneeVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,leftAnkle, pose._leftAnkleRot, pose._leftAnkleVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,leftShoulder, pose._leftShoulderRot, pose._leftShoulderVel) + self._pybullet_client.resetJointStateMultiDof(phys_model,leftElbow, pose._leftElbowRot, pose._leftElbowVel) + else: + if initBase: + self._pybullet_client.resetBasePositionAndOrientation(phys_model, pose._basePos, pose._baseOrn) + self._pybullet_client.resetJointStateMultiDof(phys_model,chest,pose._chestRot, [0,0,0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,neck,pose._neckRot, [0,0,0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,rightHip,pose._rightHipRot, [0,0,0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,rightKnee,pose._rightKneeRot, [0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,rightAnkle,pose._rightAnkleRot, [0,0,0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,rightShoulder,pose._rightShoulderRot, [0,0,0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,rightElbow, pose._rightElbowRot, [0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,leftHip, pose._leftHipRot, [0,0,0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,leftKnee, pose._leftKneeRot, [0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,leftAnkle, pose._leftAnkleRot, [0,0,0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,leftShoulder, pose._leftShoulderRot, [0,0,0]) + self._pybullet_client.resetJointStateMultiDof(phys_model,leftElbow, pose._leftElbowRot, [0]) + + def calcCycleCount(self, simTime, cycleTime): + phases = simTime / cycleTime; + count = math.floor(phases) + loop = True + #count = (loop) ? count : cMathUtil::Clamp(count, 0, 1); + return count + + + def getCycleTime(self): + keyFrameDuration = self._mocap_data.KeyFrameDuraction() + cycleTime = keyFrameDuration*(self._mocap_data.NumFrames()-1) + return cycleTime + + def setSimTime(self, t): + self._simTime = t + #print("SetTimeTime time =",t) + keyFrameDuration = self._mocap_data.KeyFrameDuraction() + cycleTime = self.getCycleTime() + #print("self._motion_data.NumFrames()=",self._mocap_data.NumFrames()) + cycles = self.calcCycleCount(t, cycleTime) + #print("cycles=",cycles) + frameTime = t - cycles*cycleTime + if (frameTime<0): + frameTime += cycleTime + + #print("keyFrameDuration=",keyFrameDuration) + #print("frameTime=",frameTime) + self._frame = int(frameTime/keyFrameDuration) + #print("self._frame=",self._frame) + + self._frameNext = self._frame+1 + if (self._frameNext >= self._mocap_data.NumFrames()): + self._frameNext = self._frame + + self._frameFraction = (frameTime - self._frame*keyFrameDuration)/(keyFrameDuration) + + def computePose(self, frameFraction): + frameData = self._mocap_data._motion_data['Frames'][self._frame] + frameDataNext = self._mocap_data._motion_data['Frames'][self._frameNext] + pose = self._poseInterpolator.Slerp(frameFraction, frameData, frameDataNext, self._pybullet_client) + return pose + + + def convertActionToPose(self, action): + pose = self._poseInterpolator.ConvertFromAction(self._pybullet_client, action) + return pose + + def computePDForces(self, desiredPositions, desiredVelocities = None): + if desiredVelocities==None: + desiredVelocities = [0]*self._totalDofs + taus = self._stablePD.computePD(bodyUniqueId=self._sim_model, + jointIndices = self._jointIndicesAll, + desiredPositions = desiredPositions, + desiredVelocities = desiredVelocities, + kps = self._kpOrg, + kds = self._kdOrg, + maxForces = [self._maxForce]*self._totalDofs, + timeStep=self._timeStep) + return taus + + def applyPDForces(self, taus): + dofIndex=7 + for index in range (len(self._jointIndicesAll)): + jointIndex = self._jointIndicesAll[index] + if self._jointDofCounts[index]==4: + self._pybullet_client.setJointMotorControlMultiDof(self._sim_model,jointIndex,self._pybullet_client.TORQUE_CONTROL,force=[taus[dofIndex+0],taus[dofIndex+1],taus[dofIndex+2]]) + if self._jointDofCounts[index]==1: + self._pybullet_client.setJointMotorControlMultiDof(self._sim_model, jointIndex, controlMode=self._pybullet_client.TORQUE_CONTROL, force=[taus[dofIndex]]) + dofIndex+=self._jointDofCounts[index] + + + + def getPhase(self): + keyFrameDuration = self._mocap_data.KeyFrameDuraction() + cycleTime = keyFrameDuration*(self._mocap_data.NumFrames()-1) + phase = self._simTime / cycleTime + phase = math.fmod(phase,1.0) + if (phase<0): + phase += 1 + return phase + + def buildHeadingTrans(self, rootOrn): + #align root transform 'forward' with world-space x axis + eul = self._pybullet_client.getEulerFromQuaternion(rootOrn) + refDir = [1,0,0] + rotVec = self._pybullet_client.rotateVector(rootOrn, refDir) + heading = math.atan2(-rotVec[2], rotVec[0]) + heading2=eul[1] + #print("heading=",heading) + headingOrn = self._pybullet_client.getQuaternionFromAxisAngle([0,1,0],-heading) + return headingOrn + + + def buildOriginTrans(self): + rootPos,rootOrn = self._pybullet_client.getBasePositionAndOrientation(self._sim_model) + + #print("rootPos=",rootPos, " rootOrn=",rootOrn) + invRootPos=[-rootPos[0], 0, -rootPos[2]] + #invOrigTransPos, invOrigTransOrn = self._pybullet_client.invertTransform(rootPos,rootOrn) + headingOrn = self.buildHeadingTrans(rootOrn) + #print("headingOrn=",headingOrn) + headingMat = self._pybullet_client.getMatrixFromQuaternion(headingOrn) + #print("headingMat=",headingMat) + #dummy, rootOrnWithoutHeading = self._pybullet_client.multiplyTransforms([0,0,0],headingOrn, [0,0,0], rootOrn) + #dummy, invOrigTransOrn = self._pybullet_client.multiplyTransforms([0,0,0],rootOrnWithoutHeading, invOrigTransPos, invOrigTransOrn) + + invOrigTransPos, invOrigTransOrn = self._pybullet_client.multiplyTransforms( [0,0,0],headingOrn, invRootPos,[0,0,0,1]) + #print("invOrigTransPos=",invOrigTransPos) + #print("invOrigTransOrn=",invOrigTransOrn) + invOrigTransMat = self._pybullet_client.getMatrixFromQuaternion(invOrigTransOrn) + #print("invOrigTransMat =",invOrigTransMat ) + return invOrigTransPos, invOrigTransOrn + + def getState(self): + + stateVector = [] + phase = self.getPhase() + #print("phase=",phase) + stateVector.append(phase) + + rootTransPos, rootTransOrn=self.buildOriginTrans() + basePos,baseOrn = self._pybullet_client.getBasePositionAndOrientation(self._sim_model) + + rootPosRel, dummy = self._pybullet_client.multiplyTransforms(rootTransPos, rootTransOrn, basePos,[0,0,0,1]) + #print("!!!rootPosRel =",rootPosRel ) + #print("rootTransPos=",rootTransPos) + #print("basePos=",basePos) + localPos,localOrn = self._pybullet_client.multiplyTransforms( rootTransPos, rootTransOrn , basePos,baseOrn ) + + localPos=[localPos[0]-rootPosRel[0],localPos[1]-rootPosRel[1],localPos[2]-rootPosRel[2]] + #print("localPos=",localPos) + + stateVector.append(rootPosRel[1]) + + #self.pb2dmJoints=[0,1,2,9,10,11,3,4,5,12,13,14,6,7,8] + self.pb2dmJoints=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] + + for pbJoint in range (self._pybullet_client.getNumJoints(self._sim_model)): + j = self.pb2dmJoints[pbJoint] + #print("joint order:",j) + ls = self._pybullet_client.getLinkState(self._sim_model, j, computeForwardKinematics=True) + linkPos = ls[0] + linkOrn = ls[1] + linkPosLocal, linkOrnLocal = self._pybullet_client.multiplyTransforms(rootTransPos, rootTransOrn, linkPos,linkOrn) + if (linkOrnLocal[3]<0): + linkOrnLocal=[-linkOrnLocal[0],-linkOrnLocal[1],-linkOrnLocal[2],-linkOrnLocal[3]] + linkPosLocal=[linkPosLocal[0]-rootPosRel[0],linkPosLocal[1]-rootPosRel[1],linkPosLocal[2]-rootPosRel[2]] + for l in linkPosLocal: + stateVector.append(l) + #re-order the quaternion, DeepMimic uses w,x,y,z + stateVector.append(linkOrnLocal[3]) + stateVector.append(linkOrnLocal[0]) + stateVector.append(linkOrnLocal[1]) + stateVector.append(linkOrnLocal[2]) + + + for pbJoint in range (self._pybullet_client.getNumJoints(self._sim_model)): + j = self.pb2dmJoints[pbJoint] + ls = self._pybullet_client.getLinkState(self._sim_model, j, computeLinkVelocity=True) + linkLinVel = ls[6] + linkAngVel = ls[7] + for l in linkLinVel: + stateVector.append(l) + for l in linkAngVel: + stateVector.append(l) + + #print("stateVector len=",len(stateVector)) + #for st in range (len(stateVector)): + # print("state[",st,"]=",stateVector[st]) + return stateVector + + def terminates(self): + #check if any non-allowed body part hits the ground + terminates=False + pts = self._pybullet_client.getContactPoints() + for p in pts: + part = -1 + if (p[1]==self._sim_model): + part=p[3] + if (p[2]==self._sim_model): + part=p[4] + if (part >=0 and part not in self._allowed_body_parts): + #print("terminating part:", part) + terminates=True + + return terminates + + def getReward(self, pose): + #from DeepMimic double cSceneImitate::CalcRewardImitate + pose_w = 0.5 + vel_w = 0.05 + end_eff_w = 0 #0.15 + root_w = 0#0.2 + com_w = 0#0.1 + + total_w = pose_w + vel_w + end_eff_w + root_w + com_w + pose_w /= total_w + vel_w /= total_w + end_eff_w /= total_w + root_w /= total_w + com_w /= total_w + + pose_scale = 2 + vel_scale = 0.1 + end_eff_scale = 40 + root_scale = 5 + com_scale = 10 + err_scale = 1 + + reward = 0 + + pose_err = 0 + vel_err = 0 + end_eff_err = 0 + root_err = 0 + com_err = 0 + heading_err = 0 + + #create a mimic reward, comparing the dynamics humanoid with a kinematic one + + #pose = self.InitializePoseFromMotionData() + #print("self._kin_model=",self._kin_model) + #print("kinematicHumanoid #joints=",self._pybullet_client.getNumJoints(self._kin_model)) + #self.ApplyPose(pose, True, True, self._kin_model, self._pybullet_client) + + #const Eigen::VectorXd& pose0 = sim_char.GetPose(); + #const Eigen::VectorXd& vel0 = sim_char.GetVel(); + #const Eigen::VectorXd& pose1 = kin_char.GetPose(); + #const Eigen::VectorXd& vel1 = kin_char.GetVel(); + #tMatrix origin_trans = sim_char.BuildOriginTrans(); + #tMatrix kin_origin_trans = kin_char.BuildOriginTrans(); + # + #tVector com0_world = sim_char.CalcCOM(); + #tVector com_vel0_world = sim_char.CalcCOMVel(); + #tVector com1_world; + #tVector com_vel1_world; + #cRBDUtil::CalcCoM(joint_mat, body_defs, pose1, vel1, com1_world, com_vel1_world); + # + root_id = 0 + #tVector root_pos0 = cKinTree::GetRootPos(joint_mat, pose0); + #tVector root_pos1 = cKinTree::GetRootPos(joint_mat, pose1); + #tQuaternion root_rot0 = cKinTree::GetRootRot(joint_mat, pose0); + #tQuaternion root_rot1 = cKinTree::GetRootRot(joint_mat, pose1); + #tVector root_vel0 = cKinTree::GetRootVel(joint_mat, vel0); + #tVector root_vel1 = cKinTree::GetRootVel(joint_mat, vel1); + #tVector root_ang_vel0 = cKinTree::GetRootAngVel(joint_mat, vel0); + #tVector root_ang_vel1 = cKinTree::GetRootAngVel(joint_mat, vel1); + + mJointWeights = [0.20833,0.10416, 0.0625, 0.10416, + 0.0625, 0.041666666666666671, 0.0625, 0.0416, + 0.00, 0.10416, 0.0625, 0.0416, 0.0625, 0.0416, 0.0000] + + num_end_effs = 0 + num_joints = 15 + + root_rot_w = mJointWeights[root_id] + #pose_err += root_rot_w * cKinTree::CalcRootRotErr(joint_mat, pose0, pose1) + #vel_err += root_rot_w * cKinTree::CalcRootAngVelErr(joint_mat, vel0, vel1) + + for j in range (num_joints): + curr_pose_err = 0 + curr_vel_err = 0 + w = mJointWeights[j]; + + simJointInfo = self._pybullet_client.getJointStateMultiDof(self._sim_model, j) + + #print("simJointInfo.pos=",simJointInfo[0]) + #print("simJointInfo.vel=",simJointInfo[1]) + kinJointInfo = self._pybullet_client.getJointStateMultiDof(self._kin_model,j) + #print("kinJointInfo.pos=",kinJointInfo[0]) + #print("kinJointInfo.vel=",kinJointInfo[1]) + if (len(simJointInfo[0])==1): + angle = simJointInfo[0][0]-kinJointInfo[0][0] + curr_pose_err = angle*angle + velDiff = simJointInfo[1][0]-kinJointInfo[1][0] + curr_vel_err = velDiff*velDiff + if (len(simJointInfo[0])==4): + #print("quaternion diff") + diffQuat = self._pybullet_client.getDifferenceQuaternion(simJointInfo[0],kinJointInfo[0]) + axis,angle = self._pybullet_client.getAxisAngleFromQuaternion(diffQuat) + curr_pose_err = angle*angle + diffVel = [simJointInfo[1][0]-kinJointInfo[1][0],simJointInfo[1][1]-kinJointInfo[1][1],simJointInfo[1][2]-kinJointInfo[1][2]] + curr_vel_err = diffVel[0]*diffVel[0]+diffVel[1]*diffVel[1]+diffVel[2]*diffVel[2] + + + pose_err += w * curr_pose_err + vel_err += w * curr_vel_err + + # bool is_end_eff = sim_char.IsEndEffector(j) + # if (is_end_eff) + # { + # tVector pos0 = sim_char.CalcJointPos(j) + # tVector pos1 = cKinTree::CalcJointWorldPos(joint_mat, pose1, j) + # double ground_h0 = mGround->SampleHeight(pos0) + # double ground_h1 = kin_char.GetOriginPos()[1] + # + # tVector pos_rel0 = pos0 - root_pos0 + # tVector pos_rel1 = pos1 - root_pos1 + # pos_rel0[1] = pos0[1] - ground_h0 + # pos_rel1[1] = pos1[1] - ground_h1 + # + # pos_rel0 = origin_trans * pos_rel0 + # pos_rel1 = kin_origin_trans * pos_rel1 + # + # curr_end_err = (pos_rel1 - pos_rel0).squaredNorm() + # end_eff_err += curr_end_err; + # ++num_end_effs; + # } + #} + #if (num_end_effs > 0): + # end_eff_err /= num_end_effs + # + #double root_ground_h0 = mGround->SampleHeight(sim_char.GetRootPos()) + #double root_ground_h1 = kin_char.GetOriginPos()[1] + #root_pos0[1] -= root_ground_h0 + #root_pos1[1] -= root_ground_h1 + #root_pos_err = (root_pos0 - root_pos1).squaredNorm() + # + #root_rot_err = cMathUtil::QuatDiffTheta(root_rot0, root_rot1) + #root_rot_err *= root_rot_err + + #root_vel_err = (root_vel1 - root_vel0).squaredNorm() + #root_ang_vel_err = (root_ang_vel1 - root_ang_vel0).squaredNorm() + + #root_err = root_pos_err + # + 0.1 * root_rot_err + # + 0.01 * root_vel_err + # + 0.001 * root_ang_vel_err + #com_err = 0.1 * (com_vel1_world - com_vel0_world).squaredNorm() + + #print("pose_err=",pose_err) + #print("vel_err=",vel_err) + pose_reward = math.exp(-err_scale * pose_scale * pose_err) + vel_reward = math.exp(-err_scale * vel_scale * vel_err) + end_eff_reward = math.exp(-err_scale * end_eff_scale * end_eff_err) + root_reward = math.exp(-err_scale * root_scale * root_err) + com_reward = math.exp(-err_scale * com_scale * com_err) + + reward = pose_w * pose_reward + vel_w * vel_reward + end_eff_w * end_eff_reward + root_w * root_reward + com_w * com_reward + + #print("reward = %f (pose_reward=%f, vel_reward=%f, end_eff_reward=%f, root_reward=%f, com_reward=%f)\n", reward, + # pose_reward,vel_reward,end_eff_reward, root_reward, com_reward); + + return reward diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data.py new file mode 100644 index 000000000..7a9c86b76 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data.py @@ -0,0 +1,19 @@ +import json + +class MotionCaptureData(object): + def __init__(self): + self.Reset() + + def Reset(self): + self._motion_data = [] + + def Load(self, path): + with open(path, 'r') as f: + self._motion_data = json.load(f) + + def NumFrames(self): + return len(self._motion_data['Frames']) + + def KeyFrameDuraction(self): + return self._motion_data['Frames'][0][0] + diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pd_controller_stable.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pd_controller_stable.py new file mode 100644 index 000000000..bb0f5f243 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pd_controller_stable.py @@ -0,0 +1,144 @@ +import numpy as np + + + +class PDControllerStableMultiDof(object): + def __init__(self, pb): + self._pb = pb + + def computePD(self, bodyUniqueId, jointIndices, desiredPositions, desiredVelocities, kps, kds, maxForces, timeStep): + + numJoints = len(jointIndices)#self._pb.getNumJoints(bodyUniqueId) + curPos,curOrn = self._pb.getBasePositionAndOrientation(bodyUniqueId) + #q1 = [desiredPositions[0],desiredPositions[1],desiredPositions[2],desiredPositions[3],desiredPositions[4],desiredPositions[5],desiredPositions[6]] + q1 = [curPos[0],curPos[1],curPos[2],curOrn[0],curOrn[1],curOrn[2],curOrn[3]] + + #qdot1 = [0,0,0, 0,0,0,0] + baseLinVel, baseAngVel = self._pb.getBaseVelocity(bodyUniqueId) + + qdot1 = [baseLinVel[0],baseLinVel[1],baseLinVel[2],baseAngVel[0],baseAngVel[1],baseAngVel[2],0] + qError = [0,0,0, 0,0,0,0] + + qIndex=7 + qdotIndex=7 + zeroAccelerations=[0,0,0, 0,0,0,0] + for i in range (numJoints): + js = self._pb.getJointStateMultiDof(bodyUniqueId, jointIndices[i]) + + jointPos=js[0] + jointVel = js[1] + q1+=jointPos + + if len(js[0])==1: + desiredPos=desiredPositions[qIndex] + + qdiff=desiredPos - jointPos[0] + qError.append(qdiff) + zeroAccelerations.append(0.) + qdot1+=jointVel + qIndex+=1 + qdotIndex+=1 + if len(js[0])==4: + desiredPos=[desiredPositions[qIndex],desiredPositions[qIndex+1],desiredPositions[qIndex+2],desiredPositions[qIndex+3]] + axis = self._pb.getAxisDifferenceQuaternion(desiredPos,jointPos) + jointVelNew = [jointVel[0],jointVel[1],jointVel[2],0] + qdot1+=jointVelNew + qError.append(axis[0]) + qError.append(axis[1]) + qError.append(axis[2]) + qError.append(0) + desiredVel=[desiredVelocities[qdotIndex],desiredVelocities[qdotIndex+1],desiredVelocities[qdotIndex+2]] + zeroAccelerations+=[0.,0.,0.,0.] + qIndex+=4 + qdotIndex+=4 + + q = np.array(q1) + qdot=np.array(qdot1) + + qdotdesired = np.array(desiredVelocities) + qdoterr = qdotdesired-qdot + + + Kp = np.diagflat(kps) + Kd = np.diagflat(kds) + + p = Kp.dot(qError) + + #np.savetxt("pb_qError.csv", qError, delimiter=",") + #np.savetxt("pb_p.csv", p, delimiter=",") + + d = Kd.dot(qdoterr) + + #np.savetxt("pb_d.csv", d, delimiter=",") + #np.savetxt("pbqdoterr.csv", qdoterr, delimiter=",") + + + M1 = self._pb.calculateMassMatrix(bodyUniqueId,q1, flags=1) + + + M2 = np.array(M1) + #np.savetxt("M2.csv", M2, delimiter=",") + + M = (M2 + Kd * timeStep) + + #np.savetxt("pbM_tKd.csv",M, delimiter=",") + + + + c1 = self._pb.calculateInverseDynamics(bodyUniqueId, q1, qdot1, zeroAccelerations, flags=1) + + + c = np.array(c1) + #np.savetxt("pbC.csv",c, delimiter=",") + A = M + #p = [0]*43 + b = p + d -c + #np.savetxt("pb_acc.csv",b, delimiter=",") + qddot = np.linalg.solve(A, b) + tau = p + d - Kd.dot(qddot) * timeStep + #print("len(tau)=",len(tau)) + maxF = np.array(maxForces) + forces = np.clip(tau, -maxF , maxF ) + return forces + + + +class PDControllerStable(object): + def __init__(self, pb): + self._pb = pb + + def computePD(self, bodyUniqueId, jointIndices, desiredPositions, desiredVelocities, kps, kds, maxForces, timeStep): + numJoints = self._pb.getNumJoints(bodyUniqueId) + jointStates = self._pb.getJointStates(bodyUniqueId, jointIndices) + q1 = [] + qdot1 = [] + zeroAccelerations = [] + for i in range (numJoints): + q1.append(jointStates[i][0]) + qdot1.append(jointStates[i][1]) + zeroAccelerations.append(0) + q = np.array(q1) + qdot=np.array(qdot1) + qdes = np.array(desiredPositions) + qdotdes = np.array(desiredVelocities) + qError = qdes - q + qdotError = qdotdes - qdot + Kp = np.diagflat(kps) + Kd = np.diagflat(kds) + p = Kp.dot(qError) + d = Kd.dot(qdotError) + forces = p + d + + M1 = self._pb.calculateMassMatrix(bodyUniqueId,q1) + M2 = np.array(M1) + M = (M2 + Kd * timeStep) + c1 = self._pb.calculateInverseDynamics(bodyUniqueId, q1, qdot1, zeroAccelerations) + c = np.array(c1) + A = M + b = -c + p + d + qddot = np.linalg.solve(A, b) + tau = p + d - Kd.dot(qddot) * timeStep + maxF = np.array(maxForces) + forces = np.clip(tau, -maxF , maxF ) + #print("c=",c) + return tau diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py new file mode 100644 index 000000000..272c9df13 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py @@ -0,0 +1,257 @@ +import numpy as np +import math +from pybullet_envs.deep_mimic.env.env import Env +from pybullet_envs.deep_mimic.env.action_space import ActionSpace +from pybullet_envs.minitaur.envs import bullet_client +import time +import motion_capture_data +from pybullet_envs.deep_mimic.env import humanoid_stable_pd +import pybullet_data +import pybullet as p1 +import random + + +class PyBulletDeepMimicEnv(Env): + def __init__(self, args=None, enable_draw=False, pybullet_client=None): + super().__init__(args, enable_draw) + self._num_agents = 1 + self._pybullet_client = pybullet_client + self._isInitialized = False + self.reset() + + def reset(self): + self.t = 0 + if not self._isInitialized: + if self.enable_draw: + self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI) + else: + self._pybullet_client = bullet_client.BulletClient() + + self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath()) + z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0]) + self._planeId = self._pybullet_client.loadURDF("plane.urdf",[0,0,0],z2y) + #print("planeId=",self._planeId) + self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP,1) + self._pybullet_client.setGravity(0,-9.8,0) + + self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10) + self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9) + + self._mocapData = motion_capture_data.MotionCaptureData() + motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_walk.txt" + #motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt" + self._mocapData.Load(motionPath) + timeStep = 1./600 + useFixedBase=False + self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData, timeStep, useFixedBase) + self._isInitialized = True + + self._pybullet_client.setTimeStep(timeStep) + self._pybullet_client.setPhysicsEngineParameter(numSubSteps=2) + + + selfCheck = False + if (selfCheck): + curTime = 0 + while self._pybullet_client.isConnected(): + self._humanoid.setSimTime(curTime) + state = self._humanoid.getState() + #print("state=",state) + pose = self._humanoid.computePose(self._humanoid._frameFraction) + for i in range (10): + curTime+=timeStep + #taus = self._humanoid.computePDForces(pose) + #self._humanoid.applyPDForces(taus) + #self._pybullet_client.stepSimulation() + time.sleep(timeStep) + #print("numframes = ", self._humanoid._mocap_data.NumFrames()) + startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2) + rnrange = 1000 + rn = random.randint(0,rnrange) + startTime = float(rn)/rnrange * self._humanoid.getCycleTime() + + self._humanoid.setSimTime(startTime) + self._humanoid.resetPose() + #this clears the contact points. Todo: add API to explicitly clear all contact points? + self._pybullet_client.stepSimulation() + + + def get_num_agents(self): + return self._num_agents + + def get_action_space(self, agent_id): + return ActionSpace(ActionSpace.Continuous) + + def get_reward_min(self, agent_id): + return 0 + + def get_reward_max(self, agent_id): + return 1 + + def get_reward_fail(self, agent_id): + return self.get_reward_min(agent_id) + + def get_reward_succ(self, agent_id): + return self.get_reward_max(agent_id) + + #scene_name == "imitate" -> cDrawSceneImitate + def get_state_size(self, agent_id): + #cCtController::GetStateSize() + #int state_size = cDeepMimicCharController::GetStateSize(); + # state_size += GetStatePoseSize();#106 + # state_size += GetStateVelSize(); #(3+3)*numBodyParts=90 + #state_size += GetStatePhaseSize();#1 + #197 + return 197 + + def build_state_norm_groups(self, agent_id): + #if (mEnablePhaseInput) + #{ + #int phase_group = gNormGroupNone; + #int phase_offset = GetStatePhaseOffset(); + #int phase_size = GetStatePhaseSize(); + #out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size); + groups = [0]*self.get_state_size(agent_id) + groups[0] = -1 + return groups + + def build_state_offset(self, agent_id): + out_offset = [0]*self.get_state_size(agent_id) + phase_offset = -0.5 + out_offset[0] = phase_offset + return np.array(out_offset) + + def build_state_scale(self, agent_id): + out_scale = [1]*self.get_state_size(agent_id) + phase_scale = 2 + out_scale[0] = phase_scale + return np.array(out_scale) + + def get_goal_size(self, agent_id): + return 0 + + def get_action_size(self, agent_id): + ctrl_size = 43 #numDof + root_size = 7 + return ctrl_size - root_size + + def build_goal_norm_groups(self, agent_id): + return np.array([]) + + def build_goal_offset(self, agent_id): + return np.array([]) + + def build_goal_scale(self, agent_id): + return np.array([]) + + def build_action_offset(self, agent_id): + out_offset = [0] * self.get_action_size(agent_id) + out_offset = [0.0000000000,0.0000000000,0.0000000000,-0.200000000,0.0000000000,0.0000000000,0.0000000000, + -0.200000000,0.0000000000,0.0000000000, 0.00000000, -0.2000000, 1.57000000, 0.00000000, 0.00000000, + 0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, -1.5700000, 0.00000000, 0.00000000, + 0.00000000, -0.2000000, 1.57000000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000, + 0.00000000, -0.2000000, -1.5700000] + #see cCtCtrlUtil::BuildOffsetScalePDPrismatic and + #see cCtCtrlUtil::BuildOffsetScalePDSpherical + return np.array(out_offset) + + def build_action_scale(self, agent_id): + out_scale = [1] * self.get_action_size(agent_id) + #see cCtCtrlUtil::BuildOffsetScalePDPrismatic and + #see cCtCtrlUtil::BuildOffsetScalePDSpherical + out_scale=[ 0.20833333333333,1.00000000000000,1.00000000000000,1.00000000000000,0.25000000000000, + 1.00000000000000,1.00000000000000,1.00000000000000,0.12077294685990,1.00000000000000, + 1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, + 1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000, + 1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000, + 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000, + 1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000, + 0.159235668789] + return np.array(out_scale) + + def build_action_bound_min(self, agent_id): + #see cCtCtrlUtil::BuildBoundsPDSpherical + out_scale = [-1] * self.get_action_size(agent_id) + out_scale = [-4.79999999999,-1.00000000000,-1.00000000000,-1.00000000000,-4.00000000000, + -1.00000000000,-1.00000000000,-1.00000000000,-7.77999999999,-1.00000000000, -1.000000000, + -1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000, -1.000000000, + -12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000, + -7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000, + -6.280000000, -1.000000000, -1.000000000, -1.000000000, -8.460000000, + -1.000000000, -1.000000000, -1.000000000, -4.710000000] + + return out_scale + + def build_action_bound_max(self, agent_id): + out_scale = [1] * self.get_action_size(agent_id) + out_scale=[ + 4.799999999,1.000000000,1.000000000,1.000000000,4.000000000,1.000000000, + 1.000000000,1.000000000,8.779999999,1.000000000, 1.0000000, 1.0000000, + 4.7100000, 6.2800000, 1.0000000, 1.0000000, 1.0000000, + 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000, + 8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, + 6.2800000, 1.0000000, 1.0000000, 1.0000000, 10.100000, + 1.0000000, 1.0000000, 1.0000000, 7.8500000] + return out_scale + + def set_mode(self, mode): + self._mode = mode + + def need_new_action(self, agent_id): + return True + + def record_state(self, agent_id): + state = self._humanoid.getState() + state[1]=state[1]+0.008 + #print("record_state=",state) + return np.array(state) + + + def record_goal(self, agent_id): + return np.array([]) + + def calc_reward(self, agent_id): + kinPose = self._humanoid.computePose(self._humanoid._frameFraction) + reward = self._humanoid.getReward(kinPose) + return reward + + def set_action(self, agent_id, action): + self.desiredPose = self._humanoid.convertActionToPose(action) + #print("set_action: desiredPose=", self.desiredPose) + + def log_val(self, agent_id, val): + pass + + def update(self, timeStep): + for i in range(10): + self.t += timeStep + self._humanoid.setSimTime(self.t) + + if self.desiredPose: + kinPose = self._humanoid.computePose(self._humanoid._frameFraction) + self._humanoid.initializePose(self._humanoid._poseInterpolator, self._humanoid._kin_model, initBase=False) + pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model) + self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0],pos[1]+2,pos[2]],orn) + #print("desiredPositions=",self.desiredPose) + taus = self._humanoid.computePDForces(self.desiredPose) + self._humanoid.applyPDForces(taus) + self._pybullet_client.stepSimulation() + + + def set_sample_count(self, count): + return + + def check_terminate(self, agent_id): + return Env.Terminate(self.is_episode_end()) + + def is_episode_end(self): + isEnded = self._humanoid.terminates() + #also check maximum time, 20 seconds (todo get from file) + #print("self.t=",self.t) + if (self.t>20): + isEnded = True + return isEnded + + def check_valid_episode(self): + #could check if limbs exceed velocity threshold + return true diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/agent_builder.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/agent_builder.py new file mode 100644 index 000000000..ca54f46d5 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/agent_builder.py @@ -0,0 +1,21 @@ +import json +import numpy as np +from learning.ppo_agent import PPOAgent +import pybullet_data + +AGENT_TYPE_KEY = "AgentType" + +def build_agent(world, id, file): + agent = None + with open(pybullet_data.getDataPath()+"/"+file) as data_file: + json_data = json.load(data_file) + + assert AGENT_TYPE_KEY in json_data + agent_type = json_data[AGENT_TYPE_KEY] + + if (agent_type == PPOAgent.NAME): + agent = PPOAgent(world, id, json_data) + else: + assert False, 'Unsupported agent type: ' + agent_type + + return agent diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/exp_params.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/exp_params.py new file mode 100644 index 000000000..07c83e0f2 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/exp_params.py @@ -0,0 +1,54 @@ +import json +import numpy as np +import pybullet_utils.math_util as MathUtil + +class ExpParams(object): + RATE_KEY = 'Rate' + INIT_ACTION_RATE_KEY = 'InitActionRate' + NOISE_KEY = 'Noise' + NOISE_INTERNAL_KEY = 'NoiseInternal' + TEMP_KEY = 'Temp' + + def __init__(self): + self.rate = 0.2 + self.init_action_rate = 0 + self.noise = 0.1 + self.noise_internal = 0 + self.temp = 0.1 + return + + def __str__(self): + str = '' + str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate) + str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate) + str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise) + str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal) + str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp) + return str + + def load(self, json_data): + if (self.RATE_KEY in json_data): + self.rate = json_data[self.RATE_KEY] + + if (self.INIT_ACTION_RATE_KEY in json_data): + self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY] + + if (self.NOISE_KEY in json_data): + self.noise = json_data[self.NOISE_KEY] + + if (self.NOISE_INTERNAL_KEY in json_data): + self.noise_internal = json_data[self.NOISE_INTERNAL_KEY] + + if (self.TEMP_KEY in json_data): + self.temp = json_data[self.TEMP_KEY] + + return + + def lerp(self, other, t): + lerp_params = ExpParams() + lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t) + lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t) + lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t) + lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t) + lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t) + return lerp_params \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/__init__.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/__init__.py new file mode 100644 index 000000000..b9742821a --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/__init__.py @@ -0,0 +1 @@ +from . import * \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/fc_2layers_1024units.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/fc_2layers_1024units.py new file mode 100644 index 000000000..20f5c0ffa --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/fc_2layers_1024units.py @@ -0,0 +1,13 @@ +import tensorflow as tf +import learning.tf_util as TFUtil + +NAME = "fc_2layers_1024units" + +def build_net(input_tfs, reuse=False): + layers = [1024, 512] + activation = tf.nn.relu + + input_tf = tf.concat(axis=-1, values=input_tfs) + h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse) + h = activation(h) + return h \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/net_builder.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/net_builder.py new file mode 100644 index 000000000..1b5983a1b --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/nets/net_builder.py @@ -0,0 +1,11 @@ +import learning.nets.fc_2layers_1024units as fc_2layers_1024units + +def build_net(net_name, input_tfs, reuse=False): + net = None + + if (net_name == fc_2layers_1024units.NAME): + net = fc_2layers_1024units.build_net(input_tfs, reuse) + else: + assert False, 'Unsupported net: ' + net_name + + return net \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/normalizer.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/normalizer.py new file mode 100644 index 000000000..74bf90569 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/normalizer.py @@ -0,0 +1,149 @@ +import numpy as np +import copy +import pybullet_utils.mpi_util as MPIUtil +from pybullet_utils.logger import Logger + +class Normalizer(object): + CHECK_SYNC_COUNT = 50000 # check synchronization after a certain number of entries + + # these group IDs must be the same as those in CharController.h + NORM_GROUP_SINGLE = 0 + NORM_GROUP_NONE = -1 + + class Group(object): + def __init__(self, id, indices): + self.id = id + self.indices = indices + return + + def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf): + self.eps = eps + self.clip = clip + self.mean = np.zeros(size) + self.mean_sq = np.zeros(size) + self.std = np.ones(size) + self.count = 0 + self.groups = self._build_groups(groups_ids) + + self.new_count = 0 + self.new_sum = np.zeros_like(self.mean) + self.new_sum_sq = np.zeros_like(self.mean_sq) + return + + def record(self, x): + size = self.get_size() + is_array = isinstance(x, np.ndarray) + if not is_array: + assert(size == 1) + x = np.array([[x]]) + + assert x.shape[-1] == size, \ + Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1])) + x = np.reshape(x, [-1, size]) + + self.new_count += x.shape[0] + self.new_sum += np.sum(x, axis=0) + self.new_sum_sq += np.sum(np.square(x), axis=0) + return + + def update(self): + new_count = MPIUtil.reduce_sum(self.new_count) + new_sum = MPIUtil.reduce_sum(self.new_sum) + new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq) + + new_total = self.count + new_count + if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT): + assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized') + + if new_count > 0: + new_mean = self._process_group_data(new_sum / new_count, self.mean) + new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq) + w_old = float(self.count) / new_total + w_new = float(new_count) / new_total + + self.mean = w_old * self.mean + w_new * new_mean + self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq + self.count = new_total + self.std = self.calc_std(self.mean, self.mean_sq) + + self.new_count = 0 + self.new_sum.fill(0) + self.new_sum_sq.fill(0) + + return + + def get_size(self): + return self.mean.size + + def set_mean_std(self, mean, std): + size = self.get_size() + is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray) + + if not is_array: + assert(size == 1) + mean = np.array([mean]) + std = np.array([std]) + + assert len(mean) == size and len(std) == size, \ + Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std))) + + self.mean = mean + self.std = std + self.mean_sq = self.calc_mean_sq(self.mean, self.std) + return + + def normalize(self, x): + norm_x = (x - self.mean) / self.std + norm_x = np.clip(norm_x, -self.clip, self.clip) + return norm_x + + def unnormalize(self, norm_x): + x = norm_x * self.std + self.mean + return x + + def calc_std(self, mean, mean_sq): + var = mean_sq - np.square(mean) + # some time floating point errors can lead to small negative numbers + var = np.maximum(var, 0) + std = np.sqrt(var) + std = np.maximum(std, self.eps) + return std + + def calc_mean_sq(self, mean, std): + return np.square(std) + np.square(self.mean) + + def check_synced(self): + synced = True + if MPIUtil.is_root_proc(): + vars = np.concatenate([self.mean, self.mean_sq]) + MPIUtil.bcast(vars) + else: + vars_local = np.concatenate([self.mean, self.mean_sq]) + vars_root = np.empty_like(vars_local) + MPIUtil.bcast(vars_root) + synced = (vars_local == vars_root).all() + return synced + + def _build_groups(self, groups_ids): + groups = [] + if groups_ids is None: + curr_id = self.NORM_GROUP_SINGLE + curr_list = np.arange(self.get_size()).astype(np.int32) + groups.append(self.Group(curr_id, curr_list)) + else: + ids = np.unique(groups_ids) + for id in ids: + curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32) + groups.append(self.Group(id, curr_list)) + + return groups + + def _process_group_data(self, new_data, old_data): + proc_data = new_data.copy() + for group in self.groups: + if group.id == self.NORM_GROUP_NONE: + proc_data[group.indices] = old_data[group.indices] + elif group.id != self.NORM_GROUP_SINGLE: + avg = np.mean(new_data[group.indices]) + proc_data[group.indices] = avg + return proc_data \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/path.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/path.py new file mode 100644 index 000000000..c6855491e --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/path.py @@ -0,0 +1,46 @@ +import numpy as np +from pybullet_envs.deep_mimic.env.env import Env + +class Path(object): + def __init__(self): + self.clear() + return + + def pathlength(self): + return len(self.actions) + + def is_valid(self): + valid = True + l = self.pathlength() + valid &= len(self.states) == l + 1 + valid &= len(self.goals) == l + 1 + valid &= len(self.actions) == l + valid &= len(self.logps) == l + valid &= len(self.rewards) == l + valid &= len(self.flags) == l + + return valid + + def check_vals(self): + for vals in [self.states, self.goals, self.actions, self.logps, + self.rewards]: + for v in vals: + if not np.isfinite(v).all(): + return False + return True + + def clear(self): + self.states = [] + self.goals = [] + self.actions = [] + self.logps = [] + self.rewards = [] + self.flags = [] + self.terminate = Env.Terminate.Null + return + + def get_pathlen(self): + return len(self.rewards) + + def calc_return(self): + return sum(self.rewards) \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/pg_agent.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/pg_agent.py new file mode 100644 index 000000000..1f82b8553 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/pg_agent.py @@ -0,0 +1,353 @@ +import numpy as np +import tensorflow as tf +import copy + +from learning.tf_agent import TFAgent +from learning.solvers.mpi_solver import MPISolver +import learning.tf_util as TFUtil +import learning.nets.net_builder as NetBuilder +from learning.tf_normalizer import TFNormalizer +import learning.rl_util as RLUtil +from pybullet_utils.logger import Logger +import pybullet_utils.mpi_util as MPIUtil +import pybullet_utils.math_util as MathUtil +from pybullet_envs.deep_mimic.env.action_space import ActionSpace +from pybullet_envs.deep_mimic.env.env import Env + +''' +Policy Gradient Agent +''' + +class PGAgent(TFAgent): + NAME = 'PG' + + ACTOR_NET_KEY = 'ActorNet' + ACTOR_STEPSIZE_KEY = 'ActorStepsize' + ACTOR_MOMENTUM_KEY = 'ActorMomentum' + ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay' + ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale' + + CRITIC_NET_KEY = 'CriticNet' + CRITIC_STEPSIZE_KEY = 'CriticStepsize' + CRITIC_MOMENTUM_KEY = 'CriticMomentum' + CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay' + + EXP_ACTION_FLAG = 1 << 0 + + def __init__(self, world, id, json_data): + self._exp_action = False + super().__init__(world, id, json_data) + return + + def reset(self): + super().reset() + self._exp_action = False + return + + def _check_action_space(self): + action_space = self.get_action_space() + return action_space == ActionSpace.Continuous + + def _load_params(self, json_data): + super()._load_params(json_data) + self.val_min, self.val_max = self._calc_val_bounds(self.discount) + self.val_fail, self.val_succ = self._calc_term_vals(self.discount) + return + + def _build_nets(self, json_data): + assert self.ACTOR_NET_KEY in json_data + assert self.CRITIC_NET_KEY in json_data + + actor_net_name = json_data[self.ACTOR_NET_KEY] + critic_net_name = json_data[self.CRITIC_NET_KEY] + actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY] + + s_size = self.get_state_size() + g_size = self.get_goal_size() + a_size = self.get_action_size() + + # setup input tensors + self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") # observations + self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") # target value s + self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage + self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") # target actions + self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g") # goals + + with tf.variable_scope('main'): + with tf.variable_scope('actor'): + self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale) + with tf.variable_scope('critic'): + self.critic_tf = self._build_net_critic(critic_net_name) + + if (self.actor_tf != None): + Logger.print2('Built actor net: ' + actor_net_name) + + if (self.critic_tf != None): + Logger.print2('Built critic net: ' + critic_net_name) + + return + + def _build_normalizers(self): + super()._build_normalizers() + with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope): + with tf.variable_scope(self.RESOURCE_SCOPE): + val_offset, val_scale = self._calc_val_offset_scale(self.discount) + self.val_norm = TFNormalizer(self.sess, 'val_norm', 1) + self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale) + return + + def _init_normalizers(self): + super()._init_normalizers() + with self.sess.as_default(), self.graph.as_default(): + self.val_norm.update() + return + + def _load_normalizers(self): + super()._load_normalizers() + self.val_norm.load() + return + + def _build_losses(self, json_data): + actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY] + critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY] + + norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf) + self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff)) + + if (critic_weight_decay != 0): + self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic') + + norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf) + norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf + + self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1) + self.actor_loss_tf *= self.adv_tf + self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf) + + norm_a_bound_min = self.a_norm.normalize(self.a_bound_min) + norm_a_bound_max = self.a_norm.normalize(self.a_bound_max) + a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max) + a_bound_loss /= self.exp_params_curr.noise + self.actor_loss_tf += a_bound_loss + + if (actor_weight_decay != 0): + self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor') + + return + + def _build_solvers(self, json_data): + actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY] + actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY] + critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY] + critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY] + + critic_vars = self._tf_vars('main/critic') + critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum) + self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars) + self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars) + + actor_vars = self._tf_vars('main/actor') + actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum) + self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars) + self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars) + + return + + def _build_net_actor(self, net_name, init_output_scale): + norm_s_tf = self.s_norm.normalize_tf(self.s_tf) + input_tfs = [norm_s_tf] + if (self.has_goal()): + norm_g_tf = self.g_norm.normalize_tf(self.g_tf) + input_tfs += [norm_g_tf] + + h = NetBuilder.build_net(net_name, input_tfs) + norm_a_tf = tf.layers.dense(inputs=h, units=self.get_action_size(), activation=None, + kernel_initializer=tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale)) + + a_tf = self.a_norm.unnormalize_tf(norm_a_tf) + return a_tf + + def _build_net_critic(self, net_name): + norm_s_tf = self.s_norm.normalize_tf(self.s_tf) + input_tfs = [norm_s_tf] + if (self.has_goal()): + norm_g_tf = self.g_norm.normalize_tf(self.g_tf) + input_tfs += [norm_g_tf] + + h = NetBuilder.build_net(net_name, input_tfs) + norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None, + kernel_initializer=TFUtil.xavier_initializer); + + norm_val_tf = tf.reshape(norm_val_tf, [-1]) + val_tf = self.val_norm.unnormalize_tf(norm_val_tf) + return val_tf + + def _initialize_vars(self): + super()._initialize_vars() + self._sync_solvers() + return + + def _sync_solvers(self): + self.actor_solver.sync() + self.critic_solver.sync() + return + + def _decide_action(self, s, g): + with self.sess.as_default(), self.graph.as_default(): + self._exp_action = False + a = self._eval_actor(s, g)[0] + logp = 0 + + if self._enable_stoch_policy(): + # epsilon-greedy + rand_action = MathUtil.flip_coin(self.exp_params_curr.rate) + if rand_action: + norm_exp_noise = np.random.randn(*a.shape) + norm_exp_noise *= self.exp_params_curr.noise + exp_noise = norm_exp_noise * self.a_norm.std + a += exp_noise + + logp = self._calc_action_logp(norm_exp_noise) + self._exp_action = True + + return a, logp + + def _enable_stoch_policy(self): + return self.enable_training and (self._mode == self.Mode.TRAIN or self._mode == self.Mode.TRAIN_END) + + def _eval_actor(self, s, g): + s = np.reshape(s, [-1, self.get_state_size()]) + g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None + + feed = { + self.s_tf : s, + self.g_tf : g + } + + a = self.actor_tf.eval(feed) + return a + + def _eval_critic(self, s, g): + with self.sess.as_default(), self.graph.as_default(): + s = np.reshape(s, [-1, self.get_state_size()]) + g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None + + feed = { + self.s_tf : s, + self.g_tf : g + } + + val = self.critic_tf.eval(feed) + return val + + def _record_flags(self): + flags = int(0) + if (self._exp_action): + flags = flags | self.EXP_ACTION_FLAG + return flags + + def _train_step(self): + super()._train_step() + + critic_loss = self._update_critic() + actor_loss = self._update_actor() + critic_loss = MPIUtil.reduce_avg(critic_loss) + actor_loss = MPIUtil.reduce_avg(actor_loss) + + critic_stepsize = self.critic_solver.get_stepsize() + actor_stepsize = self.actor_solver.get_stepsize() + + self.logger.log_tabular('Critic_Loss', critic_loss) + self.logger.log_tabular('Critic_Stepsize', critic_stepsize) + self.logger.log_tabular('Actor_Loss', actor_loss) + self.logger.log_tabular('Actor_Stepsize', actor_stepsize) + + return + + def _update_critic(self): + idx = self.replay_buffer.sample(self._local_mini_batch_size) + s = self.replay_buffer.get('states', idx) + g = self.replay_buffer.get('goals', idx) if self.has_goal() else None + + tar_V = self._calc_updated_vals(idx) + tar_V = np.clip(tar_V, self.val_min, self.val_max) + + feed = { + self.s_tf: s, + self.g_tf: g, + self.tar_val_tf: tar_V + } + + loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed) + self.critic_solver.update(grads) + return loss + + def _update_actor(self): + key = self.EXP_ACTION_FLAG + idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key) + has_goal = self.has_goal() + + s = self.replay_buffer.get('states', idx) + g = self.replay_buffer.get('goals', idx) if has_goal else None + a = self.replay_buffer.get('actions', idx) + + V_new = self._calc_updated_vals(idx) + V_old = self._eval_critic(s, g) + adv = V_new - V_old + + feed = { + self.s_tf: s, + self.g_tf: g, + self.a_tf: a, + self.adv_tf: adv + } + + loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed) + self.actor_solver.update(grads) + + return loss + + def _calc_updated_vals(self, idx): + r = self.replay_buffer.get('rewards', idx) + + if self.discount == 0: + new_V = r + else: + next_idx = self.replay_buffer.get_next_idx(idx) + s_next = self.replay_buffer.get('states', next_idx) + g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None + + is_end = self.replay_buffer.is_path_end(idx) + is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail) + is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ) + is_fail = np.logical_and(is_end, is_fail) + is_succ = np.logical_and(is_end, is_succ) + + V_next = self._eval_critic(s_next, g_next) + V_next[is_fail] = self.val_fail + V_next[is_succ] = self.val_succ + + new_V = r + self.discount * V_next + return new_V + + def _calc_action_logp(self, norm_action_deltas): + # norm action delta are for the normalized actions (scaled by self.a_norm.std) + stdev = self.exp_params_curr.noise + assert stdev > 0 + + a_size = self.get_action_size() + logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1) + logp += -0.5 * a_size * np.log(2 * np.pi) + logp += -a_size * np.log(stdev) + return logp + + def _log_val(self, s, g): + val = self._eval_critic(s, g) + norm_val = self.val_norm.normalize(val) + self.world.env.log_val(self.id, norm_val[0]) + return + + def _build_replay_buffer(self, buffer_size): + super()._build_replay_buffer(buffer_size) + self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG) + return \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/ppo_agent.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/ppo_agent.py new file mode 100644 index 000000000..1b9489489 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/ppo_agent.py @@ -0,0 +1,367 @@ +import numpy as np +import copy as copy +import tensorflow as tf + +from learning.pg_agent import PGAgent +from learning.solvers.mpi_solver import MPISolver +import learning.tf_util as TFUtil +import learning.rl_util as RLUtil +from pybullet_utils.logger import Logger +import pybullet_utils.mpi_util as MPIUtil +import pybullet_utils.math_util as MathUtil +from pybullet_envs.deep_mimic.env.env import Env + +''' +Proximal Policy Optimization Agent +''' + +class PPOAgent(PGAgent): + NAME = "PPO" + EPOCHS_KEY = "Epochs" + BATCH_SIZE_KEY = "BatchSize" + RATIO_CLIP_KEY = "RatioClip" + NORM_ADV_CLIP_KEY = "NormAdvClip" + TD_LAMBDA_KEY = "TDLambda" + TAR_CLIP_FRAC = "TarClipFrac" + ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay" + + def __init__(self, world, id, json_data): + super().__init__(world, id, json_data) + return + + def _load_params(self, json_data): + super()._load_params(json_data) + + self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY] + self.batch_size = 1024 if (self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY] + self.ratio_clip = 0.2 if (self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY] + self.norm_adv_clip = 5 if (self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY] + self.td_lambda = 0.95 if (self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY] + self.tar_clip_frac = -1 if (self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC] + self.actor_stepsize_decay = 0.5 if (self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY] + + num_procs = MPIUtil.get_num_procs() + local_batch_size = int(self.batch_size / num_procs) + min_replay_size = 2 * local_batch_size # needed to prevent buffer overflow + assert(self.replay_buffer_size > min_replay_size) + + self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size) + + return + + def _build_nets(self, json_data): + assert self.ACTOR_NET_KEY in json_data + assert self.CRITIC_NET_KEY in json_data + + actor_net_name = json_data[self.ACTOR_NET_KEY] + critic_net_name = json_data[self.CRITIC_NET_KEY] + actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY] + + s_size = self.get_state_size() + g_size = self.get_goal_size() + a_size = self.get_action_size() + + # setup input tensors + self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") + self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") + self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") + self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") + self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g") + self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp") + self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask") + + with tf.variable_scope('main'): + with tf.variable_scope('actor'): + self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale) + with tf.variable_scope('critic'): + self.critic_tf = self._build_net_critic(critic_net_name) + + if (self.a_mean_tf != None): + Logger.print2('Built actor net: ' + actor_net_name) + + if (self.critic_tf != None): + Logger.print2('Built critic net: ' + critic_net_name) + + self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size) + norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf)) + norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1) + self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf + self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf, mean_tf=None, std_tf=self.norm_a_std_tf) + + return + + def _build_losses(self, json_data): + actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY] + critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY] + + norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf) + self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff)) + + if (critic_weight_decay != 0): + self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic') + + norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf) + self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf) + + self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf, self.norm_a_std_tf) + ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf) + actor_loss0 = self.adv_tf * ratio_tf + actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip, 1 + self.ratio_clip) + self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1)) + + norm_a_bound_min = self.a_norm.normalize(self.a_bound_min) + norm_a_bound_max = self.a_norm.normalize(self.a_bound_max) + a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max) + self.actor_loss_tf += a_bound_loss + + if (actor_weight_decay != 0): + self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor') + + # for debugging + self.clip_frac_tf = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip))) + + return + + def _build_solvers(self, json_data): + actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY] + actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY] + critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY] + critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY] + + critic_vars = self._tf_vars('main/critic') + critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum) + self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars) + self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars) + + self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32, name='actor_stepsize', initializer=actor_stepsize, trainable=False) + self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[]) + self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph) + + actor_vars = self._tf_vars('main/actor') + actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf, momentum=actor_momentum) + self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars) + self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars) + + return + + def _decide_action(self, s, g): + with self.sess.as_default(), self.graph.as_default(): + self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(self.exp_params_curr.rate) + a, logp = self._eval_actor(s, g, self._exp_action) + return a[0], logp[0] + + def _eval_actor(self, s, g, enable_exp): + s = np.reshape(s, [-1, self.get_state_size()]) + g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None + + feed = { + self.s_tf : s, + self.g_tf : g, + self.exp_mask_tf: np.array([1 if enable_exp else 0]) + } + + a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed) + return a, logp + + def _train_step(self): + adv_eps = 1e-5 + + start_idx = self.replay_buffer.buffer_tail + end_idx = self.replay_buffer.buffer_head + assert(start_idx == 0) + assert(self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size) # must avoid overflow + assert(start_idx < end_idx) + + idx = np.array(list(range(start_idx, end_idx))) + end_mask = self.replay_buffer.is_path_end(idx) + end_mask = np.logical_not(end_mask) + + vals = self._compute_batch_vals(start_idx, end_idx) + new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals) + + valid_idx = idx[end_mask] + exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy() + num_valid_idx = valid_idx.shape[0] + num_exp_idx = exp_idx.shape[0] + exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)]) + + local_sample_count = valid_idx.size + global_sample_count = int(MPIUtil.reduce_sum(local_sample_count)) + mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size)) + + adv = new_vals[exp_idx[:,0]] - vals[exp_idx[:,0]] + new_vals = np.clip(new_vals, self.val_min, self.val_max) + + adv_mean = np.mean(adv) + adv_std = np.std(adv) + adv = (adv - adv_mean) / (adv_std + adv_eps) + adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip) + + critic_loss = 0 + actor_loss = 0 + actor_clip_frac = 0 + + for e in range(self.epochs): + np.random.shuffle(valid_idx) + np.random.shuffle(exp_idx) + + for b in range(mini_batches): + batch_idx_beg = b * self._local_mini_batch_size + batch_idx_end = batch_idx_beg + self._local_mini_batch_size + + critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32) + actor_batch = critic_batch.copy() + critic_batch = np.mod(critic_batch, num_valid_idx) + actor_batch = np.mod(actor_batch, num_exp_idx) + shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1) + + critic_batch = valid_idx[critic_batch] + actor_batch = exp_idx[actor_batch] + critic_batch_vals = new_vals[critic_batch] + actor_batch_adv = adv[actor_batch[:,1]] + + critic_s = self.replay_buffer.get('states', critic_batch) + critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None + curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals) + + actor_s = self.replay_buffer.get("states", actor_batch[:,0]) + actor_g = self.replay_buffer.get("goals", actor_batch[:,0]) if self.has_goal() else None + actor_a = self.replay_buffer.get("actions", actor_batch[:,0]) + actor_logp = self.replay_buffer.get("logps", actor_batch[:,0]) + curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a, actor_logp, actor_batch_adv) + + critic_loss += curr_critic_loss + actor_loss += np.abs(curr_actor_loss) + actor_clip_frac += curr_actor_clip_frac + + if (shuffle_actor): + np.random.shuffle(exp_idx) + + total_batches = mini_batches * self.epochs + critic_loss /= total_batches + actor_loss /= total_batches + actor_clip_frac /= total_batches + + critic_loss = MPIUtil.reduce_avg(critic_loss) + actor_loss = MPIUtil.reduce_avg(actor_loss) + actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac) + + critic_stepsize = self.critic_solver.get_stepsize() + actor_stepsize = self.update_actor_stepsize(actor_clip_frac) + + self.logger.log_tabular('Critic_Loss', critic_loss) + self.logger.log_tabular('Critic_Stepsize', critic_stepsize) + self.logger.log_tabular('Actor_Loss', actor_loss) + self.logger.log_tabular('Actor_Stepsize', actor_stepsize) + self.logger.log_tabular('Clip_Frac', actor_clip_frac) + self.logger.log_tabular('Adv_Mean', adv_mean) + self.logger.log_tabular('Adv_Std', adv_std) + + self.replay_buffer.clear() + + return + + def _get_iters_per_update(self): + return 1 + + def _valid_train_step(self): + samples = self.replay_buffer.get_current_size() + exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG) + global_sample_count = int(MPIUtil.reduce_sum(samples)) + global_exp_min = int(MPIUtil.reduce_min(exp_samples)) + return (global_sample_count > self.batch_size) and (global_exp_min > 0) + + def _compute_batch_vals(self, start_idx, end_idx): + states = self.replay_buffer.get_all("states")[start_idx:end_idx] + goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None + + idx = np.array(list(range(start_idx, end_idx))) + is_end = self.replay_buffer.is_path_end(idx) + is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail) + is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ) + is_fail = np.logical_and(is_end, is_fail) + is_succ = np.logical_and(is_end, is_succ) + + vals = self._eval_critic(states, goals) + vals[is_fail] = self.val_fail + vals[is_succ] = self.val_succ + + return vals + + def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer): + rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx] + + if self.discount == 0: + new_vals = rewards.copy() + else: + new_vals = np.zeros_like(val_buffer) + + curr_idx = start_idx + while curr_idx < end_idx: + idx0 = curr_idx - start_idx + idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx + r = rewards[idx0:idx1] + v = val_buffer[idx0:(idx1 + 1)] + + new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v) + curr_idx = idx1 + start_idx + 1 + + return new_vals + + def _update_critic(self, s, g, tar_vals): + feed = { + self.s_tf: s, + self.g_tf: g, + self.tar_val_tf: tar_vals + } + + loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed) + self.critic_solver.update(grads) + return loss + + def _update_actor(self, s, g, a, logp, adv): + feed = { + self.s_tf: s, + self.g_tf: g, + self.a_tf: a, + self.adv_tf: adv, + self.old_logp_tf: logp + } + + loss, grads, clip_frac = self.sess.run([self.actor_loss_tf, self.actor_grad_tf, + self.clip_frac_tf], feed) + self.actor_solver.update(grads) + + return loss, clip_frac + + def update_actor_stepsize(self, clip_frac): + clip_tol = 1.5 + step_scale = 2 + max_stepsize = 1e-2 + min_stepsize = 1e-8 + warmup_iters = 5 + + actor_stepsize = self.actor_solver.get_stepsize() + if (self.tar_clip_frac >= 0 and self.iter > warmup_iters): + min_clip = self.tar_clip_frac / clip_tol + max_clip = self.tar_clip_frac * clip_tol + under_tol = clip_frac < min_clip + over_tol = clip_frac > max_clip + + if (over_tol or under_tol): + if (over_tol): + actor_stepsize *= self.actor_stepsize_decay + else: + actor_stepsize /= self.actor_stepsize_decay + + actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize) + self.set_actor_stepsize(actor_stepsize) + + return actor_stepsize + + def set_actor_stepsize(self, stepsize): + feed = { + self._actor_stepsize_ph: stepsize, + } + self.sess.run(self._actor_stepsize_update_op, feed) + return \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/replay_buffer.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/replay_buffer.py new file mode 100644 index 000000000..284515851 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/replay_buffer.py @@ -0,0 +1,351 @@ +import numpy as np +import copy +from pybullet_utils.logger import Logger +import inspect as inspect +from pybullet_envs.deep_mimic.env.env import Env +import pybullet_utils.math_util as MathUtil + +class ReplayBuffer(object): + TERMINATE_KEY = 'terminate' + PATH_START_KEY = 'path_start' + PATH_END_KEY = 'path_end' + + def __init__(self, buffer_size): + assert buffer_size > 0 + + self.buffer_size = buffer_size + self.total_count = 0 + self.buffer_head = 0 + self.buffer_tail = MathUtil.INVALID_IDX + self.num_paths = 0 + self._sample_buffers = dict() + self.buffers = None + + self.clear() + return + + def sample(self, n): + curr_size = self.get_current_size() + assert curr_size > 0 + + idx = np.empty(n, dtype=int) + # makes sure that the end states are not sampled + for i in range(n): + while True: + curr_idx = np.random.randint(0, curr_size, size=1)[0] + curr_idx += self.buffer_tail + curr_idx = np.mod(curr_idx, self.buffer_size) + + if not self.is_path_end(curr_idx): + break + idx[i] = curr_idx + + return idx + + def sample_filtered(self, n, key): + assert key in self._sample_buffers + curr_buffer = self._sample_buffers[key] + idx = curr_buffer.sample(n) + return idx + + def count_filtered(self, key): + curr_buffer = self._sample_buffers[key] + return curr_buffer.count + + def get(self, key, idx): + return self.buffers[key][idx] + + def get_all(self, key): + return self.buffers[key] + + def get_idx_filtered(self, key): + assert key in self._sample_buffers + curr_buffer = self._sample_buffers[key] + idx = curr_buffer.slot_to_idx[:curr_buffer.count] + return idx + + def get_path_start(self, idx): + return self.buffers[self.PATH_START_KEY][idx] + + def get_path_end(self, idx): + return self.buffers[self.PATH_END_KEY][idx] + + def get_pathlen(self, idx): + is_array = isinstance(idx, np.ndarray) or isinstance(idx, list) + if not is_array: + idx = [idx] + + n = len(idx) + start_idx = self.get_path_start(idx) + end_idx = self.get_path_end(idx) + pathlen = np.empty(n, dtype=int) + + for i in range(n): + curr_start = start_idx[i] + curr_end = end_idx[i] + if curr_start < curr_end: + curr_len = curr_end - curr_start + else: + curr_len = self.buffer_size - curr_start + curr_end + pathlen[i] = curr_len + + if not is_array: + pathlen = pathlen[0] + + return pathlen + + def is_valid_path(self, idx): + start_idx = self.get_path_start(idx) + valid = start_idx != MathUtil.INVALID_IDX + return valid + + def store(self, path): + start_idx = MathUtil.INVALID_IDX + n = path.pathlength() + + if (n > 0): + assert path.is_valid() + + if path.check_vals(): + if self.buffers is None: + self._init_buffers(path) + + idx = self._request_idx(n + 1) + self._store_path(path, idx) + self._add_sample_buffers(idx) + + self.num_paths += 1 + self.total_count += n + 1 + start_idx = idx[0] + else: + Logger.print2('Invalid path data value detected') + + return start_idx + + def clear(self): + self.buffer_head = 0 + self.buffer_tail = MathUtil.INVALID_IDX + self.num_paths = 0 + + for key in self._sample_buffers: + self._sample_buffers[key].clear() + return + + def get_next_idx(self, idx): + next_idx = np.mod(idx + 1, self.buffer_size) + return next_idx + + def is_terminal_state(self, idx): + terminate_flags = self.buffers[self.TERMINATE_KEY][idx] + terminate = terminate_flags != Env.Terminate.Null.value + is_end = self.is_path_end(idx) + terminal_state = np.logical_and(terminate, is_end) + return terminal_state + + def check_terminal_flag(self, idx, flag): + terminate_flags = self.buffers[self.TERMINATE_KEY][idx] + terminate = terminate_flags == flag.value + return terminate + + def is_path_end(self, idx): + is_end = self.buffers[self.PATH_END_KEY][idx] == idx + return is_end + + def add_filter_key(self, key): + assert self.get_current_size() == 0 + if key not in self._sample_buffers: + self._sample_buffers[key] = SampleBuffer(self.buffer_size) + return + + def get_current_size(self): + if self.buffer_tail == MathUtil.INVALID_IDX: + return 0 + elif self.buffer_tail < self.buffer_head: + return self.buffer_head - self.buffer_tail + else: + return self.buffer_size - self.buffer_tail + self.buffer_head + + def _check_flags(self, key, flags): + return (flags & key) == key + + def _add_sample_buffers(self, idx): + flags = self.buffers['flags'] + for key in self._sample_buffers: + curr_buffer = self._sample_buffers[key] + filter_idx = [i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))] + curr_buffer.add(filter_idx) + return + + def _free_sample_buffers(self, idx): + for key in self._sample_buffers: + curr_buffer = self._sample_buffers[key] + curr_buffer.free(idx) + return + + def _init_buffers(self, path): + self.buffers = dict() + self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int); + self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int); + + for key in dir(path): + val = getattr(path, key) + if not key.startswith('__') and not inspect.ismethod(val): + if key == self.TERMINATE_KEY: + self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int) + else: + val_type = type(val[0]) + is_array = val_type == np.ndarray + if is_array: + shape = [self.buffer_size, val[0].shape[0]] + dtype = val[0].dtype + else: + shape = [self.buffer_size] + dtype = val_type + + self.buffers[key] = np.zeros(shape, dtype=dtype) + return + + def _request_idx(self, n): + assert n + 1 < self.buffer_size # bad things can happen if path is too long + + remainder = n + idx = [] + + start_idx = self.buffer_head + while remainder > 0: + end_idx = np.minimum(start_idx + remainder, self.buffer_size) + remainder -= (end_idx - start_idx) + + free_idx = list(range(start_idx, end_idx)) + self._free_idx(free_idx) + idx += free_idx + start_idx = 0 + + self.buffer_head = (self.buffer_head + n) % self.buffer_size + return idx + + def _free_idx(self, idx): + assert(idx[0] <= idx[-1]) + n = len(idx) + if self.buffer_tail != MathUtil.INVALID_IDX: + update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail + update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or idx[-1] >= self.buffer_tail) + + if update_tail: + i = 0 + while i < n: + curr_idx = idx[i] + if self.is_valid_path(curr_idx): + start_idx = self.get_path_start(curr_idx) + end_idx = self.get_path_end(curr_idx) + pathlen = self.get_pathlen(curr_idx) + + if start_idx < end_idx: + self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX + self._free_sample_buffers(list(range(start_idx, end_idx + 1))) + else: + self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX + self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX + self._free_sample_buffers(list(range(start_idx, self.buffer_size))) + self._free_sample_buffers(list(range(0, end_idx + 1))) + + self.num_paths -= 1 + i += pathlen + 1 + self.buffer_tail = (end_idx + 1) % self.buffer_size; + else: + i += 1 + else: + self.buffer_tail = idx[0] + return + + def _store_path(self, path, idx): + n = path.pathlength() + for key, data in self.buffers.items(): + if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY: + val = getattr(path, key) + val_len = len(val) + assert val_len == n or val_len == n + 1 + data[idx[:val_len]] = val + + self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value + self.buffers[self.PATH_START_KEY][idx] = idx[0] + self.buffers[self.PATH_END_KEY][idx] = idx[-1] + return + +class SampleBuffer(object): + def __init__(self, size): + self.idx_to_slot = np.empty(shape=[size], dtype=int) + self.slot_to_idx = np.empty(shape=[size], dtype=int) + self.count = 0 + self.clear() + return + + def clear(self): + self.idx_to_slot.fill(MathUtil.INVALID_IDX) + self.slot_to_idx.fill(MathUtil.INVALID_IDX) + self.count = 0 + return + + def is_valid(self, idx): + return self.idx_to_slot[idx] != MathUtil.INVALID_IDX + + def get_size(self): + return self.idx_to_slot.shape[0] + + def add(self, idx): + for i in idx: + if not self.is_valid(i): + new_slot = self.count + assert new_slot >= 0 + + self.idx_to_slot[i] = new_slot + self.slot_to_idx[new_slot] = i + self.count += 1 + return + + def free(self, idx): + for i in idx: + if self.is_valid(i): + slot = self.idx_to_slot[i] + last_slot = self.count - 1 + last_idx = self.slot_to_idx[last_slot] + + self.idx_to_slot[last_idx] = slot + self.slot_to_idx[slot] = last_idx + self.idx_to_slot[i] = MathUtil.INVALID_IDX + self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX + self.count -= 1 + return + + def sample(self, n): + if self.count > 0: + slots = np.random.randint(0, self.count, size=n) + idx = self.slot_to_idx[slots] + else: + idx = np.empty(shape=[0], dtype=int) + return idx + + def check_consistency(self): + valid = True + if self.count < 0: + valid = False + + if valid: + for i in range(self.get_size()): + if self.is_valid(i): + s = self.idx_to_slot[i] + if self.slot_to_idx[s] != i: + valid = False + break + + s2i = self.slot_to_idx[i] + if s2i != MathUtil.INVALID_IDX: + i2s = self.idx_to_slot[s2i] + if i2s != i: + valid = False + break + + count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX) + count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX) + valid &= count0 == count1 + return valid diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_agent.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_agent.py new file mode 100644 index 000000000..25886c659 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_agent.py @@ -0,0 +1,595 @@ +import numpy as np +import copy +import os +import time + +from abc import ABC, abstractmethod +from enum import Enum + +from learning.path import * +from learning.exp_params import ExpParams +from learning.normalizer import Normalizer +from learning.replay_buffer import ReplayBuffer +from pybullet_utils.logger import Logger +import pybullet_utils.mpi_util as MPIUtil +import pybullet_utils.math_util as MathUtil + +class RLAgent(ABC): + class Mode(Enum): + TRAIN = 0 + TEST = 1 + TRAIN_END = 2 + + NAME = "None" + + UPDATE_PERIOD_KEY = "UpdatePeriod" + ITERS_PER_UPDATE = "ItersPerUpdate" + DISCOUNT_KEY = "Discount" + MINI_BATCH_SIZE_KEY = "MiniBatchSize" + REPLAY_BUFFER_SIZE_KEY = "ReplayBufferSize" + INIT_SAMPLES_KEY = "InitSamples" + NORMALIZER_SAMPLES_KEY = "NormalizerSamples" + + OUTPUT_ITERS_KEY = "OutputIters" + INT_OUTPUT_ITERS_KEY = "IntOutputIters" + TEST_EPISODES_KEY = "TestEpisodes" + + EXP_ANNEAL_SAMPLES_KEY = "ExpAnnealSamples" + EXP_PARAM_BEG_KEY = "ExpParamsBeg" + EXP_PARAM_END_KEY = "ExpParamsEnd" + + def __init__(self, world, id, json_data): + self.world = world + self.id = id + self.logger = Logger() + self._mode = self.Mode.TRAIN + + assert self._check_action_space(), \ + Logger.print2("Invalid action space, got {:s}".format(str(self.get_action_space()))) + + self._enable_training = True + self.path = Path() + self.iter = int(0) + self.start_time = time.time() + self._update_counter = 0 + + self.update_period = 1.0 # simulated time (seconds) before each training update + self.iters_per_update = int(1) + self.discount = 0.95 + self.mini_batch_size = int(32) + self.replay_buffer_size = int(50000) + self.init_samples = int(1000) + self.normalizer_samples = np.inf + self._local_mini_batch_size = self.mini_batch_size # batch size for each work for multiprocessing + self._need_normalizer_update = True + self._total_sample_count = 0 + + self._output_dir = "" + self._int_output_dir = "" + self.output_iters = 100 + self.int_output_iters = 100 + + self.train_return = 0.0 + self.test_episodes = int(0) + self.test_episode_count = int(0) + self.test_return = 0.0 + self.avg_test_return = 0.0 + + self.exp_anneal_samples = 320000 + self.exp_params_beg = ExpParams() + self.exp_params_end = ExpParams() + self.exp_params_curr = ExpParams() + + self._load_params(json_data) + self._build_replay_buffer(self.replay_buffer_size) + self._build_normalizers() + self._build_bounds() + self.reset() + + return + + def __str__(self): + action_space_str = str(self.get_action_space()) + info_str = "" + info_str += '"ID": {:d},\n "Type": "{:s}",\n "ActionSpace": "{:s}",\n "StateDim": {:d},\n "GoalDim": {:d},\n "ActionDim": {:d}'.format( + self.id, self.NAME, action_space_str[action_space_str.rfind('.') + 1:], self.get_state_size(), self.get_goal_size(), self.get_action_size()) + return "{\n" + info_str + "\n}" + + def get_output_dir(self): + return self._output_dir + + def set_output_dir(self, out_dir): + self._output_dir = out_dir + if (self._output_dir != ""): + self.logger.configure_output_file(out_dir + "/agent" + str(self.id) + "_log.txt") + return + + output_dir = property(get_output_dir, set_output_dir) + + def get_int_output_dir(self): + return self._int_output_dir + + def set_int_output_dir(self, out_dir): + self._int_output_dir = out_dir + return + + int_output_dir = property(get_int_output_dir, set_int_output_dir) + + def reset(self): + self.path.clear() + return + + def update(self, timestep): + if self.need_new_action(): + #print("update_new_action!!!") + self._update_new_action() + else: + print("no action???") + + if (self._mode == self.Mode.TRAIN and self.enable_training): + self._update_counter += timestep + + while self._update_counter >= self.update_period: + self._train() + self._update_exp_params() + self.world.env.set_sample_count(self._total_sample_count) + self._update_counter -= self.update_period + + return + + def end_episode(self): + if (self.path.pathlength() > 0): + self._end_path() + + if (self._mode == self.Mode.TRAIN or self._mode == self.Mode.TRAIN_END): + if (self.enable_training and self.path.pathlength() > 0): + self._store_path(self.path) + elif (self._mode == self.Mode.TEST): + self._update_test_return(self.path) + else: + assert False, Logger.print2("Unsupported RL agent mode" + str(self._mode)) + + self._update_mode() + return + + def has_goal(self): + return self.get_goal_size() > 0 + + def predict_val(self): + return 0 + + def get_enable_training(self): + return self._enable_training + + def set_enable_training(self, enable): + print("set_enable_training!=", enable) + self._enable_training = enable + if (self._enable_training): + self.reset() + return + + enable_training = property(get_enable_training, set_enable_training) + + def enable_testing(self): + return self.test_episodes > 0 + + def get_name(self): + return self.NAME + + @abstractmethod + def save_model(self, out_path): + pass + + @abstractmethod + def load_model(self, in_path): + pass + + @abstractmethod + def _decide_action(self, s, g): + pass + + @abstractmethod + def _get_output_path(self): + pass + + @abstractmethod + def _get_int_output_path(self): + pass + + @abstractmethod + def _train_step(self): + pass + + @abstractmethod + def _check_action_space(self): + pass + + def get_action_space(self): + return self.world.env.get_action_space(self.id) + + def get_state_size(self): + return self.world.env.get_state_size(self.id) + + def get_goal_size(self): + return self.world.env.get_goal_size(self.id) + + def get_action_size(self): + return self.world.env.get_action_size(self.id) + + def get_num_actions(self): + return self.world.env.get_num_actions(self.id) + + def need_new_action(self): + return self.world.env.need_new_action(self.id) + + def _build_normalizers(self): + self.s_norm = Normalizer(self.get_state_size(), self.world.env.build_state_norm_groups(self.id)) + self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id), + 1 / self.world.env.build_state_scale(self.id)) + + self.g_norm = Normalizer(self.get_goal_size(), self.world.env.build_goal_norm_groups(self.id)) + self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id), + 1 / self.world.env.build_goal_scale(self.id)) + + self.a_norm = Normalizer(self.world.env.get_action_size()) + self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id), + 1 / self.world.env.build_action_scale(self.id)) + return + + def _build_bounds(self): + self.a_bound_min = self.world.env.build_action_bound_min(self.id) + self.a_bound_max = self.world.env.build_action_bound_max(self.id) + return + + def _load_params(self, json_data): + if (self.UPDATE_PERIOD_KEY in json_data): + self.update_period = int(json_data[self.UPDATE_PERIOD_KEY]) + + if (self.ITERS_PER_UPDATE in json_data): + self.iters_per_update = int(json_data[self.ITERS_PER_UPDATE]) + + if (self.DISCOUNT_KEY in json_data): + self.discount = json_data[self.DISCOUNT_KEY] + + if (self.MINI_BATCH_SIZE_KEY in json_data): + self.mini_batch_size = int(json_data[self.MINI_BATCH_SIZE_KEY]) + + if (self.REPLAY_BUFFER_SIZE_KEY in json_data): + self.replay_buffer_size = int(json_data[self.REPLAY_BUFFER_SIZE_KEY]) + + if (self.INIT_SAMPLES_KEY in json_data): + self.init_samples = int(json_data[self.INIT_SAMPLES_KEY]) + + if (self.NORMALIZER_SAMPLES_KEY in json_data): + self.normalizer_samples = int(json_data[self.NORMALIZER_SAMPLES_KEY]) + + if (self.OUTPUT_ITERS_KEY in json_data): + self.output_iters = json_data[self.OUTPUT_ITERS_KEY] + + if (self.INT_OUTPUT_ITERS_KEY in json_data): + self.int_output_iters = json_data[self.INT_OUTPUT_ITERS_KEY] + + if (self.TEST_EPISODES_KEY in json_data): + self.test_episodes = int(json_data[self.TEST_EPISODES_KEY]) + + if (self.EXP_ANNEAL_SAMPLES_KEY in json_data): + self.exp_anneal_samples = json_data[self.EXP_ANNEAL_SAMPLES_KEY] + + if (self.EXP_PARAM_BEG_KEY in json_data): + self.exp_params_beg.load(json_data[self.EXP_PARAM_BEG_KEY]) + + if (self.EXP_PARAM_END_KEY in json_data): + self.exp_params_end.load(json_data[self.EXP_PARAM_END_KEY]) + + num_procs = MPIUtil.get_num_procs() + self._local_mini_batch_size = int(np.ceil(self.mini_batch_size / num_procs)) + self._local_mini_batch_size = np.maximum(self._local_mini_batch_size, 1) + self.mini_batch_size = self._local_mini_batch_size * num_procs + + assert(self.exp_params_beg.noise == self.exp_params_end.noise) # noise std should not change + self.exp_params_curr = copy.deepcopy(self.exp_params_beg) + self.exp_params_end.noise = self.exp_params_beg.noise + + self._need_normalizer_update = self.normalizer_samples > 0 + + return + + def _record_state(self): + s = self.world.env.record_state(self.id) + return s + + def _record_goal(self): + g = self.world.env.record_goal(self.id) + return g + + def _record_reward(self): + r = self.world.env.calc_reward(self.id) + return r + + def _apply_action(self, a): + self.world.env.set_action(self.id, a) + return + + def _record_flags(self): + return int(0) + + def _is_first_step(self): + return len(self.path.states) == 0 + + def _end_path(self): + s = self._record_state() + g = self._record_goal() + r = self._record_reward() + + self.path.rewards.append(r) + self.path.states.append(s) + self.path.goals.append(g) + self.path.terminate = self.world.env.check_terminate(self.id) + + return + + def _update_new_action(self): + s = self._record_state() + g = self._record_goal() + + if not (self._is_first_step()): + r = self._record_reward() + self.path.rewards.append(r) + + a, logp = self._decide_action(s=s, g=g) + assert len(np.shape(a)) == 1 + assert len(np.shape(logp)) <= 1 + + flags = self._record_flags() + self._apply_action(a) + + self.path.states.append(s) + self.path.goals.append(g) + self.path.actions.append(a) + self.path.logps.append(logp) + self.path.flags.append(flags) + + if self._enable_draw(): + self._log_val(s, g) + + return + + def _update_exp_params(self): + lerp = float(self._total_sample_count) / self.exp_anneal_samples + lerp = np.clip(lerp, 0.0, 1.0) + self.exp_params_curr = self.exp_params_beg.lerp(self.exp_params_end, lerp) + return + + def _update_test_return(self, path): + path_reward = path.calc_return() + self.test_return += path_reward + self.test_episode_count += 1 + return + + def _update_mode(self): + if (self._mode == self.Mode.TRAIN): + self._update_mode_train() + elif (self._mode == self.Mode.TRAIN_END): + self._update_mode_train_end() + elif (self._mode == self.Mode.TEST): + self._update_mode_test() + else: + assert False, Logger.print2("Unsupported RL agent mode" + str(self._mode)) + return + + def _update_mode_train(self): + return + + def _update_mode_train_end(self): + self._init_mode_test() + return + + def _update_mode_test(self): + if (self.test_episode_count * MPIUtil.get_num_procs() >= self.test_episodes): + global_return = MPIUtil.reduce_sum(self.test_return) + global_count = MPIUtil.reduce_sum(self.test_episode_count) + avg_return = global_return / global_count + self.avg_test_return = avg_return + + if self.enable_training: + self._init_mode_train() + return + + def _init_mode_train(self): + self._mode = self.Mode.TRAIN + self.world.env.set_mode(self._mode) + return + + def _init_mode_train_end(self): + self._mode = self.Mode.TRAIN_END + return + + def _init_mode_test(self): + self._mode = self.Mode.TEST + self.test_return = 0.0 + self.test_episode_count = 0 + self.world.env.set_mode(self._mode) + return + + def _enable_output(self): + return MPIUtil.is_root_proc() and self.output_dir != "" + + def _enable_int_output(self): + return MPIUtil.is_root_proc() and self.int_output_dir != "" + + def _calc_val_bounds(self, discount): + r_min = self.world.env.get_reward_min(self.id) + r_max = self.world.env.get_reward_max(self.id) + assert(r_min <= r_max) + + val_min = r_min / ( 1.0 - discount) + val_max = r_max / ( 1.0 - discount) + return val_min, val_max + + def _calc_val_offset_scale(self, discount): + val_min, val_max = self._calc_val_bounds(discount) + val_offset = 0 + val_scale = 1 + + if (np.isfinite(val_min) and np.isfinite(val_max)): + val_offset = -0.5 * (val_max + val_min) + val_scale = 2 / (val_max - val_min) + + return val_offset, val_scale + + def _calc_term_vals(self, discount): + r_fail = self.world.env.get_reward_fail(self.id) + r_succ = self.world.env.get_reward_succ(self.id) + + r_min = self.world.env.get_reward_min(self.id) + r_max = self.world.env.get_reward_max(self.id) + assert(r_fail <= r_max and r_fail >= r_min) + assert(r_succ <= r_max and r_succ >= r_min) + assert(not np.isinf(r_fail)) + assert(not np.isinf(r_succ)) + + if (discount == 0): + val_fail = 0 + val_succ = 0 + else: + val_fail = r_fail / (1.0 - discount) + val_succ = r_succ / (1.0 - discount) + + return val_fail, val_succ + + def _update_iter(self, iter): + if (self._enable_output() and self.iter % self.output_iters == 0): + output_path = self._get_output_path() + output_dir = os.path.dirname(output_path) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + self.save_model(output_path) + + if (self._enable_int_output() and self.iter % self.int_output_iters == 0): + int_output_path = self._get_int_output_path() + int_output_dir = os.path.dirname(int_output_path) + if not os.path.exists(int_output_dir): + os.makedirs(int_output_dir) + self.save_model(int_output_path) + + self.iter = iter + return + + def _enable_draw(self): + return self.world.env.enable_draw + + def _log_val(self, s, g): + pass + + def _build_replay_buffer(self, buffer_size): + num_procs = MPIUtil.get_num_procs() + buffer_size = int(buffer_size / num_procs) + self.replay_buffer = ReplayBuffer(buffer_size=buffer_size) + self.replay_buffer_initialized = False + return + + def _store_path(self, path): + path_id = self.replay_buffer.store(path) + valid_path = path_id != MathUtil.INVALID_IDX + + if valid_path: + self.train_return = path.calc_return() + + if self._need_normalizer_update: + self._record_normalizers(path) + + return path_id + + def _record_normalizers(self, path): + states = np.array(path.states) + self.s_norm.record(states) + + if self.has_goal(): + goals = np.array(path.goals) + self.g_norm.record(goals) + + return + + def _update_normalizers(self): + self.s_norm.update() + + if self.has_goal(): + self.g_norm.update() + return + + def _train(self): + samples = self.replay_buffer.total_count + self._total_sample_count = int(MPIUtil.reduce_sum(samples)) + end_training = False + + if (self.replay_buffer_initialized): + if (self._valid_train_step()): + prev_iter = self.iter + iters = self._get_iters_per_update() + avg_train_return = MPIUtil.reduce_avg(self.train_return) + + for i in range(iters): + curr_iter = self.iter + wall_time = time.time() - self.start_time + wall_time /= 60 * 60 # store time in hours + + has_goal = self.has_goal() + s_mean = np.mean(self.s_norm.mean) + s_std = np.mean(self.s_norm.std) + g_mean = np.mean(self.g_norm.mean) if has_goal else 0 + g_std = np.mean(self.g_norm.std) if has_goal else 0 + + self.logger.log_tabular("Iteration", self.iter) + self.logger.log_tabular("Wall_Time", wall_time) + self.logger.log_tabular("Samples", self._total_sample_count) + self.logger.log_tabular("Train_Return", avg_train_return) + self.logger.log_tabular("Test_Return", self.avg_test_return) + self.logger.log_tabular("State_Mean", s_mean) + self.logger.log_tabular("State_Std", s_std) + self.logger.log_tabular("Goal_Mean", g_mean) + self.logger.log_tabular("Goal_Std", g_std) + self._log_exp_params() + + self._update_iter(self.iter + 1) + self._train_step() + + Logger.print2("Agent " + str(self.id)) + self.logger.print_tabular() + Logger.print2("") + + if (self._enable_output() and curr_iter % self.int_output_iters == 0): + self.logger.dump_tabular() + + if (prev_iter // self.int_output_iters != self.iter // self.int_output_iters): + end_training = self.enable_testing() + + else: + + Logger.print2("Agent " + str(self.id)) + Logger.print2("Samples: " + str(self._total_sample_count)) + Logger.print2("") + + if (self._total_sample_count >= self.init_samples): + self.replay_buffer_initialized = True + end_training = self.enable_testing() + + if self._need_normalizer_update: + self._update_normalizers() + self._need_normalizer_update = self.normalizer_samples > self._total_sample_count + + if end_training: + self._init_mode_train_end() + + return + + def _get_iters_per_update(self): + return MPIUtil.get_num_procs() * self.iters_per_update + + def _valid_train_step(self): + return True + + def _log_exp_params(self): + self.logger.log_tabular("Exp_Rate", self.exp_params_curr.rate) + self.logger.log_tabular("Exp_Noise", self.exp_params_curr.noise) + self.logger.log_tabular("Exp_Temp", self.exp_params_curr.temp) + return \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_util.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_util.py new file mode 100644 index 000000000..12c682a19 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_util.py @@ -0,0 +1,18 @@ +import numpy as np + +def compute_return(rewards, gamma, td_lambda, val_t): + # computes td-lambda return of path + path_len = len(rewards) + assert len(val_t) == path_len + 1 + + return_t = np.zeros(path_len) + last_val = rewards[-1] + gamma * val_t[-1] + return_t[-1] = last_val + + for i in reversed(range(0, path_len - 1)): + curr_r = rewards[i] + next_ret = return_t[i + 1] + curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret) + return_t[i] = curr_val + + return return_t \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py new file mode 100644 index 000000000..3fd7bee76 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_world.py @@ -0,0 +1,143 @@ +import numpy as np +import learning.agent_builder as AgentBuilder +import learning.tf_util as TFUtil +from learning.rl_agent import RLAgent +from pybullet_utils.logger import Logger +import pybullet_data + +class RLWorld(object): + def __init__(self, env, arg_parser): + TFUtil.disable_gpu() + + self.env = env + self.arg_parser = arg_parser + self._enable_training = True + self.train_agents = [] + self.parse_args(arg_parser) + + self.build_agents() + + return + + def get_enable_training(self): + return self._enable_training + + def set_enable_training(self, enable): + self._enable_training = enable + for i in range(len(self.agents)): + curr_agent = self.agents[i] + if curr_agent is not None: + enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True + curr_agent.enable_training = self.enable_training and enable_curr_train + + if (self._enable_training): + self.env.set_mode(RLAgent.Mode.TRAIN) + else: + self.env.set_mode(RLAgent.Mode.TEST) + + return + + enable_training = property(get_enable_training, set_enable_training) + + def parse_args(self, arg_parser): + self.train_agents = self.arg_parser.parse_bools('train_agents') + num_agents = self.env.get_num_agents() + assert(len(self.train_agents) == num_agents or len(self.train_agents) == 0) + + return + + def shutdown(self): + self.env.shutdown() + return + + def build_agents(self): + num_agents = self.env.get_num_agents() + print("num_agents=",num_agents) + self.agents = [] + + Logger.print2('') + Logger.print2('Num Agents: {:d}'.format(num_agents)) + + agent_files = self.arg_parser.parse_strings('agent_files') + print("len(agent_files)=",len(agent_files)) + assert(len(agent_files) == num_agents or len(agent_files) == 0) + + model_files = self.arg_parser.parse_strings('model_files') + assert(len(model_files) == num_agents or len(model_files) == 0) + + output_path = self.arg_parser.parse_string('output_path') + int_output_path = self.arg_parser.parse_string('int_output_path') + + for i in range(num_agents): + curr_file = agent_files[i] + curr_agent = self._build_agent(i, curr_file) + + if curr_agent is not None: + curr_agent.output_dir = output_path + curr_agent.int_output_dir = int_output_path + Logger.print2(str(curr_agent)) + + if (len(model_files) > 0): + curr_model_file = model_files[i] + if curr_model_file != 'none': + curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file) + + self.agents.append(curr_agent) + Logger.print2('') + + self.set_enable_training(self.enable_training) + + return + + def update(self, timestep): + #print("world update!\n") + self._update_agents(timestep) + self._update_env(timestep) + return + + def reset(self): + self._reset_agents() + self._reset_env() + return + + def end_episode(self): + self._end_episode_agents(); + return + + def _update_env(self, timestep): + self.env.update(timestep) + return + + def _update_agents(self, timestep): + #print("len(agents)=",len(self.agents)) + for agent in self.agents: + if (agent is not None): + agent.update(timestep) + return + + def _reset_env(self): + self.env.reset() + return + + def _reset_agents(self): + for agent in self.agents: + if (agent != None): + agent.reset() + return + + def _end_episode_agents(self): + for agent in self.agents: + if (agent != None): + agent.end_episode() + return + + def _build_agent(self, id, agent_file): + Logger.print2('Agent {:d}: {}'.format(id, agent_file)) + if (agent_file == 'none'): + agent = None + else: + agent = AgentBuilder.build_agent(self, id, agent_file) + assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file) + + return agent + diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/mpi_solver.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/mpi_solver.py new file mode 100644 index 000000000..46c0963fa --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/mpi_solver.py @@ -0,0 +1,103 @@ +from mpi4py import MPI +import tensorflow as tf +import numpy as np +import learning.tf_util as TFUtil +import pybullet_utils.math_util as MathUtil +import pybullet_utils.mpi_util as MPIUtil +from pybullet_utils.logger import Logger + +from learning.solvers.solver import Solver + +class MPISolver(Solver): + CHECK_SYNC_ITERS = 1000 + + def __init__(self, sess, optimizer, vars): + super().__init__(vars) + self.sess = sess + self.optimizer = optimizer + self._build_grad_feed(vars) + self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars)) + self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars) + self._get_flat_vars = TFUtil.GetFlat(sess, self.vars) + + self.iter = 0 + grad_dim = self._calc_grad_dim() + self._flat_grad = np.zeros(grad_dim, dtype=np.float32) + self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32) + + return + + def get_stepsize(self): + return self.optimizer._learning_rate_tensor.eval() + + def update(self, grads=None, grad_scale=1.0): + if grads is not None: + self._flat_grad = MathUtil.flatten(grads) + else: + self._flat_grad.fill(0) + return self.update_flatgrad(self._flat_grad, grad_scale) + + def update_flatgrad(self, flat_grad, grad_scale=1.0): + if self.iter % self.CHECK_SYNC_ITERS == 0: + assert self.check_synced(), Logger.print2('Network parameters desynchronized') + + if grad_scale != 1.0: + flat_grad *= grad_scale + + MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM) + self._global_flat_grad /= MPIUtil.get_num_procs() + + self._load_flat_grad(self._global_flat_grad) + self.sess.run([self._update], self._grad_feed) + self.iter += 1 + + return + + def sync(self): + vars = self._get_flat_vars() + MPIUtil.bcast(vars) + self._set_flat_vars(vars) + return + + def check_synced(self): + synced = True + if self._is_root(): + vars = self._get_flat_vars() + MPIUtil.bcast(vars) + else: + vars_local = self._get_flat_vars() + vars_root = np.empty_like(vars_local) + MPIUtil.bcast(vars_root) + synced = (vars_local == vars_root).all() + return synced + + def _is_root(self): + return MPIUtil.is_root_proc() + + def _build_grad_feed(self, vars): + self._grad_tf_list = [] + self._grad_buffers = [] + for v in self.vars: + shape = v.get_shape() + grad = np.zeros(shape) + grad_tf = tf.placeholder(tf.float32, shape=shape) + self._grad_buffers.append(grad) + self._grad_tf_list.append(grad_tf) + + self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)}) + + return + + def _calc_grad_dim(self): + grad_dim = 0 + for grad in self._grad_buffers: + grad_dim += grad.size + return grad_dim + + def _load_flat_grad(self, flat_grad): + start = 0 + for g in self._grad_buffers: + size = g.size + np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape)) + start += size + return \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/solver.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/solver.py new file mode 100644 index 000000000..8df65c029 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/solvers/solver.py @@ -0,0 +1,10 @@ +from abc import ABC, abstractmethod + +class Solver(ABC): + def __init__(self, vars): + self.vars = vars + return + + @abstractmethod + def update(self, grads): + pass \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_agent.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_agent.py new file mode 100644 index 000000000..51dc5e88a --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_agent.py @@ -0,0 +1,149 @@ +import numpy as np +import tensorflow as tf +from abc import abstractmethod + +from learning.rl_agent import RLAgent +from pybullet_utils.logger import Logger +from learning.tf_normalizer import TFNormalizer + +class TFAgent(RLAgent): + RESOURCE_SCOPE = 'resource' + SOLVER_SCOPE = 'solvers' + + def __init__(self, world, id, json_data): + self.tf_scope = 'agent' + self.graph = tf.Graph() + self.sess = tf.Session(graph=self.graph) + + super().__init__(world, id, json_data) + self._build_graph(json_data) + self._init_normalizers() + return + + def __del__(self): + self.sess.close() + return + + def save_model(self, out_path): + with self.sess.as_default(), self.graph.as_default(): + try: + save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False) + Logger.print2('Model saved to: ' + save_path) + except: + Logger.print2("Failed to save model to: " + save_path) + return + + def load_model(self, in_path): + with self.sess.as_default(), self.graph.as_default(): + self.saver.restore(self.sess, in_path) + self._load_normalizers() + Logger.print2('Model loaded from: ' + in_path) + return + + def _get_output_path(self): + assert(self.output_dir != '') + file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt' + return file_path + + def _get_int_output_path(self): + assert(self.int_output_dir != '') + file_path = self.int_output_dir + ('/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter) + return file_path + + def _build_graph(self, json_data): + with self.sess.as_default(), self.graph.as_default(): + with tf.variable_scope(self.tf_scope): + self._build_nets(json_data) + + with tf.variable_scope(self.SOLVER_SCOPE): + self._build_losses(json_data) + self._build_solvers(json_data) + + self._initialize_vars() + self._build_saver() + return + + def _init_normalizers(self): + with self.sess.as_default(), self.graph.as_default(): + # update normalizers to sync the tensorflow tensors + self.s_norm.update() + self.g_norm.update() + self.a_norm.update() + return + + @abstractmethod + def _build_nets(self, json_data): + pass + + @abstractmethod + def _build_losses(self, json_data): + pass + + @abstractmethod + def _build_solvers(self, json_data): + pass + + def _tf_vars(self, scope=''): + with self.sess.as_default(), self.graph.as_default(): + res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope) + assert len(res) > 0 + return res + + def _build_normalizers(self): + with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope): + with tf.variable_scope(self.RESOURCE_SCOPE): + self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(), self.world.env.build_state_norm_groups(self.id)) + state_offset = -self.world.env.build_state_offset(self.id) + print("state_offset=",state_offset) + state_scale = 1 / self.world.env.build_state_scale(self.id) + print("state_scale=",state_scale) + self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id), + 1 / self.world.env.build_state_scale(self.id)) + + self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(), self.world.env.build_goal_norm_groups(self.id)) + self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id), + 1 / self.world.env.build_goal_scale(self.id)) + + self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size()) + self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id), + 1 / self.world.env.build_action_scale(self.id)) + return + + def _load_normalizers(self): + self.s_norm.load() + self.g_norm.load() + self.a_norm.load() + return + + def _update_normalizers(self): + with self.sess.as_default(), self.graph.as_default(): + super()._update_normalizers() + return + + def _initialize_vars(self): + self.sess.run(tf.global_variables_initializer()) + return + + def _build_saver(self): + vars = self._get_saver_vars() + self.saver = tf.train.Saver(vars, max_to_keep=0) + return + + def _get_saver_vars(self): + with self.sess.as_default(), self.graph.as_default(): + vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope) + vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name] + #vars = [v for v in vars if '/target/' not in v.name] + assert len(vars) > 0 + return vars + + def _weight_decay_loss(self, scope): + vars = self._tf_vars(scope) + vars_no_bias = [v for v in vars if 'bias' not in v.name] + loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias]) + return loss + + def _train(self): + with self.sess.as_default(), self.graph.as_default(): + super()._train() + return \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_normalizer.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_normalizer.py new file mode 100644 index 000000000..ecbddc95b --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_normalizer.py @@ -0,0 +1,67 @@ +import numpy as np +import copy +import tensorflow as tf +from learning.normalizer import Normalizer + +class TFNormalizer(Normalizer): + + def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf): + self.sess = sess + self.scope = scope + super().__init__(size, groups_ids, eps, clip) + + with tf.variable_scope(self.scope): + self._build_resource_tf() + return + + # initialze count when loading saved values so that things don't change to quickly during updates + def load(self): + self.count = self.count_tf.eval()[0] + self.mean = self.mean_tf.eval() + self.std = self.std_tf.eval() + self.mean_sq = self.calc_mean_sq(self.mean, self.std) + return + + def update(self): + super().update() + self._update_resource_tf() + return + + def set_mean_std(self, mean, std): + super().set_mean_std(mean, std) + self._update_resource_tf() + return + + def normalize_tf(self, x): + norm_x = (x - self.mean_tf) / self.std_tf + norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip) + return norm_x + + def unnormalize_tf(self, norm_x): + x = norm_x * self.std_tf + self.mean_tf + return x + + def _build_resource_tf(self): + self.count_tf = tf.get_variable(dtype=tf.int32, name='count', initializer=np.array([self.count], dtype=np.int32), trainable=False) + self.mean_tf = tf.get_variable(dtype=tf.float32, name='mean', initializer=self.mean.astype(np.float32), trainable=False) + self.std_tf = tf.get_variable(dtype=tf.float32, name='std', initializer=self.std.astype(np.float32), trainable=False) + + self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1]) + self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape) + self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape) + + self._update_op = tf.group( + self.count_tf.assign(self.count_ph), + self.mean_tf.assign(self.mean_ph), + self.std_tf.assign(self.std_ph) + ) + return + + def _update_resource_tf(self): + feed = { + self.count_ph: np.array([self.count], dtype=np.int32), + self.mean_ph: self.mean, + self.std_ph: self.std + } + self.sess.run(self._update_op, feed_dict=feed) + return diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_util.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_util.py new file mode 100644 index 000000000..b0a315bc3 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/tf_util.py @@ -0,0 +1,104 @@ +import tensorflow as tf +import numpy as np +import os + +xavier_initializer = tf.contrib.layers.xavier_initializer() + +def disable_gpu(): + os.environ["CUDA_VISIBLE_DEVICES"] = '-1' + return + +def var_shape(x): + out = [k.value for k in x.get_shape()] + assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known" + return out + +def intprod(x): + return int(np.prod(x)) + +def numel(x): + n = intprod(var_shape(x)) + return n + +def flat_grad(loss, var_list, grad_ys=None): + grads = tf.gradients(loss, var_list, grad_ys) + return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0) + +def fc_net(input, layers_sizes, activation, reuse=None, flatten=False): # build fully connected network + curr_tf = input + for i, size in enumerate(layers_sizes): + with tf.variable_scope(str(i), reuse=reuse): + curr_tf = tf.layers.dense(inputs=curr_tf, + units=size, + kernel_initializer=xavier_initializer, + activation = activation if i < len(layers_sizes)-1 else None) + if flatten: + assert layers_sizes[-1] == 1 + curr_tf = tf.reshape(curr_tf, [-1]) + + return curr_tf + +def copy(sess, src, dst): + assert len(src) == len(dst) + sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst)))) + return + +def flat_grad(loss, var_list): + grads = tf.gradients(loss, var_list) + return tf.concat(axis=0, values=[tf.reshape(grad, [numel(v)]) + for (v, grad) in zip(var_list, grads)]) + + +def calc_logp_gaussian(x_tf, mean_tf, std_tf): + dim = tf.to_float(tf.shape(x_tf)[-1]) + + if mean_tf is None: + diff_tf = x_tf + else: + diff_tf = x_tf - mean_tf + + logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1) + logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1) + + return logp_tf + +def calc_bound_loss(x_tf, bound_min, bound_max): + # penalty for violating bounds + violation_min = tf.minimum(x_tf - bound_min, 0) + violation_max = tf.maximum(x_tf - bound_max, 0) + violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(tf.square(violation_max), axis=-1) + loss = 0.5 * tf.reduce_mean(violation) + return loss + +class SetFromFlat(object): + def __init__(self, sess, var_list, dtype=tf.float32): + assigns = [] + shapes = list(map(var_shape, var_list)) + total_size = np.sum([intprod(shape) for shape in shapes]) + + self.sess = sess + self.theta = tf.placeholder(dtype,[total_size]) + start=0 + assigns = [] + + for (shape,v) in zip(shapes,var_list): + size = intprod(shape) + assigns.append(tf.assign(v, tf.reshape(self.theta[start:start+size],shape))) + start += size + + self.op = tf.group(*assigns) + + return + + def __call__(self, theta): + self.sess.run(self.op, feed_dict={self.theta:theta}) + return + +class GetFlat(object): + def __init__(self, sess, var_list): + self.sess = sess + self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list]) + return + + def __call__(self): + return self.sess.run(self.op) \ No newline at end of file diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py new file mode 100644 index 000000000..85d70abb1 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py @@ -0,0 +1,23 @@ +import sys +import subprocess +from pybullet_utils.arg_parser import ArgParser +from pybullet_utils.logger import Logger + +def main(): + # Command line arguments + args = sys.argv[1:] + arg_parser = ArgParser() + arg_parser.load_args(args) + + num_workers = arg_parser.parse_int('num_workers', 1) + assert(num_workers > 0) + + Logger.print2('Running with {:d} workers'.format(num_workers)) + cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers) + cmd += ' '.join(args) + Logger.print2('cmd: ' + cmd) + subprocess.call(cmd, shell=True) + return + +if __name__ == '__main__': + main() diff --git a/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py new file mode 100644 index 000000000..0ed1afde8 --- /dev/null +++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py @@ -0,0 +1,82 @@ +import os +import inspect +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(os.path.dirname(currentdir)) +os.sys.path.insert(0,parentdir) +print("parentdir=",parentdir) +import json +from pybullet_envs.deep_mimic.learning.rl_world import RLWorld +from learning.ppo_agent import PPOAgent + +import pybullet_data +from pybullet_utils.arg_parser import ArgParser +from pybullet_utils.logger import Logger +from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv +import sys +import random + +update_timestep = 1./600. + +def update_world(world, time_elapsed): + timeStep = 1./600. + world.update(timeStep) + reward = world.env.calc_reward(agent_id=0) + #print("reward=",reward) + end_episode = world.env.is_episode_end() + if (end_episode): + world.end_episode() + world.reset() + return + +def build_arg_parser(args): + arg_parser = ArgParser() + arg_parser.load_args(args) + + arg_file = arg_parser.parse_string('arg_file', '') + if (arg_file != ''): + path = pybullet_data.getDataPath()+"/args/"+arg_file + succ = arg_parser.load_file(path) + Logger.print2(arg_file) + assert succ, Logger.print2('Failed to load args from: ' + arg_file) + return arg_parser + +args = sys.argv[1:] + + + +def build_world(args, enable_draw): + arg_parser = build_arg_parser(args) + print("enable_draw=",enable_draw) + env = PyBulletDeepMimicEnv(args, enable_draw) + world = RLWorld(env, arg_parser) + #world.env.set_playback_speed(playback_speed) + + motion_file = arg_parser.parse_string("motion_file") + print("motion_file=",motion_file) + bodies = arg_parser.parse_ints("fall_contact_bodies") + print("bodies=",bodies) + int_output_path = arg_parser.parse_string("int_output_path") + print("int_output_path=",int_output_path) + agent_files = pybullet_data.getDataPath()+"/"+arg_parser.parse_string("agent_files") + + AGENT_TYPE_KEY = "AgentType" + + print("agent_file=",agent_files) + with open(agent_files) as data_file: + json_data = json.load(data_file) + print("json_data=",json_data) + assert AGENT_TYPE_KEY in json_data + agent_type = json_data[AGENT_TYPE_KEY] + print("agent_type=",agent_type) + agent = PPOAgent(world, id, json_data) + + agent.set_enable_training(True) + world.reset() + return world + +if __name__ == '__main__': + world = build_world(args, True) + while (world.env._pybullet_client.isConnected()): + timeStep = 1./600. + update_world(world, timeStep) + diff --git a/examples/pybullet/premake4.lua b/examples/pybullet/premake4.lua index 19d39558c..f0c26eac5 100644 --- a/examples/pybullet/premake4.lua +++ b/examples/pybullet/premake4.lua @@ -17,7 +17,9 @@ project ("pybullet") includedirs {"../../src", "../../examples", "../../examples/ThirdPartyLibs"} - defines {"PHYSICS_IN_PROCESS_EXAMPLE_BROWSER"} + defines {"PHYSICS_IN_PROCESS_EXAMPLE_BROWSER", "STATIC_LINK_SPD_PLUGIN"} + + hasCL = findOpenCL("clew") links{ "BulletExampleBrowserLib","gwen", "BulletFileLoader","BulletWorldImporter","OpenGL_Window","BulletSoftBody", "BulletInverseDynamicsUtils", "BulletInverseDynamics", "BulletDynamics","BulletCollision","LinearMath","BussIK", "Bullet3Common"} @@ -170,6 +172,20 @@ if not _OPTIONS["no-enet"] then "../../examples/SharedMemory/plugins/collisionFilterPlugin/collisionFilterPlugin.cpp", "../../examples/SharedMemory/plugins/pdControlPlugin/pdControlPlugin.cpp", "../../examples/SharedMemory/plugins/pdControlPlugin/pdControlPlugin.h", + "../../examples/SharedMemory/plugins/stablePDPlugin/SpAlg.cpp", + "../../examples/SharedMemory/plugins/stablePDPlugin/SpAlg.h", + "../../examples/SharedMemory/plugins/stablePDPlugin/Shape.cpp", + "../../examples/SharedMemory/plugins/stablePDPlugin/Shape.h", + "../../examples/SharedMemory/plugins/stablePDPlugin/RBDUtil.cpp", + "../../examples/SharedMemory/plugins/stablePDPlugin/RBDUtil.h", + "../../examples/SharedMemory/plugins/stablePDPlugin/RBDModel.cpp", + "../../examples/SharedMemory/plugins/stablePDPlugin/RBDModel.h", + "../../examples/SharedMemory/plugins/stablePDPlugin/MathUtil.cpp", + "../../examples/SharedMemory/plugins/stablePDPlugin/MathUtil.h", + "../../examples/SharedMemory/plugins/stablePDPlugin/KinTree.cpp", + "../../examples/SharedMemory/plugins/stablePDPlugin/KinTree.h", + "../../examples/SharedMemory/plugins/stablePDPlugin/BulletConversion.cpp", + "../../examples/SharedMemory/plugins/stablePDPlugin/BulletConversion.h", } if _OPTIONS["enable_physx"] then diff --git a/examples/pybullet/pybullet.c b/examples/pybullet/pybullet.c index 7452fec60..e9403626c 100644 --- a/examples/pybullet/pybullet.c +++ b/examples/pybullet/pybullet.c @@ -1233,20 +1233,21 @@ static PyObject* pybullet_changeDynamicsInfo(PyObject* self, PyObject* args, PyO double restitution = -1; double linearDamping = -1; double angularDamping = -1; + double contactStiffness = -1; double contactDamping = -1; double ccdSweptSphereRadius = -1; int frictionAnchor = -1; double contactProcessingThreshold = -1; int activationState = -1; - + double jointDamping = -1; PyObject* localInertiaDiagonalObj = 0; b3PhysicsClientHandle sm = 0; int physicsClientId = 0; - static char* kwlist[] = {"bodyUniqueId", "linkIndex", "mass", "lateralFriction", "spinningFriction", "rollingFriction", "restitution", "linearDamping", "angularDamping", "contactStiffness", "contactDamping", "frictionAnchor", "localInertiaDiagonal", "ccdSweptSphereRadius", "contactProcessingThreshold", "activationState", "physicsClientId", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, keywds, "ii|dddddddddiOddii", kwlist, &bodyUniqueId, &linkIndex, &mass, &lateralFriction, &spinningFriction, &rollingFriction, &restitution, &linearDamping, &angularDamping, &contactStiffness, &contactDamping, &frictionAnchor, &localInertiaDiagonalObj, &ccdSweptSphereRadius, &contactProcessingThreshold, &activationState, &physicsClientId)) + static char* kwlist[] = {"bodyUniqueId", "linkIndex", "mass", "lateralFriction", "spinningFriction", "rollingFriction", "restitution", "linearDamping", "angularDamping", "contactStiffness", "contactDamping", "frictionAnchor", "localInertiaDiagonal", "ccdSweptSphereRadius", "contactProcessingThreshold", "activationState", "jointDamping", "physicsClientId", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, keywds, "ii|dddddddddiOddidi", kwlist, &bodyUniqueId, &linkIndex, &mass, &lateralFriction, &spinningFriction, &rollingFriction, &restitution, &linearDamping, &angularDamping, &contactStiffness, &contactDamping, &frictionAnchor, &localInertiaDiagonalObj, &ccdSweptSphereRadius, &contactProcessingThreshold, &activationState, &jointDamping, &physicsClientId)) { return NULL; } @@ -1300,6 +1301,10 @@ static PyObject* pybullet_changeDynamicsInfo(PyObject* self, PyObject* args, PyO b3ChangeDynamicsInfoSetAngularDamping(command, bodyUniqueId, angularDamping); } + if (jointDamping >= 0) + { + b3ChangeDynamicsInfoSetJointDamping(command, bodyUniqueId, linkIndex, jointDamping); + } if (restitution >= 0) { b3ChangeDynamicsInfoSetRestitution(command, bodyUniqueId, linkIndex, restitution); diff --git a/src/Bullet3Common/b3Quaternion.h b/src/Bullet3Common/b3Quaternion.h index 9bd5ff7d9..4fdd72dcc 100644 --- a/src/Bullet3Common/b3Quaternion.h +++ b/src/Bullet3Common/b3Quaternion.h @@ -92,8 +92,11 @@ public: /**@brief Set the rotation using axis angle notation * @param axis The axis around which to rotate * @param angle The magnitude of the rotation in Radians */ - void setRotation(const b3Vector3& axis, const b3Scalar& _angle) + void setRotation(const b3Vector3& axis1, const b3Scalar& _angle) { + b3Vector3 axis = axis1; + axis.safeNormalize(); + b3Scalar d = axis.length(); b3Assert(d != b3Scalar(0.0)); if (d < B3_EPSILON)