Merge pull request #2098 from erwincoumans/master
bit more work on deep_mimic env and physx backend
This commit is contained in:
@@ -2831,6 +2831,18 @@ B3_SHARED_API int b3ChangeDynamicsInfoSetAngularDamping(b3SharedMemoryCommandHan
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
B3_SHARED_API int b3ChangeDynamicsInfoSetJointDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double jointDamping)
|
||||||
|
{
|
||||||
|
struct SharedMemoryCommand* command = (struct SharedMemoryCommand*)commandHandle;
|
||||||
|
b3Assert(command->m_type == CMD_CHANGE_DYNAMICS_INFO);
|
||||||
|
command->m_changeDynamicsInfoArgs.m_bodyUniqueId = bodyUniqueId;
|
||||||
|
command->m_changeDynamicsInfoArgs.m_linkIndex = linkIndex;
|
||||||
|
command->m_changeDynamicsInfoArgs.m_jointDamping = jointDamping;
|
||||||
|
command->m_updateFlags |= CHANGE_DYNAMICS_INFO_SET_JOINT_DAMPING;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
B3_SHARED_API int b3ChangeDynamicsInfoSetContactStiffnessAndDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double contactStiffness, double contactDamping)
|
B3_SHARED_API int b3ChangeDynamicsInfoSetContactStiffnessAndDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double contactStiffness, double contactDamping)
|
||||||
{
|
{
|
||||||
struct SharedMemoryCommand* command = (struct SharedMemoryCommand*)commandHandle;
|
struct SharedMemoryCommand* command = (struct SharedMemoryCommand*)commandHandle;
|
||||||
|
|||||||
@@ -155,6 +155,8 @@ extern "C"
|
|||||||
B3_SHARED_API int b3ChangeDynamicsInfoSetRestitution(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double restitution);
|
B3_SHARED_API int b3ChangeDynamicsInfoSetRestitution(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double restitution);
|
||||||
B3_SHARED_API int b3ChangeDynamicsInfoSetLinearDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, double linearDamping);
|
B3_SHARED_API int b3ChangeDynamicsInfoSetLinearDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, double linearDamping);
|
||||||
B3_SHARED_API int b3ChangeDynamicsInfoSetAngularDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, double angularDamping);
|
B3_SHARED_API int b3ChangeDynamicsInfoSetAngularDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, double angularDamping);
|
||||||
|
B3_SHARED_API int b3ChangeDynamicsInfoSetJointDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double jointDamping);
|
||||||
|
|
||||||
B3_SHARED_API int b3ChangeDynamicsInfoSetContactStiffnessAndDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double contactStiffness, double contactDamping);
|
B3_SHARED_API int b3ChangeDynamicsInfoSetContactStiffnessAndDamping(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double contactStiffness, double contactDamping);
|
||||||
B3_SHARED_API int b3ChangeDynamicsInfoSetFrictionAnchor(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, int frictionAnchor);
|
B3_SHARED_API int b3ChangeDynamicsInfoSetFrictionAnchor(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, int frictionAnchor);
|
||||||
B3_SHARED_API int b3ChangeDynamicsInfoSetCcdSweptSphereRadius(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double ccdSweptSphereRadius);
|
B3_SHARED_API int b3ChangeDynamicsInfoSetCcdSweptSphereRadius(b3SharedMemoryCommandHandle commandHandle, int bodyUniqueId, int linkIndex, double ccdSweptSphereRadius);
|
||||||
|
|||||||
@@ -7737,6 +7737,12 @@ bool PhysicsServerCommandProcessor::processChangeDynamicsInfoCommand(const struc
|
|||||||
mb->getLinkCollider(linkIndex)->setContactStiffnessAndDamping(clientCmd.m_changeDynamicsInfoArgs.m_contactStiffness, clientCmd.m_changeDynamicsInfoArgs.m_contactDamping);
|
mb->getLinkCollider(linkIndex)->setContactStiffnessAndDamping(clientCmd.m_changeDynamicsInfoArgs.m_contactStiffness, clientCmd.m_changeDynamicsInfoArgs.m_contactDamping);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (clientCmd.m_updateFlags &CHANGE_DYNAMICS_INFO_SET_JOINT_DAMPING)
|
||||||
|
{
|
||||||
|
mb->getLink(linkIndex).m_jointDamping = clientCmd.m_changeDynamicsInfoArgs.m_jointDamping;
|
||||||
|
}
|
||||||
|
|
||||||
if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_MASS)
|
if (clientCmd.m_updateFlags & CHANGE_DYNAMICS_INFO_SET_MASS)
|
||||||
{
|
{
|
||||||
mb->getLink(linkIndex).m_mass = mass;
|
mb->getLink(linkIndex).m_mass = mass;
|
||||||
@@ -8057,6 +8063,7 @@ bool PhysicsServerCommandProcessor::processSendPhysicsParametersCommand(const st
|
|||||||
|
|
||||||
BT_PROFILE("CMD_SEND_PHYSICS_SIMULATION_PARAMETERS");
|
BT_PROFILE("CMD_SEND_PHYSICS_SIMULATION_PARAMETERS");
|
||||||
|
|
||||||
|
|
||||||
if (clientCmd.m_updateFlags & SIM_PARAM_ENABLE_CONE_FRICTION)
|
if (clientCmd.m_updateFlags & SIM_PARAM_ENABLE_CONE_FRICTION)
|
||||||
{
|
{
|
||||||
if (clientCmd.m_physSimParamArgs.m_enableConeFriction)
|
if (clientCmd.m_physSimParamArgs.m_enableConeFriction)
|
||||||
|
|||||||
@@ -164,6 +164,7 @@ enum EnumChangeDynamicsInfoFlags
|
|||||||
CHANGE_DYNAMICS_INFO_SET_CCD_SWEPT_SPHERE_RADIUS = 2048,
|
CHANGE_DYNAMICS_INFO_SET_CCD_SWEPT_SPHERE_RADIUS = 2048,
|
||||||
CHANGE_DYNAMICS_INFO_SET_CONTACT_PROCESSING_THRESHOLD = 4096,
|
CHANGE_DYNAMICS_INFO_SET_CONTACT_PROCESSING_THRESHOLD = 4096,
|
||||||
CHANGE_DYNAMICS_INFO_SET_ACTIVATION_STATE = 8192,
|
CHANGE_DYNAMICS_INFO_SET_ACTIVATION_STATE = 8192,
|
||||||
|
CHANGE_DYNAMICS_INFO_SET_JOINT_DAMPING = 16384,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ChangeDynamicsInfoArgs
|
struct ChangeDynamicsInfoArgs
|
||||||
@@ -185,6 +186,7 @@ struct ChangeDynamicsInfoArgs
|
|||||||
double m_ccdSweptSphereRadius;
|
double m_ccdSweptSphereRadius;
|
||||||
double m_contactProcessingThreshold;
|
double m_contactProcessingThreshold;
|
||||||
int m_activationState;
|
int m_activationState;
|
||||||
|
double m_jointDamping;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GetDynamicsInfoArgs
|
struct GetDynamicsInfoArgs
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -16,6 +16,11 @@ class PhysXServerCommandProcessor : public PhysicsCommandProcessorInterface
|
|||||||
bool processSendPhysicsParametersCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
bool processSendPhysicsParametersCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
||||||
bool processRequestActualStateCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
bool processRequestActualStateCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
||||||
bool processResetSimulationCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
bool processResetSimulationCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
||||||
|
bool processInitPoseCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
||||||
|
bool processSendDesiredStateCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
||||||
|
bool processChangeDynamicsInfoCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
||||||
|
bool processRequestPhysicsSimulationParametersCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
||||||
|
|
||||||
|
|
||||||
bool processCustomCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
bool processCustomCommand(const struct SharedMemoryCommand& clientCmd, struct SharedMemoryStatus& serverStatusOut, char* bufferServerToClient, int bufferSizeInBytes);
|
||||||
|
|
||||||
|
|||||||
@@ -16,11 +16,15 @@
|
|||||||
#include "PxMaterial.h"
|
#include "PxMaterial.h"
|
||||||
#include "PxCooking.h"
|
#include "PxCooking.h"
|
||||||
#include "PxScene.h"
|
#include "PxScene.h"
|
||||||
|
#include "PxRigidStatic.h"
|
||||||
|
#include "PxRigidDynamic.h"
|
||||||
|
#include "PxActor.h"
|
||||||
#include "PxAggregate.h"
|
#include "PxAggregate.h"
|
||||||
|
#include "Bullet3Common/b3FileUtils.h"
|
||||||
#include "PhysXUserData.h"
|
#include "PhysXUserData.h"
|
||||||
#include "../../CommonInterfaces/CommonFileIOInterface.h"
|
#include "../../CommonInterfaces/CommonFileIOInterface.h"
|
||||||
|
#include "../../Importers/ImportObjDemo/LoadMeshFromObj.h"
|
||||||
|
#include "../../Importers/ImportSTLDemo/LoadMeshFromSTL.h"
|
||||||
#include "Importers/ImportURDFDemo/UrdfParser.h"
|
#include "Importers/ImportURDFDemo/UrdfParser.h"
|
||||||
|
|
||||||
|
|
||||||
@@ -31,6 +35,8 @@ struct URDF2PhysXCachedData
|
|||||||
URDF2PhysXCachedData()
|
URDF2PhysXCachedData()
|
||||||
: m_currentMultiBodyLinkIndex(-1),
|
: m_currentMultiBodyLinkIndex(-1),
|
||||||
m_articulation(0),
|
m_articulation(0),
|
||||||
|
m_rigidStatic(0),
|
||||||
|
m_rigidDynamic(0),
|
||||||
m_totalNumJoints1(0)
|
m_totalNumJoints1(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@@ -44,6 +50,8 @@ struct URDF2PhysXCachedData
|
|||||||
int m_currentMultiBodyLinkIndex;
|
int m_currentMultiBodyLinkIndex;
|
||||||
|
|
||||||
physx::PxArticulationReducedCoordinate* m_articulation;
|
physx::PxArticulationReducedCoordinate* m_articulation;
|
||||||
|
physx::PxRigidStatic* m_rigidStatic;
|
||||||
|
physx::PxRigidDynamic* m_rigidDynamic;
|
||||||
|
|
||||||
btAlignedObjectArray<physx::PxTransform> m_linkTransWorldSpace;
|
btAlignedObjectArray<physx::PxTransform> m_linkTransWorldSpace;
|
||||||
btAlignedObjectArray<int> m_urdfLinkIndex;
|
btAlignedObjectArray<int> m_urdfLinkIndex;
|
||||||
@@ -237,7 +245,7 @@ static physx::PxConvexMesh* createPhysXConvex(physx::PxU32 numVerts, const physx
|
|||||||
|
|
||||||
|
|
||||||
int convertLinkPhysXShapes(const URDFImporterInterface& u2b, URDF2PhysXCachedData& cache, ArticulationCreationInterface& creation, int urdfLinkIndex, const char* pathPrefix, const btTransform& localInertiaFrame,
|
int convertLinkPhysXShapes(const URDFImporterInterface& u2b, URDF2PhysXCachedData& cache, ArticulationCreationInterface& creation, int urdfLinkIndex, const char* pathPrefix, const btTransform& localInertiaFrame,
|
||||||
physx::PxArticulationReducedCoordinate* articulation, int mbLinkIndex, physx::PxArticulationLink* linkPtr)
|
physx::PxArticulationReducedCoordinate* articulation, int mbLinkIndex, physx::PxRigidActor* linkPtr)
|
||||||
{
|
{
|
||||||
int numShapes = 0;
|
int numShapes = 0;
|
||||||
|
|
||||||
@@ -362,9 +370,61 @@ int convertLinkPhysXShapes(const URDFImporterInterface& u2b, URDF2PhysXCachedDat
|
|||||||
numShapes++;
|
numShapes++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case URDF_GEOM_MESH:
|
||||||
|
{
|
||||||
|
btAlignedObjectArray<physx::PxVec3> vertices;
|
||||||
|
GLInstanceGraphicsShape* glmesh = 0;
|
||||||
|
switch (collision->m_geometry.m_meshFileType)
|
||||||
|
{
|
||||||
|
case UrdfGeometry::FILE_OBJ:
|
||||||
|
{
|
||||||
|
char relativeFileName[1024];
|
||||||
|
char pathPrefix[1024];
|
||||||
|
pathPrefix[0] = 0;
|
||||||
|
if (creation.m_fileIO->findResourcePath(collision->m_geometry.m_meshFileName.c_str(), relativeFileName, 1024))
|
||||||
|
{
|
||||||
|
b3FileUtils::extractPath(relativeFileName, pathPrefix, 1024);
|
||||||
|
}
|
||||||
|
glmesh = LoadMeshFromObj(collision->m_geometry.m_meshFileName.c_str(), pathPrefix, creation.m_fileIO);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case UrdfGeometry::FILE_STL:
|
||||||
|
{
|
||||||
|
glmesh = LoadMeshFromSTL(collision->m_geometry.m_meshFileName.c_str(), creation.m_fileIO);
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (glmesh && glmesh->m_numvertices)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < glmesh->m_numvertices; i++)
|
||||||
|
{
|
||||||
|
physx::PxVec3 vert(
|
||||||
|
glmesh->m_vertices->at(i).xyzw[0] * collision->m_geometry.m_meshScale[0],
|
||||||
|
glmesh->m_vertices->at(i).xyzw[1] * collision->m_geometry.m_meshScale[1],
|
||||||
|
glmesh->m_vertices->at(i).xyzw[2] * collision->m_geometry.m_meshScale[2]);
|
||||||
|
vertices.push_back(vert);
|
||||||
|
}
|
||||||
|
|
||||||
|
physx::PxConvexMesh* convexMesh = createPhysXConvex(vertices.size(), &vertices[0], creation);
|
||||||
|
|
||||||
|
shape = physx::PxRigidActorExt::createExclusiveShape(*linkPtr,
|
||||||
|
physx::PxConvexMeshGeometry(convexMesh), *material);
|
||||||
|
|
||||||
|
shape->setLocalPose(tr);
|
||||||
|
cache.m_geomLocalPoses.push_back(tr);
|
||||||
|
numShapes++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
printf("unknown physx shape\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -390,7 +450,7 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
ArticulationCreationInterface& creation,
|
ArticulationCreationInterface& creation,
|
||||||
URDF2PhysXCachedData& cache, int urdfLinkIndex,
|
URDF2PhysXCachedData& cache, int urdfLinkIndex,
|
||||||
const btTransform& parentTransformInWorldSpace,
|
const btTransform& parentTransformInWorldSpace,
|
||||||
bool createMultiBody, const char* pathPrefix,
|
bool createActiculation, const char* pathPrefix,
|
||||||
int flags,
|
int flags,
|
||||||
UrdfVisualShapeCache2* cachedLinkGraphicsShapesIn,
|
UrdfVisualShapeCache2* cachedLinkGraphicsShapesIn,
|
||||||
UrdfVisualShapeCache2* cachedLinkGraphicsShapesOut,
|
UrdfVisualShapeCache2* cachedLinkGraphicsShapesOut,
|
||||||
@@ -562,17 +622,44 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
btTransform inertialFrameInWorldSpace = linkTransformInWorldSpace * localInertialFrame;
|
btTransform inertialFrameInWorldSpace = linkTransformInWorldSpace * localInertialFrame;
|
||||||
bool canSleep = (flags & CUF_ENABLE_SLEEPING) != 0;
|
bool canSleep = (flags & CUF_ENABLE_SLEEPING) != 0;
|
||||||
|
|
||||||
physx::PxArticulationLink* linkPtr = 0;
|
physx::PxRigidActor* linkPtr = 0;
|
||||||
|
|
||||||
|
physx::PxRigidBody* rbLinkPtr = 0;
|
||||||
|
|
||||||
if (!createMultiBody)
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
physx::PxTransform tr;
|
physx::PxTransform tr;
|
||||||
tr.p = physx::PxVec3(linkTransformInWorldSpace.getOrigin().x(), linkTransformInWorldSpace.getOrigin().y(), linkTransformInWorldSpace.getOrigin().z());
|
tr.p = physx::PxVec3(linkTransformInWorldSpace.getOrigin().x(), linkTransformInWorldSpace.getOrigin().y(), linkTransformInWorldSpace.getOrigin().z());
|
||||||
tr.q = physx::PxQuat(linkTransformInWorldSpace.getRotation().x(), linkTransformInWorldSpace.getRotation().y(), linkTransformInWorldSpace.getRotation().z(), linkTransformInWorldSpace.getRotation().w());
|
tr.q = physx::PxQuat(linkTransformInWorldSpace.getRotation().x(), linkTransformInWorldSpace.getRotation().y(), linkTransformInWorldSpace.getRotation().z(), linkTransformInWorldSpace.getRotation().w());
|
||||||
|
bool isFixedBase = (mass == 0); //todo: figure out when base is fixed
|
||||||
|
|
||||||
|
|
||||||
|
if (!createActiculation)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (isFixedBase)
|
||||||
|
{
|
||||||
|
physx::PxRigidStatic* s = creation.m_physics->createRigidStatic(tr);
|
||||||
|
if ((cache.m_rigidStatic == 0) && (cache.m_rigidDynamic == 0))
|
||||||
|
{
|
||||||
|
cache.m_rigidStatic = s;
|
||||||
|
}
|
||||||
|
linkPtr = s;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
|
||||||
|
physx::PxRigidDynamic* d = creation.m_physics->createRigidDynamic(tr);
|
||||||
|
linkPtr = d;
|
||||||
|
if ((cache.m_rigidStatic == 0) && (cache.m_rigidDynamic == 0))
|
||||||
|
{
|
||||||
|
cache.m_rigidDynamic = d;
|
||||||
|
}
|
||||||
|
rbLinkPtr = d;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
|
||||||
cache.m_linkTransWorldSpace.push_back(tr);
|
cache.m_linkTransWorldSpace.push_back(tr);
|
||||||
cache.m_urdfLinkIndex.push_back(urdfLinkIndex);
|
cache.m_urdfLinkIndex.push_back(urdfLinkIndex);
|
||||||
@@ -582,7 +669,7 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
if (cache.m_articulation == 0)
|
if (cache.m_articulation == 0)
|
||||||
{
|
{
|
||||||
|
|
||||||
bool isFixedBase = (mass == 0); //todo: figure out when base is fixed
|
|
||||||
|
|
||||||
cache.m_articulation = creation.m_physics->createArticulationReducedCoordinate();
|
cache.m_articulation = creation.m_physics->createArticulationReducedCoordinate();
|
||||||
|
|
||||||
@@ -596,6 +683,7 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
physx::PxArticulationLink* base = cache.m_articulation->createLink(NULL,tr);
|
physx::PxArticulationLink* base = cache.m_articulation->createLink(NULL,tr);
|
||||||
|
|
||||||
linkPtr = base;
|
linkPtr = base;
|
||||||
|
rbLinkPtr = base;
|
||||||
|
|
||||||
|
|
||||||
//physx::PxRigidActorExt::createExclusiveShape(*base, PxBoxGeometry(0.5f, 0.25f, 1.5f), *gMaterial);
|
//physx::PxRigidActorExt::createExclusiveShape(*base, PxBoxGeometry(0.5f, 0.25f, 1.5f), *gMaterial);
|
||||||
@@ -628,14 +716,19 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
cache.registerMultiBody(urdfLinkIndex, base, inertialFrameInWorldSpace, mass, localInertiaDiagonal, localInertialFrame);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
||||||
physx::PxArticulationLink* parentLink = cache.getPhysxLinkFromLink(urdfParentIndex);
|
physx::PxArticulationLink* parentLink = cache.getPhysxLinkFromLink(urdfParentIndex);
|
||||||
linkPtr = cache.m_articulation->createLink(parentLink, tr);
|
|
||||||
|
|
||||||
physx::PxArticulationJointReducedCoordinate* joint = static_cast<physx::PxArticulationJointReducedCoordinate*>(linkPtr->getInboundJoint());
|
physx::PxArticulationLink* childLink = cache.m_articulation->createLink(parentLink, tr);
|
||||||
|
linkPtr = childLink;
|
||||||
|
rbLinkPtr = childLink;
|
||||||
|
|
||||||
|
physx::PxArticulationJointReducedCoordinate* joint = static_cast<physx::PxArticulationJointReducedCoordinate*>(childLink->getInboundJoint());
|
||||||
|
|
||||||
switch (jointType)
|
switch (jointType)
|
||||||
{
|
{
|
||||||
@@ -691,8 +784,13 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
joint->setParentPose(parentPose);
|
joint->setParentPose(parentPose);
|
||||||
joint->setChildPose(childPose);
|
joint->setChildPose(childPose);
|
||||||
|
|
||||||
|
cache.registerMultiBody(urdfLinkIndex, childLink, inertialFrameInWorldSpace, mass, localInertiaDiagonal, localInertialFrame);
|
||||||
}
|
}
|
||||||
cache.registerMultiBody(urdfLinkIndex, linkPtr, inertialFrameInWorldSpace, mass, localInertiaDiagonal, localInertialFrame);
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (linkPtr)
|
if (linkPtr)
|
||||||
{
|
{
|
||||||
//todo: mem leaks
|
//todo: mem leaks
|
||||||
@@ -700,8 +798,6 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
userData->m_graphicsUniqueId = graphicsIndex;
|
userData->m_graphicsUniqueId = graphicsIndex;
|
||||||
linkPtr->userData = userData;
|
linkPtr->userData = userData;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//create collision shapes
|
//create collision shapes
|
||||||
|
|
||||||
@@ -709,7 +805,7 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
convertLinkPhysXShapes(u2b, cache, creation, urdfLinkIndex, pathPrefix, localInertialFrame, cache.m_articulation, mbLinkIndex, linkPtr);
|
convertLinkPhysXShapes(u2b, cache, creation, urdfLinkIndex, pathPrefix, localInertialFrame, cache.m_articulation, mbLinkIndex, linkPtr);
|
||||||
|
|
||||||
|
|
||||||
physx::PxRigidBodyExt::updateMassAndInertia(*linkPtr, mass);
|
physx::PxRigidBodyExt::updateMassAndInertia(*rbLinkPtr, mass);
|
||||||
|
|
||||||
|
|
||||||
//base->setMass(massOut);
|
//base->setMass(massOut);
|
||||||
@@ -727,7 +823,7 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
|
|
||||||
bool disableParentCollision = true;
|
bool disableParentCollision = true;
|
||||||
|
|
||||||
if (createMultiBody && cache.m_articulation)
|
if (createActiculation && cache.m_articulation)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
cache.m_bulletMultiBody->getLink(mbLinkIndex).m_jointDamping = jointDamping;
|
cache.m_bulletMultiBody->getLink(mbLinkIndex).m_jointDamping = jointDamping;
|
||||||
@@ -741,7 +837,7 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (createMultiBody)
|
if (createActiculation)
|
||||||
{
|
{
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -762,7 +858,7 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
{
|
{
|
||||||
int urdfChildLinkIndex = urdfChildIndices[i];
|
int urdfChildLinkIndex = urdfChildIndices[i];
|
||||||
|
|
||||||
ConvertURDF2PhysXInternal(u2b, creation, cache, urdfChildLinkIndex, linkTransformInWorldSpace, createMultiBody, pathPrefix, flags, cachedLinkGraphicsShapesIn, cachedLinkGraphicsShapesOut, recursive);
|
ConvertURDF2PhysXInternal(u2b, creation, cache, urdfChildLinkIndex, linkTransformInWorldSpace, createActiculation, pathPrefix, flags, cachedLinkGraphicsShapesIn, cachedLinkGraphicsShapesOut, recursive);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return linkTransformInWorldSpace;
|
return linkTransformInWorldSpace;
|
||||||
@@ -774,7 +870,7 @@ btTransform ConvertURDF2PhysXInternal(
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundation, physx::PxPhysics* physics, physx::PxCooking* cooking, physx::PxScene* scene, class PhysXURDFImporter& u2p, int flags, const char* pathPrefix, const btTransform& rootTransformInWorldSpace, struct CommonFileIOInterface* fileIO)
|
physx::PxBase* URDF2PhysX(physx::PxFoundation* foundation, physx::PxPhysics* physics, physx::PxCooking* cooking, physx::PxScene* scene, class PhysXURDFImporter& u2p, int flags, const char* pathPrefix, const btTransform& rootTransformInWorldSpace, struct CommonFileIOInterface* fileIO, bool createActiculation)
|
||||||
{
|
{
|
||||||
URDF2PhysXCachedData cache;
|
URDF2PhysXCachedData cache;
|
||||||
InitURDF2BulletCache(u2p, cache, flags);
|
InitURDF2BulletCache(u2p, cache, flags);
|
||||||
@@ -792,13 +888,13 @@ physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundati
|
|||||||
creation.m_scene = scene;
|
creation.m_scene = scene;
|
||||||
creation.m_fileIO = fileIO;
|
creation.m_fileIO = fileIO;
|
||||||
|
|
||||||
bool createMultiBody = true;
|
|
||||||
|
|
||||||
bool recursive = (flags & CUF_MAINTAIN_LINK_ORDER) == 0;
|
bool recursive = (flags & CUF_MAINTAIN_LINK_ORDER) == 0;
|
||||||
|
|
||||||
if (recursive)
|
if (recursive)
|
||||||
{
|
{
|
||||||
ConvertURDF2PhysXInternal(u2p, creation, cache, urdfLinkIndex, rootTransformInWorldSpace, createMultiBody, pathPrefix, flags, &cachedLinkGraphicsShapes, &cachedLinkGraphicsShapesOut, recursive);
|
ConvertURDF2PhysXInternal(u2p, creation, cache, urdfLinkIndex, rootTransformInWorldSpace, createActiculation, pathPrefix, flags, &cachedLinkGraphicsShapes, &cachedLinkGraphicsShapesOut, recursive);
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -820,7 +916,7 @@ physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundati
|
|||||||
int urdfLinkIndex = allIndices[i].m_index;
|
int urdfLinkIndex = allIndices[i].m_index;
|
||||||
int parentIndex = allIndices[i].m_parentIndex;
|
int parentIndex = allIndices[i].m_parentIndex;
|
||||||
btTransform parentTr = parentIndex >= 0 ? parentTransforms[parentIndex] : rootTransformInWorldSpace;
|
btTransform parentTr = parentIndex >= 0 ? parentTransforms[parentIndex] : rootTransformInWorldSpace;
|
||||||
btTransform tr = ConvertURDF2BulletInternal(u2b, creation, cache, urdfLinkIndex, parentTr, world1, createMultiBody, pathPrefix, flags, cachedLinkGraphicsShapes, &cachedLinkGraphicsShapesOut, recursive);
|
btTransform tr = ConvertURDF2BulletInternal(u2b, creation, cache, urdfLinkIndex, parentTr, world1, createActiculation, pathPrefix, flags, cachedLinkGraphicsShapes, &cachedLinkGraphicsShapesOut, recursive);
|
||||||
if ((urdfLinkIndex + 1) >= parentTransforms.size())
|
if ((urdfLinkIndex + 1) >= parentTransforms.size())
|
||||||
{
|
{
|
||||||
parentTransforms.resize(urdfLinkIndex + 1);
|
parentTransforms.resize(urdfLinkIndex + 1);
|
||||||
@@ -838,7 +934,7 @@ physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundati
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
if (scene && cache.m_articulation)
|
if (cache.m_articulation)
|
||||||
{
|
{
|
||||||
#ifdef DEBUG_ARTICULATIONS
|
#ifdef DEBUG_ARTICULATIONS
|
||||||
printf("\n-----------------\n");
|
printf("\n-----------------\n");
|
||||||
@@ -902,7 +998,19 @@ physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundati
|
|||||||
scene->addArticulation(*cache.m_articulation);
|
scene->addArticulation(*cache.m_articulation);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return cache.m_articulation;
|
return cache.m_articulation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cache.m_rigidStatic)
|
||||||
|
{
|
||||||
|
scene->addActor(*cache.m_rigidStatic);
|
||||||
|
return cache.m_rigidStatic;
|
||||||
|
}
|
||||||
|
if (cache.m_rigidDynamic)
|
||||||
|
{
|
||||||
|
scene->addActor(*cache.m_rigidDynamic);
|
||||||
|
return cache.m_rigidDynamic;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
namespace physx
|
namespace physx
|
||||||
{
|
{
|
||||||
|
class PxBase;
|
||||||
class PxFoundation;
|
class PxFoundation;
|
||||||
class PxPhysics;
|
class PxPhysics;
|
||||||
class PxDefaultCpuDispatcher;
|
class PxDefaultCpuDispatcher;
|
||||||
@@ -20,6 +21,6 @@ struct UrdfVisualShapeCache2
|
|||||||
b3AlignedObjectArray<int> m_cachedUrdfLinkVisualShapeIndices;
|
b3AlignedObjectArray<int> m_cachedUrdfLinkVisualShapeIndices;
|
||||||
};
|
};
|
||||||
|
|
||||||
physx::PxArticulationReducedCoordinate* URDF2PhysX(physx::PxFoundation* foundation, physx::PxPhysics* physics, physx::PxCooking* cooking, physx::PxScene* scene, class PhysXURDFImporter& u2p, int flags, const char* pathPrefix, const class btTransform& rootTransformInWorldSpace,struct CommonFileIOInterface* fileIO);
|
physx::PxBase* URDF2PhysX(physx::PxFoundation* foundation, physx::PxPhysics* physics, physx::PxCooking* cooking, physx::PxScene* scene, class PhysXURDFImporter& u2p, int flags, const char* pathPrefix, const class btTransform& rootTransformInWorldSpace,struct CommonFileIOInterface* fileIO, bool createActiculation);
|
||||||
|
|
||||||
#endif //URDF2PHYSX_H
|
#endif //URDF2PHYSX_H
|
||||||
@@ -1,14 +1,49 @@
|
|||||||
import pybullet as p
|
import pybullet as p
|
||||||
import time
|
import time
|
||||||
|
import math
|
||||||
|
|
||||||
|
usePhysX = True
|
||||||
|
if usePhysX:
|
||||||
p.connect(p.PhysX)
|
p.connect(p.PhysX)
|
||||||
p.loadPlugin("eglRendererPlugin")
|
p.loadPlugin("eglRendererPlugin")
|
||||||
|
else:
|
||||||
|
p.connect(p.GUI)
|
||||||
|
|
||||||
p.loadURDF("plane.urdf")
|
p.loadURDF("plane.urdf")
|
||||||
for i in range (50):
|
|
||||||
p.loadURDF("r2d2.urdf",[0,0,1+i*2])
|
for i in range (10):
|
||||||
|
sphere = p.loadURDF("marble_cube.urdf",[0,-1,1+i*1], useMaximalCoordinates=False)
|
||||||
|
p.changeDynamics(sphere ,-1, mass=1000)
|
||||||
|
|
||||||
|
door = p.loadURDF("door.urdf",[0,0,1])
|
||||||
|
p.changeDynamics(door ,1, linearDamping=0, angularDamping=0, jointDamping=0, mass=1)
|
||||||
|
print("numJoints = ", p.getNumJoints(door))
|
||||||
|
|
||||||
|
|
||||||
p.setGravity(0,0,-10)
|
p.setGravity(0,0,-10)
|
||||||
|
position_control = True
|
||||||
|
|
||||||
|
angle = math.pi*0.25
|
||||||
|
p.resetJointState(door,1,angle)
|
||||||
|
angleread = p.getJointState(door,1)
|
||||||
|
print("angleread = ",angleread)
|
||||||
|
prevTime = time.time()
|
||||||
|
|
||||||
|
angle = math.pi*0.5
|
||||||
|
|
||||||
while (1):
|
while (1):
|
||||||
|
|
||||||
|
curTime = time.time()
|
||||||
|
|
||||||
|
diff = curTime - prevTime
|
||||||
|
#every second, swap target angle
|
||||||
|
if (diff>1):
|
||||||
|
angle = - angle
|
||||||
|
prevTime = curTime
|
||||||
|
|
||||||
|
if position_control:
|
||||||
|
p.setJointMotorControl2(door,1,p.POSITION_CONTROL, targetPosition = angle, positionGain=10.1, velocityGain=1, force=11.001)
|
||||||
|
else:
|
||||||
|
p.setJointMotorControl2(door,1,p.VELOCITY_CONTROL, targetVelocity=1, force=1011)
|
||||||
p.stepSimulation()
|
p.stepSimulation()
|
||||||
time.sleep(1./240.)
|
time.sleep(1./240.)
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
{
|
||||||
|
"AgentType": "PPO",
|
||||||
|
|
||||||
|
"ActorNet": "fc_2layers_1024units",
|
||||||
|
"ActorStepsize": 0.0000025,
|
||||||
|
"ActorMomentum": 0.9,
|
||||||
|
"ActorWeightDecay": 0.0005,
|
||||||
|
"ActorInitOutputScale": 0.01,
|
||||||
|
|
||||||
|
"CriticNet": "fc_2layers_1024units",
|
||||||
|
"CriticStepsize": 0.01,
|
||||||
|
"CriticMomentum": 0.9,
|
||||||
|
"CriticWeightDecay": 0,
|
||||||
|
|
||||||
|
"UpdatePeriod": 1,
|
||||||
|
"ItersPerUpdate": 1,
|
||||||
|
"Discount": 0.95,
|
||||||
|
"BatchSize": 4096,
|
||||||
|
"MiniBatchSize": 256,
|
||||||
|
"Epochs": 1,
|
||||||
|
"ReplayBufferSize": 500000,
|
||||||
|
"InitSamples": 1,
|
||||||
|
"NormalizerSamples": 1000000,
|
||||||
|
|
||||||
|
"RatioClip": 0.2,
|
||||||
|
"NormAdvClip": 4,
|
||||||
|
"TDLambda": 0.95,
|
||||||
|
|
||||||
|
"OutputIters": 10,
|
||||||
|
"IntOutputIters": 400,
|
||||||
|
"TestEpisodes": 32,
|
||||||
|
|
||||||
|
"ExpAnnealSamples": 64000000,
|
||||||
|
|
||||||
|
"ExpParamsBeg":
|
||||||
|
{
|
||||||
|
"Rate": 1,
|
||||||
|
"InitActionRate": 1,
|
||||||
|
"Noise": 0.05,
|
||||||
|
"NoiseInternal": 0,
|
||||||
|
"Temp": 0.1
|
||||||
|
},
|
||||||
|
|
||||||
|
"ExpParamsEnd":
|
||||||
|
{
|
||||||
|
"Rate": 0.2,
|
||||||
|
"InitActionRate": 0.01,
|
||||||
|
"Noise": 0.05,
|
||||||
|
"NoiseInternal": 0,
|
||||||
|
"Temp": 0.001
|
||||||
|
}
|
||||||
|
}
|
||||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,52 @@
|
|||||||
|
import numpy as np
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import inspect
|
||||||
|
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||||
|
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||||
|
os.sys.path.insert(0,parentdir)
|
||||||
|
print("parentdir=",parentdir)
|
||||||
|
|
||||||
|
|
||||||
|
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
|
||||||
|
from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
from testrl import update_world, update_timestep, build_world
|
||||||
|
import pybullet_utils.mpi_util as MPIUtil
|
||||||
|
|
||||||
|
args = []
|
||||||
|
world = None
|
||||||
|
|
||||||
|
def run():
|
||||||
|
global update_timestep
|
||||||
|
global world
|
||||||
|
|
||||||
|
done = False
|
||||||
|
while not (done):
|
||||||
|
update_world(world, update_timestep)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def shutdown():
|
||||||
|
global world
|
||||||
|
|
||||||
|
Logger.print2('Shutting down...')
|
||||||
|
world.shutdown()
|
||||||
|
return
|
||||||
|
|
||||||
|
def main():
|
||||||
|
global args
|
||||||
|
global world
|
||||||
|
|
||||||
|
# Command line arguments
|
||||||
|
args = sys.argv[1:]
|
||||||
|
enable_draw = False
|
||||||
|
world = build_world(args, enable_draw)
|
||||||
|
|
||||||
|
run()
|
||||||
|
shutdown()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
6
examples/pybullet/gym/pybullet_envs/deep_mimic/env/action_space.py
vendored
Normal file
6
examples/pybullet/gym/pybullet_envs/deep_mimic/env/action_space.py
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
class ActionSpace(Enum):
|
||||||
|
Null = 0
|
||||||
|
Continuous = 1
|
||||||
|
Discrete = 2
|
||||||
13
examples/pybullet/gym/pybullet_envs/deep_mimic/env/env.py
vendored
Normal file
13
examples/pybullet/gym/pybullet_envs/deep_mimic/env/env.py
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
import numpy as np
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
class Env(ABC):
|
||||||
|
class Terminate(Enum):
|
||||||
|
Null = 0
|
||||||
|
Fail = 1
|
||||||
|
Succ = 2
|
||||||
|
|
||||||
|
def __init__(self, args, enable_draw):
|
||||||
|
self.enable_draw = enable_draw
|
||||||
|
return
|
||||||
249
examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_pose_interpolator.py
vendored
Normal file
249
examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_pose_interpolator.py
vendored
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
from pybullet_envs.minitaur.envs import bullet_client
|
||||||
|
import math
|
||||||
|
|
||||||
|
class HumanoidPoseInterpolator(object):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def Reset(self):
|
||||||
|
|
||||||
|
self._basePos = [0,0,0]
|
||||||
|
self._baseLinVel = [0,0,0]
|
||||||
|
self._baseOrn = [0,0,0,1]
|
||||||
|
self._baseAngVel = [0,0,0]
|
||||||
|
|
||||||
|
self._chestRot = [0,0,0,1]
|
||||||
|
self._chestVel = [0,0,0]
|
||||||
|
self._neckRot = [0,0,0,1]
|
||||||
|
self._neckVel = [0,0,0]
|
||||||
|
|
||||||
|
self._rightHipRot = [0,0,0,1]
|
||||||
|
self._rightHipVel = [0,0,0]
|
||||||
|
self._rightKneeRot = [0]
|
||||||
|
self._rightKneeVel = [0]
|
||||||
|
self._rightAnkleRot = [0,0,0,1]
|
||||||
|
self._rightAnkleVel = [0,0,0]
|
||||||
|
|
||||||
|
self._rightShoulderRot = [0,0,0,1]
|
||||||
|
self._rightShoulderVel = [0,0,0]
|
||||||
|
self._rightElbowRot = [0]
|
||||||
|
self._rightElbowVel = [0]
|
||||||
|
|
||||||
|
self._leftHipRot = [0,0,0,1]
|
||||||
|
self._leftHipVel = [0,0,0]
|
||||||
|
self._leftKneeRot = [0]
|
||||||
|
self._leftKneeVel = [0]
|
||||||
|
self._leftAnkleRot = [0,0,0,1]
|
||||||
|
self._leftAnkleVel = [0,0,0]
|
||||||
|
|
||||||
|
self._leftShoulderRot = [0,0,0,1]
|
||||||
|
self._leftShoulderVel = [0,0,0]
|
||||||
|
self._leftElbowRot = [0]
|
||||||
|
self._leftElbowVel = [0]
|
||||||
|
|
||||||
|
def ComputeLinVel(self,posStart, posEnd, deltaTime):
|
||||||
|
vel = [(posEnd[0]-posStart[0])/deltaTime,(posEnd[1]-posStart[1])/deltaTime,(posEnd[2]-posStart[2])/deltaTime]
|
||||||
|
return vel
|
||||||
|
|
||||||
|
def ComputeAngVel(self,ornStart, ornEnd, deltaTime, bullet_client):
|
||||||
|
dorn = bullet_client.getDifferenceQuaternion(ornStart,ornEnd)
|
||||||
|
axis,angle = bullet_client.getAxisAngleFromQuaternion(dorn)
|
||||||
|
angVel = [(axis[0]*angle)/deltaTime,(axis[1]*angle)/deltaTime,(axis[2]*angle)/deltaTime]
|
||||||
|
return angVel
|
||||||
|
|
||||||
|
def NormalizeVector(self, vec):
|
||||||
|
length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]
|
||||||
|
if (length2>0):
|
||||||
|
length = math.sqrt(length2)
|
||||||
|
|
||||||
|
def NormalizeQuaternion(self, orn):
|
||||||
|
length2 = orn[0]*orn[0]+orn[1]*orn[1]+orn[2]*orn[2]+orn[3]*orn[3]
|
||||||
|
if (length2>0):
|
||||||
|
length = math.sqrt(length2)
|
||||||
|
orn[0]/=length
|
||||||
|
orn[1]/=length
|
||||||
|
orn[2]/=length
|
||||||
|
orn[3]/=length
|
||||||
|
return orn
|
||||||
|
|
||||||
|
#print("Normalize? length=",length)
|
||||||
|
|
||||||
|
|
||||||
|
def PostProcessMotionData(self, frameData):
|
||||||
|
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
|
||||||
|
|
||||||
|
chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
|
||||||
|
|
||||||
|
neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
|
||||||
|
rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
|
||||||
|
rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
|
||||||
|
rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
|
||||||
|
leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
|
||||||
|
leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
|
||||||
|
leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
|
||||||
|
|
||||||
|
|
||||||
|
def GetPose(self):
|
||||||
|
pose = [ self._basePos[0],self._basePos[1],self._basePos[2],
|
||||||
|
self._baseOrn[0],self._baseOrn[1],self._baseOrn[2],self._baseOrn[3],
|
||||||
|
self._chestRot[0],self._chestRot[1],self._chestRot[2],self._chestRot[3],
|
||||||
|
self._neckRot[0],self._neckRot[1],self._neckRot[2],self._neckRot[3],
|
||||||
|
self._rightHipRot[0],self._rightHipRot[1],self._rightHipRot[2],self._rightHipRot[3],
|
||||||
|
self._rightKneeRot[0],
|
||||||
|
self._rightAnkleRot[0],self._rightAnkleRot[1],self._rightAnkleRot[2],self._rightAnkleRot[3],
|
||||||
|
self._rightShoulderRot[0],self._rightShoulderRot[1],self._rightShoulderRot[2],self._rightShoulderRot[3],
|
||||||
|
self._rightElbowRot[0],
|
||||||
|
self._leftHipRot[0],self._leftHipRot[1],self._leftHipRot[2],self._leftHipRot[3],
|
||||||
|
self._leftKneeRot[0],
|
||||||
|
self._leftAnkleRot[0],self._leftAnkleRot[1],self._leftAnkleRot[2],self._leftAnkleRot[3],
|
||||||
|
self._leftShoulderRot[0],self._leftShoulderRot[1],self._leftShoulderRot[2],self._leftShoulderRot[3],
|
||||||
|
self._leftElbowRot[0] ]
|
||||||
|
return pose
|
||||||
|
|
||||||
|
def Slerp(self, frameFraction, frameData, frameDataNext,bullet_client ):
|
||||||
|
keyFrameDuration = frameData[0]
|
||||||
|
basePos1Start = [frameData[1],frameData[2],frameData[3]]
|
||||||
|
basePos1End = [frameDataNext[1],frameDataNext[2],frameDataNext[3]]
|
||||||
|
self._basePos = [basePos1Start[0]+frameFraction*(basePos1End[0]-basePos1Start[0]),
|
||||||
|
basePos1Start[1]+frameFraction*(basePos1End[1]-basePos1Start[1]),
|
||||||
|
basePos1Start[2]+frameFraction*(basePos1End[2]-basePos1Start[2])]
|
||||||
|
self._baseLinVel = self.ComputeLinVel(basePos1Start,basePos1End, keyFrameDuration)
|
||||||
|
baseOrn1Start = [frameData[5],frameData[6], frameData[7],frameData[4]]
|
||||||
|
baseOrn1Next = [frameDataNext[5],frameDataNext[6], frameDataNext[7],frameDataNext[4]]
|
||||||
|
self._baseOrn = bullet_client.getQuaternionSlerp(baseOrn1Start,baseOrn1Next,frameFraction)
|
||||||
|
self._baseAngVel = self.ComputeAngVel(baseOrn1Start,baseOrn1Next, keyFrameDuration, bullet_client)
|
||||||
|
|
||||||
|
##pre-rotate to make z-up
|
||||||
|
#y2zPos=[0,0,0.0]
|
||||||
|
#y2zOrn = p.getQuaternionFromEuler([1.57,0,0])
|
||||||
|
#basePos,baseOrn = p.multiplyTransforms(y2zPos, y2zOrn,basePos1,baseOrn1)
|
||||||
|
|
||||||
|
chestRotStart = [frameData[9],frameData[10],frameData[11],frameData[8]]
|
||||||
|
chestRotEnd = [frameDataNext[9],frameDataNext[10],frameDataNext[11],frameDataNext[8]]
|
||||||
|
self._chestRot = bullet_client.getQuaternionSlerp(chestRotStart,chestRotEnd,frameFraction)
|
||||||
|
self._chestVel = self.ComputeAngVel(chestRotStart,chestRotEnd,keyFrameDuration,bullet_client)
|
||||||
|
|
||||||
|
neckRotStart = [frameData[13],frameData[14],frameData[15],frameData[12]]
|
||||||
|
neckRotEnd= [frameDataNext[13],frameDataNext[14],frameDataNext[15],frameDataNext[12]]
|
||||||
|
self._neckRot = bullet_client.getQuaternionSlerp(neckRotStart,neckRotEnd,frameFraction)
|
||||||
|
self._neckVel = self.ComputeAngVel(neckRotStart,neckRotEnd,keyFrameDuration,bullet_client)
|
||||||
|
|
||||||
|
rightHipRotStart = [frameData[17],frameData[18],frameData[19],frameData[16]]
|
||||||
|
rightHipRotEnd = [frameDataNext[17],frameDataNext[18],frameDataNext[19],frameDataNext[16]]
|
||||||
|
self._rightHipRot = bullet_client.getQuaternionSlerp(rightHipRotStart,rightHipRotEnd,frameFraction)
|
||||||
|
self._rightHipVel = self.ComputeAngVel(rightHipRotStart,rightHipRotEnd,keyFrameDuration,bullet_client)
|
||||||
|
|
||||||
|
rightKneeRotStart = [frameData[20]]
|
||||||
|
rightKneeRotEnd = [frameDataNext[20]]
|
||||||
|
self._rightKneeRot = [rightKneeRotStart[0]+frameFraction*(rightKneeRotEnd[0]-rightKneeRotStart[0])]
|
||||||
|
self._rightKneeVel = [(rightKneeRotEnd[0]-rightKneeRotStart[0])/keyFrameDuration]
|
||||||
|
|
||||||
|
rightAnkleRotStart = [frameData[22],frameData[23],frameData[24],frameData[21]]
|
||||||
|
rightAnkleRotEnd = [frameDataNext[22],frameDataNext[23],frameDataNext[24],frameDataNext[21]]
|
||||||
|
self._rightAnkleRot = bullet_client.getQuaternionSlerp(rightAnkleRotStart,rightAnkleRotEnd,frameFraction)
|
||||||
|
self._rightAnkleVel = self.ComputeAngVel(rightAnkleRotStart,rightAnkleRotEnd,keyFrameDuration,bullet_client)
|
||||||
|
|
||||||
|
rightShoulderRotStart = [frameData[26],frameData[27],frameData[28],frameData[25]]
|
||||||
|
rightShoulderRotEnd = [frameDataNext[26],frameDataNext[27],frameDataNext[28],frameDataNext[25]]
|
||||||
|
self._rightShoulderRot = bullet_client.getQuaternionSlerp(rightShoulderRotStart,rightShoulderRotEnd,frameFraction)
|
||||||
|
self._rightShoulderVel = self.ComputeAngVel(rightShoulderRotStart,rightShoulderRotEnd, keyFrameDuration,bullet_client)
|
||||||
|
|
||||||
|
rightElbowRotStart = [frameData[29]]
|
||||||
|
rightElbowRotEnd = [frameDataNext[29]]
|
||||||
|
self._rightElbowRot = [rightElbowRotStart[0]+frameFraction*(rightElbowRotEnd[0]-rightElbowRotStart[0])]
|
||||||
|
self._rightElbowVel = [(rightElbowRotEnd[0]-rightElbowRotStart[0])/keyFrameDuration]
|
||||||
|
|
||||||
|
leftHipRotStart = [frameData[31],frameData[32],frameData[33],frameData[30]]
|
||||||
|
leftHipRotEnd = [frameDataNext[31],frameDataNext[32],frameDataNext[33],frameDataNext[30]]
|
||||||
|
self._leftHipRot = bullet_client.getQuaternionSlerp(leftHipRotStart,leftHipRotEnd,frameFraction)
|
||||||
|
self._leftHipVel = self.ComputeAngVel(leftHipRotStart, leftHipRotEnd,keyFrameDuration,bullet_client)
|
||||||
|
|
||||||
|
leftKneeRotStart = [frameData[34]]
|
||||||
|
leftKneeRotEnd = [frameDataNext[34]]
|
||||||
|
self._leftKneeRot = [leftKneeRotStart[0] +frameFraction*(leftKneeRotEnd[0]-leftKneeRotStart[0]) ]
|
||||||
|
self._leftKneeVel = [(leftKneeRotEnd[0]-leftKneeRotStart[0])/keyFrameDuration]
|
||||||
|
|
||||||
|
leftAnkleRotStart = [frameData[36],frameData[37],frameData[38],frameData[35]]
|
||||||
|
leftAnkleRotEnd = [frameDataNext[36],frameDataNext[37],frameDataNext[38],frameDataNext[35]]
|
||||||
|
self._leftAnkleRot = bullet_client.getQuaternionSlerp(leftAnkleRotStart,leftAnkleRotEnd,frameFraction)
|
||||||
|
self._leftAnkleVel = self.ComputeAngVel(leftAnkleRotStart,leftAnkleRotEnd,keyFrameDuration,bullet_client)
|
||||||
|
|
||||||
|
leftShoulderRotStart = [frameData[40],frameData[41],frameData[42],frameData[39]]
|
||||||
|
leftShoulderRotEnd = [frameDataNext[40],frameDataNext[41],frameDataNext[42],frameDataNext[39]]
|
||||||
|
self._leftShoulderRot = bullet_client.getQuaternionSlerp(leftShoulderRotStart,leftShoulderRotEnd,frameFraction)
|
||||||
|
self._leftShoulderVel = self.ComputeAngVel(leftShoulderRotStart,leftShoulderRotEnd,keyFrameDuration,bullet_client)
|
||||||
|
|
||||||
|
leftElbowRotStart = [frameData[43]]
|
||||||
|
leftElbowRotEnd = [frameDataNext[43]]
|
||||||
|
self._leftElbowRot = [leftElbowRotStart[0]+frameFraction*(leftElbowRotEnd[0]-leftElbowRotStart[0])]
|
||||||
|
self._leftElbowVel = [(leftElbowRotEnd[0]-leftElbowRotStart[0])/keyFrameDuration]
|
||||||
|
|
||||||
|
pose = self.GetPose()
|
||||||
|
return pose
|
||||||
|
|
||||||
|
def ConvertFromAction(self, pybullet_client, action):
|
||||||
|
#turn action into pose
|
||||||
|
|
||||||
|
self.Reset()#?? needed?
|
||||||
|
index=0
|
||||||
|
angle = action[index]
|
||||||
|
axis = [action[index+1],action[index+2],action[index+3]]
|
||||||
|
index+=4
|
||||||
|
self._chestRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||||
|
#print("pose._chestRot=",pose._chestRot)
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
axis = [action[index+1],action[index+2],action[index+3]]
|
||||||
|
index+=4
|
||||||
|
self._neckRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
axis = [action[index+1],action[index+2],action[index+3]]
|
||||||
|
index+=4
|
||||||
|
self._rightHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
index+=1
|
||||||
|
self._rightKneeRot = [angle]
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
axis = [action[index+1],action[index+2],action[index+3]]
|
||||||
|
index+=4
|
||||||
|
self._rightAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
axis = [action[index+1],action[index+2],action[index+3]]
|
||||||
|
index+=4
|
||||||
|
self._rightShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
index+=1
|
||||||
|
self._rightElbowRot = [angle]
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
axis = [action[index+1],action[index+2],action[index+3]]
|
||||||
|
index+=4
|
||||||
|
self._leftHipRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
index+=1
|
||||||
|
self._leftKneeRot = [angle]
|
||||||
|
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
axis = [action[index+1],action[index+2],action[index+3]]
|
||||||
|
index+=4
|
||||||
|
self._leftAnkleRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
axis = [action[index+1],action[index+2],action[index+3]]
|
||||||
|
index+=4
|
||||||
|
self._leftShoulderRot = pybullet_client.getQuaternionFromAxisAngle(axis,angle)
|
||||||
|
|
||||||
|
angle = action[index]
|
||||||
|
index+=1
|
||||||
|
self._leftElbowRot = [angle]
|
||||||
|
|
||||||
|
pose = self.GetPose()
|
||||||
|
return pose
|
||||||
|
|
||||||
453
examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
vendored
Normal file
453
examples/pybullet/gym/pybullet_envs/deep_mimic/env/humanoid_stable_pd.py
vendored
Normal file
@@ -0,0 +1,453 @@
|
|||||||
|
|
||||||
|
from pybullet_envs.deep_mimic.env import pd_controller_stable
|
||||||
|
from pybullet_envs.deep_mimic.env import humanoid_pose_interpolator
|
||||||
|
import math
|
||||||
|
|
||||||
|
chest=1
|
||||||
|
neck=2
|
||||||
|
rightHip=3
|
||||||
|
rightKnee=4
|
||||||
|
rightAnkle=5
|
||||||
|
rightShoulder=6
|
||||||
|
rightElbow=7
|
||||||
|
leftHip=9
|
||||||
|
leftKnee=10
|
||||||
|
leftAnkle=11
|
||||||
|
leftShoulder=12
|
||||||
|
leftElbow=13
|
||||||
|
jointFrictionForce = 0
|
||||||
|
|
||||||
|
class HumanoidStablePD(object):
|
||||||
|
def __init__(self, pybullet_client, mocap_data, timeStep, useFixedBase=True):
|
||||||
|
self._pybullet_client = pybullet_client
|
||||||
|
self._mocap_data = mocap_data
|
||||||
|
print("LOADING humanoid!")
|
||||||
|
|
||||||
|
self._sim_model = self._pybullet_client.loadURDF(
|
||||||
|
"humanoid/humanoid.urdf", [0,0.85,0],globalScaling=0.25, useFixedBase=useFixedBase, flags=self._pybullet_client.URDF_MAINTAIN_LINK_ORDER)
|
||||||
|
|
||||||
|
self._kin_model = self._pybullet_client.loadURDF(
|
||||||
|
"humanoid/humanoid.urdf", [0,2.85,0],globalScaling=0.25, useFixedBase=True, flags=self._pybullet_client.URDF_MAINTAIN_LINK_ORDER)
|
||||||
|
|
||||||
|
self._pybullet_client.changeDynamics(self._sim_model, -1, lateralFriction=0.9)
|
||||||
|
for j in range (self._pybullet_client.getNumJoints(self._sim_model)):
|
||||||
|
self._pybullet_client.changeDynamics(self._sim_model, j, lateralFriction=0.9)
|
||||||
|
|
||||||
|
self._pybullet_client.changeDynamics(self._sim_model, -1, linearDamping=0, angularDamping=0)
|
||||||
|
self._pybullet_client.changeDynamics(self._kin_model, -1, linearDamping=0, angularDamping=0)
|
||||||
|
self._poseInterpolator = humanoid_pose_interpolator.HumanoidPoseInterpolator()
|
||||||
|
|
||||||
|
for i in range (self._mocap_data.NumFrames()-1):
|
||||||
|
frameData = self._mocap_data._motion_data['Frames'][i]
|
||||||
|
self._poseInterpolator.PostProcessMotionData(frameData)
|
||||||
|
|
||||||
|
self._stablePD = pd_controller_stable.PDControllerStableMultiDof(self._pybullet_client)
|
||||||
|
self._timeStep = timeStep
|
||||||
|
self._kpOrg = [0,0,0,0,0,0,0,1000,1000,1000,1000,100,100,100,100,500,500,500,500,500,400,400,400,400,400,400,400,400,300,500,500,500,500,500,400,400,400,400,400,400,400,400,300]
|
||||||
|
self._kdOrg = [0,0,0,0,0,0,0,100,100,100,100,10,10,10,10,50,50,50,50,50,40,40,40,40,40,40,40,40,30,50,50,50,50,50,40,40,40,40,40,40,40,40,30]
|
||||||
|
|
||||||
|
self._jointIndicesAll = [chest,neck, rightHip,rightKnee,rightAnkle,rightShoulder,rightElbow,leftHip,leftKnee,leftAnkle,leftShoulder,leftElbow]
|
||||||
|
for j in self._jointIndicesAll:
|
||||||
|
#self._pybullet_client.setJointMotorControlMultiDof(self._sim_model, j, self._pybullet_client.POSITION_CONTROL, force=[1,1,1])
|
||||||
|
self._pybullet_client.setJointMotorControl2(self._sim_model, j, self._pybullet_client.POSITION_CONTROL, targetPosition=0, positionGain=0, targetVelocity=0,force=jointFrictionForce)
|
||||||
|
self._pybullet_client.setJointMotorControlMultiDof(self._sim_model, j, self._pybullet_client.POSITION_CONTROL,targetPosition=[0,0,0,1], targetVelocity=[0,0,0], positionGain=0,velocityGain=1,force=[jointFrictionForce,jointFrictionForce,jointFrictionForce])
|
||||||
|
self._pybullet_client.setJointMotorControl2(self._kin_model, j, self._pybullet_client.POSITION_CONTROL, targetPosition=0, positionGain=0, targetVelocity=0,force=0)
|
||||||
|
self._pybullet_client.setJointMotorControlMultiDof(self._kin_model, j, self._pybullet_client.POSITION_CONTROL,targetPosition=[0,0,0,1], targetVelocity=[0,0,0], positionGain=0,velocityGain=1,force=[jointFrictionForce,jointFrictionForce,0])
|
||||||
|
|
||||||
|
self._jointDofCounts=[4,4,4,1,4,4,1,4,1,4,4,1]
|
||||||
|
|
||||||
|
#only those body parts/links are allowed to touch the ground, otherwise the episode terminates
|
||||||
|
self._allowed_body_parts=[5,11]
|
||||||
|
|
||||||
|
#[x,y,z] base position and [x,y,z,w] base orientation!
|
||||||
|
self._totalDofs = 7
|
||||||
|
for dof in self._jointDofCounts:
|
||||||
|
self._totalDofs += dof
|
||||||
|
self.setSimTime(0)
|
||||||
|
self._maxForce = 6000
|
||||||
|
self.resetPose()
|
||||||
|
|
||||||
|
def resetPose(self):
|
||||||
|
print("resetPose with self._frame=", self._frame, " and self._frameFraction=",self._frameFraction)
|
||||||
|
pose = self.computePose(self._frameFraction)
|
||||||
|
self.initializePose(self._poseInterpolator, self._sim_model, initBase=True)
|
||||||
|
self.initializePose(self._poseInterpolator, self._kin_model, initBase=False)
|
||||||
|
|
||||||
|
def initializePose(self, pose, phys_model,initBase, initializeVelocity = True):
|
||||||
|
|
||||||
|
if initializeVelocity:
|
||||||
|
if initBase:
|
||||||
|
self._pybullet_client.resetBasePositionAndOrientation(phys_model, pose._basePos, pose._baseOrn)
|
||||||
|
self._pybullet_client.resetBaseVelocity(phys_model, pose._baseLinVel, pose._baseAngVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,chest,pose._chestRot, pose._chestVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,neck,pose._neckRot, pose._neckVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,rightHip,pose._rightHipRot, pose._rightHipVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,rightKnee,pose._rightKneeRot, pose._rightKneeVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,rightAnkle,pose._rightAnkleRot, pose._rightAnkleVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,rightShoulder,pose._rightShoulderRot, pose._rightShoulderVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,rightElbow, pose._rightElbowRot, pose._rightElbowVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,leftHip, pose._leftHipRot, pose._leftHipVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,leftKnee, pose._leftKneeRot, pose._leftKneeVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,leftAnkle, pose._leftAnkleRot, pose._leftAnkleVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,leftShoulder, pose._leftShoulderRot, pose._leftShoulderVel)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,leftElbow, pose._leftElbowRot, pose._leftElbowVel)
|
||||||
|
else:
|
||||||
|
if initBase:
|
||||||
|
self._pybullet_client.resetBasePositionAndOrientation(phys_model, pose._basePos, pose._baseOrn)
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,chest,pose._chestRot, [0,0,0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,neck,pose._neckRot, [0,0,0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,rightHip,pose._rightHipRot, [0,0,0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,rightKnee,pose._rightKneeRot, [0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,rightAnkle,pose._rightAnkleRot, [0,0,0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,rightShoulder,pose._rightShoulderRot, [0,0,0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,rightElbow, pose._rightElbowRot, [0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,leftHip, pose._leftHipRot, [0,0,0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,leftKnee, pose._leftKneeRot, [0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,leftAnkle, pose._leftAnkleRot, [0,0,0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,leftShoulder, pose._leftShoulderRot, [0,0,0])
|
||||||
|
self._pybullet_client.resetJointStateMultiDof(phys_model,leftElbow, pose._leftElbowRot, [0])
|
||||||
|
|
||||||
|
def calcCycleCount(self, simTime, cycleTime):
|
||||||
|
phases = simTime / cycleTime;
|
||||||
|
count = math.floor(phases)
|
||||||
|
loop = True
|
||||||
|
#count = (loop) ? count : cMathUtil::Clamp(count, 0, 1);
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
def getCycleTime(self):
|
||||||
|
keyFrameDuration = self._mocap_data.KeyFrameDuraction()
|
||||||
|
cycleTime = keyFrameDuration*(self._mocap_data.NumFrames()-1)
|
||||||
|
return cycleTime
|
||||||
|
|
||||||
|
def setSimTime(self, t):
|
||||||
|
self._simTime = t
|
||||||
|
#print("SetTimeTime time =",t)
|
||||||
|
keyFrameDuration = self._mocap_data.KeyFrameDuraction()
|
||||||
|
cycleTime = self.getCycleTime()
|
||||||
|
#print("self._motion_data.NumFrames()=",self._mocap_data.NumFrames())
|
||||||
|
cycles = self.calcCycleCount(t, cycleTime)
|
||||||
|
#print("cycles=",cycles)
|
||||||
|
frameTime = t - cycles*cycleTime
|
||||||
|
if (frameTime<0):
|
||||||
|
frameTime += cycleTime
|
||||||
|
|
||||||
|
#print("keyFrameDuration=",keyFrameDuration)
|
||||||
|
#print("frameTime=",frameTime)
|
||||||
|
self._frame = int(frameTime/keyFrameDuration)
|
||||||
|
#print("self._frame=",self._frame)
|
||||||
|
|
||||||
|
self._frameNext = self._frame+1
|
||||||
|
if (self._frameNext >= self._mocap_data.NumFrames()):
|
||||||
|
self._frameNext = self._frame
|
||||||
|
|
||||||
|
self._frameFraction = (frameTime - self._frame*keyFrameDuration)/(keyFrameDuration)
|
||||||
|
|
||||||
|
def computePose(self, frameFraction):
|
||||||
|
frameData = self._mocap_data._motion_data['Frames'][self._frame]
|
||||||
|
frameDataNext = self._mocap_data._motion_data['Frames'][self._frameNext]
|
||||||
|
pose = self._poseInterpolator.Slerp(frameFraction, frameData, frameDataNext, self._pybullet_client)
|
||||||
|
return pose
|
||||||
|
|
||||||
|
|
||||||
|
def convertActionToPose(self, action):
|
||||||
|
pose = self._poseInterpolator.ConvertFromAction(self._pybullet_client, action)
|
||||||
|
return pose
|
||||||
|
|
||||||
|
def computePDForces(self, desiredPositions, desiredVelocities = None):
|
||||||
|
if desiredVelocities==None:
|
||||||
|
desiredVelocities = [0]*self._totalDofs
|
||||||
|
taus = self._stablePD.computePD(bodyUniqueId=self._sim_model,
|
||||||
|
jointIndices = self._jointIndicesAll,
|
||||||
|
desiredPositions = desiredPositions,
|
||||||
|
desiredVelocities = desiredVelocities,
|
||||||
|
kps = self._kpOrg,
|
||||||
|
kds = self._kdOrg,
|
||||||
|
maxForces = [self._maxForce]*self._totalDofs,
|
||||||
|
timeStep=self._timeStep)
|
||||||
|
return taus
|
||||||
|
|
||||||
|
def applyPDForces(self, taus):
|
||||||
|
dofIndex=7
|
||||||
|
for index in range (len(self._jointIndicesAll)):
|
||||||
|
jointIndex = self._jointIndicesAll[index]
|
||||||
|
if self._jointDofCounts[index]==4:
|
||||||
|
self._pybullet_client.setJointMotorControlMultiDof(self._sim_model,jointIndex,self._pybullet_client.TORQUE_CONTROL,force=[taus[dofIndex+0],taus[dofIndex+1],taus[dofIndex+2]])
|
||||||
|
if self._jointDofCounts[index]==1:
|
||||||
|
self._pybullet_client.setJointMotorControlMultiDof(self._sim_model, jointIndex, controlMode=self._pybullet_client.TORQUE_CONTROL, force=[taus[dofIndex]])
|
||||||
|
dofIndex+=self._jointDofCounts[index]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def getPhase(self):
|
||||||
|
keyFrameDuration = self._mocap_data.KeyFrameDuraction()
|
||||||
|
cycleTime = keyFrameDuration*(self._mocap_data.NumFrames()-1)
|
||||||
|
phase = self._simTime / cycleTime
|
||||||
|
phase = math.fmod(phase,1.0)
|
||||||
|
if (phase<0):
|
||||||
|
phase += 1
|
||||||
|
return phase
|
||||||
|
|
||||||
|
def buildHeadingTrans(self, rootOrn):
|
||||||
|
#align root transform 'forward' with world-space x axis
|
||||||
|
eul = self._pybullet_client.getEulerFromQuaternion(rootOrn)
|
||||||
|
refDir = [1,0,0]
|
||||||
|
rotVec = self._pybullet_client.rotateVector(rootOrn, refDir)
|
||||||
|
heading = math.atan2(-rotVec[2], rotVec[0])
|
||||||
|
heading2=eul[1]
|
||||||
|
#print("heading=",heading)
|
||||||
|
headingOrn = self._pybullet_client.getQuaternionFromAxisAngle([0,1,0],-heading)
|
||||||
|
return headingOrn
|
||||||
|
|
||||||
|
|
||||||
|
def buildOriginTrans(self):
|
||||||
|
rootPos,rootOrn = self._pybullet_client.getBasePositionAndOrientation(self._sim_model)
|
||||||
|
|
||||||
|
#print("rootPos=",rootPos, " rootOrn=",rootOrn)
|
||||||
|
invRootPos=[-rootPos[0], 0, -rootPos[2]]
|
||||||
|
#invOrigTransPos, invOrigTransOrn = self._pybullet_client.invertTransform(rootPos,rootOrn)
|
||||||
|
headingOrn = self.buildHeadingTrans(rootOrn)
|
||||||
|
#print("headingOrn=",headingOrn)
|
||||||
|
headingMat = self._pybullet_client.getMatrixFromQuaternion(headingOrn)
|
||||||
|
#print("headingMat=",headingMat)
|
||||||
|
#dummy, rootOrnWithoutHeading = self._pybullet_client.multiplyTransforms([0,0,0],headingOrn, [0,0,0], rootOrn)
|
||||||
|
#dummy, invOrigTransOrn = self._pybullet_client.multiplyTransforms([0,0,0],rootOrnWithoutHeading, invOrigTransPos, invOrigTransOrn)
|
||||||
|
|
||||||
|
invOrigTransPos, invOrigTransOrn = self._pybullet_client.multiplyTransforms( [0,0,0],headingOrn, invRootPos,[0,0,0,1])
|
||||||
|
#print("invOrigTransPos=",invOrigTransPos)
|
||||||
|
#print("invOrigTransOrn=",invOrigTransOrn)
|
||||||
|
invOrigTransMat = self._pybullet_client.getMatrixFromQuaternion(invOrigTransOrn)
|
||||||
|
#print("invOrigTransMat =",invOrigTransMat )
|
||||||
|
return invOrigTransPos, invOrigTransOrn
|
||||||
|
|
||||||
|
def getState(self):
|
||||||
|
|
||||||
|
stateVector = []
|
||||||
|
phase = self.getPhase()
|
||||||
|
#print("phase=",phase)
|
||||||
|
stateVector.append(phase)
|
||||||
|
|
||||||
|
rootTransPos, rootTransOrn=self.buildOriginTrans()
|
||||||
|
basePos,baseOrn = self._pybullet_client.getBasePositionAndOrientation(self._sim_model)
|
||||||
|
|
||||||
|
rootPosRel, dummy = self._pybullet_client.multiplyTransforms(rootTransPos, rootTransOrn, basePos,[0,0,0,1])
|
||||||
|
#print("!!!rootPosRel =",rootPosRel )
|
||||||
|
#print("rootTransPos=",rootTransPos)
|
||||||
|
#print("basePos=",basePos)
|
||||||
|
localPos,localOrn = self._pybullet_client.multiplyTransforms( rootTransPos, rootTransOrn , basePos,baseOrn )
|
||||||
|
|
||||||
|
localPos=[localPos[0]-rootPosRel[0],localPos[1]-rootPosRel[1],localPos[2]-rootPosRel[2]]
|
||||||
|
#print("localPos=",localPos)
|
||||||
|
|
||||||
|
stateVector.append(rootPosRel[1])
|
||||||
|
|
||||||
|
#self.pb2dmJoints=[0,1,2,9,10,11,3,4,5,12,13,14,6,7,8]
|
||||||
|
self.pb2dmJoints=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
|
||||||
|
|
||||||
|
for pbJoint in range (self._pybullet_client.getNumJoints(self._sim_model)):
|
||||||
|
j = self.pb2dmJoints[pbJoint]
|
||||||
|
#print("joint order:",j)
|
||||||
|
ls = self._pybullet_client.getLinkState(self._sim_model, j, computeForwardKinematics=True)
|
||||||
|
linkPos = ls[0]
|
||||||
|
linkOrn = ls[1]
|
||||||
|
linkPosLocal, linkOrnLocal = self._pybullet_client.multiplyTransforms(rootTransPos, rootTransOrn, linkPos,linkOrn)
|
||||||
|
if (linkOrnLocal[3]<0):
|
||||||
|
linkOrnLocal=[-linkOrnLocal[0],-linkOrnLocal[1],-linkOrnLocal[2],-linkOrnLocal[3]]
|
||||||
|
linkPosLocal=[linkPosLocal[0]-rootPosRel[0],linkPosLocal[1]-rootPosRel[1],linkPosLocal[2]-rootPosRel[2]]
|
||||||
|
for l in linkPosLocal:
|
||||||
|
stateVector.append(l)
|
||||||
|
#re-order the quaternion, DeepMimic uses w,x,y,z
|
||||||
|
stateVector.append(linkOrnLocal[3])
|
||||||
|
stateVector.append(linkOrnLocal[0])
|
||||||
|
stateVector.append(linkOrnLocal[1])
|
||||||
|
stateVector.append(linkOrnLocal[2])
|
||||||
|
|
||||||
|
|
||||||
|
for pbJoint in range (self._pybullet_client.getNumJoints(self._sim_model)):
|
||||||
|
j = self.pb2dmJoints[pbJoint]
|
||||||
|
ls = self._pybullet_client.getLinkState(self._sim_model, j, computeLinkVelocity=True)
|
||||||
|
linkLinVel = ls[6]
|
||||||
|
linkAngVel = ls[7]
|
||||||
|
for l in linkLinVel:
|
||||||
|
stateVector.append(l)
|
||||||
|
for l in linkAngVel:
|
||||||
|
stateVector.append(l)
|
||||||
|
|
||||||
|
#print("stateVector len=",len(stateVector))
|
||||||
|
#for st in range (len(stateVector)):
|
||||||
|
# print("state[",st,"]=",stateVector[st])
|
||||||
|
return stateVector
|
||||||
|
|
||||||
|
def terminates(self):
|
||||||
|
#check if any non-allowed body part hits the ground
|
||||||
|
terminates=False
|
||||||
|
pts = self._pybullet_client.getContactPoints()
|
||||||
|
for p in pts:
|
||||||
|
part = -1
|
||||||
|
if (p[1]==self._sim_model):
|
||||||
|
part=p[3]
|
||||||
|
if (p[2]==self._sim_model):
|
||||||
|
part=p[4]
|
||||||
|
if (part >=0 and part not in self._allowed_body_parts):
|
||||||
|
#print("terminating part:", part)
|
||||||
|
terminates=True
|
||||||
|
|
||||||
|
return terminates
|
||||||
|
|
||||||
|
def getReward(self, pose):
|
||||||
|
#from DeepMimic double cSceneImitate::CalcRewardImitate
|
||||||
|
pose_w = 0.5
|
||||||
|
vel_w = 0.05
|
||||||
|
end_eff_w = 0 #0.15
|
||||||
|
root_w = 0#0.2
|
||||||
|
com_w = 0#0.1
|
||||||
|
|
||||||
|
total_w = pose_w + vel_w + end_eff_w + root_w + com_w
|
||||||
|
pose_w /= total_w
|
||||||
|
vel_w /= total_w
|
||||||
|
end_eff_w /= total_w
|
||||||
|
root_w /= total_w
|
||||||
|
com_w /= total_w
|
||||||
|
|
||||||
|
pose_scale = 2
|
||||||
|
vel_scale = 0.1
|
||||||
|
end_eff_scale = 40
|
||||||
|
root_scale = 5
|
||||||
|
com_scale = 10
|
||||||
|
err_scale = 1
|
||||||
|
|
||||||
|
reward = 0
|
||||||
|
|
||||||
|
pose_err = 0
|
||||||
|
vel_err = 0
|
||||||
|
end_eff_err = 0
|
||||||
|
root_err = 0
|
||||||
|
com_err = 0
|
||||||
|
heading_err = 0
|
||||||
|
|
||||||
|
#create a mimic reward, comparing the dynamics humanoid with a kinematic one
|
||||||
|
|
||||||
|
#pose = self.InitializePoseFromMotionData()
|
||||||
|
#print("self._kin_model=",self._kin_model)
|
||||||
|
#print("kinematicHumanoid #joints=",self._pybullet_client.getNumJoints(self._kin_model))
|
||||||
|
#self.ApplyPose(pose, True, True, self._kin_model, self._pybullet_client)
|
||||||
|
|
||||||
|
#const Eigen::VectorXd& pose0 = sim_char.GetPose();
|
||||||
|
#const Eigen::VectorXd& vel0 = sim_char.GetVel();
|
||||||
|
#const Eigen::VectorXd& pose1 = kin_char.GetPose();
|
||||||
|
#const Eigen::VectorXd& vel1 = kin_char.GetVel();
|
||||||
|
#tMatrix origin_trans = sim_char.BuildOriginTrans();
|
||||||
|
#tMatrix kin_origin_trans = kin_char.BuildOriginTrans();
|
||||||
|
#
|
||||||
|
#tVector com0_world = sim_char.CalcCOM();
|
||||||
|
#tVector com_vel0_world = sim_char.CalcCOMVel();
|
||||||
|
#tVector com1_world;
|
||||||
|
#tVector com_vel1_world;
|
||||||
|
#cRBDUtil::CalcCoM(joint_mat, body_defs, pose1, vel1, com1_world, com_vel1_world);
|
||||||
|
#
|
||||||
|
root_id = 0
|
||||||
|
#tVector root_pos0 = cKinTree::GetRootPos(joint_mat, pose0);
|
||||||
|
#tVector root_pos1 = cKinTree::GetRootPos(joint_mat, pose1);
|
||||||
|
#tQuaternion root_rot0 = cKinTree::GetRootRot(joint_mat, pose0);
|
||||||
|
#tQuaternion root_rot1 = cKinTree::GetRootRot(joint_mat, pose1);
|
||||||
|
#tVector root_vel0 = cKinTree::GetRootVel(joint_mat, vel0);
|
||||||
|
#tVector root_vel1 = cKinTree::GetRootVel(joint_mat, vel1);
|
||||||
|
#tVector root_ang_vel0 = cKinTree::GetRootAngVel(joint_mat, vel0);
|
||||||
|
#tVector root_ang_vel1 = cKinTree::GetRootAngVel(joint_mat, vel1);
|
||||||
|
|
||||||
|
mJointWeights = [0.20833,0.10416, 0.0625, 0.10416,
|
||||||
|
0.0625, 0.041666666666666671, 0.0625, 0.0416,
|
||||||
|
0.00, 0.10416, 0.0625, 0.0416, 0.0625, 0.0416, 0.0000]
|
||||||
|
|
||||||
|
num_end_effs = 0
|
||||||
|
num_joints = 15
|
||||||
|
|
||||||
|
root_rot_w = mJointWeights[root_id]
|
||||||
|
#pose_err += root_rot_w * cKinTree::CalcRootRotErr(joint_mat, pose0, pose1)
|
||||||
|
#vel_err += root_rot_w * cKinTree::CalcRootAngVelErr(joint_mat, vel0, vel1)
|
||||||
|
|
||||||
|
for j in range (num_joints):
|
||||||
|
curr_pose_err = 0
|
||||||
|
curr_vel_err = 0
|
||||||
|
w = mJointWeights[j];
|
||||||
|
|
||||||
|
simJointInfo = self._pybullet_client.getJointStateMultiDof(self._sim_model, j)
|
||||||
|
|
||||||
|
#print("simJointInfo.pos=",simJointInfo[0])
|
||||||
|
#print("simJointInfo.vel=",simJointInfo[1])
|
||||||
|
kinJointInfo = self._pybullet_client.getJointStateMultiDof(self._kin_model,j)
|
||||||
|
#print("kinJointInfo.pos=",kinJointInfo[0])
|
||||||
|
#print("kinJointInfo.vel=",kinJointInfo[1])
|
||||||
|
if (len(simJointInfo[0])==1):
|
||||||
|
angle = simJointInfo[0][0]-kinJointInfo[0][0]
|
||||||
|
curr_pose_err = angle*angle
|
||||||
|
velDiff = simJointInfo[1][0]-kinJointInfo[1][0]
|
||||||
|
curr_vel_err = velDiff*velDiff
|
||||||
|
if (len(simJointInfo[0])==4):
|
||||||
|
#print("quaternion diff")
|
||||||
|
diffQuat = self._pybullet_client.getDifferenceQuaternion(simJointInfo[0],kinJointInfo[0])
|
||||||
|
axis,angle = self._pybullet_client.getAxisAngleFromQuaternion(diffQuat)
|
||||||
|
curr_pose_err = angle*angle
|
||||||
|
diffVel = [simJointInfo[1][0]-kinJointInfo[1][0],simJointInfo[1][1]-kinJointInfo[1][1],simJointInfo[1][2]-kinJointInfo[1][2]]
|
||||||
|
curr_vel_err = diffVel[0]*diffVel[0]+diffVel[1]*diffVel[1]+diffVel[2]*diffVel[2]
|
||||||
|
|
||||||
|
|
||||||
|
pose_err += w * curr_pose_err
|
||||||
|
vel_err += w * curr_vel_err
|
||||||
|
|
||||||
|
# bool is_end_eff = sim_char.IsEndEffector(j)
|
||||||
|
# if (is_end_eff)
|
||||||
|
# {
|
||||||
|
# tVector pos0 = sim_char.CalcJointPos(j)
|
||||||
|
# tVector pos1 = cKinTree::CalcJointWorldPos(joint_mat, pose1, j)
|
||||||
|
# double ground_h0 = mGround->SampleHeight(pos0)
|
||||||
|
# double ground_h1 = kin_char.GetOriginPos()[1]
|
||||||
|
#
|
||||||
|
# tVector pos_rel0 = pos0 - root_pos0
|
||||||
|
# tVector pos_rel1 = pos1 - root_pos1
|
||||||
|
# pos_rel0[1] = pos0[1] - ground_h0
|
||||||
|
# pos_rel1[1] = pos1[1] - ground_h1
|
||||||
|
#
|
||||||
|
# pos_rel0 = origin_trans * pos_rel0
|
||||||
|
# pos_rel1 = kin_origin_trans * pos_rel1
|
||||||
|
#
|
||||||
|
# curr_end_err = (pos_rel1 - pos_rel0).squaredNorm()
|
||||||
|
# end_eff_err += curr_end_err;
|
||||||
|
# ++num_end_effs;
|
||||||
|
# }
|
||||||
|
#}
|
||||||
|
#if (num_end_effs > 0):
|
||||||
|
# end_eff_err /= num_end_effs
|
||||||
|
#
|
||||||
|
#double root_ground_h0 = mGround->SampleHeight(sim_char.GetRootPos())
|
||||||
|
#double root_ground_h1 = kin_char.GetOriginPos()[1]
|
||||||
|
#root_pos0[1] -= root_ground_h0
|
||||||
|
#root_pos1[1] -= root_ground_h1
|
||||||
|
#root_pos_err = (root_pos0 - root_pos1).squaredNorm()
|
||||||
|
#
|
||||||
|
#root_rot_err = cMathUtil::QuatDiffTheta(root_rot0, root_rot1)
|
||||||
|
#root_rot_err *= root_rot_err
|
||||||
|
|
||||||
|
#root_vel_err = (root_vel1 - root_vel0).squaredNorm()
|
||||||
|
#root_ang_vel_err = (root_ang_vel1 - root_ang_vel0).squaredNorm()
|
||||||
|
|
||||||
|
#root_err = root_pos_err
|
||||||
|
# + 0.1 * root_rot_err
|
||||||
|
# + 0.01 * root_vel_err
|
||||||
|
# + 0.001 * root_ang_vel_err
|
||||||
|
#com_err = 0.1 * (com_vel1_world - com_vel0_world).squaredNorm()
|
||||||
|
|
||||||
|
#print("pose_err=",pose_err)
|
||||||
|
#print("vel_err=",vel_err)
|
||||||
|
pose_reward = math.exp(-err_scale * pose_scale * pose_err)
|
||||||
|
vel_reward = math.exp(-err_scale * vel_scale * vel_err)
|
||||||
|
end_eff_reward = math.exp(-err_scale * end_eff_scale * end_eff_err)
|
||||||
|
root_reward = math.exp(-err_scale * root_scale * root_err)
|
||||||
|
com_reward = math.exp(-err_scale * com_scale * com_err)
|
||||||
|
|
||||||
|
reward = pose_w * pose_reward + vel_w * vel_reward + end_eff_w * end_eff_reward + root_w * root_reward + com_w * com_reward
|
||||||
|
|
||||||
|
#print("reward = %f (pose_reward=%f, vel_reward=%f, end_eff_reward=%f, root_reward=%f, com_reward=%f)\n", reward,
|
||||||
|
# pose_reward,vel_reward,end_eff_reward, root_reward, com_reward);
|
||||||
|
|
||||||
|
return reward
|
||||||
19
examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data.py
vendored
Normal file
19
examples/pybullet/gym/pybullet_envs/deep_mimic/env/motion_capture_data.py
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
class MotionCaptureData(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.Reset()
|
||||||
|
|
||||||
|
def Reset(self):
|
||||||
|
self._motion_data = []
|
||||||
|
|
||||||
|
def Load(self, path):
|
||||||
|
with open(path, 'r') as f:
|
||||||
|
self._motion_data = json.load(f)
|
||||||
|
|
||||||
|
def NumFrames(self):
|
||||||
|
return len(self._motion_data['Frames'])
|
||||||
|
|
||||||
|
def KeyFrameDuraction(self):
|
||||||
|
return self._motion_data['Frames'][0][0]
|
||||||
|
|
||||||
144
examples/pybullet/gym/pybullet_envs/deep_mimic/env/pd_controller_stable.py
vendored
Normal file
144
examples/pybullet/gym/pybullet_envs/deep_mimic/env/pd_controller_stable.py
vendored
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PDControllerStableMultiDof(object):
|
||||||
|
def __init__(self, pb):
|
||||||
|
self._pb = pb
|
||||||
|
|
||||||
|
def computePD(self, bodyUniqueId, jointIndices, desiredPositions, desiredVelocities, kps, kds, maxForces, timeStep):
|
||||||
|
|
||||||
|
numJoints = len(jointIndices)#self._pb.getNumJoints(bodyUniqueId)
|
||||||
|
curPos,curOrn = self._pb.getBasePositionAndOrientation(bodyUniqueId)
|
||||||
|
#q1 = [desiredPositions[0],desiredPositions[1],desiredPositions[2],desiredPositions[3],desiredPositions[4],desiredPositions[5],desiredPositions[6]]
|
||||||
|
q1 = [curPos[0],curPos[1],curPos[2],curOrn[0],curOrn[1],curOrn[2],curOrn[3]]
|
||||||
|
|
||||||
|
#qdot1 = [0,0,0, 0,0,0,0]
|
||||||
|
baseLinVel, baseAngVel = self._pb.getBaseVelocity(bodyUniqueId)
|
||||||
|
|
||||||
|
qdot1 = [baseLinVel[0],baseLinVel[1],baseLinVel[2],baseAngVel[0],baseAngVel[1],baseAngVel[2],0]
|
||||||
|
qError = [0,0,0, 0,0,0,0]
|
||||||
|
|
||||||
|
qIndex=7
|
||||||
|
qdotIndex=7
|
||||||
|
zeroAccelerations=[0,0,0, 0,0,0,0]
|
||||||
|
for i in range (numJoints):
|
||||||
|
js = self._pb.getJointStateMultiDof(bodyUniqueId, jointIndices[i])
|
||||||
|
|
||||||
|
jointPos=js[0]
|
||||||
|
jointVel = js[1]
|
||||||
|
q1+=jointPos
|
||||||
|
|
||||||
|
if len(js[0])==1:
|
||||||
|
desiredPos=desiredPositions[qIndex]
|
||||||
|
|
||||||
|
qdiff=desiredPos - jointPos[0]
|
||||||
|
qError.append(qdiff)
|
||||||
|
zeroAccelerations.append(0.)
|
||||||
|
qdot1+=jointVel
|
||||||
|
qIndex+=1
|
||||||
|
qdotIndex+=1
|
||||||
|
if len(js[0])==4:
|
||||||
|
desiredPos=[desiredPositions[qIndex],desiredPositions[qIndex+1],desiredPositions[qIndex+2],desiredPositions[qIndex+3]]
|
||||||
|
axis = self._pb.getAxisDifferenceQuaternion(desiredPos,jointPos)
|
||||||
|
jointVelNew = [jointVel[0],jointVel[1],jointVel[2],0]
|
||||||
|
qdot1+=jointVelNew
|
||||||
|
qError.append(axis[0])
|
||||||
|
qError.append(axis[1])
|
||||||
|
qError.append(axis[2])
|
||||||
|
qError.append(0)
|
||||||
|
desiredVel=[desiredVelocities[qdotIndex],desiredVelocities[qdotIndex+1],desiredVelocities[qdotIndex+2]]
|
||||||
|
zeroAccelerations+=[0.,0.,0.,0.]
|
||||||
|
qIndex+=4
|
||||||
|
qdotIndex+=4
|
||||||
|
|
||||||
|
q = np.array(q1)
|
||||||
|
qdot=np.array(qdot1)
|
||||||
|
|
||||||
|
qdotdesired = np.array(desiredVelocities)
|
||||||
|
qdoterr = qdotdesired-qdot
|
||||||
|
|
||||||
|
|
||||||
|
Kp = np.diagflat(kps)
|
||||||
|
Kd = np.diagflat(kds)
|
||||||
|
|
||||||
|
p = Kp.dot(qError)
|
||||||
|
|
||||||
|
#np.savetxt("pb_qError.csv", qError, delimiter=",")
|
||||||
|
#np.savetxt("pb_p.csv", p, delimiter=",")
|
||||||
|
|
||||||
|
d = Kd.dot(qdoterr)
|
||||||
|
|
||||||
|
#np.savetxt("pb_d.csv", d, delimiter=",")
|
||||||
|
#np.savetxt("pbqdoterr.csv", qdoterr, delimiter=",")
|
||||||
|
|
||||||
|
|
||||||
|
M1 = self._pb.calculateMassMatrix(bodyUniqueId,q1, flags=1)
|
||||||
|
|
||||||
|
|
||||||
|
M2 = np.array(M1)
|
||||||
|
#np.savetxt("M2.csv", M2, delimiter=",")
|
||||||
|
|
||||||
|
M = (M2 + Kd * timeStep)
|
||||||
|
|
||||||
|
#np.savetxt("pbM_tKd.csv",M, delimiter=",")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
c1 = self._pb.calculateInverseDynamics(bodyUniqueId, q1, qdot1, zeroAccelerations, flags=1)
|
||||||
|
|
||||||
|
|
||||||
|
c = np.array(c1)
|
||||||
|
#np.savetxt("pbC.csv",c, delimiter=",")
|
||||||
|
A = M
|
||||||
|
#p = [0]*43
|
||||||
|
b = p + d -c
|
||||||
|
#np.savetxt("pb_acc.csv",b, delimiter=",")
|
||||||
|
qddot = np.linalg.solve(A, b)
|
||||||
|
tau = p + d - Kd.dot(qddot) * timeStep
|
||||||
|
#print("len(tau)=",len(tau))
|
||||||
|
maxF = np.array(maxForces)
|
||||||
|
forces = np.clip(tau, -maxF , maxF )
|
||||||
|
return forces
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PDControllerStable(object):
|
||||||
|
def __init__(self, pb):
|
||||||
|
self._pb = pb
|
||||||
|
|
||||||
|
def computePD(self, bodyUniqueId, jointIndices, desiredPositions, desiredVelocities, kps, kds, maxForces, timeStep):
|
||||||
|
numJoints = self._pb.getNumJoints(bodyUniqueId)
|
||||||
|
jointStates = self._pb.getJointStates(bodyUniqueId, jointIndices)
|
||||||
|
q1 = []
|
||||||
|
qdot1 = []
|
||||||
|
zeroAccelerations = []
|
||||||
|
for i in range (numJoints):
|
||||||
|
q1.append(jointStates[i][0])
|
||||||
|
qdot1.append(jointStates[i][1])
|
||||||
|
zeroAccelerations.append(0)
|
||||||
|
q = np.array(q1)
|
||||||
|
qdot=np.array(qdot1)
|
||||||
|
qdes = np.array(desiredPositions)
|
||||||
|
qdotdes = np.array(desiredVelocities)
|
||||||
|
qError = qdes - q
|
||||||
|
qdotError = qdotdes - qdot
|
||||||
|
Kp = np.diagflat(kps)
|
||||||
|
Kd = np.diagflat(kds)
|
||||||
|
p = Kp.dot(qError)
|
||||||
|
d = Kd.dot(qdotError)
|
||||||
|
forces = p + d
|
||||||
|
|
||||||
|
M1 = self._pb.calculateMassMatrix(bodyUniqueId,q1)
|
||||||
|
M2 = np.array(M1)
|
||||||
|
M = (M2 + Kd * timeStep)
|
||||||
|
c1 = self._pb.calculateInverseDynamics(bodyUniqueId, q1, qdot1, zeroAccelerations)
|
||||||
|
c = np.array(c1)
|
||||||
|
A = M
|
||||||
|
b = -c + p + d
|
||||||
|
qddot = np.linalg.solve(A, b)
|
||||||
|
tau = p + d - Kd.dot(qddot) * timeStep
|
||||||
|
maxF = np.array(maxForces)
|
||||||
|
forces = np.clip(tau, -maxF , maxF )
|
||||||
|
#print("c=",c)
|
||||||
|
return tau
|
||||||
257
examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
vendored
Normal file
257
examples/pybullet/gym/pybullet_envs/deep_mimic/env/pybullet_deep_mimic_env.py
vendored
Normal file
@@ -0,0 +1,257 @@
|
|||||||
|
import numpy as np
|
||||||
|
import math
|
||||||
|
from pybullet_envs.deep_mimic.env.env import Env
|
||||||
|
from pybullet_envs.deep_mimic.env.action_space import ActionSpace
|
||||||
|
from pybullet_envs.minitaur.envs import bullet_client
|
||||||
|
import time
|
||||||
|
import motion_capture_data
|
||||||
|
from pybullet_envs.deep_mimic.env import humanoid_stable_pd
|
||||||
|
import pybullet_data
|
||||||
|
import pybullet as p1
|
||||||
|
import random
|
||||||
|
|
||||||
|
|
||||||
|
class PyBulletDeepMimicEnv(Env):
|
||||||
|
def __init__(self, args=None, enable_draw=False, pybullet_client=None):
|
||||||
|
super().__init__(args, enable_draw)
|
||||||
|
self._num_agents = 1
|
||||||
|
self._pybullet_client = pybullet_client
|
||||||
|
self._isInitialized = False
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.t = 0
|
||||||
|
if not self._isInitialized:
|
||||||
|
if self.enable_draw:
|
||||||
|
self._pybullet_client = bullet_client.BulletClient(connection_mode=p1.GUI)
|
||||||
|
else:
|
||||||
|
self._pybullet_client = bullet_client.BulletClient()
|
||||||
|
|
||||||
|
self._pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
|
||||||
|
z2y = self._pybullet_client.getQuaternionFromEuler([-math.pi*0.5,0,0])
|
||||||
|
self._planeId = self._pybullet_client.loadURDF("plane.urdf",[0,0,0],z2y)
|
||||||
|
#print("planeId=",self._planeId)
|
||||||
|
self._pybullet_client.configureDebugVisualizer(self._pybullet_client.COV_ENABLE_Y_AXIS_UP,1)
|
||||||
|
self._pybullet_client.setGravity(0,-9.8,0)
|
||||||
|
|
||||||
|
self._pybullet_client.setPhysicsEngineParameter(numSolverIterations=10)
|
||||||
|
self._pybullet_client.changeDynamics(self._planeId, linkIndex=-1, lateralFriction=0.9)
|
||||||
|
|
||||||
|
self._mocapData = motion_capture_data.MotionCaptureData()
|
||||||
|
motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_walk.txt"
|
||||||
|
#motionPath = pybullet_data.getDataPath()+"/motions/humanoid3d_backflip.txt"
|
||||||
|
self._mocapData.Load(motionPath)
|
||||||
|
timeStep = 1./600
|
||||||
|
useFixedBase=False
|
||||||
|
self._humanoid = humanoid_stable_pd.HumanoidStablePD(self._pybullet_client, self._mocapData, timeStep, useFixedBase)
|
||||||
|
self._isInitialized = True
|
||||||
|
|
||||||
|
self._pybullet_client.setTimeStep(timeStep)
|
||||||
|
self._pybullet_client.setPhysicsEngineParameter(numSubSteps=2)
|
||||||
|
|
||||||
|
|
||||||
|
selfCheck = False
|
||||||
|
if (selfCheck):
|
||||||
|
curTime = 0
|
||||||
|
while self._pybullet_client.isConnected():
|
||||||
|
self._humanoid.setSimTime(curTime)
|
||||||
|
state = self._humanoid.getState()
|
||||||
|
#print("state=",state)
|
||||||
|
pose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||||
|
for i in range (10):
|
||||||
|
curTime+=timeStep
|
||||||
|
#taus = self._humanoid.computePDForces(pose)
|
||||||
|
#self._humanoid.applyPDForces(taus)
|
||||||
|
#self._pybullet_client.stepSimulation()
|
||||||
|
time.sleep(timeStep)
|
||||||
|
#print("numframes = ", self._humanoid._mocap_data.NumFrames())
|
||||||
|
startTime = random.randint(0,self._humanoid._mocap_data.NumFrames()-2)
|
||||||
|
rnrange = 1000
|
||||||
|
rn = random.randint(0,rnrange)
|
||||||
|
startTime = float(rn)/rnrange * self._humanoid.getCycleTime()
|
||||||
|
|
||||||
|
self._humanoid.setSimTime(startTime)
|
||||||
|
self._humanoid.resetPose()
|
||||||
|
#this clears the contact points. Todo: add API to explicitly clear all contact points?
|
||||||
|
self._pybullet_client.stepSimulation()
|
||||||
|
|
||||||
|
|
||||||
|
def get_num_agents(self):
|
||||||
|
return self._num_agents
|
||||||
|
|
||||||
|
def get_action_space(self, agent_id):
|
||||||
|
return ActionSpace(ActionSpace.Continuous)
|
||||||
|
|
||||||
|
def get_reward_min(self, agent_id):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_reward_max(self, agent_id):
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def get_reward_fail(self, agent_id):
|
||||||
|
return self.get_reward_min(agent_id)
|
||||||
|
|
||||||
|
def get_reward_succ(self, agent_id):
|
||||||
|
return self.get_reward_max(agent_id)
|
||||||
|
|
||||||
|
#scene_name == "imitate" -> cDrawSceneImitate
|
||||||
|
def get_state_size(self, agent_id):
|
||||||
|
#cCtController::GetStateSize()
|
||||||
|
#int state_size = cDeepMimicCharController::GetStateSize();
|
||||||
|
# state_size += GetStatePoseSize();#106
|
||||||
|
# state_size += GetStateVelSize(); #(3+3)*numBodyParts=90
|
||||||
|
#state_size += GetStatePhaseSize();#1
|
||||||
|
#197
|
||||||
|
return 197
|
||||||
|
|
||||||
|
def build_state_norm_groups(self, agent_id):
|
||||||
|
#if (mEnablePhaseInput)
|
||||||
|
#{
|
||||||
|
#int phase_group = gNormGroupNone;
|
||||||
|
#int phase_offset = GetStatePhaseOffset();
|
||||||
|
#int phase_size = GetStatePhaseSize();
|
||||||
|
#out_groups.segment(phase_offset, phase_size) = phase_group * Eigen::VectorXi::Ones(phase_size);
|
||||||
|
groups = [0]*self.get_state_size(agent_id)
|
||||||
|
groups[0] = -1
|
||||||
|
return groups
|
||||||
|
|
||||||
|
def build_state_offset(self, agent_id):
|
||||||
|
out_offset = [0]*self.get_state_size(agent_id)
|
||||||
|
phase_offset = -0.5
|
||||||
|
out_offset[0] = phase_offset
|
||||||
|
return np.array(out_offset)
|
||||||
|
|
||||||
|
def build_state_scale(self, agent_id):
|
||||||
|
out_scale = [1]*self.get_state_size(agent_id)
|
||||||
|
phase_scale = 2
|
||||||
|
out_scale[0] = phase_scale
|
||||||
|
return np.array(out_scale)
|
||||||
|
|
||||||
|
def get_goal_size(self, agent_id):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_action_size(self, agent_id):
|
||||||
|
ctrl_size = 43 #numDof
|
||||||
|
root_size = 7
|
||||||
|
return ctrl_size - root_size
|
||||||
|
|
||||||
|
def build_goal_norm_groups(self, agent_id):
|
||||||
|
return np.array([])
|
||||||
|
|
||||||
|
def build_goal_offset(self, agent_id):
|
||||||
|
return np.array([])
|
||||||
|
|
||||||
|
def build_goal_scale(self, agent_id):
|
||||||
|
return np.array([])
|
||||||
|
|
||||||
|
def build_action_offset(self, agent_id):
|
||||||
|
out_offset = [0] * self.get_action_size(agent_id)
|
||||||
|
out_offset = [0.0000000000,0.0000000000,0.0000000000,-0.200000000,0.0000000000,0.0000000000,0.0000000000,
|
||||||
|
-0.200000000,0.0000000000,0.0000000000, 0.00000000, -0.2000000, 1.57000000, 0.00000000, 0.00000000,
|
||||||
|
0.00000000, -0.2000000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, -1.5700000, 0.00000000, 0.00000000,
|
||||||
|
0.00000000, -0.2000000, 1.57000000, 0.00000000, 0.00000000, 0.00000000, -0.2000000, 0.00000000, 0.00000000,
|
||||||
|
0.00000000, -0.2000000, -1.5700000]
|
||||||
|
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
|
||||||
|
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
|
||||||
|
return np.array(out_offset)
|
||||||
|
|
||||||
|
def build_action_scale(self, agent_id):
|
||||||
|
out_scale = [1] * self.get_action_size(agent_id)
|
||||||
|
#see cCtCtrlUtil::BuildOffsetScalePDPrismatic and
|
||||||
|
#see cCtCtrlUtil::BuildOffsetScalePDSpherical
|
||||||
|
out_scale=[ 0.20833333333333,1.00000000000000,1.00000000000000,1.00000000000000,0.25000000000000,
|
||||||
|
1.00000000000000,1.00000000000000,1.00000000000000,0.12077294685990,1.00000000000000,
|
||||||
|
1.000000000000, 1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000,
|
||||||
|
1.000000000000, 1.000000000000, 0.079617834394, 1.000000000000, 1.000000000000,
|
||||||
|
1.000000000000, 0.159235668789, 0.120772946859, 1.000000000000, 1.000000000000,
|
||||||
|
1.000000000000, 0.159235668789, 0.159235668789, 1.000000000000, 1.000000000000,
|
||||||
|
1.000000000000, 0.107758620689, 1.000000000000, 1.000000000000, 1.000000000000,
|
||||||
|
0.159235668789]
|
||||||
|
return np.array(out_scale)
|
||||||
|
|
||||||
|
def build_action_bound_min(self, agent_id):
|
||||||
|
#see cCtCtrlUtil::BuildBoundsPDSpherical
|
||||||
|
out_scale = [-1] * self.get_action_size(agent_id)
|
||||||
|
out_scale = [-4.79999999999,-1.00000000000,-1.00000000000,-1.00000000000,-4.00000000000,
|
||||||
|
-1.00000000000,-1.00000000000,-1.00000000000,-7.77999999999,-1.00000000000, -1.000000000,
|
||||||
|
-1.000000000, -7.850000000, -6.280000000, -1.000000000, -1.000000000, -1.000000000,
|
||||||
|
-12.56000000, -1.000000000, -1.000000000, -1.000000000, -4.710000000,
|
||||||
|
-7.779999999, -1.000000000, -1.000000000, -1.000000000, -7.850000000,
|
||||||
|
-6.280000000, -1.000000000, -1.000000000, -1.000000000, -8.460000000,
|
||||||
|
-1.000000000, -1.000000000, -1.000000000, -4.710000000]
|
||||||
|
|
||||||
|
return out_scale
|
||||||
|
|
||||||
|
def build_action_bound_max(self, agent_id):
|
||||||
|
out_scale = [1] * self.get_action_size(agent_id)
|
||||||
|
out_scale=[
|
||||||
|
4.799999999,1.000000000,1.000000000,1.000000000,4.000000000,1.000000000,
|
||||||
|
1.000000000,1.000000000,8.779999999,1.000000000, 1.0000000, 1.0000000,
|
||||||
|
4.7100000, 6.2800000, 1.0000000, 1.0000000, 1.0000000,
|
||||||
|
12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
|
||||||
|
8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000,
|
||||||
|
6.2800000, 1.0000000, 1.0000000, 1.0000000, 10.100000,
|
||||||
|
1.0000000, 1.0000000, 1.0000000, 7.8500000]
|
||||||
|
return out_scale
|
||||||
|
|
||||||
|
def set_mode(self, mode):
|
||||||
|
self._mode = mode
|
||||||
|
|
||||||
|
def need_new_action(self, agent_id):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def record_state(self, agent_id):
|
||||||
|
state = self._humanoid.getState()
|
||||||
|
state[1]=state[1]+0.008
|
||||||
|
#print("record_state=",state)
|
||||||
|
return np.array(state)
|
||||||
|
|
||||||
|
|
||||||
|
def record_goal(self, agent_id):
|
||||||
|
return np.array([])
|
||||||
|
|
||||||
|
def calc_reward(self, agent_id):
|
||||||
|
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||||
|
reward = self._humanoid.getReward(kinPose)
|
||||||
|
return reward
|
||||||
|
|
||||||
|
def set_action(self, agent_id, action):
|
||||||
|
self.desiredPose = self._humanoid.convertActionToPose(action)
|
||||||
|
#print("set_action: desiredPose=", self.desiredPose)
|
||||||
|
|
||||||
|
def log_val(self, agent_id, val):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def update(self, timeStep):
|
||||||
|
for i in range(10):
|
||||||
|
self.t += timeStep
|
||||||
|
self._humanoid.setSimTime(self.t)
|
||||||
|
|
||||||
|
if self.desiredPose:
|
||||||
|
kinPose = self._humanoid.computePose(self._humanoid._frameFraction)
|
||||||
|
self._humanoid.initializePose(self._humanoid._poseInterpolator, self._humanoid._kin_model, initBase=False)
|
||||||
|
pos,orn=self._pybullet_client.getBasePositionAndOrientation(self._humanoid._sim_model)
|
||||||
|
self._pybullet_client.resetBasePositionAndOrientation(self._humanoid._kin_model, [pos[0],pos[1]+2,pos[2]],orn)
|
||||||
|
#print("desiredPositions=",self.desiredPose)
|
||||||
|
taus = self._humanoid.computePDForces(self.desiredPose)
|
||||||
|
self._humanoid.applyPDForces(taus)
|
||||||
|
self._pybullet_client.stepSimulation()
|
||||||
|
|
||||||
|
|
||||||
|
def set_sample_count(self, count):
|
||||||
|
return
|
||||||
|
|
||||||
|
def check_terminate(self, agent_id):
|
||||||
|
return Env.Terminate(self.is_episode_end())
|
||||||
|
|
||||||
|
def is_episode_end(self):
|
||||||
|
isEnded = self._humanoid.terminates()
|
||||||
|
#also check maximum time, 20 seconds (todo get from file)
|
||||||
|
#print("self.t=",self.t)
|
||||||
|
if (self.t>20):
|
||||||
|
isEnded = True
|
||||||
|
return isEnded
|
||||||
|
|
||||||
|
def check_valid_episode(self):
|
||||||
|
#could check if limbs exceed velocity threshold
|
||||||
|
return true
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
from learning.ppo_agent import PPOAgent
|
||||||
|
import pybullet_data
|
||||||
|
|
||||||
|
AGENT_TYPE_KEY = "AgentType"
|
||||||
|
|
||||||
|
def build_agent(world, id, file):
|
||||||
|
agent = None
|
||||||
|
with open(pybullet_data.getDataPath()+"/"+file) as data_file:
|
||||||
|
json_data = json.load(data_file)
|
||||||
|
|
||||||
|
assert AGENT_TYPE_KEY in json_data
|
||||||
|
agent_type = json_data[AGENT_TYPE_KEY]
|
||||||
|
|
||||||
|
if (agent_type == PPOAgent.NAME):
|
||||||
|
agent = PPOAgent(world, id, json_data)
|
||||||
|
else:
|
||||||
|
assert False, 'Unsupported agent type: ' + agent_type
|
||||||
|
|
||||||
|
return agent
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
import pybullet_utils.math_util as MathUtil
|
||||||
|
|
||||||
|
class ExpParams(object):
|
||||||
|
RATE_KEY = 'Rate'
|
||||||
|
INIT_ACTION_RATE_KEY = 'InitActionRate'
|
||||||
|
NOISE_KEY = 'Noise'
|
||||||
|
NOISE_INTERNAL_KEY = 'NoiseInternal'
|
||||||
|
TEMP_KEY = 'Temp'
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.rate = 0.2
|
||||||
|
self.init_action_rate = 0
|
||||||
|
self.noise = 0.1
|
||||||
|
self.noise_internal = 0
|
||||||
|
self.temp = 0.1
|
||||||
|
return
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
str = ''
|
||||||
|
str += '{}: {:.2f}\n'.format(self.RATE_KEY, self.rate)
|
||||||
|
str += '{}: {:.2f}\n'.format(self.INIT_ACTION_RATE_KEY, self.init_action_rate)
|
||||||
|
str += '{}: {:.2f}\n'.format(self.NOISE_KEY, self.noise)
|
||||||
|
str += '{}: {:.2f}\n'.format(self.NOISE_INTERNAL_KEY, self.noise_internal)
|
||||||
|
str += '{}: {:.2f}\n'.format(self.TEMP_KEY, self.temp)
|
||||||
|
return str
|
||||||
|
|
||||||
|
def load(self, json_data):
|
||||||
|
if (self.RATE_KEY in json_data):
|
||||||
|
self.rate = json_data[self.RATE_KEY]
|
||||||
|
|
||||||
|
if (self.INIT_ACTION_RATE_KEY in json_data):
|
||||||
|
self.init_action_rate = json_data[self.INIT_ACTION_RATE_KEY]
|
||||||
|
|
||||||
|
if (self.NOISE_KEY in json_data):
|
||||||
|
self.noise = json_data[self.NOISE_KEY]
|
||||||
|
|
||||||
|
if (self.NOISE_INTERNAL_KEY in json_data):
|
||||||
|
self.noise_internal = json_data[self.NOISE_INTERNAL_KEY]
|
||||||
|
|
||||||
|
if (self.TEMP_KEY in json_data):
|
||||||
|
self.temp = json_data[self.TEMP_KEY]
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def lerp(self, other, t):
|
||||||
|
lerp_params = ExpParams()
|
||||||
|
lerp_params.rate = MathUtil.lerp(self.rate, other.rate, t)
|
||||||
|
lerp_params.init_action_rate = MathUtil.lerp(self.init_action_rate, other.init_action_rate, t)
|
||||||
|
lerp_params.noise = MathUtil.lerp(self.noise, other.noise, t)
|
||||||
|
lerp_params.noise_internal = MathUtil.lerp(self.noise_internal, other.noise_internal, t)
|
||||||
|
lerp_params.temp = MathUtil.log_lerp(self.temp, other.temp, t)
|
||||||
|
return lerp_params
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
from . import *
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
import learning.tf_util as TFUtil
|
||||||
|
|
||||||
|
NAME = "fc_2layers_1024units"
|
||||||
|
|
||||||
|
def build_net(input_tfs, reuse=False):
|
||||||
|
layers = [1024, 512]
|
||||||
|
activation = tf.nn.relu
|
||||||
|
|
||||||
|
input_tf = tf.concat(axis=-1, values=input_tfs)
|
||||||
|
h = TFUtil.fc_net(input_tf, layers, activation=activation, reuse=reuse)
|
||||||
|
h = activation(h)
|
||||||
|
return h
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
import learning.nets.fc_2layers_1024units as fc_2layers_1024units
|
||||||
|
|
||||||
|
def build_net(net_name, input_tfs, reuse=False):
|
||||||
|
net = None
|
||||||
|
|
||||||
|
if (net_name == fc_2layers_1024units.NAME):
|
||||||
|
net = fc_2layers_1024units.build_net(input_tfs, reuse)
|
||||||
|
else:
|
||||||
|
assert False, 'Unsupported net: ' + net_name
|
||||||
|
|
||||||
|
return net
|
||||||
@@ -0,0 +1,149 @@
|
|||||||
|
import numpy as np
|
||||||
|
import copy
|
||||||
|
import pybullet_utils.mpi_util as MPIUtil
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
|
||||||
|
class Normalizer(object):
|
||||||
|
CHECK_SYNC_COUNT = 50000 # check synchronization after a certain number of entries
|
||||||
|
|
||||||
|
# these group IDs must be the same as those in CharController.h
|
||||||
|
NORM_GROUP_SINGLE = 0
|
||||||
|
NORM_GROUP_NONE = -1
|
||||||
|
|
||||||
|
class Group(object):
|
||||||
|
def __init__(self, id, indices):
|
||||||
|
self.id = id
|
||||||
|
self.indices = indices
|
||||||
|
return
|
||||||
|
|
||||||
|
def __init__(self, size, groups_ids=None, eps=0.02, clip=np.inf):
|
||||||
|
self.eps = eps
|
||||||
|
self.clip = clip
|
||||||
|
self.mean = np.zeros(size)
|
||||||
|
self.mean_sq = np.zeros(size)
|
||||||
|
self.std = np.ones(size)
|
||||||
|
self.count = 0
|
||||||
|
self.groups = self._build_groups(groups_ids)
|
||||||
|
|
||||||
|
self.new_count = 0
|
||||||
|
self.new_sum = np.zeros_like(self.mean)
|
||||||
|
self.new_sum_sq = np.zeros_like(self.mean_sq)
|
||||||
|
return
|
||||||
|
|
||||||
|
def record(self, x):
|
||||||
|
size = self.get_size()
|
||||||
|
is_array = isinstance(x, np.ndarray)
|
||||||
|
if not is_array:
|
||||||
|
assert(size == 1)
|
||||||
|
x = np.array([[x]])
|
||||||
|
|
||||||
|
assert x.shape[-1] == size, \
|
||||||
|
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
|
||||||
|
x = np.reshape(x, [-1, size])
|
||||||
|
|
||||||
|
self.new_count += x.shape[0]
|
||||||
|
self.new_sum += np.sum(x, axis=0)
|
||||||
|
self.new_sum_sq += np.sum(np.square(x), axis=0)
|
||||||
|
return
|
||||||
|
|
||||||
|
def update(self):
|
||||||
|
new_count = MPIUtil.reduce_sum(self.new_count)
|
||||||
|
new_sum = MPIUtil.reduce_sum(self.new_sum)
|
||||||
|
new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)
|
||||||
|
|
||||||
|
new_total = self.count + new_count
|
||||||
|
if (self.count // self.CHECK_SYNC_COUNT != new_total // self.CHECK_SYNC_COUNT):
|
||||||
|
assert self.check_synced(), Logger.print2('Normalizer parameters desynchronized')
|
||||||
|
|
||||||
|
if new_count > 0:
|
||||||
|
new_mean = self._process_group_data(new_sum / new_count, self.mean)
|
||||||
|
new_mean_sq = self._process_group_data(new_sum_sq / new_count, self.mean_sq)
|
||||||
|
w_old = float(self.count) / new_total
|
||||||
|
w_new = float(new_count) / new_total
|
||||||
|
|
||||||
|
self.mean = w_old * self.mean + w_new * new_mean
|
||||||
|
self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
|
||||||
|
self.count = new_total
|
||||||
|
self.std = self.calc_std(self.mean, self.mean_sq)
|
||||||
|
|
||||||
|
self.new_count = 0
|
||||||
|
self.new_sum.fill(0)
|
||||||
|
self.new_sum_sq.fill(0)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def get_size(self):
|
||||||
|
return self.mean.size
|
||||||
|
|
||||||
|
def set_mean_std(self, mean, std):
|
||||||
|
size = self.get_size()
|
||||||
|
is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)
|
||||||
|
|
||||||
|
if not is_array:
|
||||||
|
assert(size == 1)
|
||||||
|
mean = np.array([mean])
|
||||||
|
std = np.array([std])
|
||||||
|
|
||||||
|
assert len(mean) == size and len(std) == size, \
|
||||||
|
Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))
|
||||||
|
|
||||||
|
self.mean = mean
|
||||||
|
self.std = std
|
||||||
|
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
|
||||||
|
return
|
||||||
|
|
||||||
|
def normalize(self, x):
|
||||||
|
norm_x = (x - self.mean) / self.std
|
||||||
|
norm_x = np.clip(norm_x, -self.clip, self.clip)
|
||||||
|
return norm_x
|
||||||
|
|
||||||
|
def unnormalize(self, norm_x):
|
||||||
|
x = norm_x * self.std + self.mean
|
||||||
|
return x
|
||||||
|
|
||||||
|
def calc_std(self, mean, mean_sq):
|
||||||
|
var = mean_sq - np.square(mean)
|
||||||
|
# some time floating point errors can lead to small negative numbers
|
||||||
|
var = np.maximum(var, 0)
|
||||||
|
std = np.sqrt(var)
|
||||||
|
std = np.maximum(std, self.eps)
|
||||||
|
return std
|
||||||
|
|
||||||
|
def calc_mean_sq(self, mean, std):
|
||||||
|
return np.square(std) + np.square(self.mean)
|
||||||
|
|
||||||
|
def check_synced(self):
|
||||||
|
synced = True
|
||||||
|
if MPIUtil.is_root_proc():
|
||||||
|
vars = np.concatenate([self.mean, self.mean_sq])
|
||||||
|
MPIUtil.bcast(vars)
|
||||||
|
else:
|
||||||
|
vars_local = np.concatenate([self.mean, self.mean_sq])
|
||||||
|
vars_root = np.empty_like(vars_local)
|
||||||
|
MPIUtil.bcast(vars_root)
|
||||||
|
synced = (vars_local == vars_root).all()
|
||||||
|
return synced
|
||||||
|
|
||||||
|
def _build_groups(self, groups_ids):
|
||||||
|
groups = []
|
||||||
|
if groups_ids is None:
|
||||||
|
curr_id = self.NORM_GROUP_SINGLE
|
||||||
|
curr_list = np.arange(self.get_size()).astype(np.int32)
|
||||||
|
groups.append(self.Group(curr_id, curr_list))
|
||||||
|
else:
|
||||||
|
ids = np.unique(groups_ids)
|
||||||
|
for id in ids:
|
||||||
|
curr_list = np.nonzero(groups_ids == id)[0].astype(np.int32)
|
||||||
|
groups.append(self.Group(id, curr_list))
|
||||||
|
|
||||||
|
return groups
|
||||||
|
|
||||||
|
def _process_group_data(self, new_data, old_data):
|
||||||
|
proc_data = new_data.copy()
|
||||||
|
for group in self.groups:
|
||||||
|
if group.id == self.NORM_GROUP_NONE:
|
||||||
|
proc_data[group.indices] = old_data[group.indices]
|
||||||
|
elif group.id != self.NORM_GROUP_SINGLE:
|
||||||
|
avg = np.mean(new_data[group.indices])
|
||||||
|
proc_data[group.indices] = avg
|
||||||
|
return proc_data
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
import numpy as np
|
||||||
|
from pybullet_envs.deep_mimic.env.env import Env
|
||||||
|
|
||||||
|
class Path(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.clear()
|
||||||
|
return
|
||||||
|
|
||||||
|
def pathlength(self):
|
||||||
|
return len(self.actions)
|
||||||
|
|
||||||
|
def is_valid(self):
|
||||||
|
valid = True
|
||||||
|
l = self.pathlength()
|
||||||
|
valid &= len(self.states) == l + 1
|
||||||
|
valid &= len(self.goals) == l + 1
|
||||||
|
valid &= len(self.actions) == l
|
||||||
|
valid &= len(self.logps) == l
|
||||||
|
valid &= len(self.rewards) == l
|
||||||
|
valid &= len(self.flags) == l
|
||||||
|
|
||||||
|
return valid
|
||||||
|
|
||||||
|
def check_vals(self):
|
||||||
|
for vals in [self.states, self.goals, self.actions, self.logps,
|
||||||
|
self.rewards]:
|
||||||
|
for v in vals:
|
||||||
|
if not np.isfinite(v).all():
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self.states = []
|
||||||
|
self.goals = []
|
||||||
|
self.actions = []
|
||||||
|
self.logps = []
|
||||||
|
self.rewards = []
|
||||||
|
self.flags = []
|
||||||
|
self.terminate = Env.Terminate.Null
|
||||||
|
return
|
||||||
|
|
||||||
|
def get_pathlen(self):
|
||||||
|
return len(self.rewards)
|
||||||
|
|
||||||
|
def calc_return(self):
|
||||||
|
return sum(self.rewards)
|
||||||
@@ -0,0 +1,353 @@
|
|||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
import copy
|
||||||
|
|
||||||
|
from learning.tf_agent import TFAgent
|
||||||
|
from learning.solvers.mpi_solver import MPISolver
|
||||||
|
import learning.tf_util as TFUtil
|
||||||
|
import learning.nets.net_builder as NetBuilder
|
||||||
|
from learning.tf_normalizer import TFNormalizer
|
||||||
|
import learning.rl_util as RLUtil
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
import pybullet_utils.mpi_util as MPIUtil
|
||||||
|
import pybullet_utils.math_util as MathUtil
|
||||||
|
from pybullet_envs.deep_mimic.env.action_space import ActionSpace
|
||||||
|
from pybullet_envs.deep_mimic.env.env import Env
|
||||||
|
|
||||||
|
'''
|
||||||
|
Policy Gradient Agent
|
||||||
|
'''
|
||||||
|
|
||||||
|
class PGAgent(TFAgent):
|
||||||
|
NAME = 'PG'
|
||||||
|
|
||||||
|
ACTOR_NET_KEY = 'ActorNet'
|
||||||
|
ACTOR_STEPSIZE_KEY = 'ActorStepsize'
|
||||||
|
ACTOR_MOMENTUM_KEY = 'ActorMomentum'
|
||||||
|
ACTOR_WEIGHT_DECAY_KEY = 'ActorWeightDecay'
|
||||||
|
ACTOR_INIT_OUTPUT_SCALE_KEY = 'ActorInitOutputScale'
|
||||||
|
|
||||||
|
CRITIC_NET_KEY = 'CriticNet'
|
||||||
|
CRITIC_STEPSIZE_KEY = 'CriticStepsize'
|
||||||
|
CRITIC_MOMENTUM_KEY = 'CriticMomentum'
|
||||||
|
CRITIC_WEIGHT_DECAY_KEY = 'CriticWeightDecay'
|
||||||
|
|
||||||
|
EXP_ACTION_FLAG = 1 << 0
|
||||||
|
|
||||||
|
def __init__(self, world, id, json_data):
|
||||||
|
self._exp_action = False
|
||||||
|
super().__init__(world, id, json_data)
|
||||||
|
return
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
super().reset()
|
||||||
|
self._exp_action = False
|
||||||
|
return
|
||||||
|
|
||||||
|
def _check_action_space(self):
|
||||||
|
action_space = self.get_action_space()
|
||||||
|
return action_space == ActionSpace.Continuous
|
||||||
|
|
||||||
|
def _load_params(self, json_data):
|
||||||
|
super()._load_params(json_data)
|
||||||
|
self.val_min, self.val_max = self._calc_val_bounds(self.discount)
|
||||||
|
self.val_fail, self.val_succ = self._calc_term_vals(self.discount)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_nets(self, json_data):
|
||||||
|
assert self.ACTOR_NET_KEY in json_data
|
||||||
|
assert self.CRITIC_NET_KEY in json_data
|
||||||
|
|
||||||
|
actor_net_name = json_data[self.ACTOR_NET_KEY]
|
||||||
|
critic_net_name = json_data[self.CRITIC_NET_KEY]
|
||||||
|
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
|
||||||
|
|
||||||
|
s_size = self.get_state_size()
|
||||||
|
g_size = self.get_goal_size()
|
||||||
|
a_size = self.get_action_size()
|
||||||
|
|
||||||
|
# setup input tensors
|
||||||
|
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s") # observations
|
||||||
|
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val") # target value s
|
||||||
|
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage
|
||||||
|
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a") # target actions
|
||||||
|
self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g") # goals
|
||||||
|
|
||||||
|
with tf.variable_scope('main'):
|
||||||
|
with tf.variable_scope('actor'):
|
||||||
|
self.actor_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
|
||||||
|
with tf.variable_scope('critic'):
|
||||||
|
self.critic_tf = self._build_net_critic(critic_net_name)
|
||||||
|
|
||||||
|
if (self.actor_tf != None):
|
||||||
|
Logger.print2('Built actor net: ' + actor_net_name)
|
||||||
|
|
||||||
|
if (self.critic_tf != None):
|
||||||
|
Logger.print2('Built critic net: ' + critic_net_name)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_normalizers(self):
|
||||||
|
super()._build_normalizers()
|
||||||
|
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
|
||||||
|
with tf.variable_scope(self.RESOURCE_SCOPE):
|
||||||
|
val_offset, val_scale = self._calc_val_offset_scale(self.discount)
|
||||||
|
self.val_norm = TFNormalizer(self.sess, 'val_norm', 1)
|
||||||
|
self.val_norm.set_mean_std(-val_offset, 1.0 / val_scale)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _init_normalizers(self):
|
||||||
|
super()._init_normalizers()
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
self.val_norm.update()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _load_normalizers(self):
|
||||||
|
super()._load_normalizers()
|
||||||
|
self.val_norm.load()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_losses(self, json_data):
|
||||||
|
actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
|
||||||
|
critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
|
||||||
|
|
||||||
|
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
|
||||||
|
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
|
||||||
|
|
||||||
|
if (critic_weight_decay != 0):
|
||||||
|
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
|
||||||
|
|
||||||
|
norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf)
|
||||||
|
norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf
|
||||||
|
|
||||||
|
self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1)
|
||||||
|
self.actor_loss_tf *= self.adv_tf
|
||||||
|
self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf)
|
||||||
|
|
||||||
|
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
|
||||||
|
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
|
||||||
|
a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
|
||||||
|
a_bound_loss /= self.exp_params_curr.noise
|
||||||
|
self.actor_loss_tf += a_bound_loss
|
||||||
|
|
||||||
|
if (actor_weight_decay != 0):
|
||||||
|
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_solvers(self, json_data):
|
||||||
|
actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
|
||||||
|
actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
|
||||||
|
critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
|
||||||
|
critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
|
||||||
|
|
||||||
|
critic_vars = self._tf_vars('main/critic')
|
||||||
|
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
|
||||||
|
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
|
||||||
|
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
|
||||||
|
|
||||||
|
actor_vars = self._tf_vars('main/actor')
|
||||||
|
actor_opt = tf.train.MomentumOptimizer(learning_rate=actor_stepsize, momentum=actor_momentum)
|
||||||
|
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
|
||||||
|
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_net_actor(self, net_name, init_output_scale):
|
||||||
|
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
|
||||||
|
input_tfs = [norm_s_tf]
|
||||||
|
if (self.has_goal()):
|
||||||
|
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
|
||||||
|
input_tfs += [norm_g_tf]
|
||||||
|
|
||||||
|
h = NetBuilder.build_net(net_name, input_tfs)
|
||||||
|
norm_a_tf = tf.layers.dense(inputs=h, units=self.get_action_size(), activation=None,
|
||||||
|
kernel_initializer=tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale))
|
||||||
|
|
||||||
|
a_tf = self.a_norm.unnormalize_tf(norm_a_tf)
|
||||||
|
return a_tf
|
||||||
|
|
||||||
|
def _build_net_critic(self, net_name):
|
||||||
|
norm_s_tf = self.s_norm.normalize_tf(self.s_tf)
|
||||||
|
input_tfs = [norm_s_tf]
|
||||||
|
if (self.has_goal()):
|
||||||
|
norm_g_tf = self.g_norm.normalize_tf(self.g_tf)
|
||||||
|
input_tfs += [norm_g_tf]
|
||||||
|
|
||||||
|
h = NetBuilder.build_net(net_name, input_tfs)
|
||||||
|
norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None,
|
||||||
|
kernel_initializer=TFUtil.xavier_initializer);
|
||||||
|
|
||||||
|
norm_val_tf = tf.reshape(norm_val_tf, [-1])
|
||||||
|
val_tf = self.val_norm.unnormalize_tf(norm_val_tf)
|
||||||
|
return val_tf
|
||||||
|
|
||||||
|
def _initialize_vars(self):
|
||||||
|
super()._initialize_vars()
|
||||||
|
self._sync_solvers()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _sync_solvers(self):
|
||||||
|
self.actor_solver.sync()
|
||||||
|
self.critic_solver.sync()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _decide_action(self, s, g):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
self._exp_action = False
|
||||||
|
a = self._eval_actor(s, g)[0]
|
||||||
|
logp = 0
|
||||||
|
|
||||||
|
if self._enable_stoch_policy():
|
||||||
|
# epsilon-greedy
|
||||||
|
rand_action = MathUtil.flip_coin(self.exp_params_curr.rate)
|
||||||
|
if rand_action:
|
||||||
|
norm_exp_noise = np.random.randn(*a.shape)
|
||||||
|
norm_exp_noise *= self.exp_params_curr.noise
|
||||||
|
exp_noise = norm_exp_noise * self.a_norm.std
|
||||||
|
a += exp_noise
|
||||||
|
|
||||||
|
logp = self._calc_action_logp(norm_exp_noise)
|
||||||
|
self._exp_action = True
|
||||||
|
|
||||||
|
return a, logp
|
||||||
|
|
||||||
|
def _enable_stoch_policy(self):
|
||||||
|
return self.enable_training and (self._mode == self.Mode.TRAIN or self._mode == self.Mode.TRAIN_END)
|
||||||
|
|
||||||
|
def _eval_actor(self, s, g):
|
||||||
|
s = np.reshape(s, [-1, self.get_state_size()])
|
||||||
|
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||||
|
|
||||||
|
feed = {
|
||||||
|
self.s_tf : s,
|
||||||
|
self.g_tf : g
|
||||||
|
}
|
||||||
|
|
||||||
|
a = self.actor_tf.eval(feed)
|
||||||
|
return a
|
||||||
|
|
||||||
|
def _eval_critic(self, s, g):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
s = np.reshape(s, [-1, self.get_state_size()])
|
||||||
|
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||||
|
|
||||||
|
feed = {
|
||||||
|
self.s_tf : s,
|
||||||
|
self.g_tf : g
|
||||||
|
}
|
||||||
|
|
||||||
|
val = self.critic_tf.eval(feed)
|
||||||
|
return val
|
||||||
|
|
||||||
|
def _record_flags(self):
|
||||||
|
flags = int(0)
|
||||||
|
if (self._exp_action):
|
||||||
|
flags = flags | self.EXP_ACTION_FLAG
|
||||||
|
return flags
|
||||||
|
|
||||||
|
def _train_step(self):
|
||||||
|
super()._train_step()
|
||||||
|
|
||||||
|
critic_loss = self._update_critic()
|
||||||
|
actor_loss = self._update_actor()
|
||||||
|
critic_loss = MPIUtil.reduce_avg(critic_loss)
|
||||||
|
actor_loss = MPIUtil.reduce_avg(actor_loss)
|
||||||
|
|
||||||
|
critic_stepsize = self.critic_solver.get_stepsize()
|
||||||
|
actor_stepsize = self.actor_solver.get_stepsize()
|
||||||
|
|
||||||
|
self.logger.log_tabular('Critic_Loss', critic_loss)
|
||||||
|
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
|
||||||
|
self.logger.log_tabular('Actor_Loss', actor_loss)
|
||||||
|
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_critic(self):
|
||||||
|
idx = self.replay_buffer.sample(self._local_mini_batch_size)
|
||||||
|
s = self.replay_buffer.get('states', idx)
|
||||||
|
g = self.replay_buffer.get('goals', idx) if self.has_goal() else None
|
||||||
|
|
||||||
|
tar_V = self._calc_updated_vals(idx)
|
||||||
|
tar_V = np.clip(tar_V, self.val_min, self.val_max)
|
||||||
|
|
||||||
|
feed = {
|
||||||
|
self.s_tf: s,
|
||||||
|
self.g_tf: g,
|
||||||
|
self.tar_val_tf: tar_V
|
||||||
|
}
|
||||||
|
|
||||||
|
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
|
||||||
|
self.critic_solver.update(grads)
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def _update_actor(self):
|
||||||
|
key = self.EXP_ACTION_FLAG
|
||||||
|
idx = self.replay_buffer.sample_filtered(self._local_mini_batch_size, key)
|
||||||
|
has_goal = self.has_goal()
|
||||||
|
|
||||||
|
s = self.replay_buffer.get('states', idx)
|
||||||
|
g = self.replay_buffer.get('goals', idx) if has_goal else None
|
||||||
|
a = self.replay_buffer.get('actions', idx)
|
||||||
|
|
||||||
|
V_new = self._calc_updated_vals(idx)
|
||||||
|
V_old = self._eval_critic(s, g)
|
||||||
|
adv = V_new - V_old
|
||||||
|
|
||||||
|
feed = {
|
||||||
|
self.s_tf: s,
|
||||||
|
self.g_tf: g,
|
||||||
|
self.a_tf: a,
|
||||||
|
self.adv_tf: adv
|
||||||
|
}
|
||||||
|
|
||||||
|
loss, grads = self.sess.run([self.actor_loss_tf, self.actor_grad_tf], feed)
|
||||||
|
self.actor_solver.update(grads)
|
||||||
|
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def _calc_updated_vals(self, idx):
|
||||||
|
r = self.replay_buffer.get('rewards', idx)
|
||||||
|
|
||||||
|
if self.discount == 0:
|
||||||
|
new_V = r
|
||||||
|
else:
|
||||||
|
next_idx = self.replay_buffer.get_next_idx(idx)
|
||||||
|
s_next = self.replay_buffer.get('states', next_idx)
|
||||||
|
g_next = self.replay_buffer.get('goals', next_idx) if self.has_goal() else None
|
||||||
|
|
||||||
|
is_end = self.replay_buffer.is_path_end(idx)
|
||||||
|
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
|
||||||
|
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
|
||||||
|
is_fail = np.logical_and(is_end, is_fail)
|
||||||
|
is_succ = np.logical_and(is_end, is_succ)
|
||||||
|
|
||||||
|
V_next = self._eval_critic(s_next, g_next)
|
||||||
|
V_next[is_fail] = self.val_fail
|
||||||
|
V_next[is_succ] = self.val_succ
|
||||||
|
|
||||||
|
new_V = r + self.discount * V_next
|
||||||
|
return new_V
|
||||||
|
|
||||||
|
def _calc_action_logp(self, norm_action_deltas):
|
||||||
|
# norm action delta are for the normalized actions (scaled by self.a_norm.std)
|
||||||
|
stdev = self.exp_params_curr.noise
|
||||||
|
assert stdev > 0
|
||||||
|
|
||||||
|
a_size = self.get_action_size()
|
||||||
|
logp = -0.5 / (stdev * stdev) * np.sum(np.square(norm_action_deltas), axis=-1)
|
||||||
|
logp += -0.5 * a_size * np.log(2 * np.pi)
|
||||||
|
logp += -a_size * np.log(stdev)
|
||||||
|
return logp
|
||||||
|
|
||||||
|
def _log_val(self, s, g):
|
||||||
|
val = self._eval_critic(s, g)
|
||||||
|
norm_val = self.val_norm.normalize(val)
|
||||||
|
self.world.env.log_val(self.id, norm_val[0])
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_replay_buffer(self, buffer_size):
|
||||||
|
super()._build_replay_buffer(buffer_size)
|
||||||
|
self.replay_buffer.add_filter_key(self.EXP_ACTION_FLAG)
|
||||||
|
return
|
||||||
@@ -0,0 +1,367 @@
|
|||||||
|
import numpy as np
|
||||||
|
import copy as copy
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from learning.pg_agent import PGAgent
|
||||||
|
from learning.solvers.mpi_solver import MPISolver
|
||||||
|
import learning.tf_util as TFUtil
|
||||||
|
import learning.rl_util as RLUtil
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
import pybullet_utils.mpi_util as MPIUtil
|
||||||
|
import pybullet_utils.math_util as MathUtil
|
||||||
|
from pybullet_envs.deep_mimic.env.env import Env
|
||||||
|
|
||||||
|
'''
|
||||||
|
Proximal Policy Optimization Agent
|
||||||
|
'''
|
||||||
|
|
||||||
|
class PPOAgent(PGAgent):
|
||||||
|
NAME = "PPO"
|
||||||
|
EPOCHS_KEY = "Epochs"
|
||||||
|
BATCH_SIZE_KEY = "BatchSize"
|
||||||
|
RATIO_CLIP_KEY = "RatioClip"
|
||||||
|
NORM_ADV_CLIP_KEY = "NormAdvClip"
|
||||||
|
TD_LAMBDA_KEY = "TDLambda"
|
||||||
|
TAR_CLIP_FRAC = "TarClipFrac"
|
||||||
|
ACTOR_STEPSIZE_DECAY = "ActorStepsizeDecay"
|
||||||
|
|
||||||
|
def __init__(self, world, id, json_data):
|
||||||
|
super().__init__(world, id, json_data)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _load_params(self, json_data):
|
||||||
|
super()._load_params(json_data)
|
||||||
|
|
||||||
|
self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
|
||||||
|
self.batch_size = 1024 if (self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
|
||||||
|
self.ratio_clip = 0.2 if (self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
|
||||||
|
self.norm_adv_clip = 5 if (self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
|
||||||
|
self.td_lambda = 0.95 if (self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
|
||||||
|
self.tar_clip_frac = -1 if (self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
|
||||||
|
self.actor_stepsize_decay = 0.5 if (self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]
|
||||||
|
|
||||||
|
num_procs = MPIUtil.get_num_procs()
|
||||||
|
local_batch_size = int(self.batch_size / num_procs)
|
||||||
|
min_replay_size = 2 * local_batch_size # needed to prevent buffer overflow
|
||||||
|
assert(self.replay_buffer_size > min_replay_size)
|
||||||
|
|
||||||
|
self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_nets(self, json_data):
|
||||||
|
assert self.ACTOR_NET_KEY in json_data
|
||||||
|
assert self.CRITIC_NET_KEY in json_data
|
||||||
|
|
||||||
|
actor_net_name = json_data[self.ACTOR_NET_KEY]
|
||||||
|
critic_net_name = json_data[self.CRITIC_NET_KEY]
|
||||||
|
actor_init_output_scale = 1 if (self.ACTOR_INIT_OUTPUT_SCALE_KEY not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]
|
||||||
|
|
||||||
|
s_size = self.get_state_size()
|
||||||
|
g_size = self.get_goal_size()
|
||||||
|
a_size = self.get_action_size()
|
||||||
|
|
||||||
|
# setup input tensors
|
||||||
|
self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
|
||||||
|
self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
|
||||||
|
self.tar_val_tf = tf.placeholder(tf.float32, shape=[None], name="tar_val")
|
||||||
|
self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
|
||||||
|
self.g_tf = tf.placeholder(tf.float32, shape=([None, g_size] if self.has_goal() else None), name="g")
|
||||||
|
self.old_logp_tf = tf.placeholder(tf.float32, shape=[None], name="old_logp")
|
||||||
|
self.exp_mask_tf = tf.placeholder(tf.float32, shape=[None], name="exp_mask")
|
||||||
|
|
||||||
|
with tf.variable_scope('main'):
|
||||||
|
with tf.variable_scope('actor'):
|
||||||
|
self.a_mean_tf = self._build_net_actor(actor_net_name, actor_init_output_scale)
|
||||||
|
with tf.variable_scope('critic'):
|
||||||
|
self.critic_tf = self._build_net_critic(critic_net_name)
|
||||||
|
|
||||||
|
if (self.a_mean_tf != None):
|
||||||
|
Logger.print2('Built actor net: ' + actor_net_name)
|
||||||
|
|
||||||
|
if (self.critic_tf != None):
|
||||||
|
Logger.print2('Built critic net: ' + critic_net_name)
|
||||||
|
|
||||||
|
self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
|
||||||
|
norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(shape=tf.shape(self.a_mean_tf))
|
||||||
|
norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
|
||||||
|
self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
|
||||||
|
self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(x_tf=norm_a_noise_tf, mean_tf=None, std_tf=self.norm_a_std_tf)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_losses(self, json_data):
|
||||||
|
actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY]
|
||||||
|
critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY]
|
||||||
|
|
||||||
|
norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf)
|
||||||
|
self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff))
|
||||||
|
|
||||||
|
if (critic_weight_decay != 0):
|
||||||
|
self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic')
|
||||||
|
|
||||||
|
norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf)
|
||||||
|
self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf)
|
||||||
|
|
||||||
|
self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf, self.norm_a_std_tf)
|
||||||
|
ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf)
|
||||||
|
actor_loss0 = self.adv_tf * ratio_tf
|
||||||
|
actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip, 1 + self.ratio_clip)
|
||||||
|
self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1))
|
||||||
|
|
||||||
|
norm_a_bound_min = self.a_norm.normalize(self.a_bound_min)
|
||||||
|
norm_a_bound_max = self.a_norm.normalize(self.a_bound_max)
|
||||||
|
a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max)
|
||||||
|
self.actor_loss_tf += a_bound_loss
|
||||||
|
|
||||||
|
if (actor_weight_decay != 0):
|
||||||
|
self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor')
|
||||||
|
|
||||||
|
# for debugging
|
||||||
|
self.clip_frac_tf = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip)))
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_solvers(self, json_data):
|
||||||
|
actor_stepsize = 0.001 if (self.ACTOR_STEPSIZE_KEY not in json_data) else json_data[self.ACTOR_STEPSIZE_KEY]
|
||||||
|
actor_momentum = 0.9 if (self.ACTOR_MOMENTUM_KEY not in json_data) else json_data[self.ACTOR_MOMENTUM_KEY]
|
||||||
|
critic_stepsize = 0.01 if (self.CRITIC_STEPSIZE_KEY not in json_data) else json_data[self.CRITIC_STEPSIZE_KEY]
|
||||||
|
critic_momentum = 0.9 if (self.CRITIC_MOMENTUM_KEY not in json_data) else json_data[self.CRITIC_MOMENTUM_KEY]
|
||||||
|
|
||||||
|
critic_vars = self._tf_vars('main/critic')
|
||||||
|
critic_opt = tf.train.MomentumOptimizer(learning_rate=critic_stepsize, momentum=critic_momentum)
|
||||||
|
self.critic_grad_tf = tf.gradients(self.critic_loss_tf, critic_vars)
|
||||||
|
self.critic_solver = MPISolver(self.sess, critic_opt, critic_vars)
|
||||||
|
|
||||||
|
self._actor_stepsize_tf = tf.get_variable(dtype=tf.float32, name='actor_stepsize', initializer=actor_stepsize, trainable=False)
|
||||||
|
self._actor_stepsize_ph = tf.get_variable(dtype=tf.float32, name='actor_stepsize_ph', shape=[])
|
||||||
|
self._actor_stepsize_update_op = self._actor_stepsize_tf.assign(self._actor_stepsize_ph)
|
||||||
|
|
||||||
|
actor_vars = self._tf_vars('main/actor')
|
||||||
|
actor_opt = tf.train.MomentumOptimizer(learning_rate=self._actor_stepsize_tf, momentum=actor_momentum)
|
||||||
|
self.actor_grad_tf = tf.gradients(self.actor_loss_tf, actor_vars)
|
||||||
|
self.actor_solver = MPISolver(self.sess, actor_opt, actor_vars)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _decide_action(self, s, g):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(self.exp_params_curr.rate)
|
||||||
|
a, logp = self._eval_actor(s, g, self._exp_action)
|
||||||
|
return a[0], logp[0]
|
||||||
|
|
||||||
|
def _eval_actor(self, s, g, enable_exp):
|
||||||
|
s = np.reshape(s, [-1, self.get_state_size()])
|
||||||
|
g = np.reshape(g, [-1, self.get_goal_size()]) if self.has_goal() else None
|
||||||
|
|
||||||
|
feed = {
|
||||||
|
self.s_tf : s,
|
||||||
|
self.g_tf : g,
|
||||||
|
self.exp_mask_tf: np.array([1 if enable_exp else 0])
|
||||||
|
}
|
||||||
|
|
||||||
|
a, logp = self.sess.run([self.sample_a_tf, self.sample_a_logp_tf], feed_dict=feed)
|
||||||
|
return a, logp
|
||||||
|
|
||||||
|
def _train_step(self):
|
||||||
|
adv_eps = 1e-5
|
||||||
|
|
||||||
|
start_idx = self.replay_buffer.buffer_tail
|
||||||
|
end_idx = self.replay_buffer.buffer_head
|
||||||
|
assert(start_idx == 0)
|
||||||
|
assert(self.replay_buffer.get_current_size() <= self.replay_buffer.buffer_size) # must avoid overflow
|
||||||
|
assert(start_idx < end_idx)
|
||||||
|
|
||||||
|
idx = np.array(list(range(start_idx, end_idx)))
|
||||||
|
end_mask = self.replay_buffer.is_path_end(idx)
|
||||||
|
end_mask = np.logical_not(end_mask)
|
||||||
|
|
||||||
|
vals = self._compute_batch_vals(start_idx, end_idx)
|
||||||
|
new_vals = self._compute_batch_new_vals(start_idx, end_idx, vals)
|
||||||
|
|
||||||
|
valid_idx = idx[end_mask]
|
||||||
|
exp_idx = self.replay_buffer.get_idx_filtered(self.EXP_ACTION_FLAG).copy()
|
||||||
|
num_valid_idx = valid_idx.shape[0]
|
||||||
|
num_exp_idx = exp_idx.shape[0]
|
||||||
|
exp_idx = np.column_stack([exp_idx, np.array(list(range(0, num_exp_idx)), dtype=np.int32)])
|
||||||
|
|
||||||
|
local_sample_count = valid_idx.size
|
||||||
|
global_sample_count = int(MPIUtil.reduce_sum(local_sample_count))
|
||||||
|
mini_batches = int(np.ceil(global_sample_count / self.mini_batch_size))
|
||||||
|
|
||||||
|
adv = new_vals[exp_idx[:,0]] - vals[exp_idx[:,0]]
|
||||||
|
new_vals = np.clip(new_vals, self.val_min, self.val_max)
|
||||||
|
|
||||||
|
adv_mean = np.mean(adv)
|
||||||
|
adv_std = np.std(adv)
|
||||||
|
adv = (adv - adv_mean) / (adv_std + adv_eps)
|
||||||
|
adv = np.clip(adv, -self.norm_adv_clip, self.norm_adv_clip)
|
||||||
|
|
||||||
|
critic_loss = 0
|
||||||
|
actor_loss = 0
|
||||||
|
actor_clip_frac = 0
|
||||||
|
|
||||||
|
for e in range(self.epochs):
|
||||||
|
np.random.shuffle(valid_idx)
|
||||||
|
np.random.shuffle(exp_idx)
|
||||||
|
|
||||||
|
for b in range(mini_batches):
|
||||||
|
batch_idx_beg = b * self._local_mini_batch_size
|
||||||
|
batch_idx_end = batch_idx_beg + self._local_mini_batch_size
|
||||||
|
|
||||||
|
critic_batch = np.array(range(batch_idx_beg, batch_idx_end), dtype=np.int32)
|
||||||
|
actor_batch = critic_batch.copy()
|
||||||
|
critic_batch = np.mod(critic_batch, num_valid_idx)
|
||||||
|
actor_batch = np.mod(actor_batch, num_exp_idx)
|
||||||
|
shuffle_actor = (actor_batch[-1] < actor_batch[0]) or (actor_batch[-1] == num_exp_idx - 1)
|
||||||
|
|
||||||
|
critic_batch = valid_idx[critic_batch]
|
||||||
|
actor_batch = exp_idx[actor_batch]
|
||||||
|
critic_batch_vals = new_vals[critic_batch]
|
||||||
|
actor_batch_adv = adv[actor_batch[:,1]]
|
||||||
|
|
||||||
|
critic_s = self.replay_buffer.get('states', critic_batch)
|
||||||
|
critic_g = self.replay_buffer.get('goals', critic_batch) if self.has_goal() else None
|
||||||
|
curr_critic_loss = self._update_critic(critic_s, critic_g, critic_batch_vals)
|
||||||
|
|
||||||
|
actor_s = self.replay_buffer.get("states", actor_batch[:,0])
|
||||||
|
actor_g = self.replay_buffer.get("goals", actor_batch[:,0]) if self.has_goal() else None
|
||||||
|
actor_a = self.replay_buffer.get("actions", actor_batch[:,0])
|
||||||
|
actor_logp = self.replay_buffer.get("logps", actor_batch[:,0])
|
||||||
|
curr_actor_loss, curr_actor_clip_frac = self._update_actor(actor_s, actor_g, actor_a, actor_logp, actor_batch_adv)
|
||||||
|
|
||||||
|
critic_loss += curr_critic_loss
|
||||||
|
actor_loss += np.abs(curr_actor_loss)
|
||||||
|
actor_clip_frac += curr_actor_clip_frac
|
||||||
|
|
||||||
|
if (shuffle_actor):
|
||||||
|
np.random.shuffle(exp_idx)
|
||||||
|
|
||||||
|
total_batches = mini_batches * self.epochs
|
||||||
|
critic_loss /= total_batches
|
||||||
|
actor_loss /= total_batches
|
||||||
|
actor_clip_frac /= total_batches
|
||||||
|
|
||||||
|
critic_loss = MPIUtil.reduce_avg(critic_loss)
|
||||||
|
actor_loss = MPIUtil.reduce_avg(actor_loss)
|
||||||
|
actor_clip_frac = MPIUtil.reduce_avg(actor_clip_frac)
|
||||||
|
|
||||||
|
critic_stepsize = self.critic_solver.get_stepsize()
|
||||||
|
actor_stepsize = self.update_actor_stepsize(actor_clip_frac)
|
||||||
|
|
||||||
|
self.logger.log_tabular('Critic_Loss', critic_loss)
|
||||||
|
self.logger.log_tabular('Critic_Stepsize', critic_stepsize)
|
||||||
|
self.logger.log_tabular('Actor_Loss', actor_loss)
|
||||||
|
self.logger.log_tabular('Actor_Stepsize', actor_stepsize)
|
||||||
|
self.logger.log_tabular('Clip_Frac', actor_clip_frac)
|
||||||
|
self.logger.log_tabular('Adv_Mean', adv_mean)
|
||||||
|
self.logger.log_tabular('Adv_Std', adv_std)
|
||||||
|
|
||||||
|
self.replay_buffer.clear()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _get_iters_per_update(self):
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def _valid_train_step(self):
|
||||||
|
samples = self.replay_buffer.get_current_size()
|
||||||
|
exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
|
||||||
|
global_sample_count = int(MPIUtil.reduce_sum(samples))
|
||||||
|
global_exp_min = int(MPIUtil.reduce_min(exp_samples))
|
||||||
|
return (global_sample_count > self.batch_size) and (global_exp_min > 0)
|
||||||
|
|
||||||
|
def _compute_batch_vals(self, start_idx, end_idx):
|
||||||
|
states = self.replay_buffer.get_all("states")[start_idx:end_idx]
|
||||||
|
goals = self.replay_buffer.get_all("goals")[start_idx:end_idx] if self.has_goal() else None
|
||||||
|
|
||||||
|
idx = np.array(list(range(start_idx, end_idx)))
|
||||||
|
is_end = self.replay_buffer.is_path_end(idx)
|
||||||
|
is_fail = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Fail)
|
||||||
|
is_succ = self.replay_buffer.check_terminal_flag(idx, Env.Terminate.Succ)
|
||||||
|
is_fail = np.logical_and(is_end, is_fail)
|
||||||
|
is_succ = np.logical_and(is_end, is_succ)
|
||||||
|
|
||||||
|
vals = self._eval_critic(states, goals)
|
||||||
|
vals[is_fail] = self.val_fail
|
||||||
|
vals[is_succ] = self.val_succ
|
||||||
|
|
||||||
|
return vals
|
||||||
|
|
||||||
|
def _compute_batch_new_vals(self, start_idx, end_idx, val_buffer):
|
||||||
|
rewards = self.replay_buffer.get_all("rewards")[start_idx:end_idx]
|
||||||
|
|
||||||
|
if self.discount == 0:
|
||||||
|
new_vals = rewards.copy()
|
||||||
|
else:
|
||||||
|
new_vals = np.zeros_like(val_buffer)
|
||||||
|
|
||||||
|
curr_idx = start_idx
|
||||||
|
while curr_idx < end_idx:
|
||||||
|
idx0 = curr_idx - start_idx
|
||||||
|
idx1 = self.replay_buffer.get_path_end(curr_idx) - start_idx
|
||||||
|
r = rewards[idx0:idx1]
|
||||||
|
v = val_buffer[idx0:(idx1 + 1)]
|
||||||
|
|
||||||
|
new_vals[idx0:idx1] = RLUtil.compute_return(r, self.discount, self.td_lambda, v)
|
||||||
|
curr_idx = idx1 + start_idx + 1
|
||||||
|
|
||||||
|
return new_vals
|
||||||
|
|
||||||
|
def _update_critic(self, s, g, tar_vals):
|
||||||
|
feed = {
|
||||||
|
self.s_tf: s,
|
||||||
|
self.g_tf: g,
|
||||||
|
self.tar_val_tf: tar_vals
|
||||||
|
}
|
||||||
|
|
||||||
|
loss, grads = self.sess.run([self.critic_loss_tf, self.critic_grad_tf], feed)
|
||||||
|
self.critic_solver.update(grads)
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def _update_actor(self, s, g, a, logp, adv):
|
||||||
|
feed = {
|
||||||
|
self.s_tf: s,
|
||||||
|
self.g_tf: g,
|
||||||
|
self.a_tf: a,
|
||||||
|
self.adv_tf: adv,
|
||||||
|
self.old_logp_tf: logp
|
||||||
|
}
|
||||||
|
|
||||||
|
loss, grads, clip_frac = self.sess.run([self.actor_loss_tf, self.actor_grad_tf,
|
||||||
|
self.clip_frac_tf], feed)
|
||||||
|
self.actor_solver.update(grads)
|
||||||
|
|
||||||
|
return loss, clip_frac
|
||||||
|
|
||||||
|
def update_actor_stepsize(self, clip_frac):
|
||||||
|
clip_tol = 1.5
|
||||||
|
step_scale = 2
|
||||||
|
max_stepsize = 1e-2
|
||||||
|
min_stepsize = 1e-8
|
||||||
|
warmup_iters = 5
|
||||||
|
|
||||||
|
actor_stepsize = self.actor_solver.get_stepsize()
|
||||||
|
if (self.tar_clip_frac >= 0 and self.iter > warmup_iters):
|
||||||
|
min_clip = self.tar_clip_frac / clip_tol
|
||||||
|
max_clip = self.tar_clip_frac * clip_tol
|
||||||
|
under_tol = clip_frac < min_clip
|
||||||
|
over_tol = clip_frac > max_clip
|
||||||
|
|
||||||
|
if (over_tol or under_tol):
|
||||||
|
if (over_tol):
|
||||||
|
actor_stepsize *= self.actor_stepsize_decay
|
||||||
|
else:
|
||||||
|
actor_stepsize /= self.actor_stepsize_decay
|
||||||
|
|
||||||
|
actor_stepsize = np.clip(actor_stepsize, min_stepsize, max_stepsize)
|
||||||
|
self.set_actor_stepsize(actor_stepsize)
|
||||||
|
|
||||||
|
return actor_stepsize
|
||||||
|
|
||||||
|
def set_actor_stepsize(self, stepsize):
|
||||||
|
feed = {
|
||||||
|
self._actor_stepsize_ph: stepsize,
|
||||||
|
}
|
||||||
|
self.sess.run(self._actor_stepsize_update_op, feed)
|
||||||
|
return
|
||||||
@@ -0,0 +1,351 @@
|
|||||||
|
import numpy as np
|
||||||
|
import copy
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
import inspect as inspect
|
||||||
|
from pybullet_envs.deep_mimic.env.env import Env
|
||||||
|
import pybullet_utils.math_util as MathUtil
|
||||||
|
|
||||||
|
class ReplayBuffer(object):
|
||||||
|
TERMINATE_KEY = 'terminate'
|
||||||
|
PATH_START_KEY = 'path_start'
|
||||||
|
PATH_END_KEY = 'path_end'
|
||||||
|
|
||||||
|
def __init__(self, buffer_size):
|
||||||
|
assert buffer_size > 0
|
||||||
|
|
||||||
|
self.buffer_size = buffer_size
|
||||||
|
self.total_count = 0
|
||||||
|
self.buffer_head = 0
|
||||||
|
self.buffer_tail = MathUtil.INVALID_IDX
|
||||||
|
self.num_paths = 0
|
||||||
|
self._sample_buffers = dict()
|
||||||
|
self.buffers = None
|
||||||
|
|
||||||
|
self.clear()
|
||||||
|
return
|
||||||
|
|
||||||
|
def sample(self, n):
|
||||||
|
curr_size = self.get_current_size()
|
||||||
|
assert curr_size > 0
|
||||||
|
|
||||||
|
idx = np.empty(n, dtype=int)
|
||||||
|
# makes sure that the end states are not sampled
|
||||||
|
for i in range(n):
|
||||||
|
while True:
|
||||||
|
curr_idx = np.random.randint(0, curr_size, size=1)[0]
|
||||||
|
curr_idx += self.buffer_tail
|
||||||
|
curr_idx = np.mod(curr_idx, self.buffer_size)
|
||||||
|
|
||||||
|
if not self.is_path_end(curr_idx):
|
||||||
|
break
|
||||||
|
idx[i] = curr_idx
|
||||||
|
|
||||||
|
return idx
|
||||||
|
|
||||||
|
def sample_filtered(self, n, key):
|
||||||
|
assert key in self._sample_buffers
|
||||||
|
curr_buffer = self._sample_buffers[key]
|
||||||
|
idx = curr_buffer.sample(n)
|
||||||
|
return idx
|
||||||
|
|
||||||
|
def count_filtered(self, key):
|
||||||
|
curr_buffer = self._sample_buffers[key]
|
||||||
|
return curr_buffer.count
|
||||||
|
|
||||||
|
def get(self, key, idx):
|
||||||
|
return self.buffers[key][idx]
|
||||||
|
|
||||||
|
def get_all(self, key):
|
||||||
|
return self.buffers[key]
|
||||||
|
|
||||||
|
def get_idx_filtered(self, key):
|
||||||
|
assert key in self._sample_buffers
|
||||||
|
curr_buffer = self._sample_buffers[key]
|
||||||
|
idx = curr_buffer.slot_to_idx[:curr_buffer.count]
|
||||||
|
return idx
|
||||||
|
|
||||||
|
def get_path_start(self, idx):
|
||||||
|
return self.buffers[self.PATH_START_KEY][idx]
|
||||||
|
|
||||||
|
def get_path_end(self, idx):
|
||||||
|
return self.buffers[self.PATH_END_KEY][idx]
|
||||||
|
|
||||||
|
def get_pathlen(self, idx):
|
||||||
|
is_array = isinstance(idx, np.ndarray) or isinstance(idx, list)
|
||||||
|
if not is_array:
|
||||||
|
idx = [idx]
|
||||||
|
|
||||||
|
n = len(idx)
|
||||||
|
start_idx = self.get_path_start(idx)
|
||||||
|
end_idx = self.get_path_end(idx)
|
||||||
|
pathlen = np.empty(n, dtype=int)
|
||||||
|
|
||||||
|
for i in range(n):
|
||||||
|
curr_start = start_idx[i]
|
||||||
|
curr_end = end_idx[i]
|
||||||
|
if curr_start < curr_end:
|
||||||
|
curr_len = curr_end - curr_start
|
||||||
|
else:
|
||||||
|
curr_len = self.buffer_size - curr_start + curr_end
|
||||||
|
pathlen[i] = curr_len
|
||||||
|
|
||||||
|
if not is_array:
|
||||||
|
pathlen = pathlen[0]
|
||||||
|
|
||||||
|
return pathlen
|
||||||
|
|
||||||
|
def is_valid_path(self, idx):
|
||||||
|
start_idx = self.get_path_start(idx)
|
||||||
|
valid = start_idx != MathUtil.INVALID_IDX
|
||||||
|
return valid
|
||||||
|
|
||||||
|
def store(self, path):
|
||||||
|
start_idx = MathUtil.INVALID_IDX
|
||||||
|
n = path.pathlength()
|
||||||
|
|
||||||
|
if (n > 0):
|
||||||
|
assert path.is_valid()
|
||||||
|
|
||||||
|
if path.check_vals():
|
||||||
|
if self.buffers is None:
|
||||||
|
self._init_buffers(path)
|
||||||
|
|
||||||
|
idx = self._request_idx(n + 1)
|
||||||
|
self._store_path(path, idx)
|
||||||
|
self._add_sample_buffers(idx)
|
||||||
|
|
||||||
|
self.num_paths += 1
|
||||||
|
self.total_count += n + 1
|
||||||
|
start_idx = idx[0]
|
||||||
|
else:
|
||||||
|
Logger.print2('Invalid path data value detected')
|
||||||
|
|
||||||
|
return start_idx
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self.buffer_head = 0
|
||||||
|
self.buffer_tail = MathUtil.INVALID_IDX
|
||||||
|
self.num_paths = 0
|
||||||
|
|
||||||
|
for key in self._sample_buffers:
|
||||||
|
self._sample_buffers[key].clear()
|
||||||
|
return
|
||||||
|
|
||||||
|
def get_next_idx(self, idx):
|
||||||
|
next_idx = np.mod(idx + 1, self.buffer_size)
|
||||||
|
return next_idx
|
||||||
|
|
||||||
|
def is_terminal_state(self, idx):
|
||||||
|
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
|
||||||
|
terminate = terminate_flags != Env.Terminate.Null.value
|
||||||
|
is_end = self.is_path_end(idx)
|
||||||
|
terminal_state = np.logical_and(terminate, is_end)
|
||||||
|
return terminal_state
|
||||||
|
|
||||||
|
def check_terminal_flag(self, idx, flag):
|
||||||
|
terminate_flags = self.buffers[self.TERMINATE_KEY][idx]
|
||||||
|
terminate = terminate_flags == flag.value
|
||||||
|
return terminate
|
||||||
|
|
||||||
|
def is_path_end(self, idx):
|
||||||
|
is_end = self.buffers[self.PATH_END_KEY][idx] == idx
|
||||||
|
return is_end
|
||||||
|
|
||||||
|
def add_filter_key(self, key):
|
||||||
|
assert self.get_current_size() == 0
|
||||||
|
if key not in self._sample_buffers:
|
||||||
|
self._sample_buffers[key] = SampleBuffer(self.buffer_size)
|
||||||
|
return
|
||||||
|
|
||||||
|
def get_current_size(self):
|
||||||
|
if self.buffer_tail == MathUtil.INVALID_IDX:
|
||||||
|
return 0
|
||||||
|
elif self.buffer_tail < self.buffer_head:
|
||||||
|
return self.buffer_head - self.buffer_tail
|
||||||
|
else:
|
||||||
|
return self.buffer_size - self.buffer_tail + self.buffer_head
|
||||||
|
|
||||||
|
def _check_flags(self, key, flags):
|
||||||
|
return (flags & key) == key
|
||||||
|
|
||||||
|
def _add_sample_buffers(self, idx):
|
||||||
|
flags = self.buffers['flags']
|
||||||
|
for key in self._sample_buffers:
|
||||||
|
curr_buffer = self._sample_buffers[key]
|
||||||
|
filter_idx = [i for i in idx if (self._check_flags(key, flags[i]) and not self.is_path_end(i))]
|
||||||
|
curr_buffer.add(filter_idx)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _free_sample_buffers(self, idx):
|
||||||
|
for key in self._sample_buffers:
|
||||||
|
curr_buffer = self._sample_buffers[key]
|
||||||
|
curr_buffer.free(idx)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _init_buffers(self, path):
|
||||||
|
self.buffers = dict()
|
||||||
|
self.buffers[self.PATH_START_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
|
||||||
|
self.buffers[self.PATH_END_KEY] = MathUtil.INVALID_IDX * np.ones(self.buffer_size, dtype=int);
|
||||||
|
|
||||||
|
for key in dir(path):
|
||||||
|
val = getattr(path, key)
|
||||||
|
if not key.startswith('__') and not inspect.ismethod(val):
|
||||||
|
if key == self.TERMINATE_KEY:
|
||||||
|
self.buffers[self.TERMINATE_KEY] = np.zeros(shape=[self.buffer_size], dtype=int)
|
||||||
|
else:
|
||||||
|
val_type = type(val[0])
|
||||||
|
is_array = val_type == np.ndarray
|
||||||
|
if is_array:
|
||||||
|
shape = [self.buffer_size, val[0].shape[0]]
|
||||||
|
dtype = val[0].dtype
|
||||||
|
else:
|
||||||
|
shape = [self.buffer_size]
|
||||||
|
dtype = val_type
|
||||||
|
|
||||||
|
self.buffers[key] = np.zeros(shape, dtype=dtype)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _request_idx(self, n):
|
||||||
|
assert n + 1 < self.buffer_size # bad things can happen if path is too long
|
||||||
|
|
||||||
|
remainder = n
|
||||||
|
idx = []
|
||||||
|
|
||||||
|
start_idx = self.buffer_head
|
||||||
|
while remainder > 0:
|
||||||
|
end_idx = np.minimum(start_idx + remainder, self.buffer_size)
|
||||||
|
remainder -= (end_idx - start_idx)
|
||||||
|
|
||||||
|
free_idx = list(range(start_idx, end_idx))
|
||||||
|
self._free_idx(free_idx)
|
||||||
|
idx += free_idx
|
||||||
|
start_idx = 0
|
||||||
|
|
||||||
|
self.buffer_head = (self.buffer_head + n) % self.buffer_size
|
||||||
|
return idx
|
||||||
|
|
||||||
|
def _free_idx(self, idx):
|
||||||
|
assert(idx[0] <= idx[-1])
|
||||||
|
n = len(idx)
|
||||||
|
if self.buffer_tail != MathUtil.INVALID_IDX:
|
||||||
|
update_tail = idx[0] <= idx[-1] and idx[0] <= self.buffer_tail and idx[-1] >= self.buffer_tail
|
||||||
|
update_tail |= idx[0] > idx[-1] and (idx[0] <= self.buffer_tail or idx[-1] >= self.buffer_tail)
|
||||||
|
|
||||||
|
if update_tail:
|
||||||
|
i = 0
|
||||||
|
while i < n:
|
||||||
|
curr_idx = idx[i]
|
||||||
|
if self.is_valid_path(curr_idx):
|
||||||
|
start_idx = self.get_path_start(curr_idx)
|
||||||
|
end_idx = self.get_path_end(curr_idx)
|
||||||
|
pathlen = self.get_pathlen(curr_idx)
|
||||||
|
|
||||||
|
if start_idx < end_idx:
|
||||||
|
self.buffers[self.PATH_START_KEY][start_idx:end_idx + 1] = MathUtil.INVALID_IDX
|
||||||
|
self._free_sample_buffers(list(range(start_idx, end_idx + 1)))
|
||||||
|
else:
|
||||||
|
self.buffers[self.PATH_START_KEY][start_idx:self.buffer_size] = MathUtil.INVALID_IDX
|
||||||
|
self.buffers[self.PATH_START_KEY][0:end_idx + 1] = MathUtil.INVALID_IDX
|
||||||
|
self._free_sample_buffers(list(range(start_idx, self.buffer_size)))
|
||||||
|
self._free_sample_buffers(list(range(0, end_idx + 1)))
|
||||||
|
|
||||||
|
self.num_paths -= 1
|
||||||
|
i += pathlen + 1
|
||||||
|
self.buffer_tail = (end_idx + 1) % self.buffer_size;
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
self.buffer_tail = idx[0]
|
||||||
|
return
|
||||||
|
|
||||||
|
def _store_path(self, path, idx):
|
||||||
|
n = path.pathlength()
|
||||||
|
for key, data in self.buffers.items():
|
||||||
|
if key != self.PATH_START_KEY and key != self.PATH_END_KEY and key != self.TERMINATE_KEY:
|
||||||
|
val = getattr(path, key)
|
||||||
|
val_len = len(val)
|
||||||
|
assert val_len == n or val_len == n + 1
|
||||||
|
data[idx[:val_len]] = val
|
||||||
|
|
||||||
|
self.buffers[self.TERMINATE_KEY][idx] = path.terminate.value
|
||||||
|
self.buffers[self.PATH_START_KEY][idx] = idx[0]
|
||||||
|
self.buffers[self.PATH_END_KEY][idx] = idx[-1]
|
||||||
|
return
|
||||||
|
|
||||||
|
class SampleBuffer(object):
|
||||||
|
def __init__(self, size):
|
||||||
|
self.idx_to_slot = np.empty(shape=[size], dtype=int)
|
||||||
|
self.slot_to_idx = np.empty(shape=[size], dtype=int)
|
||||||
|
self.count = 0
|
||||||
|
self.clear()
|
||||||
|
return
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self.idx_to_slot.fill(MathUtil.INVALID_IDX)
|
||||||
|
self.slot_to_idx.fill(MathUtil.INVALID_IDX)
|
||||||
|
self.count = 0
|
||||||
|
return
|
||||||
|
|
||||||
|
def is_valid(self, idx):
|
||||||
|
return self.idx_to_slot[idx] != MathUtil.INVALID_IDX
|
||||||
|
|
||||||
|
def get_size(self):
|
||||||
|
return self.idx_to_slot.shape[0]
|
||||||
|
|
||||||
|
def add(self, idx):
|
||||||
|
for i in idx:
|
||||||
|
if not self.is_valid(i):
|
||||||
|
new_slot = self.count
|
||||||
|
assert new_slot >= 0
|
||||||
|
|
||||||
|
self.idx_to_slot[i] = new_slot
|
||||||
|
self.slot_to_idx[new_slot] = i
|
||||||
|
self.count += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
def free(self, idx):
|
||||||
|
for i in idx:
|
||||||
|
if self.is_valid(i):
|
||||||
|
slot = self.idx_to_slot[i]
|
||||||
|
last_slot = self.count - 1
|
||||||
|
last_idx = self.slot_to_idx[last_slot]
|
||||||
|
|
||||||
|
self.idx_to_slot[last_idx] = slot
|
||||||
|
self.slot_to_idx[slot] = last_idx
|
||||||
|
self.idx_to_slot[i] = MathUtil.INVALID_IDX
|
||||||
|
self.slot_to_idx[last_slot] = MathUtil.INVALID_IDX
|
||||||
|
self.count -= 1
|
||||||
|
return
|
||||||
|
|
||||||
|
def sample(self, n):
|
||||||
|
if self.count > 0:
|
||||||
|
slots = np.random.randint(0, self.count, size=n)
|
||||||
|
idx = self.slot_to_idx[slots]
|
||||||
|
else:
|
||||||
|
idx = np.empty(shape=[0], dtype=int)
|
||||||
|
return idx
|
||||||
|
|
||||||
|
def check_consistency(self):
|
||||||
|
valid = True
|
||||||
|
if self.count < 0:
|
||||||
|
valid = False
|
||||||
|
|
||||||
|
if valid:
|
||||||
|
for i in range(self.get_size()):
|
||||||
|
if self.is_valid(i):
|
||||||
|
s = self.idx_to_slot[i]
|
||||||
|
if self.slot_to_idx[s] != i:
|
||||||
|
valid = False
|
||||||
|
break
|
||||||
|
|
||||||
|
s2i = self.slot_to_idx[i]
|
||||||
|
if s2i != MathUtil.INVALID_IDX:
|
||||||
|
i2s = self.idx_to_slot[s2i]
|
||||||
|
if i2s != i:
|
||||||
|
valid = False
|
||||||
|
break
|
||||||
|
|
||||||
|
count0 = np.sum(self.idx_to_slot == MathUtil.INVALID_IDX)
|
||||||
|
count1 = np.sum(self.slot_to_idx == MathUtil.INVALID_IDX)
|
||||||
|
valid &= count0 == count1
|
||||||
|
return valid
|
||||||
@@ -0,0 +1,595 @@
|
|||||||
|
import numpy as np
|
||||||
|
import copy
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
from learning.path import *
|
||||||
|
from learning.exp_params import ExpParams
|
||||||
|
from learning.normalizer import Normalizer
|
||||||
|
from learning.replay_buffer import ReplayBuffer
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
import pybullet_utils.mpi_util as MPIUtil
|
||||||
|
import pybullet_utils.math_util as MathUtil
|
||||||
|
|
||||||
|
class RLAgent(ABC):
|
||||||
|
class Mode(Enum):
|
||||||
|
TRAIN = 0
|
||||||
|
TEST = 1
|
||||||
|
TRAIN_END = 2
|
||||||
|
|
||||||
|
NAME = "None"
|
||||||
|
|
||||||
|
UPDATE_PERIOD_KEY = "UpdatePeriod"
|
||||||
|
ITERS_PER_UPDATE = "ItersPerUpdate"
|
||||||
|
DISCOUNT_KEY = "Discount"
|
||||||
|
MINI_BATCH_SIZE_KEY = "MiniBatchSize"
|
||||||
|
REPLAY_BUFFER_SIZE_KEY = "ReplayBufferSize"
|
||||||
|
INIT_SAMPLES_KEY = "InitSamples"
|
||||||
|
NORMALIZER_SAMPLES_KEY = "NormalizerSamples"
|
||||||
|
|
||||||
|
OUTPUT_ITERS_KEY = "OutputIters"
|
||||||
|
INT_OUTPUT_ITERS_KEY = "IntOutputIters"
|
||||||
|
TEST_EPISODES_KEY = "TestEpisodes"
|
||||||
|
|
||||||
|
EXP_ANNEAL_SAMPLES_KEY = "ExpAnnealSamples"
|
||||||
|
EXP_PARAM_BEG_KEY = "ExpParamsBeg"
|
||||||
|
EXP_PARAM_END_KEY = "ExpParamsEnd"
|
||||||
|
|
||||||
|
def __init__(self, world, id, json_data):
|
||||||
|
self.world = world
|
||||||
|
self.id = id
|
||||||
|
self.logger = Logger()
|
||||||
|
self._mode = self.Mode.TRAIN
|
||||||
|
|
||||||
|
assert self._check_action_space(), \
|
||||||
|
Logger.print2("Invalid action space, got {:s}".format(str(self.get_action_space())))
|
||||||
|
|
||||||
|
self._enable_training = True
|
||||||
|
self.path = Path()
|
||||||
|
self.iter = int(0)
|
||||||
|
self.start_time = time.time()
|
||||||
|
self._update_counter = 0
|
||||||
|
|
||||||
|
self.update_period = 1.0 # simulated time (seconds) before each training update
|
||||||
|
self.iters_per_update = int(1)
|
||||||
|
self.discount = 0.95
|
||||||
|
self.mini_batch_size = int(32)
|
||||||
|
self.replay_buffer_size = int(50000)
|
||||||
|
self.init_samples = int(1000)
|
||||||
|
self.normalizer_samples = np.inf
|
||||||
|
self._local_mini_batch_size = self.mini_batch_size # batch size for each work for multiprocessing
|
||||||
|
self._need_normalizer_update = True
|
||||||
|
self._total_sample_count = 0
|
||||||
|
|
||||||
|
self._output_dir = ""
|
||||||
|
self._int_output_dir = ""
|
||||||
|
self.output_iters = 100
|
||||||
|
self.int_output_iters = 100
|
||||||
|
|
||||||
|
self.train_return = 0.0
|
||||||
|
self.test_episodes = int(0)
|
||||||
|
self.test_episode_count = int(0)
|
||||||
|
self.test_return = 0.0
|
||||||
|
self.avg_test_return = 0.0
|
||||||
|
|
||||||
|
self.exp_anneal_samples = 320000
|
||||||
|
self.exp_params_beg = ExpParams()
|
||||||
|
self.exp_params_end = ExpParams()
|
||||||
|
self.exp_params_curr = ExpParams()
|
||||||
|
|
||||||
|
self._load_params(json_data)
|
||||||
|
self._build_replay_buffer(self.replay_buffer_size)
|
||||||
|
self._build_normalizers()
|
||||||
|
self._build_bounds()
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
action_space_str = str(self.get_action_space())
|
||||||
|
info_str = ""
|
||||||
|
info_str += '"ID": {:d},\n "Type": "{:s}",\n "ActionSpace": "{:s}",\n "StateDim": {:d},\n "GoalDim": {:d},\n "ActionDim": {:d}'.format(
|
||||||
|
self.id, self.NAME, action_space_str[action_space_str.rfind('.') + 1:], self.get_state_size(), self.get_goal_size(), self.get_action_size())
|
||||||
|
return "{\n" + info_str + "\n}"
|
||||||
|
|
||||||
|
def get_output_dir(self):
|
||||||
|
return self._output_dir
|
||||||
|
|
||||||
|
def set_output_dir(self, out_dir):
|
||||||
|
self._output_dir = out_dir
|
||||||
|
if (self._output_dir != ""):
|
||||||
|
self.logger.configure_output_file(out_dir + "/agent" + str(self.id) + "_log.txt")
|
||||||
|
return
|
||||||
|
|
||||||
|
output_dir = property(get_output_dir, set_output_dir)
|
||||||
|
|
||||||
|
def get_int_output_dir(self):
|
||||||
|
return self._int_output_dir
|
||||||
|
|
||||||
|
def set_int_output_dir(self, out_dir):
|
||||||
|
self._int_output_dir = out_dir
|
||||||
|
return
|
||||||
|
|
||||||
|
int_output_dir = property(get_int_output_dir, set_int_output_dir)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.path.clear()
|
||||||
|
return
|
||||||
|
|
||||||
|
def update(self, timestep):
|
||||||
|
if self.need_new_action():
|
||||||
|
#print("update_new_action!!!")
|
||||||
|
self._update_new_action()
|
||||||
|
else:
|
||||||
|
print("no action???")
|
||||||
|
|
||||||
|
if (self._mode == self.Mode.TRAIN and self.enable_training):
|
||||||
|
self._update_counter += timestep
|
||||||
|
|
||||||
|
while self._update_counter >= self.update_period:
|
||||||
|
self._train()
|
||||||
|
self._update_exp_params()
|
||||||
|
self.world.env.set_sample_count(self._total_sample_count)
|
||||||
|
self._update_counter -= self.update_period
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def end_episode(self):
|
||||||
|
if (self.path.pathlength() > 0):
|
||||||
|
self._end_path()
|
||||||
|
|
||||||
|
if (self._mode == self.Mode.TRAIN or self._mode == self.Mode.TRAIN_END):
|
||||||
|
if (self.enable_training and self.path.pathlength() > 0):
|
||||||
|
self._store_path(self.path)
|
||||||
|
elif (self._mode == self.Mode.TEST):
|
||||||
|
self._update_test_return(self.path)
|
||||||
|
else:
|
||||||
|
assert False, Logger.print2("Unsupported RL agent mode" + str(self._mode))
|
||||||
|
|
||||||
|
self._update_mode()
|
||||||
|
return
|
||||||
|
|
||||||
|
def has_goal(self):
|
||||||
|
return self.get_goal_size() > 0
|
||||||
|
|
||||||
|
def predict_val(self):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_enable_training(self):
|
||||||
|
return self._enable_training
|
||||||
|
|
||||||
|
def set_enable_training(self, enable):
|
||||||
|
print("set_enable_training!=", enable)
|
||||||
|
self._enable_training = enable
|
||||||
|
if (self._enable_training):
|
||||||
|
self.reset()
|
||||||
|
return
|
||||||
|
|
||||||
|
enable_training = property(get_enable_training, set_enable_training)
|
||||||
|
|
||||||
|
def enable_testing(self):
|
||||||
|
return self.test_episodes > 0
|
||||||
|
|
||||||
|
def get_name(self):
|
||||||
|
return self.NAME
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def save_model(self, out_path):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def load_model(self, in_path):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _decide_action(self, s, g):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _get_output_path(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _get_int_output_path(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _train_step(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _check_action_space(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_action_space(self):
|
||||||
|
return self.world.env.get_action_space(self.id)
|
||||||
|
|
||||||
|
def get_state_size(self):
|
||||||
|
return self.world.env.get_state_size(self.id)
|
||||||
|
|
||||||
|
def get_goal_size(self):
|
||||||
|
return self.world.env.get_goal_size(self.id)
|
||||||
|
|
||||||
|
def get_action_size(self):
|
||||||
|
return self.world.env.get_action_size(self.id)
|
||||||
|
|
||||||
|
def get_num_actions(self):
|
||||||
|
return self.world.env.get_num_actions(self.id)
|
||||||
|
|
||||||
|
def need_new_action(self):
|
||||||
|
return self.world.env.need_new_action(self.id)
|
||||||
|
|
||||||
|
def _build_normalizers(self):
|
||||||
|
self.s_norm = Normalizer(self.get_state_size(), self.world.env.build_state_norm_groups(self.id))
|
||||||
|
self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id),
|
||||||
|
1 / self.world.env.build_state_scale(self.id))
|
||||||
|
|
||||||
|
self.g_norm = Normalizer(self.get_goal_size(), self.world.env.build_goal_norm_groups(self.id))
|
||||||
|
self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id),
|
||||||
|
1 / self.world.env.build_goal_scale(self.id))
|
||||||
|
|
||||||
|
self.a_norm = Normalizer(self.world.env.get_action_size())
|
||||||
|
self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id),
|
||||||
|
1 / self.world.env.build_action_scale(self.id))
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_bounds(self):
|
||||||
|
self.a_bound_min = self.world.env.build_action_bound_min(self.id)
|
||||||
|
self.a_bound_max = self.world.env.build_action_bound_max(self.id)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _load_params(self, json_data):
|
||||||
|
if (self.UPDATE_PERIOD_KEY in json_data):
|
||||||
|
self.update_period = int(json_data[self.UPDATE_PERIOD_KEY])
|
||||||
|
|
||||||
|
if (self.ITERS_PER_UPDATE in json_data):
|
||||||
|
self.iters_per_update = int(json_data[self.ITERS_PER_UPDATE])
|
||||||
|
|
||||||
|
if (self.DISCOUNT_KEY in json_data):
|
||||||
|
self.discount = json_data[self.DISCOUNT_KEY]
|
||||||
|
|
||||||
|
if (self.MINI_BATCH_SIZE_KEY in json_data):
|
||||||
|
self.mini_batch_size = int(json_data[self.MINI_BATCH_SIZE_KEY])
|
||||||
|
|
||||||
|
if (self.REPLAY_BUFFER_SIZE_KEY in json_data):
|
||||||
|
self.replay_buffer_size = int(json_data[self.REPLAY_BUFFER_SIZE_KEY])
|
||||||
|
|
||||||
|
if (self.INIT_SAMPLES_KEY in json_data):
|
||||||
|
self.init_samples = int(json_data[self.INIT_SAMPLES_KEY])
|
||||||
|
|
||||||
|
if (self.NORMALIZER_SAMPLES_KEY in json_data):
|
||||||
|
self.normalizer_samples = int(json_data[self.NORMALIZER_SAMPLES_KEY])
|
||||||
|
|
||||||
|
if (self.OUTPUT_ITERS_KEY in json_data):
|
||||||
|
self.output_iters = json_data[self.OUTPUT_ITERS_KEY]
|
||||||
|
|
||||||
|
if (self.INT_OUTPUT_ITERS_KEY in json_data):
|
||||||
|
self.int_output_iters = json_data[self.INT_OUTPUT_ITERS_KEY]
|
||||||
|
|
||||||
|
if (self.TEST_EPISODES_KEY in json_data):
|
||||||
|
self.test_episodes = int(json_data[self.TEST_EPISODES_KEY])
|
||||||
|
|
||||||
|
if (self.EXP_ANNEAL_SAMPLES_KEY in json_data):
|
||||||
|
self.exp_anneal_samples = json_data[self.EXP_ANNEAL_SAMPLES_KEY]
|
||||||
|
|
||||||
|
if (self.EXP_PARAM_BEG_KEY in json_data):
|
||||||
|
self.exp_params_beg.load(json_data[self.EXP_PARAM_BEG_KEY])
|
||||||
|
|
||||||
|
if (self.EXP_PARAM_END_KEY in json_data):
|
||||||
|
self.exp_params_end.load(json_data[self.EXP_PARAM_END_KEY])
|
||||||
|
|
||||||
|
num_procs = MPIUtil.get_num_procs()
|
||||||
|
self._local_mini_batch_size = int(np.ceil(self.mini_batch_size / num_procs))
|
||||||
|
self._local_mini_batch_size = np.maximum(self._local_mini_batch_size, 1)
|
||||||
|
self.mini_batch_size = self._local_mini_batch_size * num_procs
|
||||||
|
|
||||||
|
assert(self.exp_params_beg.noise == self.exp_params_end.noise) # noise std should not change
|
||||||
|
self.exp_params_curr = copy.deepcopy(self.exp_params_beg)
|
||||||
|
self.exp_params_end.noise = self.exp_params_beg.noise
|
||||||
|
|
||||||
|
self._need_normalizer_update = self.normalizer_samples > 0
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _record_state(self):
|
||||||
|
s = self.world.env.record_state(self.id)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def _record_goal(self):
|
||||||
|
g = self.world.env.record_goal(self.id)
|
||||||
|
return g
|
||||||
|
|
||||||
|
def _record_reward(self):
|
||||||
|
r = self.world.env.calc_reward(self.id)
|
||||||
|
return r
|
||||||
|
|
||||||
|
def _apply_action(self, a):
|
||||||
|
self.world.env.set_action(self.id, a)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _record_flags(self):
|
||||||
|
return int(0)
|
||||||
|
|
||||||
|
def _is_first_step(self):
|
||||||
|
return len(self.path.states) == 0
|
||||||
|
|
||||||
|
def _end_path(self):
|
||||||
|
s = self._record_state()
|
||||||
|
g = self._record_goal()
|
||||||
|
r = self._record_reward()
|
||||||
|
|
||||||
|
self.path.rewards.append(r)
|
||||||
|
self.path.states.append(s)
|
||||||
|
self.path.goals.append(g)
|
||||||
|
self.path.terminate = self.world.env.check_terminate(self.id)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_new_action(self):
|
||||||
|
s = self._record_state()
|
||||||
|
g = self._record_goal()
|
||||||
|
|
||||||
|
if not (self._is_first_step()):
|
||||||
|
r = self._record_reward()
|
||||||
|
self.path.rewards.append(r)
|
||||||
|
|
||||||
|
a, logp = self._decide_action(s=s, g=g)
|
||||||
|
assert len(np.shape(a)) == 1
|
||||||
|
assert len(np.shape(logp)) <= 1
|
||||||
|
|
||||||
|
flags = self._record_flags()
|
||||||
|
self._apply_action(a)
|
||||||
|
|
||||||
|
self.path.states.append(s)
|
||||||
|
self.path.goals.append(g)
|
||||||
|
self.path.actions.append(a)
|
||||||
|
self.path.logps.append(logp)
|
||||||
|
self.path.flags.append(flags)
|
||||||
|
|
||||||
|
if self._enable_draw():
|
||||||
|
self._log_val(s, g)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_exp_params(self):
|
||||||
|
lerp = float(self._total_sample_count) / self.exp_anneal_samples
|
||||||
|
lerp = np.clip(lerp, 0.0, 1.0)
|
||||||
|
self.exp_params_curr = self.exp_params_beg.lerp(self.exp_params_end, lerp)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_test_return(self, path):
|
||||||
|
path_reward = path.calc_return()
|
||||||
|
self.test_return += path_reward
|
||||||
|
self.test_episode_count += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_mode(self):
|
||||||
|
if (self._mode == self.Mode.TRAIN):
|
||||||
|
self._update_mode_train()
|
||||||
|
elif (self._mode == self.Mode.TRAIN_END):
|
||||||
|
self._update_mode_train_end()
|
||||||
|
elif (self._mode == self.Mode.TEST):
|
||||||
|
self._update_mode_test()
|
||||||
|
else:
|
||||||
|
assert False, Logger.print2("Unsupported RL agent mode" + str(self._mode))
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_mode_train(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_mode_train_end(self):
|
||||||
|
self._init_mode_test()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_mode_test(self):
|
||||||
|
if (self.test_episode_count * MPIUtil.get_num_procs() >= self.test_episodes):
|
||||||
|
global_return = MPIUtil.reduce_sum(self.test_return)
|
||||||
|
global_count = MPIUtil.reduce_sum(self.test_episode_count)
|
||||||
|
avg_return = global_return / global_count
|
||||||
|
self.avg_test_return = avg_return
|
||||||
|
|
||||||
|
if self.enable_training:
|
||||||
|
self._init_mode_train()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _init_mode_train(self):
|
||||||
|
self._mode = self.Mode.TRAIN
|
||||||
|
self.world.env.set_mode(self._mode)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _init_mode_train_end(self):
|
||||||
|
self._mode = self.Mode.TRAIN_END
|
||||||
|
return
|
||||||
|
|
||||||
|
def _init_mode_test(self):
|
||||||
|
self._mode = self.Mode.TEST
|
||||||
|
self.test_return = 0.0
|
||||||
|
self.test_episode_count = 0
|
||||||
|
self.world.env.set_mode(self._mode)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _enable_output(self):
|
||||||
|
return MPIUtil.is_root_proc() and self.output_dir != ""
|
||||||
|
|
||||||
|
def _enable_int_output(self):
|
||||||
|
return MPIUtil.is_root_proc() and self.int_output_dir != ""
|
||||||
|
|
||||||
|
def _calc_val_bounds(self, discount):
|
||||||
|
r_min = self.world.env.get_reward_min(self.id)
|
||||||
|
r_max = self.world.env.get_reward_max(self.id)
|
||||||
|
assert(r_min <= r_max)
|
||||||
|
|
||||||
|
val_min = r_min / ( 1.0 - discount)
|
||||||
|
val_max = r_max / ( 1.0 - discount)
|
||||||
|
return val_min, val_max
|
||||||
|
|
||||||
|
def _calc_val_offset_scale(self, discount):
|
||||||
|
val_min, val_max = self._calc_val_bounds(discount)
|
||||||
|
val_offset = 0
|
||||||
|
val_scale = 1
|
||||||
|
|
||||||
|
if (np.isfinite(val_min) and np.isfinite(val_max)):
|
||||||
|
val_offset = -0.5 * (val_max + val_min)
|
||||||
|
val_scale = 2 / (val_max - val_min)
|
||||||
|
|
||||||
|
return val_offset, val_scale
|
||||||
|
|
||||||
|
def _calc_term_vals(self, discount):
|
||||||
|
r_fail = self.world.env.get_reward_fail(self.id)
|
||||||
|
r_succ = self.world.env.get_reward_succ(self.id)
|
||||||
|
|
||||||
|
r_min = self.world.env.get_reward_min(self.id)
|
||||||
|
r_max = self.world.env.get_reward_max(self.id)
|
||||||
|
assert(r_fail <= r_max and r_fail >= r_min)
|
||||||
|
assert(r_succ <= r_max and r_succ >= r_min)
|
||||||
|
assert(not np.isinf(r_fail))
|
||||||
|
assert(not np.isinf(r_succ))
|
||||||
|
|
||||||
|
if (discount == 0):
|
||||||
|
val_fail = 0
|
||||||
|
val_succ = 0
|
||||||
|
else:
|
||||||
|
val_fail = r_fail / (1.0 - discount)
|
||||||
|
val_succ = r_succ / (1.0 - discount)
|
||||||
|
|
||||||
|
return val_fail, val_succ
|
||||||
|
|
||||||
|
def _update_iter(self, iter):
|
||||||
|
if (self._enable_output() and self.iter % self.output_iters == 0):
|
||||||
|
output_path = self._get_output_path()
|
||||||
|
output_dir = os.path.dirname(output_path)
|
||||||
|
if not os.path.exists(output_dir):
|
||||||
|
os.makedirs(output_dir)
|
||||||
|
self.save_model(output_path)
|
||||||
|
|
||||||
|
if (self._enable_int_output() and self.iter % self.int_output_iters == 0):
|
||||||
|
int_output_path = self._get_int_output_path()
|
||||||
|
int_output_dir = os.path.dirname(int_output_path)
|
||||||
|
if not os.path.exists(int_output_dir):
|
||||||
|
os.makedirs(int_output_dir)
|
||||||
|
self.save_model(int_output_path)
|
||||||
|
|
||||||
|
self.iter = iter
|
||||||
|
return
|
||||||
|
|
||||||
|
def _enable_draw(self):
|
||||||
|
return self.world.env.enable_draw
|
||||||
|
|
||||||
|
def _log_val(self, s, g):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _build_replay_buffer(self, buffer_size):
|
||||||
|
num_procs = MPIUtil.get_num_procs()
|
||||||
|
buffer_size = int(buffer_size / num_procs)
|
||||||
|
self.replay_buffer = ReplayBuffer(buffer_size=buffer_size)
|
||||||
|
self.replay_buffer_initialized = False
|
||||||
|
return
|
||||||
|
|
||||||
|
def _store_path(self, path):
|
||||||
|
path_id = self.replay_buffer.store(path)
|
||||||
|
valid_path = path_id != MathUtil.INVALID_IDX
|
||||||
|
|
||||||
|
if valid_path:
|
||||||
|
self.train_return = path.calc_return()
|
||||||
|
|
||||||
|
if self._need_normalizer_update:
|
||||||
|
self._record_normalizers(path)
|
||||||
|
|
||||||
|
return path_id
|
||||||
|
|
||||||
|
def _record_normalizers(self, path):
|
||||||
|
states = np.array(path.states)
|
||||||
|
self.s_norm.record(states)
|
||||||
|
|
||||||
|
if self.has_goal():
|
||||||
|
goals = np.array(path.goals)
|
||||||
|
self.g_norm.record(goals)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_normalizers(self):
|
||||||
|
self.s_norm.update()
|
||||||
|
|
||||||
|
if self.has_goal():
|
||||||
|
self.g_norm.update()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _train(self):
|
||||||
|
samples = self.replay_buffer.total_count
|
||||||
|
self._total_sample_count = int(MPIUtil.reduce_sum(samples))
|
||||||
|
end_training = False
|
||||||
|
|
||||||
|
if (self.replay_buffer_initialized):
|
||||||
|
if (self._valid_train_step()):
|
||||||
|
prev_iter = self.iter
|
||||||
|
iters = self._get_iters_per_update()
|
||||||
|
avg_train_return = MPIUtil.reduce_avg(self.train_return)
|
||||||
|
|
||||||
|
for i in range(iters):
|
||||||
|
curr_iter = self.iter
|
||||||
|
wall_time = time.time() - self.start_time
|
||||||
|
wall_time /= 60 * 60 # store time in hours
|
||||||
|
|
||||||
|
has_goal = self.has_goal()
|
||||||
|
s_mean = np.mean(self.s_norm.mean)
|
||||||
|
s_std = np.mean(self.s_norm.std)
|
||||||
|
g_mean = np.mean(self.g_norm.mean) if has_goal else 0
|
||||||
|
g_std = np.mean(self.g_norm.std) if has_goal else 0
|
||||||
|
|
||||||
|
self.logger.log_tabular("Iteration", self.iter)
|
||||||
|
self.logger.log_tabular("Wall_Time", wall_time)
|
||||||
|
self.logger.log_tabular("Samples", self._total_sample_count)
|
||||||
|
self.logger.log_tabular("Train_Return", avg_train_return)
|
||||||
|
self.logger.log_tabular("Test_Return", self.avg_test_return)
|
||||||
|
self.logger.log_tabular("State_Mean", s_mean)
|
||||||
|
self.logger.log_tabular("State_Std", s_std)
|
||||||
|
self.logger.log_tabular("Goal_Mean", g_mean)
|
||||||
|
self.logger.log_tabular("Goal_Std", g_std)
|
||||||
|
self._log_exp_params()
|
||||||
|
|
||||||
|
self._update_iter(self.iter + 1)
|
||||||
|
self._train_step()
|
||||||
|
|
||||||
|
Logger.print2("Agent " + str(self.id))
|
||||||
|
self.logger.print_tabular()
|
||||||
|
Logger.print2("")
|
||||||
|
|
||||||
|
if (self._enable_output() and curr_iter % self.int_output_iters == 0):
|
||||||
|
self.logger.dump_tabular()
|
||||||
|
|
||||||
|
if (prev_iter // self.int_output_iters != self.iter // self.int_output_iters):
|
||||||
|
end_training = self.enable_testing()
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
Logger.print2("Agent " + str(self.id))
|
||||||
|
Logger.print2("Samples: " + str(self._total_sample_count))
|
||||||
|
Logger.print2("")
|
||||||
|
|
||||||
|
if (self._total_sample_count >= self.init_samples):
|
||||||
|
self.replay_buffer_initialized = True
|
||||||
|
end_training = self.enable_testing()
|
||||||
|
|
||||||
|
if self._need_normalizer_update:
|
||||||
|
self._update_normalizers()
|
||||||
|
self._need_normalizer_update = self.normalizer_samples > self._total_sample_count
|
||||||
|
|
||||||
|
if end_training:
|
||||||
|
self._init_mode_train_end()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _get_iters_per_update(self):
|
||||||
|
return MPIUtil.get_num_procs() * self.iters_per_update
|
||||||
|
|
||||||
|
def _valid_train_step(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _log_exp_params(self):
|
||||||
|
self.logger.log_tabular("Exp_Rate", self.exp_params_curr.rate)
|
||||||
|
self.logger.log_tabular("Exp_Noise", self.exp_params_curr.noise)
|
||||||
|
self.logger.log_tabular("Exp_Temp", self.exp_params_curr.temp)
|
||||||
|
return
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def compute_return(rewards, gamma, td_lambda, val_t):
|
||||||
|
# computes td-lambda return of path
|
||||||
|
path_len = len(rewards)
|
||||||
|
assert len(val_t) == path_len + 1
|
||||||
|
|
||||||
|
return_t = np.zeros(path_len)
|
||||||
|
last_val = rewards[-1] + gamma * val_t[-1]
|
||||||
|
return_t[-1] = last_val
|
||||||
|
|
||||||
|
for i in reversed(range(0, path_len - 1)):
|
||||||
|
curr_r = rewards[i]
|
||||||
|
next_ret = return_t[i + 1]
|
||||||
|
curr_val = curr_r + gamma * ((1.0 - td_lambda) * val_t[i + 1] + td_lambda * next_ret)
|
||||||
|
return_t[i] = curr_val
|
||||||
|
|
||||||
|
return return_t
|
||||||
@@ -0,0 +1,143 @@
|
|||||||
|
import numpy as np
|
||||||
|
import learning.agent_builder as AgentBuilder
|
||||||
|
import learning.tf_util as TFUtil
|
||||||
|
from learning.rl_agent import RLAgent
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
import pybullet_data
|
||||||
|
|
||||||
|
class RLWorld(object):
|
||||||
|
def __init__(self, env, arg_parser):
|
||||||
|
TFUtil.disable_gpu()
|
||||||
|
|
||||||
|
self.env = env
|
||||||
|
self.arg_parser = arg_parser
|
||||||
|
self._enable_training = True
|
||||||
|
self.train_agents = []
|
||||||
|
self.parse_args(arg_parser)
|
||||||
|
|
||||||
|
self.build_agents()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def get_enable_training(self):
|
||||||
|
return self._enable_training
|
||||||
|
|
||||||
|
def set_enable_training(self, enable):
|
||||||
|
self._enable_training = enable
|
||||||
|
for i in range(len(self.agents)):
|
||||||
|
curr_agent = self.agents[i]
|
||||||
|
if curr_agent is not None:
|
||||||
|
enable_curr_train = self.train_agents[i] if (len(self.train_agents) > 0) else True
|
||||||
|
curr_agent.enable_training = self.enable_training and enable_curr_train
|
||||||
|
|
||||||
|
if (self._enable_training):
|
||||||
|
self.env.set_mode(RLAgent.Mode.TRAIN)
|
||||||
|
else:
|
||||||
|
self.env.set_mode(RLAgent.Mode.TEST)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
enable_training = property(get_enable_training, set_enable_training)
|
||||||
|
|
||||||
|
def parse_args(self, arg_parser):
|
||||||
|
self.train_agents = self.arg_parser.parse_bools('train_agents')
|
||||||
|
num_agents = self.env.get_num_agents()
|
||||||
|
assert(len(self.train_agents) == num_agents or len(self.train_agents) == 0)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
|
self.env.shutdown()
|
||||||
|
return
|
||||||
|
|
||||||
|
def build_agents(self):
|
||||||
|
num_agents = self.env.get_num_agents()
|
||||||
|
print("num_agents=",num_agents)
|
||||||
|
self.agents = []
|
||||||
|
|
||||||
|
Logger.print2('')
|
||||||
|
Logger.print2('Num Agents: {:d}'.format(num_agents))
|
||||||
|
|
||||||
|
agent_files = self.arg_parser.parse_strings('agent_files')
|
||||||
|
print("len(agent_files)=",len(agent_files))
|
||||||
|
assert(len(agent_files) == num_agents or len(agent_files) == 0)
|
||||||
|
|
||||||
|
model_files = self.arg_parser.parse_strings('model_files')
|
||||||
|
assert(len(model_files) == num_agents or len(model_files) == 0)
|
||||||
|
|
||||||
|
output_path = self.arg_parser.parse_string('output_path')
|
||||||
|
int_output_path = self.arg_parser.parse_string('int_output_path')
|
||||||
|
|
||||||
|
for i in range(num_agents):
|
||||||
|
curr_file = agent_files[i]
|
||||||
|
curr_agent = self._build_agent(i, curr_file)
|
||||||
|
|
||||||
|
if curr_agent is not None:
|
||||||
|
curr_agent.output_dir = output_path
|
||||||
|
curr_agent.int_output_dir = int_output_path
|
||||||
|
Logger.print2(str(curr_agent))
|
||||||
|
|
||||||
|
if (len(model_files) > 0):
|
||||||
|
curr_model_file = model_files[i]
|
||||||
|
if curr_model_file != 'none':
|
||||||
|
curr_agent.load_model(pybullet_data.getDataPath()+"/"+curr_model_file)
|
||||||
|
|
||||||
|
self.agents.append(curr_agent)
|
||||||
|
Logger.print2('')
|
||||||
|
|
||||||
|
self.set_enable_training(self.enable_training)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def update(self, timestep):
|
||||||
|
#print("world update!\n")
|
||||||
|
self._update_agents(timestep)
|
||||||
|
self._update_env(timestep)
|
||||||
|
return
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self._reset_agents()
|
||||||
|
self._reset_env()
|
||||||
|
return
|
||||||
|
|
||||||
|
def end_episode(self):
|
||||||
|
self._end_episode_agents();
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_env(self, timestep):
|
||||||
|
self.env.update(timestep)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_agents(self, timestep):
|
||||||
|
#print("len(agents)=",len(self.agents))
|
||||||
|
for agent in self.agents:
|
||||||
|
if (agent is not None):
|
||||||
|
agent.update(timestep)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _reset_env(self):
|
||||||
|
self.env.reset()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _reset_agents(self):
|
||||||
|
for agent in self.agents:
|
||||||
|
if (agent != None):
|
||||||
|
agent.reset()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _end_episode_agents(self):
|
||||||
|
for agent in self.agents:
|
||||||
|
if (agent != None):
|
||||||
|
agent.end_episode()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_agent(self, id, agent_file):
|
||||||
|
Logger.print2('Agent {:d}: {}'.format(id, agent_file))
|
||||||
|
if (agent_file == 'none'):
|
||||||
|
agent = None
|
||||||
|
else:
|
||||||
|
agent = AgentBuilder.build_agent(self, id, agent_file)
|
||||||
|
assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)
|
||||||
|
|
||||||
|
return agent
|
||||||
|
|
||||||
@@ -0,0 +1,103 @@
|
|||||||
|
from mpi4py import MPI
|
||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
|
import learning.tf_util as TFUtil
|
||||||
|
import pybullet_utils.math_util as MathUtil
|
||||||
|
import pybullet_utils.mpi_util as MPIUtil
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
|
||||||
|
from learning.solvers.solver import Solver
|
||||||
|
|
||||||
|
class MPISolver(Solver):
|
||||||
|
CHECK_SYNC_ITERS = 1000
|
||||||
|
|
||||||
|
def __init__(self, sess, optimizer, vars):
|
||||||
|
super().__init__(vars)
|
||||||
|
self.sess = sess
|
||||||
|
self.optimizer = optimizer
|
||||||
|
self._build_grad_feed(vars)
|
||||||
|
self._update = optimizer.apply_gradients(zip(self._grad_tf_list, self.vars))
|
||||||
|
self._set_flat_vars = TFUtil.SetFromFlat(sess, self.vars)
|
||||||
|
self._get_flat_vars = TFUtil.GetFlat(sess, self.vars)
|
||||||
|
|
||||||
|
self.iter = 0
|
||||||
|
grad_dim = self._calc_grad_dim()
|
||||||
|
self._flat_grad = np.zeros(grad_dim, dtype=np.float32)
|
||||||
|
self._global_flat_grad = np.zeros(grad_dim, dtype=np.float32)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def get_stepsize(self):
|
||||||
|
return self.optimizer._learning_rate_tensor.eval()
|
||||||
|
|
||||||
|
def update(self, grads=None, grad_scale=1.0):
|
||||||
|
if grads is not None:
|
||||||
|
self._flat_grad = MathUtil.flatten(grads)
|
||||||
|
else:
|
||||||
|
self._flat_grad.fill(0)
|
||||||
|
return self.update_flatgrad(self._flat_grad, grad_scale)
|
||||||
|
|
||||||
|
def update_flatgrad(self, flat_grad, grad_scale=1.0):
|
||||||
|
if self.iter % self.CHECK_SYNC_ITERS == 0:
|
||||||
|
assert self.check_synced(), Logger.print2('Network parameters desynchronized')
|
||||||
|
|
||||||
|
if grad_scale != 1.0:
|
||||||
|
flat_grad *= grad_scale
|
||||||
|
|
||||||
|
MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
|
||||||
|
self._global_flat_grad /= MPIUtil.get_num_procs()
|
||||||
|
|
||||||
|
self._load_flat_grad(self._global_flat_grad)
|
||||||
|
self.sess.run([self._update], self._grad_feed)
|
||||||
|
self.iter += 1
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def sync(self):
|
||||||
|
vars = self._get_flat_vars()
|
||||||
|
MPIUtil.bcast(vars)
|
||||||
|
self._set_flat_vars(vars)
|
||||||
|
return
|
||||||
|
|
||||||
|
def check_synced(self):
|
||||||
|
synced = True
|
||||||
|
if self._is_root():
|
||||||
|
vars = self._get_flat_vars()
|
||||||
|
MPIUtil.bcast(vars)
|
||||||
|
else:
|
||||||
|
vars_local = self._get_flat_vars()
|
||||||
|
vars_root = np.empty_like(vars_local)
|
||||||
|
MPIUtil.bcast(vars_root)
|
||||||
|
synced = (vars_local == vars_root).all()
|
||||||
|
return synced
|
||||||
|
|
||||||
|
def _is_root(self):
|
||||||
|
return MPIUtil.is_root_proc()
|
||||||
|
|
||||||
|
def _build_grad_feed(self, vars):
|
||||||
|
self._grad_tf_list = []
|
||||||
|
self._grad_buffers = []
|
||||||
|
for v in self.vars:
|
||||||
|
shape = v.get_shape()
|
||||||
|
grad = np.zeros(shape)
|
||||||
|
grad_tf = tf.placeholder(tf.float32, shape=shape)
|
||||||
|
self._grad_buffers.append(grad)
|
||||||
|
self._grad_tf_list.append(grad_tf)
|
||||||
|
|
||||||
|
self._grad_feed = dict({g_tf: g for g_tf, g in zip(self._grad_tf_list, self._grad_buffers)})
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _calc_grad_dim(self):
|
||||||
|
grad_dim = 0
|
||||||
|
for grad in self._grad_buffers:
|
||||||
|
grad_dim += grad.size
|
||||||
|
return grad_dim
|
||||||
|
|
||||||
|
def _load_flat_grad(self, flat_grad):
|
||||||
|
start = 0
|
||||||
|
for g in self._grad_buffers:
|
||||||
|
size = g.size
|
||||||
|
np.copyto(g, np.reshape(flat_grad[start:start + size], g.shape))
|
||||||
|
start += size
|
||||||
|
return
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
class Solver(ABC):
|
||||||
|
def __init__(self, vars):
|
||||||
|
self.vars = vars
|
||||||
|
return
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def update(self, grads):
|
||||||
|
pass
|
||||||
@@ -0,0 +1,149 @@
|
|||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
from abc import abstractmethod
|
||||||
|
|
||||||
|
from learning.rl_agent import RLAgent
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
from learning.tf_normalizer import TFNormalizer
|
||||||
|
|
||||||
|
class TFAgent(RLAgent):
|
||||||
|
RESOURCE_SCOPE = 'resource'
|
||||||
|
SOLVER_SCOPE = 'solvers'
|
||||||
|
|
||||||
|
def __init__(self, world, id, json_data):
|
||||||
|
self.tf_scope = 'agent'
|
||||||
|
self.graph = tf.Graph()
|
||||||
|
self.sess = tf.Session(graph=self.graph)
|
||||||
|
|
||||||
|
super().__init__(world, id, json_data)
|
||||||
|
self._build_graph(json_data)
|
||||||
|
self._init_normalizers()
|
||||||
|
return
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self.sess.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
def save_model(self, out_path):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
try:
|
||||||
|
save_path = self.saver.save(self.sess, out_path, write_meta_graph=False, write_state=False)
|
||||||
|
Logger.print2('Model saved to: ' + save_path)
|
||||||
|
except:
|
||||||
|
Logger.print2("Failed to save model to: " + save_path)
|
||||||
|
return
|
||||||
|
|
||||||
|
def load_model(self, in_path):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
self.saver.restore(self.sess, in_path)
|
||||||
|
self._load_normalizers()
|
||||||
|
Logger.print2('Model loaded from: ' + in_path)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _get_output_path(self):
|
||||||
|
assert(self.output_dir != '')
|
||||||
|
file_path = self.output_dir + '/agent' + str(self.id) + '_model.ckpt'
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
def _get_int_output_path(self):
|
||||||
|
assert(self.int_output_dir != '')
|
||||||
|
file_path = self.int_output_dir + ('/agent{:d}_models/agent{:d}_int_model_{:010d}.ckpt').format(self.id, self.id, self.iter)
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
def _build_graph(self, json_data):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
with tf.variable_scope(self.tf_scope):
|
||||||
|
self._build_nets(json_data)
|
||||||
|
|
||||||
|
with tf.variable_scope(self.SOLVER_SCOPE):
|
||||||
|
self._build_losses(json_data)
|
||||||
|
self._build_solvers(json_data)
|
||||||
|
|
||||||
|
self._initialize_vars()
|
||||||
|
self._build_saver()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _init_normalizers(self):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
# update normalizers to sync the tensorflow tensors
|
||||||
|
self.s_norm.update()
|
||||||
|
self.g_norm.update()
|
||||||
|
self.a_norm.update()
|
||||||
|
return
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _build_nets(self, json_data):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _build_losses(self, json_data):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _build_solvers(self, json_data):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _tf_vars(self, scope=''):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.tf_scope + '/' + scope)
|
||||||
|
assert len(res) > 0
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _build_normalizers(self):
|
||||||
|
with self.sess.as_default(), self.graph.as_default(), tf.variable_scope(self.tf_scope):
|
||||||
|
with tf.variable_scope(self.RESOURCE_SCOPE):
|
||||||
|
self.s_norm = TFNormalizer(self.sess, 's_norm', self.get_state_size(), self.world.env.build_state_norm_groups(self.id))
|
||||||
|
state_offset = -self.world.env.build_state_offset(self.id)
|
||||||
|
print("state_offset=",state_offset)
|
||||||
|
state_scale = 1 / self.world.env.build_state_scale(self.id)
|
||||||
|
print("state_scale=",state_scale)
|
||||||
|
self.s_norm.set_mean_std(-self.world.env.build_state_offset(self.id),
|
||||||
|
1 / self.world.env.build_state_scale(self.id))
|
||||||
|
|
||||||
|
self.g_norm = TFNormalizer(self.sess, 'g_norm', self.get_goal_size(), self.world.env.build_goal_norm_groups(self.id))
|
||||||
|
self.g_norm.set_mean_std(-self.world.env.build_goal_offset(self.id),
|
||||||
|
1 / self.world.env.build_goal_scale(self.id))
|
||||||
|
|
||||||
|
self.a_norm = TFNormalizer(self.sess, 'a_norm', self.get_action_size())
|
||||||
|
self.a_norm.set_mean_std(-self.world.env.build_action_offset(self.id),
|
||||||
|
1 / self.world.env.build_action_scale(self.id))
|
||||||
|
return
|
||||||
|
|
||||||
|
def _load_normalizers(self):
|
||||||
|
self.s_norm.load()
|
||||||
|
self.g_norm.load()
|
||||||
|
self.a_norm.load()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_normalizers(self):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
super()._update_normalizers()
|
||||||
|
return
|
||||||
|
|
||||||
|
def _initialize_vars(self):
|
||||||
|
self.sess.run(tf.global_variables_initializer())
|
||||||
|
return
|
||||||
|
|
||||||
|
def _build_saver(self):
|
||||||
|
vars = self._get_saver_vars()
|
||||||
|
self.saver = tf.train.Saver(vars, max_to_keep=0)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _get_saver_vars(self):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.tf_scope)
|
||||||
|
vars = [v for v in vars if '/' + self.SOLVER_SCOPE + '/' not in v.name]
|
||||||
|
#vars = [v for v in vars if '/target/' not in v.name]
|
||||||
|
assert len(vars) > 0
|
||||||
|
return vars
|
||||||
|
|
||||||
|
def _weight_decay_loss(self, scope):
|
||||||
|
vars = self._tf_vars(scope)
|
||||||
|
vars_no_bias = [v for v in vars if 'bias' not in v.name]
|
||||||
|
loss = tf.add_n([tf.nn.l2_loss(v) for v in vars_no_bias])
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def _train(self):
|
||||||
|
with self.sess.as_default(), self.graph.as_default():
|
||||||
|
super()._train()
|
||||||
|
return
|
||||||
@@ -0,0 +1,67 @@
|
|||||||
|
import numpy as np
|
||||||
|
import copy
|
||||||
|
import tensorflow as tf
|
||||||
|
from learning.normalizer import Normalizer
|
||||||
|
|
||||||
|
class TFNormalizer(Normalizer):
|
||||||
|
|
||||||
|
def __init__(self, sess, scope, size, groups_ids=None, eps=0.02, clip=np.inf):
|
||||||
|
self.sess = sess
|
||||||
|
self.scope = scope
|
||||||
|
super().__init__(size, groups_ids, eps, clip)
|
||||||
|
|
||||||
|
with tf.variable_scope(self.scope):
|
||||||
|
self._build_resource_tf()
|
||||||
|
return
|
||||||
|
|
||||||
|
# initialze count when loading saved values so that things don't change to quickly during updates
|
||||||
|
def load(self):
|
||||||
|
self.count = self.count_tf.eval()[0]
|
||||||
|
self.mean = self.mean_tf.eval()
|
||||||
|
self.std = self.std_tf.eval()
|
||||||
|
self.mean_sq = self.calc_mean_sq(self.mean, self.std)
|
||||||
|
return
|
||||||
|
|
||||||
|
def update(self):
|
||||||
|
super().update()
|
||||||
|
self._update_resource_tf()
|
||||||
|
return
|
||||||
|
|
||||||
|
def set_mean_std(self, mean, std):
|
||||||
|
super().set_mean_std(mean, std)
|
||||||
|
self._update_resource_tf()
|
||||||
|
return
|
||||||
|
|
||||||
|
def normalize_tf(self, x):
|
||||||
|
norm_x = (x - self.mean_tf) / self.std_tf
|
||||||
|
norm_x = tf.clip_by_value(norm_x, -self.clip, self.clip)
|
||||||
|
return norm_x
|
||||||
|
|
||||||
|
def unnormalize_tf(self, norm_x):
|
||||||
|
x = norm_x * self.std_tf + self.mean_tf
|
||||||
|
return x
|
||||||
|
|
||||||
|
def _build_resource_tf(self):
|
||||||
|
self.count_tf = tf.get_variable(dtype=tf.int32, name='count', initializer=np.array([self.count], dtype=np.int32), trainable=False)
|
||||||
|
self.mean_tf = tf.get_variable(dtype=tf.float32, name='mean', initializer=self.mean.astype(np.float32), trainable=False)
|
||||||
|
self.std_tf = tf.get_variable(dtype=tf.float32, name='std', initializer=self.std.astype(np.float32), trainable=False)
|
||||||
|
|
||||||
|
self.count_ph = tf.get_variable(dtype=tf.int32, name='count_ph', shape=[1])
|
||||||
|
self.mean_ph = tf.get_variable(dtype=tf.float32, name='mean_ph', shape=self.mean.shape)
|
||||||
|
self.std_ph = tf.get_variable(dtype=tf.float32, name='std_ph', shape=self.std.shape)
|
||||||
|
|
||||||
|
self._update_op = tf.group(
|
||||||
|
self.count_tf.assign(self.count_ph),
|
||||||
|
self.mean_tf.assign(self.mean_ph),
|
||||||
|
self.std_tf.assign(self.std_ph)
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_resource_tf(self):
|
||||||
|
feed = {
|
||||||
|
self.count_ph: np.array([self.count], dtype=np.int32),
|
||||||
|
self.mean_ph: self.mean,
|
||||||
|
self.std_ph: self.std
|
||||||
|
}
|
||||||
|
self.sess.run(self._update_op, feed_dict=feed)
|
||||||
|
return
|
||||||
@@ -0,0 +1,104 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
|
||||||
|
xavier_initializer = tf.contrib.layers.xavier_initializer()
|
||||||
|
|
||||||
|
def disable_gpu():
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = '-1'
|
||||||
|
return
|
||||||
|
|
||||||
|
def var_shape(x):
|
||||||
|
out = [k.value for k in x.get_shape()]
|
||||||
|
assert all(isinstance(a, int) for a in out), "shape function assumes that shape is fully known"
|
||||||
|
return out
|
||||||
|
|
||||||
|
def intprod(x):
|
||||||
|
return int(np.prod(x))
|
||||||
|
|
||||||
|
def numel(x):
|
||||||
|
n = intprod(var_shape(x))
|
||||||
|
return n
|
||||||
|
|
||||||
|
def flat_grad(loss, var_list, grad_ys=None):
|
||||||
|
grads = tf.gradients(loss, var_list, grad_ys)
|
||||||
|
return tf.concat([tf.reshape(grad, [numel(v)]) for (v, grad) in zip(var_list, grads)], axis=0)
|
||||||
|
|
||||||
|
def fc_net(input, layers_sizes, activation, reuse=None, flatten=False): # build fully connected network
|
||||||
|
curr_tf = input
|
||||||
|
for i, size in enumerate(layers_sizes):
|
||||||
|
with tf.variable_scope(str(i), reuse=reuse):
|
||||||
|
curr_tf = tf.layers.dense(inputs=curr_tf,
|
||||||
|
units=size,
|
||||||
|
kernel_initializer=xavier_initializer,
|
||||||
|
activation = activation if i < len(layers_sizes)-1 else None)
|
||||||
|
if flatten:
|
||||||
|
assert layers_sizes[-1] == 1
|
||||||
|
curr_tf = tf.reshape(curr_tf, [-1])
|
||||||
|
|
||||||
|
return curr_tf
|
||||||
|
|
||||||
|
def copy(sess, src, dst):
|
||||||
|
assert len(src) == len(dst)
|
||||||
|
sess.run(list(map(lambda v: v[1].assign(v[0]), zip(src, dst))))
|
||||||
|
return
|
||||||
|
|
||||||
|
def flat_grad(loss, var_list):
|
||||||
|
grads = tf.gradients(loss, var_list)
|
||||||
|
return tf.concat(axis=0, values=[tf.reshape(grad, [numel(v)])
|
||||||
|
for (v, grad) in zip(var_list, grads)])
|
||||||
|
|
||||||
|
|
||||||
|
def calc_logp_gaussian(x_tf, mean_tf, std_tf):
|
||||||
|
dim = tf.to_float(tf.shape(x_tf)[-1])
|
||||||
|
|
||||||
|
if mean_tf is None:
|
||||||
|
diff_tf = x_tf
|
||||||
|
else:
|
||||||
|
diff_tf = x_tf - mean_tf
|
||||||
|
|
||||||
|
logp_tf = -0.5 * tf.reduce_sum(tf.square(diff_tf / std_tf), axis=-1)
|
||||||
|
logp_tf += -0.5 * dim * np.log(2 * np.pi) - tf.reduce_sum(tf.log(std_tf), axis=-1)
|
||||||
|
|
||||||
|
return logp_tf
|
||||||
|
|
||||||
|
def calc_bound_loss(x_tf, bound_min, bound_max):
|
||||||
|
# penalty for violating bounds
|
||||||
|
violation_min = tf.minimum(x_tf - bound_min, 0)
|
||||||
|
violation_max = tf.maximum(x_tf - bound_max, 0)
|
||||||
|
violation = tf.reduce_sum(tf.square(violation_min), axis=-1) + tf.reduce_sum(tf.square(violation_max), axis=-1)
|
||||||
|
loss = 0.5 * tf.reduce_mean(violation)
|
||||||
|
return loss
|
||||||
|
|
||||||
|
class SetFromFlat(object):
|
||||||
|
def __init__(self, sess, var_list, dtype=tf.float32):
|
||||||
|
assigns = []
|
||||||
|
shapes = list(map(var_shape, var_list))
|
||||||
|
total_size = np.sum([intprod(shape) for shape in shapes])
|
||||||
|
|
||||||
|
self.sess = sess
|
||||||
|
self.theta = tf.placeholder(dtype,[total_size])
|
||||||
|
start=0
|
||||||
|
assigns = []
|
||||||
|
|
||||||
|
for (shape,v) in zip(shapes,var_list):
|
||||||
|
size = intprod(shape)
|
||||||
|
assigns.append(tf.assign(v, tf.reshape(self.theta[start:start+size],shape)))
|
||||||
|
start += size
|
||||||
|
|
||||||
|
self.op = tf.group(*assigns)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def __call__(self, theta):
|
||||||
|
self.sess.run(self.op, feed_dict={self.theta:theta})
|
||||||
|
return
|
||||||
|
|
||||||
|
class GetFlat(object):
|
||||||
|
def __init__(self, sess, var_list):
|
||||||
|
self.sess = sess
|
||||||
|
self.op = tf.concat(axis=0, values=[tf.reshape(v, [numel(v)]) for v in var_list])
|
||||||
|
return
|
||||||
|
|
||||||
|
def __call__(self):
|
||||||
|
return self.sess.run(self.op)
|
||||||
23
examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py
Normal file
23
examples/pybullet/gym/pybullet_envs/deep_mimic/mpi_run.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
from pybullet_utils.arg_parser import ArgParser
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Command line arguments
|
||||||
|
args = sys.argv[1:]
|
||||||
|
arg_parser = ArgParser()
|
||||||
|
arg_parser.load_args(args)
|
||||||
|
|
||||||
|
num_workers = arg_parser.parse_int('num_workers', 1)
|
||||||
|
assert(num_workers > 0)
|
||||||
|
|
||||||
|
Logger.print2('Running with {:d} workers'.format(num_workers))
|
||||||
|
cmd = 'mpiexec -n {:d} python3 DeepMimic_Optimizer.py '.format(num_workers)
|
||||||
|
cmd += ' '.join(args)
|
||||||
|
Logger.print2('cmd: ' + cmd)
|
||||||
|
subprocess.call(cmd, shell=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
82
examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
Normal file
82
examples/pybullet/gym/pybullet_envs/deep_mimic/testrl.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
import os
|
||||||
|
import inspect
|
||||||
|
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||||
|
parentdir = os.path.dirname(os.path.dirname(currentdir))
|
||||||
|
os.sys.path.insert(0,parentdir)
|
||||||
|
print("parentdir=",parentdir)
|
||||||
|
import json
|
||||||
|
from pybullet_envs.deep_mimic.learning.rl_world import RLWorld
|
||||||
|
from learning.ppo_agent import PPOAgent
|
||||||
|
|
||||||
|
import pybullet_data
|
||||||
|
from pybullet_utils.arg_parser import ArgParser
|
||||||
|
from pybullet_utils.logger import Logger
|
||||||
|
from pybullet_envs.deep_mimic.env.pybullet_deep_mimic_env import PyBulletDeepMimicEnv
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
|
||||||
|
update_timestep = 1./600.
|
||||||
|
|
||||||
|
def update_world(world, time_elapsed):
|
||||||
|
timeStep = 1./600.
|
||||||
|
world.update(timeStep)
|
||||||
|
reward = world.env.calc_reward(agent_id=0)
|
||||||
|
#print("reward=",reward)
|
||||||
|
end_episode = world.env.is_episode_end()
|
||||||
|
if (end_episode):
|
||||||
|
world.end_episode()
|
||||||
|
world.reset()
|
||||||
|
return
|
||||||
|
|
||||||
|
def build_arg_parser(args):
|
||||||
|
arg_parser = ArgParser()
|
||||||
|
arg_parser.load_args(args)
|
||||||
|
|
||||||
|
arg_file = arg_parser.parse_string('arg_file', '')
|
||||||
|
if (arg_file != ''):
|
||||||
|
path = pybullet_data.getDataPath()+"/args/"+arg_file
|
||||||
|
succ = arg_parser.load_file(path)
|
||||||
|
Logger.print2(arg_file)
|
||||||
|
assert succ, Logger.print2('Failed to load args from: ' + arg_file)
|
||||||
|
return arg_parser
|
||||||
|
|
||||||
|
args = sys.argv[1:]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def build_world(args, enable_draw):
|
||||||
|
arg_parser = build_arg_parser(args)
|
||||||
|
print("enable_draw=",enable_draw)
|
||||||
|
env = PyBulletDeepMimicEnv(args, enable_draw)
|
||||||
|
world = RLWorld(env, arg_parser)
|
||||||
|
#world.env.set_playback_speed(playback_speed)
|
||||||
|
|
||||||
|
motion_file = arg_parser.parse_string("motion_file")
|
||||||
|
print("motion_file=",motion_file)
|
||||||
|
bodies = arg_parser.parse_ints("fall_contact_bodies")
|
||||||
|
print("bodies=",bodies)
|
||||||
|
int_output_path = arg_parser.parse_string("int_output_path")
|
||||||
|
print("int_output_path=",int_output_path)
|
||||||
|
agent_files = pybullet_data.getDataPath()+"/"+arg_parser.parse_string("agent_files")
|
||||||
|
|
||||||
|
AGENT_TYPE_KEY = "AgentType"
|
||||||
|
|
||||||
|
print("agent_file=",agent_files)
|
||||||
|
with open(agent_files) as data_file:
|
||||||
|
json_data = json.load(data_file)
|
||||||
|
print("json_data=",json_data)
|
||||||
|
assert AGENT_TYPE_KEY in json_data
|
||||||
|
agent_type = json_data[AGENT_TYPE_KEY]
|
||||||
|
print("agent_type=",agent_type)
|
||||||
|
agent = PPOAgent(world, id, json_data)
|
||||||
|
|
||||||
|
agent.set_enable_training(True)
|
||||||
|
world.reset()
|
||||||
|
return world
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
world = build_world(args, True)
|
||||||
|
while (world.env._pybullet_client.isConnected()):
|
||||||
|
timeStep = 1./600.
|
||||||
|
update_world(world, timeStep)
|
||||||
|
|
||||||
@@ -17,7 +17,9 @@ project ("pybullet")
|
|||||||
|
|
||||||
includedirs {"../../src", "../../examples",
|
includedirs {"../../src", "../../examples",
|
||||||
"../../examples/ThirdPartyLibs"}
|
"../../examples/ThirdPartyLibs"}
|
||||||
defines {"PHYSICS_IN_PROCESS_EXAMPLE_BROWSER"}
|
defines {"PHYSICS_IN_PROCESS_EXAMPLE_BROWSER", "STATIC_LINK_SPD_PLUGIN"}
|
||||||
|
|
||||||
|
|
||||||
hasCL = findOpenCL("clew")
|
hasCL = findOpenCL("clew")
|
||||||
|
|
||||||
links{ "BulletExampleBrowserLib","gwen", "BulletFileLoader","BulletWorldImporter","OpenGL_Window","BulletSoftBody", "BulletInverseDynamicsUtils", "BulletInverseDynamics", "BulletDynamics","BulletCollision","LinearMath","BussIK", "Bullet3Common"}
|
links{ "BulletExampleBrowserLib","gwen", "BulletFileLoader","BulletWorldImporter","OpenGL_Window","BulletSoftBody", "BulletInverseDynamicsUtils", "BulletInverseDynamics", "BulletDynamics","BulletCollision","LinearMath","BussIK", "Bullet3Common"}
|
||||||
@@ -170,6 +172,20 @@ if not _OPTIONS["no-enet"] then
|
|||||||
"../../examples/SharedMemory/plugins/collisionFilterPlugin/collisionFilterPlugin.cpp",
|
"../../examples/SharedMemory/plugins/collisionFilterPlugin/collisionFilterPlugin.cpp",
|
||||||
"../../examples/SharedMemory/plugins/pdControlPlugin/pdControlPlugin.cpp",
|
"../../examples/SharedMemory/plugins/pdControlPlugin/pdControlPlugin.cpp",
|
||||||
"../../examples/SharedMemory/plugins/pdControlPlugin/pdControlPlugin.h",
|
"../../examples/SharedMemory/plugins/pdControlPlugin/pdControlPlugin.h",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/SpAlg.cpp",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/SpAlg.h",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/Shape.cpp",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/Shape.h",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/RBDUtil.cpp",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/RBDUtil.h",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/RBDModel.cpp",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/RBDModel.h",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/MathUtil.cpp",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/MathUtil.h",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/KinTree.cpp",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/KinTree.h",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/BulletConversion.cpp",
|
||||||
|
"../../examples/SharedMemory/plugins/stablePDPlugin/BulletConversion.h",
|
||||||
}
|
}
|
||||||
|
|
||||||
if _OPTIONS["enable_physx"] then
|
if _OPTIONS["enable_physx"] then
|
||||||
|
|||||||
@@ -1233,20 +1233,21 @@ static PyObject* pybullet_changeDynamicsInfo(PyObject* self, PyObject* args, PyO
|
|||||||
double restitution = -1;
|
double restitution = -1;
|
||||||
double linearDamping = -1;
|
double linearDamping = -1;
|
||||||
double angularDamping = -1;
|
double angularDamping = -1;
|
||||||
|
|
||||||
double contactStiffness = -1;
|
double contactStiffness = -1;
|
||||||
double contactDamping = -1;
|
double contactDamping = -1;
|
||||||
double ccdSweptSphereRadius = -1;
|
double ccdSweptSphereRadius = -1;
|
||||||
int frictionAnchor = -1;
|
int frictionAnchor = -1;
|
||||||
double contactProcessingThreshold = -1;
|
double contactProcessingThreshold = -1;
|
||||||
int activationState = -1;
|
int activationState = -1;
|
||||||
|
double jointDamping = -1;
|
||||||
PyObject* localInertiaDiagonalObj = 0;
|
PyObject* localInertiaDiagonalObj = 0;
|
||||||
|
|
||||||
b3PhysicsClientHandle sm = 0;
|
b3PhysicsClientHandle sm = 0;
|
||||||
|
|
||||||
int physicsClientId = 0;
|
int physicsClientId = 0;
|
||||||
static char* kwlist[] = {"bodyUniqueId", "linkIndex", "mass", "lateralFriction", "spinningFriction", "rollingFriction", "restitution", "linearDamping", "angularDamping", "contactStiffness", "contactDamping", "frictionAnchor", "localInertiaDiagonal", "ccdSweptSphereRadius", "contactProcessingThreshold", "activationState", "physicsClientId", NULL};
|
static char* kwlist[] = {"bodyUniqueId", "linkIndex", "mass", "lateralFriction", "spinningFriction", "rollingFriction", "restitution", "linearDamping", "angularDamping", "contactStiffness", "contactDamping", "frictionAnchor", "localInertiaDiagonal", "ccdSweptSphereRadius", "contactProcessingThreshold", "activationState", "jointDamping", "physicsClientId", NULL};
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "ii|dddddddddiOddii", kwlist, &bodyUniqueId, &linkIndex, &mass, &lateralFriction, &spinningFriction, &rollingFriction, &restitution, &linearDamping, &angularDamping, &contactStiffness, &contactDamping, &frictionAnchor, &localInertiaDiagonalObj, &ccdSweptSphereRadius, &contactProcessingThreshold, &activationState, &physicsClientId))
|
if (!PyArg_ParseTupleAndKeywords(args, keywds, "ii|dddddddddiOddidi", kwlist, &bodyUniqueId, &linkIndex, &mass, &lateralFriction, &spinningFriction, &rollingFriction, &restitution, &linearDamping, &angularDamping, &contactStiffness, &contactDamping, &frictionAnchor, &localInertiaDiagonalObj, &ccdSweptSphereRadius, &contactProcessingThreshold, &activationState, &jointDamping, &physicsClientId))
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@@ -1300,6 +1301,10 @@ static PyObject* pybullet_changeDynamicsInfo(PyObject* self, PyObject* args, PyO
|
|||||||
b3ChangeDynamicsInfoSetAngularDamping(command, bodyUniqueId, angularDamping);
|
b3ChangeDynamicsInfoSetAngularDamping(command, bodyUniqueId, angularDamping);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (jointDamping >= 0)
|
||||||
|
{
|
||||||
|
b3ChangeDynamicsInfoSetJointDamping(command, bodyUniqueId, linkIndex, jointDamping);
|
||||||
|
}
|
||||||
if (restitution >= 0)
|
if (restitution >= 0)
|
||||||
{
|
{
|
||||||
b3ChangeDynamicsInfoSetRestitution(command, bodyUniqueId, linkIndex, restitution);
|
b3ChangeDynamicsInfoSetRestitution(command, bodyUniqueId, linkIndex, restitution);
|
||||||
|
|||||||
@@ -92,8 +92,11 @@ public:
|
|||||||
/**@brief Set the rotation using axis angle notation
|
/**@brief Set the rotation using axis angle notation
|
||||||
* @param axis The axis around which to rotate
|
* @param axis The axis around which to rotate
|
||||||
* @param angle The magnitude of the rotation in Radians */
|
* @param angle The magnitude of the rotation in Radians */
|
||||||
void setRotation(const b3Vector3& axis, const b3Scalar& _angle)
|
void setRotation(const b3Vector3& axis1, const b3Scalar& _angle)
|
||||||
{
|
{
|
||||||
|
b3Vector3 axis = axis1;
|
||||||
|
axis.safeNormalize();
|
||||||
|
|
||||||
b3Scalar d = axis.length();
|
b3Scalar d = axis.length();
|
||||||
b3Assert(d != b3Scalar(0.0));
|
b3Assert(d != b3Scalar(0.0));
|
||||||
if (d < B3_EPSILON)
|
if (d < B3_EPSILON)
|
||||||
|
|||||||
Reference in New Issue
Block a user