Merge branch 'master' of https://github.com/erwincoumans/bullet3
This commit is contained in:
@@ -206,15 +206,15 @@ void DeformableMultibody::initPhysics()
|
||||
|
||||
psb->getCollisionShape()->setMargin(0.25);
|
||||
psb->generateBendingConstraints(2);
|
||||
psb->setTotalMass(5);
|
||||
psb->setTotalMass(1);
|
||||
psb->m_cfg.kKHR = 1; // collision hardness with kinematic objects
|
||||
psb->m_cfg.kCHR = 1; // collision hardness with rigid body
|
||||
psb->m_cfg.kDF = .1;
|
||||
psb->m_cfg.kDF = 2;
|
||||
psb->m_cfg.collisions = btSoftBody::fCollision::SDF_RD;
|
||||
psb->setCollisionFlags(0);
|
||||
getDeformableDynamicsWorld()->addSoftBody(psb);
|
||||
|
||||
btDeformableMassSpringForce* mass_spring = new btDeformableMassSpringForce(2, 0.01, false);
|
||||
btDeformableMassSpringForce* mass_spring = new btDeformableMassSpringForce(30, 1, true);
|
||||
getDeformableDynamicsWorld()->addForce(psb, mass_spring);
|
||||
m_forces.push_back(mass_spring);
|
||||
|
||||
|
||||
@@ -219,7 +219,7 @@ void DeformableRigid::initPhysics()
|
||||
psb->setTotalMass(1);
|
||||
psb->m_cfg.kKHR = 1; // collision hardness with kinematic objects
|
||||
psb->m_cfg.kCHR = 1; // collision hardness with rigid body
|
||||
psb->m_cfg.kDF = 2;
|
||||
psb->m_cfg.kDF = .4;
|
||||
psb->m_cfg.collisions = btSoftBody::fCollision::SDF_RD;
|
||||
getDeformableDynamicsWorld()->addSoftBody(psb);
|
||||
|
||||
|
||||
@@ -275,12 +275,12 @@ void GraspDeformable::initPhysics()
|
||||
{
|
||||
char absolute_path[1024];
|
||||
b3BulletDefaultFileIO fileio;
|
||||
fileio.findResourcePath("ditto.vtk", absolute_path, 1024);
|
||||
fileio.findResourcePath("ditto.vtk", absolute_path, 1024);
|
||||
// fileio.findResourcePath("banana.vtk", absolute_path, 1024);
|
||||
// fileio.findResourcePath("ball.vtk", absolute_path, 1024);
|
||||
// fileio.findResourcePath("deformable_crumpled_napkin_sim.vtk", absolute_path, 1024);
|
||||
// fileio.findResourcePath("single_tet.vtk", absolute_path, 1024);
|
||||
// fileio.findResourcePath("tube.vtk", absolute_path, 1024);
|
||||
// fileio.findResourcePath("tube.vtk", absolute_path, 1024);
|
||||
// fileio.findResourcePath("torus.vtk", absolute_path, 1024);
|
||||
// fileio.findResourcePath("paper_roll.vtk", absolute_path, 1024);
|
||||
// fileio.findResourcePath("bread.vtk", absolute_path, 1024);
|
||||
|
||||
@@ -231,7 +231,7 @@ void Pinch::initPhysics()
|
||||
btVector3 gravity = btVector3(0, -10, 0);
|
||||
m_dynamicsWorld->setGravity(gravity);
|
||||
getDeformableDynamicsWorld()->getWorldInfo().m_gravity = gravity;
|
||||
|
||||
getDeformableDynamicsWorld()->getWorldInfo().m_sparsesdf.setDefaultVoxelsz(0.25);
|
||||
getDeformableDynamicsWorld()->setSolverCallback(dynamics);
|
||||
m_guiHelper->createPhysicsDebugDrawer(m_dynamicsWorld);
|
||||
|
||||
|
||||
@@ -425,12 +425,12 @@ public:
|
||||
|
||||
m_robotSim.setGravity(btVector3(0, 0, -10));
|
||||
b3RobotSimulatorLoadDeformableBodyArgs args(2, .01, 0.006);
|
||||
args.m_springElasticStiffness = .1;
|
||||
args.m_springDampingStiffness = .0004;
|
||||
args.m_springBendingStiffness = 1;
|
||||
args.m_frictionCoeff = 1;
|
||||
args.m_springElasticStiffness = 1;
|
||||
args.m_springDampingStiffness = .01;
|
||||
args.m_springBendingStiffness = .1;
|
||||
args.m_frictionCoeff = 10;
|
||||
args.m_useSelfCollision = false;
|
||||
// args.m_useFaceContact = true;
|
||||
args.m_useFaceContact = true;
|
||||
args.m_useBendingSprings = true;
|
||||
args.m_startPosition.setValue(0, 0, 0);
|
||||
args.m_startOrientation.setValue(0, 0, 1, 1);
|
||||
@@ -476,7 +476,7 @@ public:
|
||||
revoluteJoint2.m_jointType = ePoint2PointType;
|
||||
m_robotSim.createConstraint(0, 2, 0, 4, &revoluteJoint1);
|
||||
m_robotSim.createConstraint(0, 3, 0, 6, &revoluteJoint2);
|
||||
m_robotSim.setNumSimulationSubSteps(8);
|
||||
m_robotSim.setNumSimulationSubSteps(2);
|
||||
}
|
||||
|
||||
if ((m_options & eSOFTBODY_MULTIBODY_COUPLING) != 0)
|
||||
|
||||
@@ -8153,7 +8153,7 @@ bool PhysicsServerCommandProcessor::processLoadSoftBodyCommand(const struct Shar
|
||||
{
|
||||
spring_bending_stiffness = clientCmd.m_loadSoftBodyArguments.m_springBendingStiffness;
|
||||
}
|
||||
btDeformableLagrangianForce* springForce = new btDeformableMassSpringForce(spring_elastic_stiffness, spring_damping_stiffness, false, spring_bending_stiffness);
|
||||
btDeformableLagrangianForce* springForce = new btDeformableMassSpringForce(spring_elastic_stiffness, spring_damping_stiffness, true, spring_bending_stiffness);
|
||||
deformWorld->addForce(psb, springForce);
|
||||
m_data->m_lf.push_back(springForce);
|
||||
}
|
||||
@@ -9353,6 +9353,15 @@ bool PhysicsServerCommandProcessor::processSendPhysicsParametersCommand(const st
|
||||
if (deformWorld)
|
||||
{
|
||||
deformWorld->getWorldInfo().m_gravity = grav;
|
||||
for (int i = 0; i < m_data->m_lf.size(); ++i)
|
||||
{
|
||||
btDeformableLagrangianForce* force = m_data->m_lf[i];
|
||||
if (force->getForceType() == BT_GRAVITY_FORCE)
|
||||
{
|
||||
btDeformableGravityForce* gforce = (btDeformableGravityForce*)force;
|
||||
gforce->m_gravity = grav;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
# Code adapted from https://github.com/araffin/rl-baselines-zoo
|
||||
# it requires stable-baselines to be installed
|
||||
# Colab Notebook: https://colab.research.google.com/drive/1nZkHO4QTYfAksm9ZTaZ5vXyC7szZxC3F
|
||||
# Author: Antonin RAFFIN
|
||||
# MIT License
|
||||
import argparse
|
||||
|
||||
import pybullet_envs
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
from stable_baselines import SAC, TD3
|
||||
from stable_baselines.common.noise import NormalActionNoise
|
||||
|
||||
from utils import TimeFeatureWrapper, EvalCallback
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--algo', help='RL Algorithm (Soft Actor-Critic by default)', default='sac',
|
||||
type=str, required=False, choices=['sac', 'td3'])
|
||||
parser.add_argument('--env', type=str, default='HalfCheetahBulletEnv-v0', help='environment ID')
|
||||
parser.add_argument('-n', '--n-timesteps', help='Number of training timesteps', default=int(1e6),
|
||||
type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
env_id = args.env
|
||||
n_timesteps = args.n_timesteps
|
||||
save_path = '{}_{}'.format(args.algo, env_id)
|
||||
|
||||
# Instantiate and wrap the environment
|
||||
env = TimeFeatureWrapper(gym.make(env_id))
|
||||
|
||||
# Create the evaluation environment and callback
|
||||
eval_env = TimeFeatureWrapper(gym.make(env_id))
|
||||
callback = EvalCallback(eval_env, best_model_save_path=save_path + '_best')
|
||||
|
||||
algo = {
|
||||
'sac': SAC,
|
||||
'td3': TD3
|
||||
}[args.algo]
|
||||
|
||||
n_actions = env.action_space.shape[0]
|
||||
|
||||
# Tuned hyperparameters from https://github.com/araffin/rl-baselines-zoo
|
||||
hyperparams = {
|
||||
'sac': dict(batch_size=256, gamma=0.98, policy_kwargs=dict(layers=[256, 256]),
|
||||
learning_starts=10000, buffer_size=int(2e5), tau=0.01),
|
||||
|
||||
'td3': dict(batch_size=100, policy_kwargs=dict(layers=[400, 300]),
|
||||
learning_rate=1e-3, learning_starts=10000, buffer_size=int(1e6),
|
||||
train_freq=1000, gradient_steps=1000,
|
||||
action_noise=NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)))
|
||||
}[args.algo]
|
||||
|
||||
model = algo('MlpPolicy', env, verbose=1, **hyperparams)
|
||||
model.learn(n_timesteps, callback=callback)
|
||||
|
||||
print("Saving to {}.zip".format(save_path))
|
||||
model.save(save_path)
|
||||
114
examples/pybullet/gym/pybullet_envs/stable_baselines/utils.py
Normal file
114
examples/pybullet/gym/pybullet_envs/stable_baselines/utils.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# Code adapted from https://github.com/araffin/rl-baselines-zoo
|
||||
# it requires stable-baselines to be installed
|
||||
# Author: Antonin RAFFIN
|
||||
# MIT License
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym.wrappers import TimeLimit
|
||||
|
||||
from stable_baselines.common.evaluation import evaluate_policy
|
||||
|
||||
|
||||
class TimeFeatureWrapper(gym.Wrapper):
|
||||
"""
|
||||
Add remaining time to observation space for fixed length episodes.
|
||||
See https://arxiv.org/abs/1712.00378 and https://github.com/aravindr93/mjrl/issues/13.
|
||||
|
||||
:param env: (gym.Env)
|
||||
:param max_steps: (int) Max number of steps of an episode
|
||||
if it is not wrapped in a TimeLimit object.
|
||||
:param test_mode: (bool) In test mode, the time feature is constant,
|
||||
equal to zero. This allow to check that the agent did not overfit this feature,
|
||||
learning a deterministic pre-defined sequence of actions.
|
||||
"""
|
||||
def __init__(self, env, max_steps=1000, test_mode=False):
|
||||
assert isinstance(env.observation_space, gym.spaces.Box)
|
||||
# Add a time feature to the observation
|
||||
low, high = env.observation_space.low, env.observation_space.high
|
||||
low, high= np.concatenate((low, [0])), np.concatenate((high, [1.]))
|
||||
env.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)
|
||||
|
||||
super(TimeFeatureWrapper, self).__init__(env)
|
||||
|
||||
if isinstance(env, TimeLimit):
|
||||
self._max_steps = env._max_episode_steps
|
||||
else:
|
||||
self._max_steps = max_steps
|
||||
self._current_step = 0
|
||||
self._test_mode = test_mode
|
||||
|
||||
def reset(self):
|
||||
self._current_step = 0
|
||||
return self._get_obs(self.env.reset())
|
||||
|
||||
def step(self, action):
|
||||
self._current_step += 1
|
||||
obs, reward, done, info = self.env.step(action)
|
||||
return self._get_obs(obs), reward, done, info
|
||||
|
||||
def _get_obs(self, obs):
|
||||
"""
|
||||
Concatenate the time feature to the current observation.
|
||||
|
||||
:param obs: (np.ndarray)
|
||||
:return: (np.ndarray)
|
||||
"""
|
||||
# Remaining time is more general
|
||||
time_feature = 1 - (self._current_step / self._max_steps)
|
||||
if self._test_mode:
|
||||
time_feature = 1.0
|
||||
# Optionnaly: concatenate [time_feature, time_feature ** 2]
|
||||
return np.concatenate((obs, [time_feature]))
|
||||
|
||||
|
||||
class EvalCallback(object):
|
||||
"""
|
||||
Callback for evaluating an agent.
|
||||
|
||||
:param eval_env: (gym.Env) The environment used for initialization
|
||||
:param n_eval_episodes: (int) The number of episodes to test the agent
|
||||
:param eval_freq: (int) Evaluate the agent every eval_freq call of the callback.
|
||||
:param deterministic: (bool)
|
||||
:param best_model_save_path: (str)
|
||||
:param verbose: (int)
|
||||
"""
|
||||
def __init__(self, eval_env, n_eval_episodes=5, eval_freq=10000,
|
||||
deterministic=True, best_model_save_path=None, verbose=1):
|
||||
super(EvalCallback, self).__init__()
|
||||
self.n_eval_episodes = n_eval_episodes
|
||||
self.eval_freq = eval_freq
|
||||
self.best_mean_reward = -np.inf
|
||||
self.deterministic = deterministic
|
||||
self.eval_env = eval_env
|
||||
self.verbose = verbose
|
||||
self.model, self.num_timesteps = None, 0
|
||||
self.best_model_save_path = best_model_save_path
|
||||
self.n_calls = 0
|
||||
|
||||
def __call__(self, locals_, globals_):
|
||||
"""
|
||||
:param locals_: (dict)
|
||||
:param globals_: (dict)
|
||||
:return: (bool)
|
||||
"""
|
||||
self.n_calls += 1
|
||||
self.model = locals_['self']
|
||||
self.num_timesteps = self.model.num_timesteps
|
||||
|
||||
if self.n_calls % self.eval_freq == 0:
|
||||
episode_rewards, _ = evaluate_policy(self.model, self.eval_env, n_eval_episodes=self.n_eval_episodes,
|
||||
deterministic=self.deterministic, return_episode_rewards=True)
|
||||
|
||||
|
||||
mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
|
||||
if self.verbose > 0:
|
||||
print("Eval num_timesteps={}, "
|
||||
"episode_reward={:.2f} +/- {:.2f}".format(self.num_timesteps, mean_reward, std_reward))
|
||||
|
||||
if mean_reward > self.best_mean_reward:
|
||||
if self.best_model_save_path is not None:
|
||||
print("Saving best model")
|
||||
self.model.save(self.best_model_save_path)
|
||||
self.best_mean_reward = mean_reward
|
||||
|
||||
return True
|
||||
Reference in New Issue
Block a user