allow pybullet_envs.deep_mimic.testrl --arg_file run_humanoid3d_backflip_args.txt to perform a backflip. Can only backflip twice, then drops on ground.

this deepmimic is still very slow, due to slow mass matrix/inverse dynamics computation. once spherical motor drive is enabled, it should be fast(er) move pd_controller_stable to pybullet_utils for easier re-use add plane_transparent.urdf to pybullet_data allow spacebar in keyboardEvents (Windows for now)
2019-02-10 20:56:31 -08:00
parent 28c9ea3aad
commit 9bddca873c
17 changed files with 551 additions and 224 deletions
--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/ppo_agent.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/ppo_agent.py
@@ -147,6 +147,7 @@ class PPOAgent(PGAgent):
    def _decide_action(self, s, g):
        with self.sess.as_default(), self.graph.as_default():
            self._exp_action = self._enable_stoch_policy() and MathUtil.flip_coin(self.exp_params_curr.rate)
+            #print("_decide_action._exp_action=",self._exp_action)
            a, logp = self._eval_actor(s, g, self._exp_action)
        return a[0], logp[0]

--- a/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_agent.py
+++ b/examples/pybullet/gym/pybullet_envs/deep_mimic/learning/rl_agent.py
@@ -123,8 +123,7 @@ class RLAgent(ABC):
        if self.need_new_action():
           #print("update_new_action!!!")
           self._update_new_action()
-        else:
-           print("no action???")
+        

        if (self._mode == self.Mode.TRAIN and self.enable_training):
            self._update_counter += timestep
@@ -329,7 +328,9 @@ class RLAgent(ABC):
        return

    def _update_new_action(self):
+        #print("_update_new_action!")
        s = self._record_state()
+        #np.savetxt("pb_record_state_s.csv", s, delimiter=",")
        g = self._record_goal()

        if not (self._is_first_step()):