Isaac to mujoco loading (#29)

* added stompyv2 * stompy_v2 fixes * removing torques values the model doesn't fly away * updated standing position and joint limits * initial params * cleanup * fighting mypy * linting * defeating mypy * fixes * final fixes * more fixes * more fixes * linting * save the walking policy, small issues with knees * linking latest humanoid-gym * setup working for mjcf * saving new modle with inertia, training works * saving model with inertia, training works * latest settings for making legs_only ppo realistic * cleanup / removing comments * add hexmove settings and walking policy * loading isaac gym model to mujoco * Updated third party library Co-authored-by: Allen Wu <[email protected]> Co-authored-by: Wesley Maa <[email protected]> * linting * cleanup * merge --------- Co-authored-by: nathanjzhao <[email protected]> Co-authored-by: Allen Wu <[email protected]> Co-authored-by: Wesley Maa <[email protected]>
kscalelabs · Jul 17, 2024 · 1e2914e · 1e2914e
1 parent e3ff6f5
commit 1e2914e
Show file tree

Hide file tree

Showing 16 changed files with 2,600 additions and 42 deletions.
diff --git a/sim/deploy/config.py b/sim/deploy/config.py
@@ -57,7 +57,8 @@ class RobotConfig:
     kds: np.ndarray
     tau_limit: np.ndarray
     robot_model_path: str
-    dt: float = 0.001
+    # is2ac
+    dt: float = 0.00002  # 0.001
     phase_duration: float = 0.64
     duration: float = 10.0
     decimation: int = 10

diff --git a/sim/deploy/policy.py b/sim/deploy/policy.py
@@ -35,7 +35,12 @@ def __init__(
         self.hist_obs = deque()
         for _ in range(frame_stack):
             self.hist_obs.append(np.zeros([1, obs_size], dtype=np.double))
-        self.model = torch.jit.load(model_path)
+        # Load the model.
+        # Changed to load onto same device as other tensors
+        # TODO: bring back jit when ready
+        # self.model = torch.jit.load(model_path)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model = torch.load(model_path, map_location=self.device)
         self.action = np.zeros((num_actions), dtype=np.double)
 
     @abstractmethod
@@ -167,7 +172,7 @@ def next_action(
         for i in range(self.cfg.frame_stack):
             policy_input[0, i * self.cfg.num_single_obs : (i + 1) * self.cfg.num_single_obs] = self.hist_obs[i][0, :]
 
-        self.action[:] = self.model(torch.tensor(policy_input))[0].detach().numpy()
+        self.action[:] = self.model(torch.tensor(policy_input).to(self.device))[0].detach().cpu().numpy()
 
         self.action = np.clip(self.action, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)
         return self.action

diff --git a/sim/deploy/run.py b/sim/deploy/run.py
@@ -1,4 +1,5 @@
 """Basic sim2sim and sim2real deployment script.
+          <joint name="lfemurrx" pos="0 0 0" axis="1 0 0" range="-0.349066 2.79253" class="stiff_medium"/>
 
 Run example:
     mjpython sim/deploy/run.py --load_model sim/deploy/tests/walking_policy.pt --world MUJOCO
@@ -21,7 +22,7 @@
 
 from sim.deploy.config import RobotConfig
 from sim.env import stompy_mjcf_path
-from sim.stompy.joints import StompyFixed
+from sim.new_test.joints import Stompy as StompyFixed
 
 
 class Worlds(Enum):
@@ -62,11 +63,19 @@ class MujocoWorld(World):
     def __init__(self, cfg: RobotConfig):
         self.cfg = cfg
         self.model = mujoco.MjModel.from_xml_path(str(self.cfg.robot_model_path))
+
         self.model.opt.timestep = self.cfg.dt
+
         # First step
         self.data = mujoco.MjData(self.model)
+        self.data.qpos = self.model.keyframe("default").qpos
+
         mujoco.mj_step(self.model, self.data)
 
+        # Set self.data initial state to the default standing position
+        # self.data.qpos = StompyFixed.default_standing()
+        # TODO: This line should produce same results soon
+
     def step(
         self,
         tau: np.ndarray = None,
@@ -109,19 +118,25 @@ def get_observation(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarra
         return dof_pos, dof_vel, orientation, ang_vel
 
     def simulate(self, policy: SimPolicy) -> None:
+
         with mujoco.viewer.launch_passive(self.model, self.data) as viewer:
             for step in tqdm(range(int(cfg.duration / cfg.dt)), desc="Simulating..."):
                 # Get the world state
                 dof_pos, dof_vel, orientation, ang_vel = self.get_observation()
 
+                # Zero action
+                target_dof_pos = dof_pos
+
                 # We update the policy at a lower frequency
                 # The simulation runs at 1000hz, but the policy is updated at 100hz
-                if step % cfg.decimation == 0:
-                    action = policy.next_action(dof_pos, dof_vel, orientation, ang_vel, step)
-                    target_dof_pos = action * cfg.action_scale
+                # if step % cfg.decimation == 0:
+                #     action = policy.next_action(dof_pos, dof_vel, orientation, ang_vel, step)
+                #     target_dof_pos = action * cfg.action_scale
 
                 tau = policy.pd_control(target_dof_pos, dof_pos, cfg.kps, dof_vel, cfg.kds)
-
+                # breakpoint()
+                # set tau to zero for now
+                # tau = np.zeros_like(tau)
                 self.step(tau=tau)
                 viewer.sync()
 
@@ -141,7 +156,7 @@ def __init__(self, cfg: RobotConfig):
         delattr(args, "load_model")
 
         # adapt
-        args.task = "legs_ppo"
+        args.task = "only_legs_ppo"
         args.device = "cpu"
         args.physics_engine = gymapi.SIM_PHYSX
         args.num_envs = 1

diff --git a/sim/deploy/tests/model_400_model_only.pt b/sim/deploy/tests/model_400_model_only.pt
diff --git a/sim/humanoid_gym/envs/__init__.py b/sim/humanoid_gym/envs/__init__.py
@@ -13,8 +13,12 @@
 # fmt: on
 from .getup_config import GetupCfg, GetupCfgPPO
 from .getup_env import GetupFreeEnv
+from .hexmove_config import HexmoveCfg, HexmoveCfgPPO
+from .hexmove_env import HexmoveFreeEnv
 from .legs_config import LegsCfg, LegsCfgPPO
 from .legs_env import LegsFreeEnv
+from .only_legs_config import OnlyLegsCfg, OnlyLegsCfgPPO
+from .only_legs_env import OnlyLegsFreeEnv
 from .stompy_config import StompyCfg, StompyCfgPPO
 from .stompy_env import StompyFreeEnv
 
@@ -30,6 +34,7 @@ def register_tasks() -> None:
     task_registry.register("stompy_ppo", StompyFreeEnv, StompyCfg(), StompyCfgPPO())
     task_registry.register("getup_ppo", GetupFreeEnv, GetupCfg(), GetupCfgPPO())
     task_registry.register("legs_ppo", LegsFreeEnv, LegsCfg(), LegsCfgPPO())
+    task_registry.register("only_legs_ppo", OnlyLegsFreeEnv, OnlyLegsCfg(), OnlyLegsCfgPPO())
 
 
 register_tasks()
diff --git a/sim/humanoid_gym/envs/getup_env.py b/sim/humanoid_gym/envs/getup_env.py
@@ -263,7 +263,7 @@ def check_termination(self):
         t = len(self.reset_buf)
         self.height_history[:, t] = self.root_states[:, 2]
         self.window_size = 10
-        breakpoint()
+
         above_threshold = self.episode_length_buf > self.window_size:
 
             self.reset_buf = torch.any(