diff --git a/examples/experiments/standing/robustv0/policy_1.pt b/examples/experiments/standing/robustv0/policy_1.pt
new file mode 100644
index 00000000..3bdb256d
Binary files /dev/null and b/examples/experiments/standing/robustv0/policy_1.pt differ
diff --git a/examples/experiments/standing/robustv1/policy_1.pt b/examples/experiments/standing/robustv1/policy_1.pt
new file mode 100644
index 00000000..68a6194e
Binary files /dev/null and b/examples/experiments/standing/robustv1/policy_1.pt differ
diff --git a/examples/experiments/walking/arms/model_700.pt b/examples/experiments/walking/arms/model_700.pt
new file mode 100644
index 00000000..1f2d8006
Binary files /dev/null and b/examples/experiments/walking/arms/model_700.pt differ
diff --git a/examples/experiments/walking/exp4-large_walk_with_arms/policy_1.pt b/examples/experiments/walking/exp4-large_walk_with_arms/policy_1.pt
new file mode 100644
index 00000000..9d2de367
Binary files /dev/null and b/examples/experiments/walking/exp4-large_walk_with_arms/policy_1.pt differ
diff --git a/examples/experiments/walking/exp5-larger_gait/policy_1.pt b/examples/experiments/walking/exp5-larger_gait/policy_1.pt
new file mode 100644
index 00000000..ca649be8
Binary files /dev/null and b/examples/experiments/walking/exp5-larger_gait/policy_1.pt differ
diff --git a/examples/experiments/walking/exp6-arm_movement++/policy_1.pt b/examples/experiments/walking/exp6-arm_movement++/policy_1.pt
new file mode 100644
index 00000000..a33f7b7f
Binary files /dev/null and b/examples/experiments/walking/exp6-arm_movement++/policy_1.pt differ
diff --git a/examples/experiments/walking/increase_target_height/policy_1.pt b/examples/experiments/walking/increase_target_height/policy_1.pt
new file mode 100644
index 00000000..f884687c
Binary files /dev/null and b/examples/experiments/walking/increase_target_height/policy_1.pt differ
diff --git a/examples/experiments/walking/target_height_and_clearance_rewards/policy_1.pt b/examples/experiments/walking/target_height_and_clearance_rewards/policy_1.pt
new file mode 100644
index 00000000..239a8a80
Binary files /dev/null and b/examples/experiments/walking/target_height_and_clearance_rewards/policy_1.pt differ
diff --git a/examples/new_gait.pt b/examples/new_gait.pt
new file mode 100644
index 00000000..1b7358e2
Binary files /dev/null and b/examples/new_gait.pt differ
diff --git a/examples/new_standing.pt b/examples/new_standing.pt
new file mode 100644
index 00000000..b322dcf7
Binary files /dev/null and b/examples/new_standing.pt differ
diff --git a/examples/original_walking.pt b/examples/original_walking.pt
new file mode 100644
index 00000000..2ba5b97f
Binary files /dev/null and b/examples/original_walking.pt differ
diff --git a/examples/test_policy.pt b/examples/test_policy.pt
new file mode 100644
index 00000000..69b339af
Binary files /dev/null and b/examples/test_policy.pt differ
diff --git a/examples/walking_micro.onnx b/examples/walking_micro.onnx
index 68a35b8e..615fcb8e 100644
Binary files a/examples/walking_micro.onnx and b/examples/walking_micro.onnx differ
diff --git a/examples/walking_micro.pt b/examples/walking_micro.pt
index 58c94e6c..0b9f321a 100644
Binary files a/examples/walking_micro.pt and b/examples/walking_micro.pt differ
diff --git a/examples/walking_pro.onnx b/examples/walking_pro.onnx
index bbdb1b15..432b0e95 100644
Binary files a/examples/walking_pro.onnx and b/examples/walking_pro.onnx differ
diff --git a/examples/walking_pro.pt b/examples/walking_pro.pt
index 6aedefd4..2878ac43 100644
Binary files a/examples/walking_pro.pt and b/examples/walking_pro.pt differ
diff --git a/policy_1.pt b/policy_1.pt
index 500afdda..0b102b3b 100644
Binary files a/policy_1.pt and b/policy_1.pt differ
diff --git a/sim/algo/ppo/on_policy_runner.py b/sim/algo/ppo/on_policy_runner.py
index e3fb6e1c..6b1933ab 100755
--- a/sim/algo/ppo/on_policy_runner.py
+++ b/sim/algo/ppo/on_policy_runner.py
@@ -30,6 +30,7 @@
# Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
# type: ignore
+import json
import os
import statistics
import time
@@ -39,6 +40,7 @@
import torch
import wandb
+import yaml
from torch.utils.tensorboard import SummaryWriter
from sim.algo.ppo.actor_critic import ActorCritic
@@ -46,6 +48,54 @@
from sim.algo.vec_env import VecEnv
+def make_serializable(obj):
+ """Recursively convert an object and its components to JSON-serializable types."""
+ if isinstance(obj, dict):
+ return {key: make_serializable(value) for key, value in obj.items()}
+ elif isinstance(obj, (list, tuple)):
+ return [make_serializable(item) for item in obj]
+ try:
+ # Try standard JSON serialization first
+ json.dumps(obj)
+ return obj
+ except (TypeError, OverflowError):
+ try:
+ # Try converting to float (for numpy numbers etc)
+ return float(obj)
+ except:
+ try:
+ # Try converting to int
+ return int(obj)
+ except:
+ # If all else fails, convert to string representation
+ return f"<{type(obj).__name__}:{str(obj)}>"
+
+
+def write_config_file(config: dict, log_dir: str) -> None:
+ """Writes the configuration to a JSON file in the log directory."""
+ if not os.path.exists(log_dir):
+ os.makedirs(log_dir)
+
+ # Add metadata section
+ metadata = {
+ "creation_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+ "experiment_name": config["runner"]["experiment_name"],
+ "run_name": config["runner"]["run_name"],
+ }
+
+ serializable_config = make_serializable(config)
+
+ full_config = {"metadata": metadata, "configuration": serializable_config}
+
+ json_path = os.path.join(log_dir, "experiment_config.json")
+ with open(json_path, "w") as f:
+ json.dump(full_config, f, indent=2, sort_keys=True)
+
+ yaml_path = os.path.join(log_dir, "experiment_config.yaml")
+ with open(yaml_path, "w") as f:
+ yaml.dump(full_config, f, default_flow_style=False, sort_keys=False)
+
+
class OnPolicyRunner:
def __init__(self, env: VecEnv, train_cfg: dict, log_dir: Optional[str] = None, device: str = "cpu"):
self.cfg = train_cfg["runner"]
@@ -102,6 +152,8 @@ def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = Fals
config=self.all_cfg,
)
self.writer = SummaryWriter(log_dir=self.log_dir, flush_secs=10)
+ if self.current_learning_iteration == 0:
+ write_config_file(self.all_cfg, self.log_dir)
if init_at_random_ep_len:
self.env.episode_length_buf = torch.randint_like(
self.env.episode_length_buf, high=int(self.env.max_episode_length)
diff --git a/sim/env.py b/sim/env.py
index 8c0dcadc..cee99897 100755
--- a/sim/env.py
+++ b/sim/env.py
@@ -11,32 +11,29 @@
def model_dir(robotname: str) -> Path:
print(os.getcwd())
- return Path(os.environ.get("MODEL_DIR", "sim/resources/" + robotname))
+ model_dir = Path(os.environ.get("MODEL_DIR", "sim/resources/" + robotname))
+ return model_dir
def run_dir() -> Path:
return Path(os.environ.get("RUN_DIR", "runs"))
-def robot_urdf_path(robotname: str, legs_only: bool = False) -> Path:
- if legs_only:
- robot_path = model_dir(robotname) / "robot_fixed.urdf"
- else:
- robot_path = model_dir(robotname) / "robot_fixed.urdf"
-
+def robot_urdf_path(robotname: str, legs_only: bool = True) -> Path:
+ print(f"robot_urdf_path({robotname=}, {legs_only=})")
+ filename = "robot_fixed.urdf" if legs_only else "robot.urdf"
+ robot_path = model_dir(robotname) / filename
if not robot_path.exists():
raise FileNotFoundError(f"URDF file not found: {robot_path}")
-
+ print(f"robot_path={robot_path}")
return robot_path.resolve()
-def robot_mjcf_path(robotname: str, legs_only: bool = False) -> Path:
- if legs_only:
- robot_path = model_dir(robotname) / "robot_fixed.xml"
- else:
- robot_path = model_dir(robotname) / "robot_fixed.xml"
-
+def robot_mjcf_path(robotname: str, legs_only: bool = True) -> Path:
+ print(f"robot_mjcf_path({robotname=}, {legs_only=})")
+ filename = "robot_fixed.xml" if legs_only else "robot.xml"
+ robot_path = model_dir(robotname) / filename
if not robot_path.exists():
raise FileNotFoundError(f"MJCF file not found: {robot_path}")
-
+ print(f"robot_path={robot_path}")
return robot_path.resolve()
diff --git a/sim/env_helpers.py b/sim/env_helpers.py
new file mode 100644
index 00000000..6e32537e
--- /dev/null
+++ b/sim/env_helpers.py
@@ -0,0 +1,51 @@
+import numpy as np
+
+np.set_printoptions(precision=2, suppress=True)
+
+
+def debug_robot_state(robot_name: str, obs_buf: np.ndarray, actions: np.ndarray) -> None:
+ """Debug function for robot state from observation buffer."""
+ # Unpack the obs_buf components
+ cmd = obs_buf[:5] # Command input (sin, cos, vel_x, vel_y, vel_yaw)
+ q = obs_buf[5:21] # Joint positions (16)
+ dq = obs_buf[21:37] # Joint velocities (16)
+ prev_actions = obs_buf[37:53] # Actions (16)
+ ang_vel = obs_buf[53:56] # Base angular velocity (3)
+ euler = obs_buf[56:59] # Base euler angles (3)
+
+ quat = euler_to_quat(euler)
+
+ # Detailed unpack
+ vel_x = cmd[2]
+ vel_y = cmd[3]
+ vel_yaw = cmd[4]
+
+ # Print the robot state
+ print(f"\n=== {robot_name} State ===")
+ # print(f"Command: {vel_x=:.2f} {vel_y=:.2f} {vel_yaw=:.2f}")
+ # print(f"RPY: [{euler[0]:.2f}, {euler[1]:.2f}, {euler[2]:.2f}]")
+ # print(f"Quat: [{quat[0]:.2f}, {quat[1]:.2f}, {quat[2]:.2f}, {quat[3]:.2f}]")
+ # print(f"AngVel: [{ang_vel[0]:.2f}, {ang_vel[1]:.2f}, {ang_vel[2]:.2f}]")
+ # print("Joints:")
+ # print(" Pos:", " ".join(f"{x:5.2f}" for x in q))
+ # print(" Vel:", " ".join(f"{x:5.2f}" for x in dq))
+ # print(" Act:", " ".join(f"{x:5.2f}" for x in prev_actions))
+ # print("Actions:")
+ # print(" ", " ".join(f"{x:5.2f}" for x in actions))
+ print(obs_buf[-59:])
+ print("=================")
+
+
+def euler_to_quat(euler):
+ """Convert euler angles (roll, pitch, yaw) to quaternion (x, y, z, w)."""
+ roll, pitch, yaw = euler
+
+ cr, cp, cy = np.cos(roll / 2), np.cos(pitch / 2), np.cos(yaw / 2)
+ sr, sp, sy = np.sin(roll / 2), np.sin(pitch / 2), np.sin(yaw / 2)
+
+ qw = cr * cp * cy + sr * sp * sy
+ qx = sr * cp * cy - cr * sp * sy
+ qy = cr * sp * cy + sr * cp * sy
+ qz = cr * cp * sy - sr * sp * cy
+
+ return np.array([qx, qy, qz, qw])
diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py
index d307168f..69f77b05 100755
--- a/sim/envs/__init__.py
+++ b/sim/envs/__init__.py
@@ -14,7 +14,10 @@
from sim.envs.humanoids.g1_env import G1FreeEnv
from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO
from sim.envs.humanoids.h1_env import H1FreeEnv
-from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO
+from sim.envs.humanoids.stompymicro_config import (
+ StompyMicroCfg,
+ StompyMicroCfgPPO,
+)
from sim.envs.humanoids.stompymicro_env import StompyMicroEnv
from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
from sim.envs.humanoids.stompymini_env import MiniFreeEnv
diff --git a/sim/envs/base/base_env.py b/sim/envs/base/base_env.py
new file mode 100644
index 00000000..108c3ccd
--- /dev/null
+++ b/sim/envs/base/base_env.py
@@ -0,0 +1,25 @@
+from abc import ABC, abstractmethod
+from typing import Any, Tuple
+
+
+class Env(ABC):
+ """Minimal base environment class for RL."""
+
+ @abstractmethod
+ def step(self, action: Any) -> Tuple[Any, float, bool, dict]:
+ """Run one timestep of the environment.
+ Returns: observation, reward, done, info"""
+ pass
+
+ @abstractmethod
+ def reset(self) -> Any:
+ """Reset the environment to initial state."""
+ pass
+
+ def render(self) -> None:
+ """Optional method to render the environment."""
+ pass
+
+ def close(self) -> None:
+ """Clean up resources."""
+ pass
diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
index db341d8f..6be1d110 100644
--- a/sim/envs/base/legged_robot.py
+++ b/sim/envs/base/legged_robot.py
@@ -102,11 +102,17 @@ def post_physics_step(self):
# TODO(pfb30) - debug this
origin = torch.tensor(self.cfg.init_state.rot, device=self.device).repeat(self.num_envs, 1)
origin = quat_conjugate(origin)
- self.base_quat[:] = quat_mul(origin, self.root_states[:, 3:7])
- self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat)
- self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10])
- self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13])
+ if self.imu_indices:
+ self.base_quat = quat_mul(origin, self.rigid_state[:, self.imu_indices, 3:7])
+ self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 7:10])
+ self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 10:13])
+ else:
+ self.base_quat = self.root_states[:, 3:7]
+ self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10])
+ self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13])
+
+ self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat)
self.projected_gravity[:] = quat_rotate_inverse(self.base_quat, self.gravity_vec)
self._post_physics_step_callback()
@@ -190,7 +196,10 @@ def reset_idx(self, env_ids):
# TODO(pfb30) - debug this
origin = torch.tensor(self.cfg.init_state.rot, device=self.device).repeat(self.num_envs, 1)
origin = quat_conjugate(origin)
- self.base_quat[env_ids] = quat_mul(origin[env_ids, :], self.root_states[env_ids, 3:7])
+ if self.imu_indices:
+ self.base_quat[env_ids] = quat_mul(origin[env_ids, :], self.rigid_state[env_ids, self.imu_indices, 3:7])
+ else:
+ self.base_quat[env_ids] = quat_mul(origin[env_ids, :], self.root_states[env_ids, 3:7])
self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat)
self.projected_gravity[env_ids] = quat_rotate_inverse(self.base_quat[env_ids], self.gravity_vec[env_ids])
@@ -234,6 +243,7 @@ def _process_rigid_shape_props(self, props, env_id):
Returns:
[List[gymapi.RigidShapeProperties]]: Modified rigid shape properties
"""
+
if self.cfg.domain_rand.randomize_friction:
if env_id == 0:
# prepare friction randomization
@@ -276,6 +286,10 @@ def _process_dof_props(self, props, env_id):
self.dof_vel_limits[i] = props["velocity"][i].item() * self.cfg.safety.vel_limit
self.torque_limits[i] = props["effort"][i].item() * self.cfg.safety.torque_limit
+ for i, dof_name in enumerate(self.dof_names):
+ props["friction"][i] = self.cfg.asset.friction
+ props["armature"][i] = self.cfg.asset.armature
+
return props
def _process_rigid_body_props(self, props, env_id):
@@ -363,7 +377,6 @@ def _compute_torques(self, actions):
torques = p_gains * (actions_scaled + self.default_dof_pos - self.dof_pos) - d_gains * self.dof_vel
res = torch.clip(torques, -self.torque_limits, self.torque_limits)
-
return res
def _reset_dofs(self, env_ids):
@@ -481,17 +494,24 @@ def _init_buffers(self):
self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
self.dof_pos = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 0]
self.dof_vel = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 1]
- # self.base_quat = self.root_states[:, 3:7]
+
+ self.rigid_state = gymtorch.wrap_tensor(rigid_body_state) # .view(self.num_envs, -1, 13)
+ self.rigid_state = self.rigid_state.view(self.num_envs, -1, 13)
# TODO(pfb30): debug this
+ # self.base_quat = self.root_states[:, 3:7]
origin = torch.tensor(self.cfg.init_state.rot, device=self.device).repeat(self.num_envs, 1)
origin = quat_conjugate(origin)
- self.base_quat = quat_mul(origin, self.root_states[:, 3:7])
+
+ if self.imu_indices:
+ self.base_quat = quat_mul(origin, self.rigid_state[:, self.imu_indices, 3:7])
+ else:
+ self.base_quat = quat_mul(origin, self.root_states[:, 3:7])
+
self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat)
self.contact_forces = gymtorch.wrap_tensor(net_contact_forces).view(
self.num_envs, -1, 3
) # shape: num_envs, num_bodies, xyz axis
- self.rigid_state = gymtorch.wrap_tensor(rigid_body_state).view(self.num_envs, -1, 13)
# initialize some data used later on
self.common_step_counter = 0
@@ -537,8 +557,13 @@ def _init_buffers(self):
self.last_contacts = torch.zeros(
self.num_envs, len(self.feet_indices), dtype=torch.bool, device=self.device, requires_grad=False
)
- self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10])
- self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13])
+ if self.imu_indices:
+ self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 7:10])
+ self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 10:13])
+ else:
+ self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10])
+ self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13])
+
self.projected_gravity = quat_rotate_inverse(self.base_quat, self.gravity_vec)
if self.cfg.terrain.measure_heights:
self.height_points = self._init_height_points()
@@ -592,6 +617,7 @@ def _prepare_reward_function(self):
# prepare list of functions
self.reward_functions = []
self.reward_names = []
+
for name, scale in self.reward_scales.items():
if name == "termination":
continue
@@ -696,6 +722,7 @@ def _create_envs(self):
self.num_dofs = len(self.dof_names)
feet_names = [s for s in body_names if s in self.cfg.asset.foot_name]
knee_names = [s for s in body_names if s in self.cfg.asset.knee_name]
+
penalized_contact_names = []
for name in self.cfg.asset.penalize_contacts_on:
penalized_contact_names.extend([s for s in body_names if name in s])
@@ -738,9 +765,9 @@ def _create_envs(self):
)
dof_props = self._process_dof_props(dof_props_asset, i)
self.gym.set_actor_dof_properties(env_handle, actor_handle, dof_props)
+
body_props = self.gym.get_actor_rigid_body_properties(env_handle, actor_handle)
body_props = self._process_rigid_body_props(body_props, i)
-
self.gym.set_actor_rigid_body_properties(env_handle, actor_handle, body_props, recomputeInertia=False)
self.envs.append(env_handle)
self.actor_handles.append(actor_handle)
@@ -753,8 +780,7 @@ def _create_envs(self):
print("feet", self.feet_indices)
print(f"Processed body properties for env {i}:")
for j, prop in enumerate(body_props):
- print(f" Body {j} mass: {prop.mass}")
- # prop.mass = prop.mass * 1e7
+ print(f" Body {j} mass: {prop.mass}")
print(f" Body {j} inertia: {prop.inertia.x}, {prop.inertia.y}, {prop.inertia.z}")
self.knee_indices = torch.zeros(len(knee_names), dtype=torch.long, device=self.device, requires_grad=False)
@@ -763,6 +789,12 @@ def _create_envs(self):
self.envs[0], self.actor_handles[0], knee_names[i]
)
+ self.imu_indices = self.gym.find_actor_rigid_body_handle(
+ self.envs[0], self.actor_handles[0], self.cfg.asset.imu_name
+ )
+ if self.imu_indices == -1:
+ self.imu_indices = None
+
self.penalised_contact_indices = torch.zeros(
len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False
)
diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
index 1198d62e..cbbb2cf8 100644
--- a/sim/envs/base/legged_robot_config.py
+++ b/sim/envs/base/legged_robot_config.py
@@ -29,8 +29,6 @@
#
# Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
-from enum import Enum
-
from sim.envs.base.base_config import BaseConfig
@@ -124,7 +122,10 @@ class safety:
class asset:
file = ""
name = "legged_robot" # actor name
- foot_name = "None" # name of the feet bodies, used to index body state and contact force tensors
+ foot_name = ["None"] # name of the feet bodies, used to index body state and contact force tensors
+ knee_name = ["None"]
+ imu_name = None
+
penalize_contacts_on = []
terminate_after_contacts_on = []
disable_gravity = False
@@ -143,6 +144,7 @@ class asset:
max_angular_velocity = 1000.0
max_linear_velocity = 1000.0
armature = 0.0
+ friction = 0.0
thickness = 0.01
class domain_rand:
diff --git a/sim/envs/base/mujoco_env.py b/sim/envs/base/mujoco_env.py
new file mode 100644
index 00000000..a7419dbc
--- /dev/null
+++ b/sim/envs/base/mujoco_env.py
@@ -0,0 +1,274 @@
+import mujoco
+import mujoco_viewer
+import numpy as np
+from dataclasses import dataclass
+from typing import Tuple
+from scipy.spatial.transform import Rotation as R
+from collections import deque
+
+from sim.envs.base.base_env import Env
+from sim.envs.base.legged_robot_config import LeggedRobotCfg
+from sim.envs.humanoids.stompymicro_config import StompyMicroCfg
+
+np.set_printoptions(precision=2, suppress=True)
+
+MUJOCO_TO_ISAAC = {
+ 7: 13, # right_shoulder_pitch
+ 8: 14, # right_shoulder_yaw
+ 9: 15, # right_elbow_yaw
+ 10: 5, # left_shoulder_pitch
+ 11: 6, # left_shoulder_yaw
+ 12: 7, # left_elbow_yaw
+ 13: 8, # right_hip_pitch
+ 14: 9, # right_hip_yaw
+ 15: 10, # right_hip_roll
+ 16: 11, # right_knee_pitch
+ 17: 12, # right_ankle_pitch
+ 18: 0, # left_hip_pitch
+ 19: 1, # left_hip_yaw
+ 20: 2, # left_hip_roll
+ 21: 3, # left_knee_pitch
+ 22: 4, # left_ankle_pitch
+}
+ISAAC_TO_MUJOCO = {v: k for k, v in MUJOCO_TO_ISAAC.items()}
+
+
+@dataclass
+class MujocoCfg(StompyMicroCfg): # LeggedRobotCfg):
+ class gains:
+ tau_factor: float = 4
+ kp_scale: float = 1.0
+ kd_scale: float = 1.0
+
+
+class MujocoEnv(Env):
+ def __init__(
+ self,
+ cfg: MujocoCfg,
+ render: bool = False,
+ ):
+ self.cfg = cfg
+ self.robot = self.cfg.asset.robot
+ self.num_joints = len(self.robot.all_joints())
+ self.model = mujoco.MjModel.from_xml_path(self.cfg.asset.file_xml)
+ self.model.opt.timestep = self.cfg.sim.dt
+ self.data = mujoco.MjData(self.model)
+ self.viewer = mujoco_viewer.MujocoViewer(self.model, self.data) if render else None
+
+ self.mujoco_indices = np.array([k for k in MUJOCO_TO_ISAAC.keys()])
+ self.isaac_order = np.array([MUJOCO_TO_ISAAC[k] for k in self.mujoco_indices])
+
+ stiffness = self.robot.stiffness()
+ damping = self.robot.damping()
+ effort = self.robot.effort()
+
+ self.kps = np.array([stiffness[joint.split('_', 1)[1]] for joint in self.robot.all_joints()]) * self.cfg.gains.kp_scale
+ self.kds = np.array([damping[joint.split('_', 1)[1]] for joint in self.robot.all_joints()]) * self.cfg.gains.kd_scale
+ self.tau_limits = np.array([effort[joint.split('_', 1)[1]] for joint in self.robot.all_joints()]) * self.cfg.gains.tau_factor
+
+ self.default_dof_pos = np.array([
+ self.robot.default_standing().get(joint, 0.0)
+ for joint in self.robot.all_joints()
+ ])
+
+ self.num_joints = len(self.robot.all_joints())
+
+ ### Initialize Buffers ##
+ self.obs_history = deque(maxlen=self.cfg.env.frame_stack)
+ for _ in range(self.cfg.env.frame_stack):
+ self.obs_history.append(np.zeros(self.cfg.env.num_single_obs))
+ self.commands = np.zeros(4) # [vel_x, vel_y, ang_vel_yaw, heading]
+ self.last_actions = np.zeros(self.num_joints)
+ self.origin_quat = R.from_quat(self.cfg.init_state.rot)
+ self.origin_quat_inv = self.origin_quat.inv()
+ ## Initialize Buffers ##
+
+ self.commands_scale = np.array([
+ self.cfg.normalization.obs_scales.lin_vel,
+ self.cfg.normalization.obs_scales.lin_vel,
+ self.cfg.normalization.obs_scales.ang_vel
+ ])
+
+ self.reset()
+ self.compute_observations()
+ print_model_info(self.model, self.data)
+
+ def compute_observations(self) -> None:
+ """Compute observations matching StompyMicroEnv implementation"""
+ phase = self.episode_length_buf * self.cfg.sim.dt / self.cfg.rewards.cycle_time
+ sin_pos = np.sin(2 * np.pi * phase)
+ cos_pos = np.cos(2 * np.pi * phase)
+ command_input = np.array([sin_pos, cos_pos] + list(self.commands[:3] * self.commands_scale))
+
+ q = self.data.qpos[-self.num_joints:][self.isaac_order]
+ dq = self.data.qvel[-self.num_joints:][self.isaac_order]
+ q = (q - self.default_dof_pos) * self.cfg.normalization.obs_scales.dof_pos
+ dq = dq * self.cfg.normalization.obs_scales.dof_vel
+
+ sensor_quat = self.data.sensor("orientation").data[[1, 2, 3, 0]] # wxyz to xyzw
+ sensor_rotation = R.from_quat(sensor_quat)
+
+ base_rotation = self.origin_quat_inv * sensor_rotation
+ base_quat = base_rotation.as_quat() # returns xyzw quaternion
+
+ ang_vel_world = self.data.sensor("angular-velocity").data
+ base_ang_vel = base_rotation.inv().apply(ang_vel_world)
+ base_ang_vel = base_ang_vel * self.cfg.normalization.obs_scales.ang_vel
+
+ base_euler = base_rotation.as_euler('xyz') * self.cfg.normalization.obs_scales.quat
+
+ assert command_input.shape == (5,)
+ assert q.shape == (self.num_joints,)
+ assert dq.shape == (self.num_joints,)
+ assert base_ang_vel.shape == (3,)
+ assert base_euler.shape == (3,)
+
+ # Match Isaac's observation concatenation order
+ obs_buf = np.concatenate([
+ command_input.flatten(), # 5D
+ q.flatten(), # num_joints D
+ dq.flatten(), # num_joints D
+ self.last_actions.flatten(), # num_joints D
+ base_ang_vel.flatten(), # 3D
+ base_euler.flatten() # 3D
+ ])
+
+ # TODO: Add noise
+
+ self.obs_history.append(obs_buf)
+
+ obs_buf_all = np.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], axis=1) # Shape: (K, T)
+ assert obs_buf_all.shape == (self.cfg.env.num_single_obs, self.cfg.env.frame_stack)
+
+ self.obs_buf = obs_buf_all.reshape(-1)[np.newaxis, :]
+ assert self.obs_buf.shape == (1, self.cfg.env.num_single_obs * self.cfg.env.frame_stack)
+
+ def reset(self) -> Tuple[np.ndarray, ...]:
+ """Reset environment matching Isaac's initialization"""
+ # Reset episode counter
+ self.episode_length_buf = 0
+ self.reset_buf = False
+
+ try:
+ self.data.qpos = self.model.keyframe("default").qpos
+ self.data.qpos[-self.num_joints:] = self.default_dof_pos
+ except KeyError as e:
+ print(f"Warning: Keyframe 'default' not found in model. Using zero initial state instead.")
+ self.data.qpos[-self.num_joints:] = self.default_dof_pos.copy()
+
+ self.data.qpos[-self.num_joints:] += np.random.uniform(
+ -self.cfg.domain_rand.start_pos_noise,
+ self.cfg.domain_rand.start_pos_noise,
+ size=self.num_joints
+ )
+ self.data.qvel = np.zeros_like(self.data.qvel)
+ self.data.qacc = np.zeros_like(self.data.qacc)
+ self.data.ctrl = np.zeros_like(self.data.ctrl)
+
+ self.actions = np.zeros(self.num_joints)
+ self.last_actions = np.zeros(self.num_joints)
+
+ mujoco.mj_step(self.model, self.data)
+ obs, _, _, _ = self.step(np.zeros(self.num_joints))
+ return obs
+
+ def step(self, action: np.ndarray):
+ """Execute environment step matching Isaac's implementation more closely"""
+ actions = np.clip(action, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)
+ delay = np.random.rand(1) * self.cfg.domain_rand.action_delay
+ actions = (1 - delay) * actions + delay * self.actions
+ actions += self.cfg.domain_rand.action_noise * np.random.randn(*actions.shape) * actions
+ self.actions = np.clip(action, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)
+
+ # self.render()
+
+ # TODO: Understand why below doesn't work!
+ for _ in range(self.cfg.control.decimation):
+ self.render()
+ self.data.ctrl = self._compute_torques(actions)
+ mujoco.mj_step(self.model, self.data)
+
+ ## Post Physics Step ##
+ self.episode_length_buf += 1
+
+ # origin and imu_indices weirdness??
+
+ self.check_termination()
+ self.compute_rewards()
+ if self.reset_buf:
+ self.reset()
+
+ self.compute_observations()
+
+ ## Post Physics Step ##
+
+ self.obs_buf = np.clip(self.obs_buf, -self.cfg.normalization.clip_observations, self.cfg.normalization.clip_observations)
+
+ self.extras = {
+ 'step': self.episode_length_buf,
+ 'fall': self.reset_buf and self.episode_length_buf < self.cfg.env.episode_length_s / self.cfg.sim.dt
+ }
+
+ return self.obs_buf, self.rew_buf, self.reset_buf, self.extras
+
+ def render(self):
+ if self.viewer:
+ self.viewer.render()
+
+ def close(self):
+ if self.viewer:
+ self.viewer.close()
+
+ ## Helpers ##
+
+ def compute_rewards(self) -> None:
+ # TODO: Implement reward computation
+ self.rew_buf = 0
+
+ def check_termination(self) -> None:
+ """Check termination conditions"""
+ self.reset_buf = False
+ self.reset_buf |= self.episode_length_buf >= self.cfg.env.episode_length_s / self.cfg.sim.dt
+ self.reset_buf |= self.data.qpos[2] < self.cfg.asset.termination_height
+
+ def _compute_torques(self, actions: np.ndarray) -> np.ndarray:
+ """Calculate torques from position commands"""
+ if actions.ndim == 2:
+ actions = actions.squeeze(0)
+
+ actions_mujoco = actions[self.isaac_order]
+
+ actions_scaled = actions_mujoco * self.cfg.control.action_scale
+ q_mujoco = self.data.qpos[-self.num_joints:]
+ dq_mujoco = self.data.qvel[-self.num_joints:]
+
+ torques = self.kps * (actions_scaled + self.default_dof_pos - q_mujoco) - self.kds * dq_mujoco
+ return np.clip(torques, -self.tau_limits, self.tau_limits)
+
+
+def print_model_info(model, data):
+ print(f"\nMujoco DOF info:")
+ print(f"qpos shape: {data.qpos.shape}")
+ print(f"qvel shape: {data.qvel.shape}")
+ print("\nJoint info:")
+ for i in range(model.njnt):
+ print(f"Joint {i}: {model.joint(i).name}, type={model.joint(i).type}, qposadr={model.joint(i).qposadr}, dofadr={model.joint(i).dofadr}")
+
+ print("\nBody Properties:")
+ for i in range(model.nbody):
+ name = model.body(i).name
+ mass = model.body(i).mass[0]
+ inertia = [
+ model.body_inertia[i][0], # ixx
+ model.body_inertia[i][1], # iyy
+ model.body_inertia[i][2] # izz
+ ]
+ quat = model.body(i).quat
+ pos = model.body(i).pos
+ print(f"{name}: mass={mass:.3f} | inertia={inertia} | pos={pos} | quat={quat}")
+
+ # print("\nFriction Properties:")
+ # for i in range(model.ngeom):
+ # name = model.geom(i).name
+ # friction = model.geom(i).friction
+ # print(f"{name}: friction={friction}")
diff --git a/sim/envs/humanoids/dora_env.py b/sim/envs/humanoids/dora_env.py
index 94ffa9be..116aa544 100644
--- a/sim/envs/humanoids/dora_env.py
+++ b/sim/envs/humanoids/dora_env.py
@@ -1,14 +1,12 @@
# mypy: disable-error-code="valid-newtype"
"""Defines the environment for training the humanoid."""
-from isaacgym.torch_utils import * # isort:skip
-
from sim.envs.base.legged_robot import LeggedRobot
from sim.resources.dora.joints import Robot
from sim.utils.terrain import HumanoidTerrain
from isaacgym import gymtorch # isort:skip
-
+from isaacgym.torch_utils import * # isort:skip
import torch # isort:skip
diff --git a/sim/envs/humanoids/stompymicro_config.py b/sim/envs/humanoids/stompymicro_config.py
index d5b83e76..12cbe362 100644
--- a/sim/envs/humanoids/stompymicro_config.py
+++ b/sim/envs/humanoids/stompymicro_config.py
@@ -1,13 +1,15 @@
"""Defines the environment configuration for the Getting up task"""
-from sim.env import robot_urdf_path
+import numpy as np
+
+from sim.env import robot_mjcf_path, robot_urdf_path
from sim.envs.base.legged_robot_config import ( # type: ignore
LeggedRobotCfg,
LeggedRobotCfgPPO,
)
from sim.resources.stompymicro.joints import Robot
-NUM_JOINTS = len(Robot.all_joints()) # 20
+NUM_JOINTS = len(Robot.all_joints()) # 16
class StompyMicroCfg(LeggedRobotCfg):
@@ -23,7 +25,7 @@ class env(LeggedRobotCfg.env):
num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
num_actions = NUM_JOINTS
num_envs = 4096
- episode_length_s = 24 # episode length in seconds
+ episode_length_s = 60 # episode length in seconds
use_ref_actions = False
class safety:
@@ -35,13 +37,15 @@ class safety:
class asset(LeggedRobotCfg.asset):
name = "stompymicro"
- file = str(robot_urdf_path(name))
+ robot = Robot
+ file = str(robot_urdf_path(name, legs_only=Robot.legs_only))
+ file_xml = str(robot_mjcf_path(name, legs_only=Robot.legs_only))
foot_name = ["foot_left", "foot_right"]
- knee_name = ["ankle_pitch_left", "ankle_pitch_right"]
+ knee_name = ["left_knee_pitch_motor", "right_knee_pitch_motor"]
- termination_height = 0.05
- default_feet_height = 0.01
+ termination_height = 0.25
+ default_feet_height = 0.02
terminate_after_contacts_on = ["torso"]
@@ -51,6 +55,11 @@ class asset(LeggedRobotCfg.asset):
replace_cylinder_with_capsule = False
fix_base_link = False
+ # BAM parameters
+ # TODO update effort to larger one
+ friction = 0.053343597773929877
+ armature = 0.008793405204572328
+
class terrain(LeggedRobotCfg.terrain):
mesh_type = "plane"
# mesh_type = 'trimesh'
@@ -70,14 +79,15 @@ class terrain(LeggedRobotCfg.terrain):
class noise:
add_noise = True
- noise_level = 0.6 # scales other values
+ noise_level = 3.0 # scales other values
class noise_scales:
dof_pos = 0.05
- dof_vel = 0.5
- ang_vel = 0.1
- lin_vel = 0.05
- quat = 0.03
+ dof_vel = 1.5
+ lin_vel = 0.1
+ ang_vel = 0.2
+ quat = 0.1
+ gravity = 0.05
height_measurements = 0.1
class init_state(LeggedRobotCfg.init_state):
@@ -97,7 +107,7 @@ class control(LeggedRobotCfg.control):
# action scale: target angle = actionScale * action + defaultAngle
action_scale = 0.25
# decimation: Number of control action updates @ sim DT per policy DT
- decimation = 10 # 100hz
+ decimation = 20 # 50hz
class sim(LeggedRobotCfg.sim):
dt = 0.001 # 1000 Hz
@@ -108,7 +118,7 @@ class physx(LeggedRobotCfg.sim.physx):
num_threads = 10
solver_type = 1 # 0: pgs, 1: tgs
num_position_iterations = 4
- num_velocity_iterations = 1
+ num_velocity_iterations = 0
contact_offset = 0.01 # [m]
rest_offset = 0.0 # [m]
bounce_threshold_velocity = 0.1 # [m/s]
@@ -119,18 +129,18 @@ class physx(LeggedRobotCfg.sim.physx):
contact_collection = 2
class domain_rand(LeggedRobotCfg.domain_rand):
- start_pos_noise = 0.1
+ start_pos_noise = 0.05
randomize_friction = True
friction_range = [0.1, 2.0]
- randomize_base_mass = True # True
+ randomize_base_mass = False
added_mass_range = [-0.5, 0.5]
- push_robots = True # True
- push_interval_s = 4
- max_push_vel_xy = 0.05
- max_push_ang_vel = 0.1
+ push_robots = True
+ push_interval_s = 3.5
+ max_push_vel_xy = 0.6 # TODO: This was set to make standing significantly harder
+ max_push_ang_vel = 0.8
# dynamic randomization
action_delay = 0.5
- action_noise = 0.02
+ action_noise = 0.1 # TODO: This was set to make standing harder
class commands(LeggedRobotCfg.commands):
# Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -139,53 +149,50 @@ class commands(LeggedRobotCfg.commands):
heading_command = True # if true: compute ang vel command from heading error
class ranges:
- lin_vel_x = [-0.3, 0.6] # min max [m/s]
- lin_vel_y = [-0.3, 0.3] # min max [m/s]
- ang_vel_yaw = [-0.3, 0.3] # min max [rad/s]
- heading = [-3.14, 3.14]
+ # for each, min / max
+ lin_vel_x = [-0.4, 0.4] # [m/s]
+ lin_vel_y = [-0.4, 0.4] # [m/s]
+ ang_vel_yaw = [-np.pi / 20, np.pi / 20] # [rad/s]
+ heading = [-np.pi, np.pi] # [rad]
+ # Walking Task
class rewards:
base_height_target = Robot.height
- min_dist = 0.07
- max_dist = 0.14
+ min_dist = 0.14
+ max_dist = 0.20
# put some settings here for LLM parameter tuning
target_joint_pos_scale = 0.17 # rad
- target_feet_height = 0.02 # m
- cycle_time = 0.2 # sec
+ target_feet_height = 0.05 # m
+ cycle_time = 0.5 # sec
# if true negative total rewards are clipped at zero (avoids early termination problems)
only_positive_rewards = True
# tracking reward = exp(error*sigma)
tracking_sigma = 5.0
- max_contact_force = 50 # forces above this value are penalized
+ max_contact_force = 100 # forces above this value are penalized
class scales:
-
- # TODO: add an argument
- walking = False
-
- if walking == True:
- # reference motion tracking
- joint_pos = 1.6
- feet_clearance = 1.2
- feet_contact_number = 1.2
- feet_air_time = 1.2
- foot_slip = -0.05
- feet_distance = 0.2
- knee_distance = 0.2
- # contact
- feet_contact_forces = -0.01
- # vel tracking
- tracking_lin_vel = 1.2
- tracking_ang_vel = 1.1
- vel_mismatch_exp = 0.5 # lin_z; ang x,y
- low_speed = 0.4
- track_vel_hard = 0.5
+ # reference motion tracking
+ joint_pos = 1.0
+ feet_clearance = 1.0
+ feet_contact_number = 1.5
+ feet_air_time = 2.0
+ foot_slip = -0.3
+ feet_distance = 0.2
+ knee_distance = 0.2
+ # contact
+ feet_contact_forces = -0.1
+ # vel tracking
+ tracking_lin_vel = 1.0
+ tracking_ang_vel = 1.0
+ vel_mismatch_exp = 1.0 # lin_z; ang x,y
+ low_speed = 1.0
+ track_vel_hard = 1.0
# base pos
default_joint_pos = 1.0
- orientation = 1
- base_height = 0.2
+ orientation = 2.0
+ base_height = 0.5
base_acc = 0.2
# energy
action_smoothness = -0.002
@@ -212,6 +219,32 @@ class viewer:
lookat = [0, -2, 0]
+# Standing Task
+class rewards:
+ base_height_target = Robot.height
+ min_dist = 0.06
+ max_dist = 0.16
+
+ cycle_time = 0.5 # sec
+ target_joint_pos_scale = 0.17 # rad
+ only_positive_rewards = True
+ tracking_sigma = 5.0
+ max_contact_force = 50 # forces above this value are penalized
+
+ class scales:
+ # base pos
+ default_joint_pos = 1.0
+ base_height = 0.2
+ base_acc = 0.2
+ # energy
+ action_smoothness = -0.002
+ torques = -1e-4
+ dof_vel = -5e-3
+ dof_acc = -1e-6
+
+# StompyMicroCfg.rewards = rewards # comment out to use the walking task rewards
+
+
class StompyMicroCfgPPO(LeggedRobotCfgPPO):
seed = 5
runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner
@@ -233,14 +266,19 @@ class runner:
policy_class_name = "ActorCritic"
algorithm_class_name = "PPO"
num_steps_per_env = 60 # per iteration
- max_iterations = 3001 # number of policy updates
+ max_iterations = 12001 # number of policy updates
# logging
- save_interval = 300 # check for potential saves every this many iterations
+ save_interval = 100 # check for potential saves every this many iterations
experiment_name = "StompyMicro"
- run_name = ""
+ run_name = "WalkingRobustFinal"
# load and resume
resume = False
load_run = -1 # -1 = last run
checkpoint = -1 # -1 = last saved model
resume_path = None # updated from load_run and chkpt
+
+
+if __name__ == "__main__":
+ cfg = StompyMicroCfg()
+ print(cfg.__dict__)
diff --git a/sim/envs/humanoids/stompymicro_env.py b/sim/envs/humanoids/stompymicro_env.py
index 79669b50..c5a4232c 100644
--- a/sim/envs/humanoids/stompymicro_env.py
+++ b/sim/envs/humanoids/stompymicro_env.py
@@ -2,13 +2,13 @@
"""Defines the environment for training the humanoid."""
from sim.envs.base.legged_robot import LeggedRobot
+from sim.envs.humanoids.stompymicro_config import StompyMicroCfg
from sim.resources.stompymicro.joints import Robot
from sim.utils.terrain import HumanoidTerrain
from isaacgym import gymtorch # isort:skip
from isaacgym.torch_utils import * # isort: skip
-
import torch # isort:skip
@@ -46,7 +46,7 @@ class StompyMicroEnv(LeggedRobot):
reset_idx(env_ids): Resets the environment for the specified environment IDs.
"""
- def __init__(self, cfg, sim_params, physics_engine, sim_device, headless):
+ def __init__(self, cfg: StompyMicroCfg, sim_params, physics_engine, sim_device: str, headless: bool):
super().__init__(cfg, sim_params, physics_engine, sim_device, headless)
self.last_feet_z = self.cfg.asset.default_feet_height
self.feet_height = torch.zeros((self.num_envs, 2), device=self.device)
@@ -57,13 +57,26 @@ def __init__(self, cfg, sim_params, physics_engine, sim_device, headless):
self.legs_joints = {}
for name, joint in Robot.legs.left.joints_motors():
- print(name)
joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
self.legs_joints["left_" + name] = joint_handle
+ print(f"Robot.legs.left.{name} -> {joint} ({joint_handle})")
for name, joint in Robot.legs.right.joints_motors():
joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
self.legs_joints["right_" + name] = joint_handle
+ print(f"Robot.legs.right.{name} -> {joint} ({joint_handle})")
+
+ if not Robot.legs_only:
+ self.arms_joints = {}
+ for name, joint in Robot.arms.left.joints_motors():
+ joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
+ self.arms_joints["left_" + name] = joint_handle
+ print(f"Robot.arms.left.{name} -> {joint} ({joint_handle})")
+
+ for name, joint in Robot.arms.right.joints_motors():
+ joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint)
+ self.arms_joints["right_" + name] = joint_handle
+ print(f"Robot.arms.right.{name} -> {joint} ({joint_handle})")
self.compute_observations()
@@ -118,7 +131,6 @@ def compute_ref_state(self):
sin_pos = torch.sin(2 * torch.pi * phase)
sin_pos_l = sin_pos.clone()
sin_pos_r = sin_pos.clone()
- default_clone = self.default_dof_pos.clone()
self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1)
scale_1 = self.cfg.rewards.target_joint_pos_scale
@@ -206,14 +218,14 @@ def compute_observations(self):
self.privileged_obs_buf = torch.cat(
(
self.command_input, # 2 + 3
- (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos, # 12
- self.dof_vel * self.obs_scales.dof_vel, # 12
- self.actions, # 12
- diff, # 12
+ (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos, # 16
+ self.dof_vel * self.obs_scales.dof_vel, # 16
+ self.actions, # 16
+ diff, # 16
self.base_lin_vel * self.obs_scales.lin_vel, # 3
self.base_ang_vel * self.obs_scales.ang_vel, # 3
self.base_euler_xyz * self.obs_scales.quat, # 3
- self.rand_push_force[:, :2], # 3
+ self.rand_push_force[:, :2], # 2
self.rand_push_torque, # 3
self.env_frictions, # 1
self.body_mass / 30.0, # 1
@@ -221,19 +233,19 @@ def compute_observations(self):
contact_mask, # 2
),
dim=-1,
- )
+ ) # 89
obs_buf = torch.cat(
(
self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw)
- q, # 20D
- dq, # 20D
- self.actions, # 20D
+ q, # 16D
+ dq, # 16D
+ self.actions, # 16D
self.base_ang_vel * self.obs_scales.ang_vel, # 3
self.base_euler_xyz * self.obs_scales.quat, # 3
),
dim=-1,
- )
+ ) # 59D
if self.cfg.terrain.measure_heights:
heights = (
@@ -317,7 +329,7 @@ def _reward_feet_air_time(self):
self.last_contacts = contact
first_contact = (self.feet_air_time > 0.0) * self.contact_filt
self.feet_air_time += self.dt
- air_time = self.feet_air_time.clamp(0, 0.5) * first_contact
+ air_time = self.feet_air_time.clamp(0, 1) * first_contact
self.feet_air_time *= ~self.contact_filt
return air_time.sum(dim=1)
@@ -336,7 +348,8 @@ def _reward_orientation(self):
"""
quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10)
orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20)
- return (quat_mismatch + orientation) / 2
+ reward = (quat_mismatch + orientation) / 2
+ return reward
def _reward_feet_contact_forces(self):
"""Calculates the reward for keeping contact forces within a specified range. Penalizes
@@ -406,7 +419,9 @@ def _reward_track_vel_hard(self):
linear_error = 0.2 * (lin_vel_error + ang_vel_error)
- return (lin_vel_error_exp + ang_vel_error_exp) / 2.0 - linear_error
+ reward = (lin_vel_error_exp + ang_vel_error_exp) / 2.0 - linear_error
+
+ return reward
def _reward_tracking_lin_vel(self):
"""Tracks linear velocity commands along the xy axes.
diff --git a/sim/envs/humanoids/stompypro_config.py b/sim/envs/humanoids/stompypro_config.py
index c68ab34e..6d1e020e 100644
--- a/sim/envs/humanoids/stompypro_config.py
+++ b/sim/envs/humanoids/stompypro_config.py
@@ -7,7 +7,7 @@
)
from sim.resources.stompypro.joints import Robot
-NUM_JOINTS = len(Robot.all_joints()) # 12
+NUM_JOINTS = len(Robot.all_joints())
class StompyProCfg(LeggedRobotCfg):
@@ -39,6 +39,7 @@ class asset(LeggedRobotCfg.asset):
foot_name = ["L_foot", "R_foot"]
knee_name = ["L_calf", "R_calf"]
+ imu_name = "imu_link"
termination_height = 0.2
default_feet_height = 0.0
@@ -50,8 +51,8 @@ class asset(LeggedRobotCfg.asset):
fix_base_link = False
class terrain(LeggedRobotCfg.terrain):
- mesh_type = "plane"
- # mesh_type = 'trimesh'
+ # mesh_type = "plane"
+ mesh_type = "trimesh"
curriculum = False
# rough terrain only:
measure_heights = False
@@ -161,9 +162,9 @@ class rewards:
class scales:
# reference motion tracking
- joint_pos = 1.6
- feet_clearance = 1.6
- feet_contact_number = 1.5
+ joint_pos = 1.9
+ feet_clearance = 1.7
+ feet_contact_number = 1.7
# gait
feet_air_time = 1.6
foot_slip = -0.05
@@ -172,8 +173,8 @@ class scales:
# contact
feet_contact_forces = -0.01
# vel tracking
- tracking_lin_vel = 1.2
- tracking_ang_vel = 1.3
+ tracking_lin_vel = 1.0
+ tracking_ang_vel = 1.0
vel_mismatch_exp = 0.5 # lin_z; ang x,y
low_speed = 0.2
track_vel_hard = 0.5
diff --git a/sim/model_export.py b/sim/model_export.py
new file mode 100644
index 00000000..6e797e02
--- /dev/null
+++ b/sim/model_export.py
@@ -0,0 +1,297 @@
+"""Script to convert weights to Rust-compatible format."""
+
+import re
+from dataclasses import dataclass, fields
+from io import BytesIO
+from typing import List, Optional, Tuple
+
+import onnx
+import onnxruntime as ort
+
+from sim.scripts.create_mjcf import load_embodiment
+
+import torch # isort: skip
+from torch import Tensor, nn # isort: skip
+from torch.distributions import Normal # isort: skip
+
+
+@dataclass
+class ActorCfg:
+ embodiment: str = "stompypro"
+ cycle_time: float = 0.4 # Cycle time for sinusoidal command input
+ action_scale: float = 0.25 # Scale for actions
+ lin_vel_scale: float = 2.0 # Scale for linear velocity
+ ang_vel_scale: float = 1.0 # Scale for angular velocity
+ quat_scale: float = 1.0 # Scale for quaternion
+ dof_pos_scale: float = 1.0 # Scale for joint positions
+ dof_vel_scale: float = 0.05 # Scale for joint velocities
+ frame_stack: int = 15 # Number of frames to stack for the policy input
+ clip_observations: float = 18.0 # Clip observations to this value
+ clip_actions: float = 18.0 # Clip actions to this value
+
+
+class ActorCritic(nn.Module):
+ def __init__(
+ self,
+ num_actor_obs: int,
+ num_critic_obs: int,
+ num_actions: int,
+ actor_hidden_dims: List[int] = [256, 256, 256],
+ critic_hidden_dims: List[int] = [256, 256, 256],
+ init_noise_std: float = 1.0,
+ activation: nn.Module = nn.ELU(),
+ ) -> None:
+ super(ActorCritic, self).__init__()
+
+ mlp_input_dim_a = num_actor_obs
+ mlp_input_dim_c = num_critic_obs
+
+ # Policy function.
+ actor_layers = []
+ actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0]))
+ actor_layers.append(activation)
+ for dim_i in range(len(actor_hidden_dims)):
+ if dim_i == len(actor_hidden_dims) - 1:
+ actor_layers.append(nn.Linear(actor_hidden_dims[dim_i], num_actions))
+ else:
+ actor_layers.append(nn.Linear(actor_hidden_dims[dim_i], actor_hidden_dims[dim_i + 1]))
+ actor_layers.append(activation)
+ self.actor = nn.Sequential(*actor_layers)
+
+ # Value function.
+ critic_layers = []
+ critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0]))
+ critic_layers.append(activation)
+ for dim_i in range(len(critic_hidden_dims)):
+ if dim_i == len(critic_hidden_dims) - 1:
+ critic_layers.append(nn.Linear(critic_hidden_dims[dim_i], 1))
+ else:
+ critic_layers.append(nn.Linear(critic_hidden_dims[dim_i], critic_hidden_dims[dim_i + 1]))
+ critic_layers.append(activation)
+ self.critic = nn.Sequential(*critic_layers)
+
+ # Action noise.
+ self.std = nn.Parameter(init_noise_std * torch.ones(num_actions))
+ self.distribution = None
+
+ # Disable args validation for speedup.
+ Normal.set_default_validate_args = False
+
+
+class Actor(nn.Module):
+ """Actor model.
+
+ Parameters:
+ policy: The policy network.
+ cfg: The configuration for the actor.
+ """
+
+ def __init__(self, policy: nn.Module, cfg: ActorCfg) -> None:
+ super().__init__()
+
+ self.robot = load_embodiment(cfg.embodiment)
+
+ # Policy config
+ default_dof_pos_dict = self.robot.default_standing()
+ self.num_actions = len(self.robot.all_joints())
+ self.frame_stack = cfg.frame_stack
+
+ # 11 is the number of single observation features - 6 from IMU, 5 from command input
+ # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions)
+ self.num_single_obs = 11 + self.num_actions * 3
+ self.num_observations = int(self.frame_stack * self.num_single_obs)
+
+ self.policy = policy
+
+ # This is the policy reference joint positions and should be the same order as the policy and mjcf file.
+ # CURRENTLY NOT USED IN FORWARD PASS TO MAKE MORE GENERALIZEABLE FOR REAL AND SIM
+ self.default_dof_pos = torch.tensor(list(default_dof_pos_dict.values()))
+
+ self.action_scale = cfg.action_scale
+ self.lin_vel_scale = cfg.lin_vel_scale
+ self.ang_vel_scale = cfg.ang_vel_scale
+ self.quat_scale = cfg.quat_scale
+ self.dof_pos_scale = cfg.dof_pos_scale
+ self.dof_vel_scale = cfg.dof_vel_scale
+
+ self.clip_observations = cfg.clip_observations
+ self.clip_actions = cfg.clip_actions
+
+ self.cycle_time = cfg.cycle_time
+
+ def get_init_buffer(self) -> Tensor:
+ return torch.zeros(self.num_observations)
+
+ def forward(
+ self,
+ x_vel: Tensor, # x-coordinate of the target velocity
+ y_vel: Tensor, # y-coordinate of the target velocity
+ rot: Tensor, # target angular velocity
+ t: Tensor, # current policy time (sec)
+ dof_pos: Tensor, # current angular position of the DoFs relative to default
+ dof_vel: Tensor, # current angular velocity of the DoFs
+ prev_actions: Tensor, # previous actions taken by the model
+ imu_ang_vel: Tensor, # angular velocity of the IMU
+ imu_euler_xyz: Tensor, # euler angles of the IMU
+ buffer: Tensor, # buffer of previous observations
+ ) -> Tuple[Tensor, Tensor, Tensor]:
+ """Runs the actor model forward pass.
+
+ Args:
+ x_vel: The x-coordinate of the target velocity, with shape (1).
+ y_vel: The y-coordinate of the target velocity, with shape (1).
+ rot: The target angular velocity, with shape (1).
+ t: The current policy time step, with shape (1).
+ dof_pos: The current angular position of the DoFs relative to default, with shape (num_actions).
+ dof_vel: The current angular velocity of the DoFs, with shape (num_actions).
+ prev_actions: The previous actions taken by the model, with shape (num_actions).
+ imu_ang_vel: The angular velocity of the IMU, with shape (3),
+ in radians per second. If IMU is not used, can be all zeros.
+ imu_euler_xyz: The euler angles of the IMU, with shape (3),
+ in radians. "XYZ" means (roll, pitch, yaw). If IMU is not used,
+ can be all zeros.
+ buffer: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is
+ the return value of the previous forward pass. On the first
+ pass, it should be all zeros.
+
+ Returns:
+ actions_scaled: The actions to take, with shape (num_actions), scaled by the action_scale.
+ actions: The actions to take, with shape (num_actions).
+ x: The new buffer of observations, with shape (frame_stack * num_single_obs).
+ """
+ sin_pos = torch.sin(2 * torch.pi * t / self.cycle_time)
+ cos_pos = torch.cos(2 * torch.pi * t / self.cycle_time)
+
+ # Construct command input
+ command_input = torch.cat(
+ (
+ sin_pos,
+ cos_pos,
+ x_vel * self.lin_vel_scale,
+ y_vel * self.lin_vel_scale,
+ rot * self.ang_vel_scale,
+ ),
+ dim=0,
+ )
+
+ # Calculate current position and velocity observations
+ q = dof_pos * self.dof_pos_scale
+ dq = dof_vel * self.dof_vel_scale
+
+ # Construct new observation
+ new_x = torch.cat(
+ (
+ command_input,
+ q,
+ dq,
+ prev_actions,
+ imu_ang_vel * self.ang_vel_scale,
+ imu_euler_xyz * self.quat_scale,
+ ),
+ dim=0,
+ )
+
+ # Clip the inputs
+ new_x = torch.clamp(new_x, -self.clip_observations, self.clip_observations)
+
+ # Add the new frame to the buffer
+ x = torch.cat((buffer, new_x), dim=0)
+ # Pop the oldest frame
+ x = x[self.num_single_obs :]
+
+ policy_input = x.unsqueeze(0)
+
+ # Get actions from the policy
+ actions = self.policy(policy_input).squeeze(0)
+ actions_scaled = actions * self.action_scale
+
+ return actions_scaled, actions, x
+
+
+def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[str] = None) -> ort.InferenceSession:
+ """Converts a PyTorch model to a ONNX format.
+
+ Args:
+ model_path: Path to the PyTorch model.
+ cfg: The configuration for the actor.
+ save_path: Path to save the ONNX model.
+
+ Returns:
+ An ONNX inference session.
+ """
+ all_weights = torch.load(model_path, map_location="cpu", weights_only=True)
+ weights = all_weights["model_state_dict"]
+ num_actor_obs = weights["actor.0.weight"].shape[1]
+ num_critic_obs = weights["critic.0.weight"].shape[1]
+ num_actions = weights["std"].shape[0]
+ actor_hidden_dims = [v.shape[0] for k, v in weights.items() if re.match(r"actor\.\d+\.weight", k)]
+ critic_hidden_dims = [v.shape[0] for k, v in weights.items() if re.match(r"critic\.\d+\.weight", k)]
+ actor_hidden_dims = actor_hidden_dims[:-1]
+ critic_hidden_dims = critic_hidden_dims[:-1]
+
+ ac_model = ActorCritic(num_actor_obs, num_critic_obs, num_actions, actor_hidden_dims, critic_hidden_dims)
+ ac_model.load_state_dict(weights)
+
+ a_model = Actor(ac_model.actor, cfg)
+
+ # Gets the model input tensors.
+ x_vel = torch.randn(1)
+ y_vel = torch.randn(1)
+ rot = torch.randn(1)
+ t = torch.randn(1)
+ dof_pos = torch.randn(a_model.num_actions)
+ dof_vel = torch.randn(a_model.num_actions)
+ prev_actions = torch.randn(a_model.num_actions)
+ imu_ang_vel = torch.randn(3)
+ imu_euler_xyz = torch.randn(3)
+ buffer = a_model.get_init_buffer()
+ input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, imu_ang_vel, imu_euler_xyz, buffer)
+
+ jit_model = torch.jit.script(a_model)
+
+ # Export the model to a buffer
+ buffer = BytesIO()
+ torch.onnx.export(jit_model, input_tensors, buffer)
+ buffer.seek(0)
+
+ # Load the model as an onnx model
+ model_proto = onnx.load_model(buffer)
+
+ # Add sim2sim metadata
+ robot_effort = list(a_model.robot.effort().values())
+ robot_stiffness = list(a_model.robot.stiffness().values())
+ robot_damping = list(a_model.robot.damping().values())
+ num_actions = a_model.num_actions
+ num_observations = a_model.num_observations
+
+ for field_name, field in [
+ ("robot_effort", robot_effort),
+ ("robot_stiffness", robot_stiffness),
+ ("robot_damping", robot_damping),
+ ("num_actions", num_actions),
+ ("num_observations", num_observations),
+ ]:
+ meta = model_proto.metadata_props.add()
+ meta.key = field_name
+ meta.value = str(field)
+
+ # Add the configuration of the model
+ for field in fields(cfg):
+ value = getattr(cfg, field.name)
+ meta = model_proto.metadata_props.add()
+ meta.key = field.name
+ meta.value = str(value)
+
+ if save_path:
+ onnx.save_model(model_proto, save_path)
+
+ # Convert model to bytes
+ buffer2 = BytesIO()
+ onnx.save_model(model_proto, buffer2)
+ buffer2.seek(0)
+
+ return ort.InferenceSession(buffer2.read())
+
+
+if __name__ == "__main__":
+ convert_model_to_onnx("model_3000.pt", ActorCfg(), "policy.onnx")
diff --git a/sim/play.py b/sim/play.py
index 9f215534..29158f69 100755
--- a/sim/play.py
+++ b/sim/play.py
@@ -1,56 +1,31 @@
+# mypy: ignore-errors
"""Play a trained policy in the environment.
Run:
python sim/play.py --task g1 --log_h5
python sim/play.py --task stompymini --log_h5
+ python sim/play.py --task=stompymicro --sim_device=cpu --num_envs=9 --arrows --command_mode=fixed
"""
-
-# mypy: ignore-errors
-
import argparse
-import copy
import logging
import os
from datetime import datetime
-from typing import Any, Union
import cv2
import h5py
import numpy as np
-from isaacgym import gymapi
from tqdm import tqdm
-logger = logging.getLogger(__name__)
-
from sim.env import run_dir # noqa: E402
from sim.envs import task_registry # noqa: E402
-from sim.utils.helpers import get_args # noqa: E402
+from sim.utils.args_parsing import parse_args_with_extras
+from sim.utils.cmd_manager import CommandManager # noqa: E402
+from sim.utils.helpers import export_policy_as_jit # noqa: E402
from sim.utils.logger import Logger # noqa: E402
-import torch # isort: skip
-
-
-def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None:
- os.makedirs(path, exist_ok=True)
- path = os.path.join(path, "policy_1.pt")
- model = copy.deepcopy(actor_critic.actor).to("cpu")
- traced_script_module = torch.jit.script(model)
- traced_script_module.save(path)
-
-
-def export_policy_as_onnx(actor_critic, path):
- os.makedirs(path, exist_ok=True)
- path = os.path.join(path, "policy_1.onnx")
- model = copy.deepcopy(actor_critic.actor).to("cpu")
-
- # Get the input dimension from the first layer of the model
- first_layer = next(model.parameters())
- input_dim = first_layer.shape[1]
-
- # Create a dummy input tensor with the correct dimensions
- dummy_input = torch.randn(1, input_dim)
+from isaacgym import gymapi # isort: skip
- torch.onnx.export(model, dummy_input, path)
+logger = logging.getLogger(__name__)
def play(args: argparse.Namespace) -> None:
@@ -60,12 +35,15 @@ def play(args: argparse.Namespace) -> None:
# override some parameters for testing
env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1)
env_cfg.sim.max_gpu_contact_pairs = 2**10
- env_cfg.terrain.mesh_type = "plane"
+ if args.trimesh:
+ env_cfg.terrain.mesh_type = "trimesh"
+ else:
+ env_cfg.terrain.mesh_type = "plane"
env_cfg.terrain.num_rows = 5
env_cfg.terrain.num_cols = 5
env_cfg.terrain.curriculum = False
env_cfg.terrain.max_init_terrain_level = 5
- env_cfg.noise.add_noise = True
+ env_cfg.noise.add_noise = False
env_cfg.domain_rand.push_robots = False
env_cfg.domain_rand.joint_angle_noise = 0.0
env_cfg.noise.curriculum = False
@@ -86,29 +64,35 @@ def play(args: argparse.Namespace) -> None:
policy = ppo_runner.get_inference_policy(device=env.device)
# Export policy if needed
- EXPORT_POLICY = True
- if EXPORT_POLICY:
+ if args.export_policy:
path = os.path.join(".")
export_policy_as_jit(ppo_runner.alg.actor_critic, path)
print("Exported policy as jit script to: ", path)
# export policy as a onnx module (used to run it on web)
- if EXPORT_ONNX:
- path = os.path.join(".")
- export_policy_as_onnx(ppo_runner.alg.actor_critic, path)
+ if args.export_onnx:
+ from sim.model_export import ActorCfg, convert_model_to_onnx # noqa: E402
+
+ path = ppo_runner.alg.actor_critic
+ convert_model_to_onnx(path, ActorCfg(), save_path="policy.onnx")
print("Exported policy as onnx to: ", path)
# Prepare for logging
env_logger = Logger(env.dt)
robot_index = 0
joint_index = 1
- stop_state_log = 1000
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+
+ # Prepare command manager
+ cmd_manager = CommandManager(
+ num_envs=env_cfg.env.num_envs, mode=args.command_mode, device=env.device, env_cfg=env_cfg
+ )
+
if args.log_h5:
h5_file = h5py.File(f"data{now}.h5", "w")
# Create dataset for actions
- max_timesteps = stop_state_log
+ max_timesteps = args.max_iterations
num_dof = env.num_dof
dset_actions = h5_file.create_dataset("actions", (max_timesteps, num_dof), dtype=np.float32)
@@ -136,7 +120,7 @@ def play(args: argparse.Namespace) -> None:
"observations/euler", (max_timesteps, buf_len, 3), dtype=np.float32
) # root orientation
- if RENDER:
+ if args.render:
camera_properties = gymapi.CameraProperties()
camera_properties.width = 1920
camera_properties.height = 1080
@@ -165,26 +149,28 @@ def play(args: argparse.Namespace) -> None:
os.mkdir(experiment_dir)
video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080))
- for t in tqdm(range(stop_state_log)):
+ for t in tqdm(range(train_cfg.runner.max_iterations)):
actions = policy(obs.detach())
if args.log_h5:
dset_actions[t] = actions.detach().numpy()
- if FIX_COMMAND:
- env.commands[:, 0] = 0.5
- env.commands[:, 1] = 0.0
- env.commands[:, 2] = 0.0
- env.commands[:, 3] = 0.0
+ commands = cmd_manager.update(env.dt)
+ env.commands[:] = commands
+
obs, critic_obs, rews, dones, infos = env.step(actions.detach())
- print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}")
- if RENDER:
+ if args.render:
env.gym.fetch_results(env.sim, True)
env.gym.step_graphics(env.sim)
env.gym.render_all_camera_sensors(env.sim)
img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR)
img = np.reshape(img, (1080, 1920, 4))
img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
+ robot_positions = env.root_states[:, 0:3].cpu().numpy()
+ actual_vels = np.stack([env.base_lin_vel[:, 0].cpu().numpy(), env.base_lin_vel[:, 1].cpu().numpy()], axis=1)
+
+ if args.arrows:
+ cmd_manager.draw(env.gym, env.viewer, env.envs, robot_positions, actual_vels)
video.write(img[..., :3])
@@ -236,8 +222,9 @@ def play(args: argparse.Namespace) -> None:
env_logger.print_rewards()
env_logger.plot_states()
+ cmd_manager.close()
- if RENDER:
+ if args.render:
video.release()
if args.log_h5:
@@ -245,15 +232,35 @@ def play(args: argparse.Namespace) -> None:
h5_file.close()
-if __name__ == "__main__":
- RENDER = True
- FIX_COMMAND = True
+def add_play_arguments(parser):
+ """Add play-specific arguments."""
+ # Visualization
+ parser.add_argument(
+ "--arrows", action="store_true", default=False, help="Draw command and velocity arrows during visualization"
+ )
+ parser.add_argument("--render", action="store_true", default=True, help="Enable rendering")
+
+ # Control
+ parser.add_argument(
+ "--command_mode",
+ type=str,
+ default="fixed",
+ choices=["fixed", "oscillating", "random", "keyboard"],
+ help="Control mode for the robot",
+ )
- EXPORT_ONNX = True
+ # Export options
+ parser.add_argument("--export_policy", action="store_true", default=False, help="Export policy as JIT")
+ parser.add_argument("--export_onnx", action="store_true", default=False, help="Export policy as ONNX")
- base_args = get_args()
- parser = argparse.ArgumentParser(description="Extend base arguments with log_h5")
- parser.add_argument("--log_h5", action="store_true", help="Enable HDF5 logging")
- args, unknown = parser.parse_known_args(namespace=base_args)
+ # Logging
+ parser.add_argument("--log_h5", action="store_true", default=False, help="Enable HDF5 logging")
+ # Trimesh
+ parser.add_argument("--trimesh", action="store_true", default=False, help="Use trimesh terrain")
+
+
+if __name__ == "__main__":
+ args = parse_args_with_extras(add_play_arguments)
+ print("Arguments:", vars(args))
play(args)
diff --git a/sim/produce_sim_data.py b/sim/produce_sim_data.py
new file mode 100644
index 00000000..f4bae38a
--- /dev/null
+++ b/sim/produce_sim_data.py
@@ -0,0 +1,60 @@
+import argparse
+import multiprocessing as mp
+import subprocess
+import time
+
+from tqdm import tqdm
+
+
+def run_simulation(sim_idx: int, args: argparse.Namespace) -> None:
+ """Run a single simulation instance with the given parameters."""
+ cmd = [
+ "python",
+ "sim/sim2sim.py",
+ "--embodiment",
+ args.embodiment,
+ "--load_model",
+ args.load_model,
+ "--log_h5",
+ "--no_render",
+ ]
+
+ try:
+ subprocess.run(cmd, check=True)
+ except subprocess.CalledProcessError as e:
+ print(f"Simulation {sim_idx} failed with error: {e}")
+
+
+def run_parallel_sims(num_threads: int, args: argparse.Namespace) -> None:
+ """Run multiple simulation instances in parallel with a delay between each start."""
+ processes = []
+ delay_between_starts = 0.1 # Adjust the delay (in seconds) as needed
+
+ for idx in range(num_threads):
+ p = mp.Process(target=run_simulation, args=(idx, args))
+ p.start()
+ processes.append(p)
+ time.sleep(delay_between_starts) # Introduce a delay before starting the next process
+
+ # Wait for all processes to finish with a progress bar
+ for p in tqdm(processes, desc="Running parallel simulations"):
+ p.join()
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Run parallel simulations.")
+ parser.add_argument("--num_threads", type=int, default=10, help="Number of parallel simulations to run")
+ parser.add_argument("--embodiment", default="stompypro", type=str, help="Embodiment name")
+ parser.add_argument("--load_model", default="examples/walking_pro.onnx", type=str, help="Path to model to load")
+
+ args = parser.parse_args()
+
+ # Run 100 examples total, in parallel batches
+ num_examples = 2000
+ num_batches = (num_examples + args.num_threads - 1) // args.num_threads
+
+ for batch in range(num_batches):
+ examples_remaining = num_examples - (batch * args.num_threads)
+ threads_this_batch = min(args.num_threads, examples_remaining)
+ print(f"\nRunning batch {batch+1}/{num_batches} ({threads_this_batch} simulations)")
+ run_parallel_sims(threads_this_batch, args)
diff --git a/sim/requirements.txt b/sim/requirements.txt
index 9751be9e..8c87ef02 100755
--- a/sim/requirements.txt
+++ b/sim/requirements.txt
@@ -1,13 +1,12 @@
-# requirements.txt
-
-mediapy
-torch
-python-dotenv
-h5py
-gdown
+mediapy==1.2.2
+torch==2.4.1
+python-dotenv==1.0.1
+h5py==3.11.0
+gdown==5.2.0
matplotlib==3.3.4
numpy==1.19.5
-wandb
+wandb==0.18.1
tensorboard==2.14.0
-onnx
-onnxscript
+onnx==1.15.0
+onnxscript==0.1.0.dev20240413
+pygame==2.6.1
\ No newline at end of file
diff --git a/sim/resources/stompymicro/joints.py b/sim/resources/stompymicro/joints.py
index fb1e1551..825e2665 100644
--- a/sim/resources/stompymicro/joints.py
+++ b/sim/resources/stompymicro/joints.py
@@ -5,6 +5,7 @@
tree of the various joint names in the robot.
"""
+import math
import textwrap
from abc import ABC
from typing import Dict, List, Tuple, Union
@@ -52,229 +53,282 @@ def __str__(self) -> str:
return f"[{self.__class__.__name__}]{parts_str}"
-class LeftArm(Node):
- shoulder_yaw = "left_shoulder_yaw"
- shoulder_pitch = "left_shoulder_pitch"
- elbow_pitch = "left_elbow_yaw" # FIXME: yaw vs pitch
-
-
class RightArm(Node):
- shoulder_yaw = "right_shoulder_yaw"
shoulder_pitch = "right_shoulder_pitch"
- elbow_pitch = "right_elbow_yaw" # FIXME: yaw vs pitch
+ shoulder_yaw = "right_shoulder_yaw"
+ elbow_yaw = "right_elbow_yaw"
-class LeftLeg(Node):
- hip_roll = "left_hip_roll"
- hip_yaw = "left_hip_yaw"
- hip_pitch = "left_hip_pitch"
- knee_pitch = "left_knee_pitch"
- ankle_pitch = "left_ankle_pitch"
+class LeftArm(Node):
+ shoulder_pitch = "left_shoulder_pitch"
+ shoulder_yaw = "left_shoulder_yaw"
+ elbow_yaw = "left_elbow_yaw"
class RightLeg(Node):
- hip_roll = "right_hip_roll"
- hip_yaw = "right_hip_yaw"
hip_pitch = "right_hip_pitch"
+ hip_yaw = "right_hip_yaw"
+ hip_roll = "right_hip_roll"
knee_pitch = "right_knee_pitch"
ankle_pitch = "right_ankle_pitch"
+class LeftLeg(Node):
+ hip_pitch = "left_hip_pitch"
+ hip_yaw = "left_hip_yaw"
+ hip_roll = "left_hip_roll"
+ ankle_pitch = "left_ankle_pitch"
+ knee_pitch = "left_knee_pitch"
+
+
class Legs(Node):
- left = LeftLeg()
right = RightLeg()
+ left = LeftLeg()
+
+
+class Arms(Node):
+ right = RightArm()
+ left = LeftArm()
+
+
+def get_facing_direction_quaternion(angle_degrees: float) -> List[float]:
+ theta = angle_degrees * (math.pi / 180)
+ half_theta = theta / 2
+ return [0, 0, math.sin(half_theta), math.cos(half_theta)]
class Robot(Node):
- height = 0.31
- rotation = [0, 0, 0.707, 0.707]
+ height = 0.34
+ angle = 0
+ rotation = get_facing_direction_quaternion(angle)
+ print(f"Rotation: {rotation}")
+
+ legs_only = False
- # left_arm = LeftArm()
- # right_arm = RightArm()
legs = Legs()
+ arms = Arms() if not legs_only else None
@classmethod
def default_standing(cls) -> Dict[str, float]:
- return {
- # Legs
- cls.legs.left.hip_pitch: 0.0,
+ legs = { # TODO: Set back to previous values
+ cls.legs.left.hip_pitch: 0,
+ cls.legs.left.knee_pitch: 0,
cls.legs.left.hip_yaw: 0,
cls.legs.left.hip_roll: 0,
- cls.legs.left.knee_pitch: 0,
cls.legs.left.ankle_pitch: 0,
cls.legs.right.hip_pitch: 0,
- cls.legs.right.hip_yaw: 0,
- cls.legs.right.hip_roll: 0,
cls.legs.right.knee_pitch: 0,
cls.legs.right.ankle_pitch: 0,
- # TODO: fixing this for debugging
- # Arms
- # cls.left_arm.shoulder_pitch: 0,
- # cls.left_arm.shoulder_yaw: 0,
- # cls.left_arm.elbow_pitch: 0,
- # cls.right_arm.shoulder_pitch: 0,
- # cls.right_arm.shoulder_yaw: 0,
- # cls.right_arm.elbow_pitch: 0,
+ cls.legs.right.hip_yaw: 0,
+ cls.legs.right.hip_roll: 0,
}
+ if cls.arms is None:
+ return legs
+
+ arms = {
+ cls.arms.left.shoulder_pitch: 0.0,
+ cls.arms.left.shoulder_yaw: 0.0,
+ cls.arms.left.elbow_yaw: 0.0,
+ cls.arms.right.shoulder_pitch: 0.0,
+ cls.arms.right.shoulder_yaw: 0.0,
+ cls.arms.right.elbow_yaw: 0.0,
+ }
+ return {**legs, **arms}
+
@classmethod
def default_limits(cls) -> Dict[str, Dict[str, float]]:
- return {
- # # left arm
- # Robot.left_arm.shoulder_pitch: {
- # "lower": -1.5707963,
- # "upper": 1.5707963,
- # },
- # Robot.left_arm.shoulder_yaw: {
- # "lower": -1.5707963,
- # "upper": 1.5707963,
- # },
- # Robot.left_arm.elbow_pitch: {
- # "lower": -1.2217305,
- # "upper": 1.2217305,
- # },
- # # right arm
- # Robot.right_arm.shoulder_pitch: {
- # "lower": -1.5707963,
- # "upper": 1.5707963,
- # },
- # Robot.right_arm.shoulder_yaw: {
- # "lower": -1.5707963,
- # "upper": 1.5707963,
- # },
- # Robot.right_arm.elbow_pitch: {
- # "lower": -1.2217305,
- # "upper": 1.2217305,
- # },
- # left leg
- Robot.legs.left.hip_pitch: {
+ legs = {
+ # Left Leg
+ cls.legs.left.hip_pitch: {
"lower": -1.5707963,
"upper": 1.5707963,
},
- Robot.legs.left.hip_yaw: {
+ cls.legs.left.hip_yaw: {
"lower": -1.5707963,
"upper": 0.087266463,
},
- Robot.legs.left.hip_roll: {
+ cls.legs.left.hip_roll: {
"lower": -0.78539816,
"upper": 0.78539816,
},
- Robot.legs.left.knee_pitch: {
- "lower": 0,
- "upper": 1.0471976,
- },
- Robot.legs.left.ankle_pitch: {
+ cls.legs.left.knee_pitch: {
"lower": -1.0471976,
- "upper": 1.0471976,
+ "upper": 0,
},
- # right leg
- Robot.legs.right.hip_pitch: {
+ cls.legs.left.ankle_pitch: {
"lower": -1.5707963,
"upper": 1.5707963,
},
- Robot.legs.right.hip_yaw: {
+ # Right Leg
+ cls.legs.right.hip_pitch: {
+ "lower": -1.5707963,
+ "upper": 1.5707963,
+ },
+ cls.legs.right.hip_yaw: {
"lower": -0.087266463,
"upper": 1.5707963,
},
- Robot.legs.right.hip_roll: {
+ cls.legs.right.hip_roll: {
"lower": -0.78539816,
"upper": 0.78539816,
},
- Robot.legs.right.knee_pitch: {
- "lower": -1.0471976,
- "upper": 0,
- },
- Robot.legs.right.ankle_pitch: {
- "lower": -1.0471976,
+ cls.legs.right.knee_pitch: {
+ "lower": 0,
"upper": 1.0471976,
},
+ cls.legs.right.ankle_pitch: {
+ "lower": -1.5707963,
+ "upper": 1.5707963,
+ },
}
+ if cls.arms is None:
+ return legs
+
+ arms = {
+ # Left Arm
+ cls.arms.left.shoulder_pitch: {
+ "lower": -1.7453293,
+ "upper": 1.7453293,
+ },
+ cls.arms.left.shoulder_yaw: {
+ "lower": -0.43633231,
+ "upper": 1.5707963,
+ },
+ cls.arms.left.elbow_yaw: {
+ "lower": -1.5707963,
+ "upper": 1.5707963,
+ },
+ # Right Arm
+ cls.arms.right.shoulder_pitch: {
+ "lower": -1.7453293,
+ "upper": 1.7453293,
+ },
+ cls.arms.right.shoulder_yaw: {
+ "lower": -1.134464,
+ "upper": 0.87266463,
+ },
+ cls.arms.right.elbow_yaw: {
+ "lower": -1.5707963,
+ "upper": 1.5707963,
+ },
+ }
+ return {**legs, **arms}
+
# p_gains
@classmethod
def stiffness(cls) -> Dict[str, float]:
- return {
- "hip_pitch": 5,
- "hip_yaw": 5,
- "hip_roll": 5,
- "knee_pitch": 5,
- "ankle_pitch": 5,
- # "shoulder_pitch": 5,
- # "shoulder_yaw": 5,
- # "shoulder_roll": 5,
- # "elbow_pitch": 5,
- # "elbow_yaw": 5,
+ legs = {
+ "hip_pitch": 17.681462808698132,
+ "hip_yaw": 17.681462808698132,
+ "hip_roll": 17.681462808698132,
+ "knee_pitch": 17.681462808698132,
+ "ankle_pitch": 17.681462808698132,
+ }
+
+ if cls.arms is None:
+ return legs
+
+ arms = {
+ "shoulder_pitch": 17.681462808698132,
+ "shoulder_yaw": 17.681462808698132,
+ "elbow_yaw": 17.681462808698132,
}
+ return {**legs, **arms}
# d_gains
@classmethod
def damping(cls) -> Dict[str, float]:
- return {
- "hip_pitch": 0.3,
- "hip_yaw": 0.3,
- "hip_roll": 0.3,
- "knee_pitch": 0.3,
- "ankle_pitch": 0.3,
- # "shoulder_pitch": 0.3,
- # "shoulder_yaw": 0.3,
- # "shoulder_roll": 0.3,
- # "elbow_pitch": 0.3,
- # "elbow_yaw": 0.3,
+ legs = {
+ "hip_pitch": 0.5354656169048285,
+ "hip_yaw": 0.5354656169048285,
+ "hip_roll": 0.5354656169048285,
+ "knee_pitch": 0.5354656169048285,
+ "ankle_pitch": 0.5354656169048285,
}
- # pos_limits
- @classmethod
- def effort(cls) -> Dict[str, float]:
- return {
- "hip_pitch": 1,
- "hip_yaw": 1,
- "hip_roll": 1,
- "knee_pitch": 1,
- "ankle_pitch": 1,
- # "shoulder_pitch": 1,
- # "shoulder_yaw": 1,
- # "shoulder_roll": 1,
- # "elbow_pitch": 1,
- # "elbow_yaw": 1,
+ if cls.arms is None:
+ return legs
+
+ arms = {
+ "shoulder_pitch": 0.5354656169048285,
+ "shoulder_yaw": 0.5354656169048285,
+ "elbow_yaw": 0.5354656169048285,
}
+ return {**legs, **arms}
- # vel_limits
+ # pos_limits
@classmethod
- def velocity(cls) -> Dict[str, float]:
- return {
+ def effort(cls) -> Dict[str, float]:
+ legs = {
"hip_pitch": 10,
"hip_yaw": 10,
"hip_roll": 10,
"knee_pitch": 10,
"ankle_pitch": 10,
- # "shoulder_pitch": 10,
- # "shoulder_yaw": 10,
- # "shoulder_roll": 10,
- # "elbow_pitch": 10,
- # "elbow_yaw": 10,
}
+ if cls.arms is None:
+ return legs
+
+ arms = {
+ "shoulder_pitch": 10,
+ "shoulder_yaw": 10,
+ "elbow_yaw": 10,
+ }
+ return {**legs, **arms}
+
+ # vel_limits
+ @classmethod
+ def velocity(cls) -> Dict[str, float]:
+ legs = {
+ "hip_pitch": 5,
+ "hip_yaw": 5,
+ "hip_roll": 5,
+ "knee_pitch": 5,
+ "ankle_pitch": 5,
+ }
+
+ if cls.arms is None:
+ return legs
+
+ arms = {
+ "shoulder_pitch": 5,
+ "shoulder_yaw": 5,
+ "elbow_yaw": 5,
+ }
+ return {**legs, **arms}
+
@classmethod
def friction(cls) -> Dict[str, float]:
- return {
- # pfb30 todo
+ legs = {
"hip_pitch": 0.05,
"hip_yaw": 0.05,
"hip_roll": 0.05,
"knee_pitch": 0.05,
"ankle_pitch": 0.05,
- # "ankle_pitch": 0.05,
- # "elbow_yaw": 0.05,
- # "elbow_pitch": 0.05,
}
+ if cls.arms is None:
+ return legs
+
+ arms = {
+ "shoulder_pitch": 0.05,
+ "shoulder_yaw": 0.05,
+ "elbow_yaw": 0.05,
+ }
+ return {**legs, **arms}
+
def print_joints() -> None:
joints = Robot.all_joints()
assert len(joints) == len(set(joints)), "Duplicate joint names found!"
print(Robot())
+ print(f"\nArms are {'disabled' if Robot.legs_only else 'enabled'}")
+ print(f"Number of joints: {len(joints)}")
if __name__ == "__main__":
- # python -m sim.Robot.joints
+ # python -m sim.resources.[robotname].joints
print_joints()
diff --git a/sim/resources/stompymicro/robot.urdf b/sim/resources/stompymicro/robot.urdf
index a75c6b84..0c537b2b 100644
--- a/sim/resources/stompymicro/robot.urdf
+++ b/sim/resources/stompymicro/robot.urdf
@@ -2,500 +2,500 @@
-
-
-
+
+
+
-
-
-
+
+
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
-
+
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
-
+
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
-
+
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
-
+
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
-
+
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
-
+
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
-
+
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
-
+
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
\ No newline at end of file
diff --git a/sim/resources/stompymicro/robot.xml b/sim/resources/stompymicro/robot.xml
new file mode 100644
index 00000000..4a2985c7
--- /dev/null
+++ b/sim/resources/stompymicro/robot.xml
@@ -0,0 +1,222 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/sim/resources/stompymicro/robot_calibration.xml b/sim/resources/stompymicro/robot_calibration.xml
index 199afdbf..e4c7a0d7 100644
--- a/sim/resources/stompymicro/robot_calibration.xml
+++ b/sim/resources/stompymicro/robot_calibration.xml
@@ -1,7 +1,8 @@
-
+
+
-
+
@@ -9,110 +10,133 @@
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -120,62 +144,80 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
\ No newline at end of file
diff --git a/sim/resources/stompymicro/robot_fixed.urdf b/sim/resources/stompymicro/robot_fixed.urdf
index b4fc9eb5..c61ed55d 100644
--- a/sim/resources/stompymicro/robot_fixed.urdf
+++ b/sim/resources/stompymicro/robot_fixed.urdf
@@ -3,7 +3,7 @@
-
+
@@ -28,75 +28,75 @@
-
-
-
+
+
+
-
+
-
-
+
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
+
-
-
+
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
-
+
+
-
-
+
+
@@ -115,16 +115,16 @@
-
-
-
+
+
+
-
+
-
+
@@ -144,16 +144,16 @@
-
-
-
+
+
+
-
+
-
+
@@ -173,16 +173,16 @@
-
-
-
+
+
+
-
+
-
+
@@ -202,16 +202,16 @@
-
-
-
+
+
+
-
+
-
+
@@ -231,16 +231,16 @@
-
-
-
+
+
+
-
+
-
+
@@ -260,191 +260,191 @@
-
-
-
+
+
+
-
+
-
-
+
+
-
+
-
+
-
-
+
+
-
+
-
-
-
+
+
+
-
+
-
-
+
+
-
+
-
+
-
-
+
+
-
+
-
-
-
+
+
+
-
+
-
-
-
+
+
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
+
-
-
-
+
+
+
-
+
-
+
-
+
-
+
-
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
-
+
-
+
-
-
+
+
-
+
-
-
-
+
+
+
-
-
-
-
+
+
+
+
-
+
-
+
-
-
+
+
-
+
-
-
-
+
+
+
-
-
+
+
-
-
+
+
@@ -463,16 +463,16 @@
-
-
-
+
+
+
-
-
+
+
-
+
@@ -492,9 +492,9 @@
-
-
-
+
+
+
\ No newline at end of file
diff --git a/sim/resources/stompymicro/robot_fixed.xml b/sim/resources/stompymicro/robot_fixed.xml
index b5436397..826a3aff 100644
--- a/sim/resources/stompymicro/robot_fixed.xml
+++ b/sim/resources/stompymicro/robot_fixed.xml
@@ -1,119 +1,98 @@
-
-
-
-
+
-
-
-
-
+
+
+
+
-
+
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -121,57 +100,25 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
+
-
+
\ No newline at end of file
diff --git a/sim/resources/stompypro/joints.py b/sim/resources/stompypro/joints.py
index 75cdd058..27c6c2c6 100755
--- a/sim/resources/stompypro/joints.py
+++ b/sim/resources/stompypro/joints.py
@@ -118,6 +118,9 @@ def isaac_to_mujoco_signs(cls) -> Dict[str, int]:
def default_positions(cls) -> Dict[str, float]:
return {}
+ """This should be ordered according to how the policy is trained. E.g. the first
+ entry should be the angle of the first joint in the policy."""
+
@classmethod
def default_standing(cls) -> Dict[str, float]:
return {
diff --git a/sim/resources/stompypro/robot_fixed.urdf b/sim/resources/stompypro/robot_fixed.urdf
index 5c682f98..00c47bbe 100644
--- a/sim/resources/stompypro/robot_fixed.urdf
+++ b/sim/resources/stompypro/robot_fixed.urdf
@@ -10,7 +10,30 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/sim/resources/stompypro/robot_fixed.xml b/sim/resources/stompypro/robot_fixed.xml
index 68b15adf..63063952 100644
--- a/sim/resources/stompypro/robot_fixed.xml
+++ b/sim/resources/stompypro/robot_fixed.xml
@@ -34,7 +34,7 @@
-
+
diff --git a/sim/scripts/calibration_isaac.py b/sim/scripts/calibration_isaac.py
index 3cfc6673..89e23a7f 100755
--- a/sim/scripts/calibration_isaac.py
+++ b/sim/scripts/calibration_isaac.py
@@ -15,8 +15,8 @@
import numpy as np
# Importing torch down here to avoid gymtorch issues.
-import torch # noqa: E402 # type: ignore[import]
from isaacgym import gymapi, gymtorch, gymutil
+import torch # noqa: E402 # type: ignore[import] # isort: skip
from matplotlib import pyplot as plt
from sim.env import robot_urdf_path
diff --git a/sim/scripts/calibration_mujoco.py b/sim/scripts/calibration_mujoco.py
index 7045d44b..8244eca8 100755
--- a/sim/scripts/calibration_mujoco.py
+++ b/sim/scripts/calibration_mujoco.py
@@ -78,7 +78,7 @@ def run_mujoco(cfg: Any, joint_id: int = 0, steps: int = 1000) -> None: # noqa:
Returns:
None
"""
- model_dir = os.environ.get("MODEL_DIR")
+ model_dir = os.environ.get("MODEL_DIR") or "sim/resources"
mujoco_model_path = f"{model_dir}/{args.embodiment}/robot_calibration.xml"
model = mujoco.MjModel.from_xml_path(mujoco_model_path)
model.opt.timestep = cfg.sim_config.dt
diff --git a/sim/scripts/create_mjcf.py b/sim/scripts/create_mjcf.py
index 123dcbbd..a968702f 100755
--- a/sim/scripts/create_mjcf.py
+++ b/sim/scripts/create_mjcf.py
@@ -16,7 +16,7 @@
import xml.dom.minidom
import xml.etree.ElementTree as ET
from pathlib import Path
-from typing import Any, List, OrderedDict, Union
+from typing import Any, List, Union
from sim.scripts import mjcf
@@ -373,20 +373,20 @@ def create_mjcf(filepath: Path) -> None:
"""Create a MJCF file for the Stompy robot."""
path = Path(filepath)
robot_name = path.stem
- path = path.parent
+ parent_path = path.parent
robot = Sim2SimRobot(
robot_name,
- path,
+ parent_path,
mjcf.Compiler(angle="radian", meshdir="meshes", autolimits=True, eulerseq="zyx"),
)
robot.adapt_world()
- robot.save(path / f"{robot_name}_fixed.xml")
+ robot.save(path.with_suffix(".xml"))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Create a MJCF file for the robot.")
- parser.add_argument("filepath", type=str, help="The path to load and save the MJCF file.")
+ parser.add_argument("--filepath", type=str, help="The path to load and save the MJCF file.")
parser.add_argument("--robot", type=str, help="The robot name to load.")
args = parser.parse_args()
# Robot name is whatever string comes right before ".urdf" extension
diff --git a/sim/sim2sim.py b/sim/sim2sim.py
deleted file mode 100755
index 1a9182be..00000000
--- a/sim/sim2sim.py
+++ /dev/null
@@ -1,322 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-# list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
-
-"""
-Difference setup
-python sim/play.py --task mini_ppo --sim_device cpu
-python sim/sim2sim.py --load_model examples/standing_pro.pt --embodiment stompypro
-python sim/sim2sim.py --load_model examples/standing_micro.pt --embodiment stompymicro
-"""
-import argparse
-import math
-import os
-from collections import deque
-from copy import deepcopy
-
-import mujoco
-import mujoco_viewer
-import numpy as np
-import pygame
-from scipy.spatial.transform import Rotation as R
-from tqdm import tqdm
-
-from sim.scripts.create_mjcf import load_embodiment
-
-import torch # isort: skip
-
-
-def handle_keyboard_input():
- global x_vel_cmd, y_vel_cmd, yaw_vel_cmd
-
- keys = pygame.key.get_pressed()
-
- # Update movement commands based on arrow keys
- if keys[pygame.K_UP]:
- x_vel_cmd += 0.0005
- if keys[pygame.K_DOWN]:
- x_vel_cmd -= 0.0005
- if keys[pygame.K_LEFT]:
- y_vel_cmd += 0.0005
- if keys[pygame.K_RIGHT]:
- y_vel_cmd -= 0.0005
-
- # Yaw control
- if keys[pygame.K_a]:
- yaw_vel_cmd += 0.001
- if keys[pygame.K_z]:
- yaw_vel_cmd -= 0.001
-
-
-class Sim2simCfg:
- def __init__(
- self,
- embodiment,
- frame_stack=15,
- c_frame_stack=3,
- sim_duration=60.0,
- dt=0.001,
- decimation=10,
- cycle_time=0.4,
- tau_factor=3,
- lin_vel=2.0,
- ang_vel=1.0,
- dof_pos=1.0,
- dof_vel=0.05,
- clip_observations=18.0,
- clip_actions=18.0,
- action_scale=0.25,
- ):
- self.robot = load_embodiment(embodiment)
-
- self.num_actions = len(self.robot.all_joints())
-
- self.frame_stack = frame_stack
- self.c_frame_stack = c_frame_stack
- self.num_single_obs = 11 + self.num_actions * self.c_frame_stack
- self.num_observations = int(self.frame_stack * self.num_single_obs)
-
- self.sim_duration = sim_duration
- self.dt = dt
- self.decimation = decimation
-
- self.cycle_time = cycle_time
-
- self.tau_factor = tau_factor
- self.tau_limit = (
- np.array(list(self.robot.effort().values()) + list(self.robot.effort().values())) * self.tau_factor
- )
- self.kps = np.array(list(self.robot.stiffness().values()) + list(self.robot.stiffness().values()))
- self.kds = np.array(list(self.robot.damping().values()) + list(self.robot.damping().values()))
-
- self.lin_vel = lin_vel
- self.ang_vel = ang_vel
- self.dof_pos = dof_pos
- self.dof_vel = dof_vel
-
- self.clip_observations = clip_observations
- self.clip_actions = clip_actions
-
- self.action_scale = action_scale
-
-
-def quaternion_to_euler_array(quat):
- # Ensure quaternion is in the correct format [x, y, z, w]
- x, y, z, w = quat
-
- # Roll (x-axis rotation)
- t0 = +2.0 * (w * x + y * z)
- t1 = +1.0 - 2.0 * (x * x + y * y)
- roll_x = np.arctan2(t0, t1)
-
- # Pitch (y-axis rotation)
- t2 = +2.0 * (w * y - z * x)
- t2 = np.clip(t2, -1.0, 1.0)
- pitch_y = np.arcsin(t2)
-
- # Yaw (z-axis rotation)
- t3 = +2.0 * (w * z + x * y)
- t4 = +1.0 - 2.0 * (y * y + z * z)
- yaw_z = np.arctan2(t3, t4)
-
- # Returns roll, pitch, yaw in a NumPy array in radians
- return np.array([roll_x, pitch_y, yaw_z])
-
-
-def get_obs(data):
- """Extracts an observation from the mujoco data structure"""
- q = data.qpos.astype(np.double)
- dq = data.qvel.astype(np.double)
- quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double)
- r = R.from_quat(quat)
- v = r.apply(data.qvel[:3], inverse=True).astype(np.double) # In the base frame
- omega = data.sensor("angular-velocity").data.astype(np.double)
- gvec = r.apply(np.array([0.0, 0.0, -1.0]), inverse=True).astype(np.double)
- return (q, dq, quat, v, omega, gvec)
-
-
-def pd_control(target_q, q, kp, target_dq, dq, kd, default):
- """Calculates torques from position commands"""
- return kp * (target_q + default - q) - kd * dq
-
-
-def run_mujoco(policy, cfg, keyboard_use=False):
- """
- Run the Mujoco simulation using the provided policy and configuration.
-
- Args:
- policy: The policy used for controlling the simulation.
- cfg: The configuration object containing simulation settings.
-
- Returns:
- None
- """
- model_dir = os.environ.get("MODEL_DIR")
- mujoco_model_path = f"{model_dir}/{args.embodiment}/robot_fixed.xml"
-
- model = mujoco.MjModel.from_xml_path(mujoco_model_path)
- model.opt.timestep = cfg.dt
- data = mujoco.MjData(model)
-
- try:
- data.qpos = model.keyframe("default").qpos
- default = deepcopy(model.keyframe("default").qpos)[-cfg.num_actions :]
- print("Default position:", default)
- except:
- print("No default position found, using zero initialization")
- default = np.zeros(cfg.num_actions) # 3 for pos, 4 for quat, cfg.num_actions for joints
-
- mujoco.mj_step(model, data)
- for ii in range(len(data.ctrl) + 1):
- print(data.joint(ii).id, data.joint(ii).name)
-
- data.qvel = np.zeros_like(data.qvel)
- data.qacc = np.zeros_like(data.qacc)
- viewer = mujoco_viewer.MujocoViewer(model, data)
-
- target_q = np.zeros((cfg.num_actions), dtype=np.double)
- action = np.zeros((cfg.num_actions), dtype=np.double)
-
- hist_obs = deque()
- for _ in range(cfg.frame_stack):
- hist_obs.append(np.zeros([1, cfg.num_single_obs], dtype=np.double))
-
- count_lowlevel = 0
-
- for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."):
- if keyboard_use:
- handle_keyboard_input()
-
- # Obtain an observation
- q, dq, quat, v, omega, gvec = get_obs(data)
- q = q[-cfg.num_actions :]
- dq = dq[-cfg.num_actions :]
-
- # 1000hz -> 50hz
- if count_lowlevel % cfg.decimation == 0:
- obs = np.zeros([1, cfg.num_single_obs], dtype=np.float32)
- eu_ang = quaternion_to_euler_array(quat)
- eu_ang[eu_ang > math.pi] -= 2 * math.pi
-
- cur_pos_obs = (q - default) * cfg.dof_pos
-
- cur_vel_obs = dq * cfg.dof_vel
-
- obs[0, 0] = math.sin(2 * math.pi * count_lowlevel * cfg.dt / cfg.cycle_time)
- obs[0, 1] = math.cos(2 * math.pi * count_lowlevel * cfg.dt / cfg.cycle_time)
- obs[0, 2] = x_vel_cmd * cfg.lin_vel
- obs[0, 3] = y_vel_cmd * cfg.lin_vel
- obs[0, 4] = yaw_vel_cmd * cfg.ang_vel
- obs[0, 5 : (cfg.num_actions + 5)] = cur_pos_obs
- obs[0, (cfg.num_actions + 5) : (2 * cfg.num_actions + 5)] = cur_vel_obs
- obs[0, (2 * cfg.num_actions + 5) : (3 * cfg.num_actions + 5)] = action
- obs[0, (3 * cfg.num_actions + 5) : (3 * cfg.num_actions + 5) + 3] = omega
- obs[0, (3 * cfg.num_actions + 5) + 3 : (3 * cfg.num_actions + 5) + 2 * 3] = eu_ang
-
- obs = np.clip(obs, -cfg.clip_observations, cfg.clip_observations)
-
- hist_obs.append(obs)
- hist_obs.popleft()
-
- policy_input = np.zeros([1, cfg.num_observations], dtype=np.float32)
- for i in range(cfg.frame_stack):
- policy_input[0, i * cfg.num_single_obs : (i + 1) * cfg.num_single_obs] = hist_obs[i][0, :]
-
- action[:] = get_policy_output(policy, policy_input)
- action = np.clip(action, -cfg.clip_actions, cfg.clip_actions)
- target_q = action * cfg.action_scale
-
- target_dq = np.zeros((cfg.num_actions), dtype=np.double)
-
- # Generate PD control
- tau = pd_control(target_q, q, cfg.kps, target_dq, dq, cfg.kds, default) # Calc torques
-
- tau = np.clip(tau, -cfg.tau_limit, cfg.tau_limit) # Clamp torques
- # print(tau)
- # print(eu_ang)
- print(x_vel_cmd, y_vel_cmd, yaw_vel_cmd)
-
- data.ctrl = tau
-
- mujoco.mj_step(model, data)
- viewer.render()
- count_lowlevel += 1
-
- viewer.close()
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="Deployment script.")
- parser.add_argument("--load_model", type=str, required=True, help="Run to load from.")
- parser.add_argument("--embodiment", type=str, required=True, help="embodiment")
- parser.add_argument("--terrain", action="store_true", help="terrain or plane")
- parser.add_argument("--load_actions", action="store_true", help="saved_actions")
- parser.add_argument("--keyboard_use", action="store_true", help="keyboard_use")
- args = parser.parse_args()
-
- if args.keyboard_use:
- x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.0, 0.0, 0.0
- pygame.init()
- pygame.display.set_caption("Simulation Control")
- else:
- x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.4, 0.0, 0.0
-
- if "pt" in args.load_model:
- policy = torch.jit.load(args.load_model)
- elif "onnx" in args.load_model:
- import onnxruntime as ort
-
- policy = ort.InferenceSession(args.load_model)
-
- def get_policy_output(policy, input_data):
- if isinstance(policy, torch.jit._script.RecursiveScriptModule):
- return policy(torch.tensor(input_data))[0].detach().numpy()
- else:
- ort_inputs = {policy.get_inputs()[0].name: input_data}
- return policy.run(None, ort_inputs)[0][0]
-
- if args.embodiment == "stompypro":
- cfg = Sim2simCfg(
- args.embodiment,
- sim_duration=60.0,
- dt=0.001,
- decimation=10,
- cycle_time=0.4,
- tau_factor=3.0,
- )
- elif args.embodiment == "stompymicro":
- cfg = Sim2simCfg(
- args.embodiment,
- sim_duration=60.0,
- dt=0.001,
- decimation=10,
- cycle_time=0.2,
- tau_factor=2,
- )
-
- run_mujoco(policy, cfg, args.keyboard_use)
diff --git a/sim/sim2sim/__init__.py b/sim/sim2sim/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/sim/sim2sim/calibrate.py b/sim/sim2sim/calibrate.py
new file mode 100644
index 00000000..18bfa1c8
--- /dev/null
+++ b/sim/sim2sim/calibrate.py
@@ -0,0 +1,99 @@
+import multiprocessing as mp
+import time
+
+from tqdm import tqdm
+
+from sim.envs.base.mujoco_env import MujocoCfg, MujocoEnv
+from sim.sim2sim.mujoco.play import run_simulation
+from sim.sim2sim.optimizers.bayesian import BayesianOptimizer, OptimizeParam
+
+# def run_parallel_sims(num_threads: int) -> None:
+# """Run multiple simulation instances in parallel with a delay between each start."""
+# processes = []
+# delay_between_starts = 0.1
+
+# for idx in range(num_threads):
+# p = mp.Process(target=run_simulation, args=(idx, args))
+# p.start()
+# processes.append(p)
+# time.sleep(delay_between_starts) # Introduce a delay before starting the next process
+
+# # Wait for all processes to finish with a progress bar
+# for p in tqdm(processes, desc="Running parallel simulations"):
+# p.join()
+
+
+if __name__ == "__main__":
+ import numpy as np
+ import onnxruntime as ort
+ from kinfer.export.pytorch import export_to_onnx
+ from kinfer.inference.python import ONNXModel
+ from scipy.spatial.transform import Rotation as R
+
+ from sim.model_export import ActorCfg
+ from sim.sim2sim.helpers import get_actor_policy
+ from sim.utils.cmd_manager import CommandManager
+
+ cfg = MujocoCfg()
+ cfg.env.num_envs = 1
+ env = MujocoEnv(cfg, render=True)
+ cmd_manager = CommandManager(num_envs=1, mode="fixed", default_cmd=[0.0, 0.0, 0.0, 0.0]) # cfg.env.num_envs,
+ LOAD_MODEL_PATH = "policy_1.pt"
+
+ policy_cfg = ActorCfg(embodiment=cfg.asset.name)
+
+ actor_model, sim2sim_info, input_tensors = get_actor_policy(LOAD_MODEL_PATH, policy_cfg)
+ export_config = {**vars(policy_cfg), **sim2sim_info}
+ print(export_config)
+
+ export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx")
+ policy = ONNXModel("kinfer_test.onnx")
+
+ # gains.kp_scale: 1.566
+ # gains.kd_scale: 9.312
+ # 940 -> {'gains.kp_scale': 2.7692284239249987, 'gains.kd_scale': 17.73252701327175, 'gains.tau_factor': 3.1723978203087935}
+
+ """
+ Best parameters found:
+ gains.kp_scale: 1.445
+ gains.kd_scale: 19.975
+ gains.tau_factor: 6.350
+ """
+ parameters = [
+ OptimizeParam(
+ name="gains.kp_scale",
+ min_val=0.5,
+ max_val=10.0,
+ ),
+ OptimizeParam(
+ name="gains.kd_scale",
+ min_val=0.2,
+ max_val=10.0,
+ ),
+ # OptimizeParam(
+ # name="gains.tau_factor",
+ # min_val=1.0,
+ # max_val=10.0,
+ # ),
+ ]
+ optimizer = BayesianOptimizer(
+ base_cfg=cfg,
+ policy=policy,
+ parameters=parameters,
+ cmd_manager=cmd_manager,
+ n_initial_points=200,
+ n_iterations=1000,
+ exploration_weight=0.25,
+ )
+
+ best_params, history = optimizer.optimize()
+
+ print("\nOptimization complete!")
+ print("Best parameters found:")
+ for name, value in best_params.items():
+ print(f"{name}: {value:.3f}")
+
+ import json
+
+ with open("optimization_results.json", "w") as f:
+ json.dump({"best_params": best_params, "history": history}, f, indent=2)
diff --git a/sim/sim2sim/helpers.py b/sim/sim2sim/helpers.py
new file mode 100644
index 00000000..20caba5e
--- /dev/null
+++ b/sim/sim2sim/helpers.py
@@ -0,0 +1,66 @@
+from typing import Tuple
+
+import pygame
+
+from sim.model_export import Actor, ActorCfg
+
+import torch # isort:skip
+
+
+def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[torch.nn.Module, dict, Tuple[torch.Tensor, ...]]:
+ model = torch.jit.load(model_path, map_location="cpu")
+ a_model = Actor(model, cfg)
+
+ # Gets the model input tensors.
+ x_vel = torch.randn(1)
+ y_vel = torch.randn(1)
+ rot = torch.randn(1)
+ t = torch.randn(1)
+ dof_pos = torch.randn(a_model.num_actions)
+ dof_vel = torch.randn(a_model.num_actions)
+ prev_actions = torch.randn(a_model.num_actions)
+ imu_ang_vel = torch.randn(3)
+ imu_euler_xyz = torch.randn(3)
+ buffer = a_model.get_init_buffer()
+ input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, imu_ang_vel, imu_euler_xyz, buffer)
+
+ # Add sim2sim metadata
+ robot_effort = list(a_model.robot.effort().values())
+ robot_stiffness = list(a_model.robot.stiffness().values())
+ robot_damping = list(a_model.robot.damping().values())
+ num_actions = a_model.num_actions
+ num_observations = a_model.num_observations
+
+ return (
+ a_model,
+ {
+ "robot_effort": robot_effort,
+ "robot_stiffness": robot_stiffness,
+ "robot_damping": robot_damping,
+ "num_actions": num_actions,
+ "num_observations": num_observations,
+ },
+ input_tensors,
+ )
+
+
+def handle_keyboard_input():
+ global x_vel_cmd, y_vel_cmd, yaw_vel_cmd
+
+ keys = pygame.key.get_pressed()
+
+ # Update movement commands based on arrow keys
+ if keys[pygame.K_UP]:
+ x_vel_cmd += 0.0005
+ if keys[pygame.K_DOWN]:
+ x_vel_cmd -= 0.0005
+ if keys[pygame.K_LEFT]:
+ y_vel_cmd += 0.0005
+ if keys[pygame.K_RIGHT]:
+ y_vel_cmd -= 0.0005
+
+ # Yaw control
+ if keys[pygame.K_a]:
+ yaw_vel_cmd += 0.001
+ if keys[pygame.K_z]:
+ yaw_vel_cmd -= 0.001
diff --git a/sim/sim2sim/mujoco/coplay.py b/sim/sim2sim/mujoco/coplay.py
new file mode 100644
index 00000000..c8cc1e93
--- /dev/null
+++ b/sim/sim2sim/mujoco/coplay.py
@@ -0,0 +1,119 @@
+import argparse
+import logging
+
+import numpy as np
+from tqdm import tqdm
+
+from sim.env_helpers import debug_robot_state
+from sim.envs.base.mujoco_env import MujocoCfg, MujocoEnv
+from sim.utils.args_parsing import parse_args_with_extras
+from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO
+from sim.envs.humanoids.stompymicro_env import StompyMicroEnv
+from sim.utils.helpers import class_to_dict, parse_sim_params
+from sim.algo.ppo.on_policy_runner import OnPolicyRunner
+import torch # isort: skip
+
+logger = logging.getLogger(__name__)
+
+np.set_printoptions(precision=3, suppress=True)
+torch.set_printoptions(precision=3)
+
+DEVICE = "cuda:0" # "cpu"
+LOAD_MODEL_PATH = "examples/experiments/standing/robustv1/policy_1.pt"
+
+
+def coplay(args: argparse.Namespace) -> None:
+ """Run both Isaac Gym and Mujoco environments simultaneously."""
+ cfg = MujocoCfg()
+ cfg.env.num_envs = 1
+
+ cfg.gains.kp_scale = 1.
+ cfg.gains.kd_scale = 1.
+ cfg.gains.tau_factor = 4.0
+
+ # Remove randomness
+ cfg.domain_rand.start_pos_noise = 0.0
+ cfg.domain_rand.randomize_friction = False
+ cfg.domain_rand.randomize_base_mass = False
+ cfg.domain_rand.push_robots = False
+ cfg.domain_rand.action_noise = 0.0
+ cfg.domain_rand.action_delay = 0.0
+
+ cfg.sim.physx.max_gpu_contact_pairs = 2**10
+
+ train_cfg = StompyMicroCfgPPO()
+ train_cfg.runner.load_run = -1
+ train_cfg.seed = 0
+
+ # Create and setup environment
+ sim_params = {"sim": class_to_dict(cfg.sim)}
+ sim_params = parse_sim_params(args, sim_params)
+
+ RENDER_MUJOCO = True
+ isaac_env = StompyMicroEnv(
+ cfg=cfg,
+ sim_params=sim_params,
+ physics_engine=args.physics_engine,
+ sim_device=args.sim_device,
+ headless=RENDER_MUJOCO,
+ )
+ mujoco_env = MujocoEnv(
+ cfg,
+ render=RENDER_MUJOCO,
+ )
+
+ # Setup and load policy
+ all_cfg = {**class_to_dict(train_cfg), **class_to_dict(cfg)}
+ ppo_runner = OnPolicyRunner(isaac_env, all_cfg, device=DEVICE)
+ resume_path = "logs/StompyMicro/Dec04_20-29-12_StandingRobust/model_12001.pt"
+ ppo_runner.load(resume_path, load_optimizer=False)
+ ppo_runner.alg.actor_critic.eval()
+ ppo_runner.alg.actor_critic.to(DEVICE)
+ policy = ppo_runner.alg.actor_critic.act_inference
+
+ isaac_obs, _ = isaac_env.reset()
+ mujoco_obs_np = mujoco_env.reset()
+ mujoco_obs = torch.from_numpy(mujoco_obs_np).float().to(DEVICE)
+
+ ## DEBUG ##
+ print(f"Policy device: {next(ppo_runner.alg.actor_critic.parameters()).device}")
+ print(f"Isaac obs device: {isaac_obs.device}")
+ print(f"Mujoco obs device: {mujoco_obs.device}")
+
+ print(f"Isaac obs shape: {isaac_obs.shape}")
+ print(f"Mujoco obs shape: {mujoco_obs.shape}")
+
+ print(isaac_obs[0][-59:].detach().cpu().numpy())
+ print(mujoco_obs[0][-59:].detach().cpu().numpy())
+ ## DEBUG ##
+
+ done1 = done2 = False
+
+ ISAAC = False
+
+ steps = int(cfg.env.episode_length_s / (cfg.sim.dt * cfg.control.decimation))
+ for t in tqdm(range(steps)):
+ if ISAAC:
+ isaac_actions = policy(isaac_obs.detach())
+ isaac_env.commands[:] = torch.zeros(4)
+ isaac_obs, _, _, done1, _ = isaac_env.step(isaac_actions.detach())
+
+ mujoco_actions = policy(mujoco_obs.detach())
+ mujoco_env.commands[:] = torch.zeros(4)
+ mujoco_obs_np, _, done2, _ = mujoco_env.step(mujoco_actions.detach().cpu().numpy())
+ mujoco_obs = torch.from_numpy(mujoco_obs_np).float().to(DEVICE)
+
+ if t % 17 == 0:
+ print(f"Time: {t * cfg.sim.dt:.2f}s")
+ if ISAAC:
+ debug_robot_state("Isaac", isaac_obs[0].detach().cpu().numpy(), isaac_actions[0].detach().cpu().numpy())
+ debug_robot_state("Mujoco", mujoco_obs[0].detach().cpu().numpy(), mujoco_actions[0].detach().cpu().numpy())
+
+ if done1 or done2:
+ break
+
+
+if __name__ == "__main__":
+ args = parse_args_with_extras(lambda x: x)
+ print("Arguments:", vars(args))
+ coplay(args)
diff --git a/sim/sim2sim/mujoco/old.py b/sim/sim2sim/mujoco/old.py
new file mode 100644
index 00000000..21d13c3d
--- /dev/null
+++ b/sim/sim2sim/mujoco/old.py
@@ -0,0 +1,256 @@
+import argparse
+import math
+import os
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import Dict, List, Tuple, Union
+
+import mujoco
+import mujoco_viewer
+import numpy as np
+import onnxruntime as ort
+import pygame
+from kinfer.export.pytorch import export_to_onnx
+from kinfer.inference.python import ONNXModel
+from sim.model_export import ActorCfg
+from scipy.spatial.transform import Rotation as R
+from tqdm import tqdm
+from sim.sim2sim.helpers import get_actor_policy
+
+
+@dataclass
+class Sim2simCfg:
+ sim_duration: float = 60.0
+ dt: float = 0.001
+ decimation: int = 10
+ tau_factor: float = 3
+ cycle_time: float = 0.25
+
+
+def quaternion_to_euler_array(quat: np.ndarray) -> np.ndarray:
+ # Ensure quaternion is in the correct format [x, y, z, w]
+ x, y, z, w = quat
+
+ # Roll (x-axis rotation)
+ t0 = +2.0 * (w * x + y * z)
+ t1 = +1.0 - 2.0 * (x * x + y * y)
+ roll_x = np.arctan2(t0, t1)
+
+ # Pitch (y-axis rotation)
+ t2 = +2.0 * (w * y - z * x)
+ t2 = np.clip(t2, -1.0, 1.0)
+ pitch_y = np.arcsin(t2)
+
+ # Yaw (z-axis rotation)
+ t3 = +2.0 * (w * z + x * y)
+ t4 = +1.0 - 2.0 * (y * y + z * z)
+ yaw_z = np.arctan2(t3, t4)
+
+ # Returns roll, pitch, yaw in a NumPy array in radians
+ return np.array([roll_x, pitch_y, yaw_z])
+
+
+def get_obs(data: mujoco.MjData) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+ """Extracts an observation from the mujoco data structure"""
+ q = data.qpos.astype(np.double)
+ dq = data.qvel.astype(np.double)
+ quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double)
+ r = R.from_quat(quat)
+ v = r.apply(data.qvel[:3], inverse=True).astype(np.double) # In the base frame
+ omega = data.sensor("angular-velocity").data.astype(np.double)
+ gvec = r.apply(np.array([0.0, 0.0, -1.0]), inverse=True).astype(np.double)
+ return (q, dq, quat, v, omega, gvec)
+
+
+def pd_control(
+ target_q: np.ndarray,
+ q: np.ndarray,
+ kp: np.ndarray,
+ dq: np.ndarray,
+ kd: np.ndarray,
+ default: np.ndarray,
+) -> np.ndarray:
+ """Calculates torques from position commands"""
+ return kp * (target_q + default - q) - kd * dq
+
+
+def run_mujoco(
+ embodiment: str,
+ policy: ort.InferenceSession,
+ cfg: Sim2simCfg,
+ model_info: Dict[str, Union[float, List[float], str]],
+) -> None:
+ """
+ Run the Mujoco simulation using the provided policy and configuration.
+ """
+ model_dir = os.environ.get("MODEL_DIR", "sim/resources")
+ mujoco_model_path = f"{model_dir}/{embodiment}/robot.xml"
+
+ model = mujoco.MjModel.from_xml_path(mujoco_model_path)
+ model.opt.timestep = cfg.dt
+ data = mujoco.MjData(model)
+
+ assert isinstance(model_info["num_actions"], int)
+ assert isinstance(model_info["num_observations"], int)
+ assert isinstance(model_info["robot_effort"], list)
+ assert isinstance(model_info["robot_stiffness"], list)
+ assert isinstance(model_info["robot_damping"], list)
+
+ tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * cfg.tau_factor
+ kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"]))
+ kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"]))
+
+ try:
+ data.qpos = model.keyframe("default").qpos
+ default = deepcopy(model.keyframe("default").qpos)[-model_info["num_actions"] :]
+ print("Default position:", default)
+ except:
+ print("No default position found, using zero initialization")
+ default = np.zeros(model_info["num_actions"]) # 3 for pos, 4 for quat, cfg.num_actions for joints
+ default += np.random.uniform(-0.03, 0.03, size=default.shape)
+ print("Default position:", default)
+ mujoco.mj_step(model, data)
+ for ii in range(len(data.ctrl) + 1):
+ print(data.joint(ii).id, data.joint(ii).name)
+
+ data.qvel = np.zeros_like(data.qvel)
+ data.qacc = np.zeros_like(data.qacc)
+
+ viewer = mujoco_viewer.MujocoViewer(model, data)
+
+ target_q = np.zeros((model_info["num_actions"]), dtype=np.double)
+ prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double)
+ hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double)
+
+ count_lowlevel = 0
+
+ input_data = {
+ "x_vel.1": np.zeros(1).astype(np.float32),
+ "y_vel.1": np.zeros(1).astype(np.float32),
+ "rot.1": np.zeros(1).astype(np.float32),
+ "t.1": np.zeros(1).astype(np.float32),
+ "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32),
+ "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32),
+ "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32),
+ "imu_ang_vel.1": np.zeros(3).astype(np.float32),
+ "imu_euler_xyz.1": np.zeros(3).astype(np.float32),
+ "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32),
+ }
+
+ # Initialize variables for tracking upright steps and average speed
+ upright_steps = 0
+ total_speed = 0.0
+ step_count = 0
+
+ for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."):
+ # Obtain an observation
+ q, dq, quat, v, omega, gvec = get_obs(data)
+ q = q[-model_info["num_actions"] :]
+ dq = dq[-model_info["num_actions"] :]
+
+ eu_ang = quaternion_to_euler_array(quat)
+ eu_ang[eu_ang > math.pi] -= 2 * math.pi
+
+ # Check if the robot is upright (roll and pitch within ±30 degrees)
+ if abs(eu_ang[0]) > math.radians(30) or abs(eu_ang[1]) > math.radians(30):
+ print("Robot tilted heavily, ending simulation.")
+ break
+ else:
+ upright_steps += 1 # Increment upright steps
+
+ # Calculate speed and accumulate for average speed calculation
+ speed = np.linalg.norm(v[:2]) # Speed in the x-y plane
+ total_speed += speed
+ step_count += 1
+
+ # 1000hz -> 50hz
+ if count_lowlevel % cfg.decimation == 0:
+ # Convert sim coordinates to policy coordinates
+ cur_pos_obs = q - default
+ cur_vel_obs = dq
+
+ input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32)
+ input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32)
+ input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32)
+
+ input_data["t.1"] = np.array([count_lowlevel * cfg.dt], dtype=np.float32)
+
+ input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32)
+ input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32)
+
+ input_data["prev_actions.1"] = prev_actions.astype(np.float32)
+
+ input_data["imu_ang_vel.1"] = omega.astype(np.float32)
+ input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32)
+
+ input_data["buffer.1"] = hist_obs.astype(np.float32)
+
+ policy_output = policy(input_data)
+ positions = policy_output["actions_scaled"]
+ curr_actions = policy_output["actions"]
+ hist_obs = policy_output["x.3"]
+
+ target_q = positions
+
+ prev_actions = curr_actions
+
+ # Generate PD control
+ tau = pd_control(target_q, q, kps, dq, kds, default) # Calc torques
+ tau = np.clip(tau, -tau_limit, tau_limit) # Clamp torques
+
+ data.ctrl = tau
+ mujoco.mj_step(model, data)
+
+ viewer.render()
+ count_lowlevel += 1
+
+ viewer.close()
+
+ # Calculate average speed
+ if step_count > 0:
+ average_speed = total_speed / step_count
+ else:
+ average_speed = 0.0
+
+ # Save or print the statistics at the end of the episode
+ print(f"Number of upright steps: {upright_steps}")
+ print(f"Average speed: {average_speed:.4f} m/s")
+
+
+if __name__ == "__main__":
+ x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.2, 0.0, 0.0
+
+ policy_cfg = ActorCfg(embodiment="stompymicro")
+ policy_cfg.cycle_time = 0.2
+ cfg = Sim2simCfg(
+ sim_duration=10.0,
+ dt=0.001,
+ decimation=10,
+ tau_factor=2,
+ cycle_time=policy_cfg.cycle_time,
+ )
+
+ actor_model, sim2sim_info, input_tensors = get_actor_policy("policy_1.pt", policy_cfg)
+
+ # Merge policy_cfg and sim2sim_info into a single config object
+ export_config = {**vars(policy_cfg), **sim2sim_info}
+ print(export_config)
+ export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx")
+ policy = ONNXModel("kinfer_test.onnx")
+
+ metadata = policy.get_metadata()
+
+ model_info = {
+ "num_actions": metadata["num_actions"],
+ "num_observations": metadata["num_observations"],
+ "robot_effort": metadata["robot_effort"],
+ "robot_stiffness": metadata["robot_stiffness"],
+ "robot_damping": metadata["robot_damping"],
+ }
+
+ run_mujoco(
+ "stompymicro",
+ policy,
+ cfg,
+ model_info,
+ )
\ No newline at end of file
diff --git a/sim/sim2sim/mujoco/play.py b/sim/sim2sim/mujoco/play.py
new file mode 100644
index 00000000..5a6e7a43
--- /dev/null
+++ b/sim/sim2sim/mujoco/play.py
@@ -0,0 +1,185 @@
+import numpy as np
+import onnxruntime as ort
+from scipy.interpolate import griddata
+import matplotlib.pyplot as plt
+
+from sim.env_helpers import debug_robot_state
+from sim.envs.base.mujoco_env import MujocoCfg, MujocoEnv
+from sim.utils.cmd_manager import CommandManager
+
+np.set_printoptions(precision=2, suppress=True)
+
+
+def run_simulation(env: MujocoEnv, policy: ort.InferenceSession, cfg: MujocoCfg, cmd_manager: CommandManager, num_episodes: int = 1) -> float:
+ """Run a policy in the Mujoco environment."""
+ total_reward = 0
+ rewards = []
+
+ for episode in range(num_episodes):
+ obs = env.reset()
+ count = 0
+ episode_reward = 0
+
+ target_q = np.zeros(env.num_joints)
+ prev_actions = np.zeros(env.num_joints)
+ hist_obs = np.zeros(policy.get_metadata()["num_observations"])
+
+ while count * cfg.sim.dt * cfg.control.decimation < cfg.env.episode_length_s:
+ q, dq, quat, v, omega, euler = obs
+ phase = count * cfg.sim.dt / cfg.rewards.cycle_time
+ command_input = np.array([np.sin(2 * np.pi * phase), np.cos(2 * np.pi * phase), 0.0, 0.0, 0.0])
+ obs_buf = np.concatenate([command_input, q, dq, prev_actions, omega, euler])
+ policy_output = policy(
+ {
+ "x_vel.1": np.array([command_input[2]], dtype=np.float32),
+ "y_vel.1": np.array([command_input[3]], dtype=np.float32),
+ "rot.1": np.array([command_input[4]], dtype=np.float32),
+ "t.1": np.array([count * cfg.sim.dt], dtype=np.float32),
+ "dof_pos.1": q.astype(np.float32),
+ "dof_vel.1": dq.astype(np.float32),
+ "prev_actions.1": prev_actions.astype(np.float32),
+ "imu_ang_vel.1": omega.astype(np.float32),
+ "imu_euler_xyz.1": euler.astype(np.float32),
+ "buffer.1": hist_obs.astype(np.float32),
+ }
+ )
+
+ target_q = policy_output["actions_scaled"]
+ prev_actions = policy_output["actions"]
+ hist_obs = policy_output["x.3"]
+
+ obs, reward, done, info = env.step(target_q)
+ episode_reward += reward
+ count += 1
+
+ if count % 17 == 0:
+ pass # debug_robot_state("Mujoco", obs_buf, actions=target_q)
+
+ if done:
+ # print(f"Episode {episode + 1} finished with reward: {episode_reward}")
+ total_reward += episode_reward
+ rewards.append(episode_reward)
+ break
+
+ env.close()
+
+ average_reward = total_reward / num_episodes
+ # print(f"Average reward over {num_episodes} episodes: {average_reward}")
+ return rewards
+
+
+def run_experiment(env: MujocoEnv, policy: ort.InferenceSession, cfg: MujocoCfg, cmd_manager: CommandManager):
+ kp_scale_range = np.linspace(0.1, 5.0, 40)
+ kd_scale_range = np.linspace(0.1, 5.0, 40)
+ kp_mesh, kd_mesh = np.meshgrid(kp_scale_range, kd_scale_range)
+ gains_combinations = np.column_stack((kp_mesh.flatten(), kd_mesh.flatten()))
+
+ results = []
+ for i, (kp, kd) in enumerate(gains_combinations):
+ cfg.gains.kp_scale = kp
+ cfg.gains.kd_scale = kd
+ rewards = run_simulation(env, policy, cfg, cmd_manager, num_episodes=10)
+ mean_reward = np.mean(rewards)
+ results.append(mean_reward)
+ print(f"Experiment {i}/{len(gains_combinations)} -> (kds: {kd}, kps: {kp}, reward: {mean_reward:.2f})")
+
+ results = np.array(results)
+ kp_interp = np.linspace(kp_scale_range.min(), kp_scale_range.max(), 100)
+ kd_interp = np.linspace(kd_scale_range.min(), kd_scale_range.max(), 100)
+ kp_mesh_fine, kd_mesh_fine = np.meshgrid(kp_interp, kd_interp)
+
+ rewards_smooth = griddata(
+ gains_combinations,
+ results,
+ (kp_mesh_fine, kd_mesh_fine),
+ method='cubic'
+ )
+
+ plt.figure(figsize=(10, 8))
+ plt.imshow(
+ rewards_smooth,
+ extent=[kp_scale_range.min(), kp_scale_range.max(),
+ kd_scale_range.min(), kd_scale_range.max()],
+ origin='lower',
+ aspect='auto',
+ cmap='viridis'
+ )
+
+ plt.colorbar(label='Mean Reward')
+ plt.xlabel('KP Scale')
+ plt.ylabel('KD Scale')
+ plt.title('Reward Heatmap for PD Gains')
+
+ plt.contour(
+ kp_mesh_fine, kd_mesh_fine, rewards_smooth,
+ levels=10,
+ colors='white',
+ alpha=0.3,
+ linestyles='solid'
+ )
+
+ best_idx = np.argmax(results)
+ best_kp = gains_combinations[best_idx][0]
+ best_kd = gains_combinations[best_idx][1]
+ plt.plot(best_kp, best_kd, 'r*', markersize=15, label='Best Gains')
+
+ plt.legend()
+ plt.tight_layout()
+ plt.show()
+
+ print(f"\nBest configuration found:")
+ print(f"KP Scale: {best_kp:.2f}")
+ print(f"KD Scale: {best_kd:.2f}")
+ print(f"Mean Reward: {results[best_idx]:.2f}")
+
+
+if __name__ == "__main__":
+ import numpy as np
+ from kinfer.export.pytorch import export_to_onnx
+ from kinfer.inference.python import ONNXModel
+ from scipy.spatial.transform import Rotation as R
+
+ from sim.model_export import ActorCfg
+ from sim.sim2sim.helpers import get_actor_policy
+
+ """
+ Current parameters:
+ gains.kp_scale: 1.445
+ gains.kd_scale: 19.975
+ gains.tau_factor: 6.350
+ """
+
+ cfg = MujocoCfg()
+ cfg.gains.kp_scale = 4.6 # 1.643 # 1.445
+ cfg.gains.kd_scale = 7.5 # 29.027 # 19.975
+ cfg.gains.tau_factor = 4
+ cfg.env.num_envs = 1
+ render = False
+ env = MujocoEnv(cfg, render=render)
+ cmd_manager = CommandManager(num_envs=cfg.env.num_envs, mode="fixed", default_cmd=[0.0, 0.0, 0.0, 0.0])
+ LOAD_MODEL_PATH = "examples/experiments/standing/robustv1/policy_1.pt"
+
+ policy_cfg = ActorCfg(embodiment=cfg.asset.name)
+
+ actor_model, sim2sim_info, input_tensors = get_actor_policy(LOAD_MODEL_PATH, policy_cfg)
+ export_config = {**vars(policy_cfg), **sim2sim_info}
+ print(export_config)
+
+ export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx")
+ policy = ONNXModel("kinfer_test.onnx")
+
+ rewards = run_simulation(MujocoEnv(cfg, render=True), policy, cfg, cmd_manager, num_episodes=1)
+ print(f"Mean reward: {np.mean(rewards)}")
+ run_experiment(env, policy, cfg, cmd_manager)
+
+ # kp_scale_range = np.linspace(0.5, 10.0, 100)
+ # kd_scale_range = np.linspace(0.2, 10.0, 100)
+ # np.random.shuffle(kp_scale_range)
+ # np.random.shuffle(kd_scale_range)
+ # for i in range(100):
+ # cfg.gains.kp_scale = kp_scale_range[i]
+ # cfg.gains.kd_scale = kd_scale_range[i]
+ # rewards = run_simulation(env, policy, cfg, cmd_manager, num_episodes=1)
+ # print(f"{i} -> (kds: {cfg.gains.kd_scale}, kps: {cfg.gains.kp_scale})")
+
+ # 9 -> (kds: 0.8929292929292931, kps: 8.656565656565656)
diff --git a/sim/sim2sim/mujoco/test.py b/sim/sim2sim/mujoco/test.py
new file mode 100644
index 00000000..f7f17112
--- /dev/null
+++ b/sim/sim2sim/mujoco/test.py
@@ -0,0 +1,109 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+from datetime import datetime
+
+from sim import ROOT_DIR
+from sim.utils.args_parsing import parse_args_with_extras
+from sim.model_export import ActorCfg
+from sim.sim2sim.helpers import get_actor_policy
+
+from sim.envs.humanoids.stompymicro_config import StompyMicroCfgPPO, StompyMicroCfg
+from sim.envs.humanoids.stompymicro_env import StompyMicroEnv
+from sim.utils.helpers import (
+ class_to_dict,
+ get_load_path,
+ parse_sim_params,
+)
+from sim.algo.ppo.on_policy_runner import OnPolicyRunner
+from kinfer.export.pytorch import export_to_onnx # isort: skip
+from kinfer.inference.python import ONNXModel # isort: skip
+import torch # isort: skip
+
+np.set_printoptions(precision=3, suppress=True)
+torch.set_printoptions(precision=3)
+
+
+def test_policies(args):
+ LOAD_MODEL_PATH = "examples/experiments/standing/robustv1/policy_1.pt"
+ DEVICE = "cuda:0"
+
+ ## ISAAC ##
+ args.load_model = LOAD_MODEL_PATH
+ env_cfg, train_cfg = StompyMicroCfg(), StompyMicroCfgPPO()
+ env_cfg.env.num_envs = 1
+ env_cfg.sim.physx.max_gpu_contact_pairs = 2**10
+ train_cfg.runner.resume = True
+ train_cfg.runner.load_run = -8
+ train_cfg.seed = 0
+ sim_params = {"sim": class_to_dict(env_cfg.sim)}
+ sim_params = parse_sim_params(args, sim_params)
+ env = StompyMicroEnv(
+ cfg=env_cfg,
+ sim_params=sim_params,
+ physics_engine=args.physics_engine,
+ sim_device=args.sim_device,
+ headless=args.headless,
+ )
+ train_cfg_dict = class_to_dict(train_cfg)
+ env_cfg_dict = class_to_dict(env_cfg)
+ all_cfg = {**train_cfg_dict, **env_cfg_dict}
+ ppo_runner = OnPolicyRunner(env, all_cfg, log_dir=None, device=DEVICE)
+ resume_path = get_load_path(
+ root=os.path.join(ROOT_DIR, "logs", train_cfg.runner.experiment_name), load_run=train_cfg.runner.load_run, checkpoint=train_cfg.runner.checkpoint
+ )
+ ppo_runner.load(resume_path, load_optimizer=False)
+ ppo_runner.alg.actor_critic.eval()
+ ppo_runner.alg.actor_critic.to(DEVICE)
+ policy = ppo_runner.alg.actor_critic.act_inference
+
+ obs_buf = torch.zeros((1, 885)).to(DEVICE)
+ with torch.no_grad():
+ isaac_actions = policy(obs_buf)
+ print(isaac_actions)
+ isaac_actions = isaac_actions.cpu().numpy()[0]
+ ## ISAAC ##
+
+ ## MUJOCO ##
+ policy_cfg = ActorCfg(embodiment="stompymicro")
+ actor_model, sim2sim_info, input_tensors = get_actor_policy(LOAD_MODEL_PATH, policy_cfg)
+ export_config = {**vars(policy_cfg), **sim2sim_info}
+ export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx")
+ mujoco_policy = ONNXModel("kinfer_test.onnx")
+ num_actions = sim2sim_info["num_actions"]
+ mujoco_inputs = {
+ "x_vel.1": np.zeros(1, dtype=np.float32),
+ "y_vel.1": np.zeros(1, dtype=np.float32),
+ "rot.1": np.zeros(1, dtype=np.float32),
+ "t.1": np.zeros(1, dtype=np.float32),
+ "dof_pos.1": np.zeros(num_actions, dtype=np.float32),
+ "dof_vel.1": np.zeros(num_actions, dtype=np.float32),
+ "prev_actions.1": np.zeros(num_actions, dtype=np.float32),
+ "imu_ang_vel.1": np.zeros(3, dtype=np.float32),
+ "imu_euler_xyz.1": np.zeros(3, dtype=np.float32),
+ "buffer.1": np.zeros(sim2sim_info["num_observations"], dtype=np.float32),
+ }
+ mujoco_outputs = mujoco_policy(mujoco_inputs)
+ ## MUJOCO ##
+
+ ## COMPARE ##
+ print("\nPolicy Output Comparison:")
+ print("-" * 50)
+ print("\nMujoco actions:")
+ print(mujoco_outputs["actions_scaled"])
+ print("\nIsaac actions:")
+ print(isaac_actions)
+ print("\nDifference metrics:", np.abs(mujoco_outputs["actions_scaled"] - isaac_actions).mean())
+ plt.plot(mujoco_outputs["actions_scaled"], label="Mujoco")
+ plt.plot(isaac_actions, label="Isaac")
+ plt.legend()
+ plt.show()
+ ## COMPARE ##
+
+ return mujoco_outputs, isaac_actions
+
+
+if __name__ == "__main__":
+ args = parse_args_with_extras(lambda x: x)
+ print("Arguments:", vars(args))
+ test_policies(args)
diff --git a/sim/sim2sim/optimizers/bayesian.py b/sim/sim2sim/optimizers/bayesian.py
new file mode 100644
index 00000000..ceb1bef4
--- /dev/null
+++ b/sim/sim2sim/optimizers/bayesian.py
@@ -0,0 +1,218 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Tuple
+
+import numpy as np
+import onnxruntime as ort
+from scipy.stats import norm
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern
+
+from sim.envs.base.mujoco_env import MujocoCfg, MujocoEnv
+from sim.sim2sim.mujoco.play import run_simulation
+from sim.utils.cmd_manager import CommandManager
+
+
+@dataclass
+class OptimizeParam:
+ """Definition of a parameter to optimize"""
+
+ name: str # Full parameter path (e.g., 'gains.kp_scale')
+ min_val: float
+ max_val: float
+ init_val: float = None
+
+
+class BayesianOptimizer:
+ def __init__(
+ self,
+ base_cfg: MujocoCfg,
+ policy: ort.InferenceSession,
+ parameters: List[OptimizeParam],
+ cmd_manager: CommandManager,
+ n_initial_points: int = 10,
+ n_iterations: int = 40,
+ exploration_weight: float = 0.1,
+ ):
+ self.base_cfg = base_cfg
+ self.policy = policy
+ self.parameters = parameters
+ self.cmd_manager = cmd_manager
+ self.n_initial_points = n_initial_points
+ self.n_iterations = n_iterations
+ self.exploration_weight = exploration_weight
+
+ # Initialize storage for observations
+ self.X = [] # Parameter configurations
+ self.y = [] # Observed scores
+
+ # Set up parameter bounds for GP
+ self.bounds = np.array([[p.min_val, p.max_val] for p in parameters])
+
+ # Initialize GP with Matérn kernel (more flexible than RBF for real processes)
+ kernel = ConstantKernel(1.0) * RBF(length_scale=1.0) + ConstantKernel(1.0) * Matern(length_scale=1.0, nu=0.5)
+
+ self.gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, normalize_y=True, alpha=1e-6)
+
+ def _get_cfg_value(self, cfg: MujocoCfg, param_path: str) -> Any:
+ """Get a value from nested config using dot notation"""
+ value = cfg
+ for part in param_path.split("."):
+ value = getattr(value, part)
+ return value
+
+ def _set_cfg_value(self, cfg: MujocoCfg, param_path: str, value: Any) -> None:
+ """Set a value in nested config using dot notation"""
+ parts = param_path.split(".")
+ obj = cfg
+ for part in parts[:-1]:
+ obj = getattr(obj, part)
+ setattr(obj, parts[-1], value)
+
+ def _params_to_array(self, params: Dict[str, float]) -> np.ndarray:
+ """Convert parameter dictionary to array"""
+ return np.array([params[p.name] for p in self.parameters])
+
+ def _array_to_params(self, x: np.ndarray) -> Dict[str, float]:
+ """Convert array to parameter dictionary"""
+ return {p.name: float(val) for p, val in zip(self.parameters, x)}
+
+ def _sample_random_point(self) -> np.ndarray:
+ """Sample a random point in the parameter space"""
+ return np.array([np.random.uniform(p.min_val, p.max_val) for p in self.parameters])
+
+ def _expected_improvement(self, X: np.ndarray, xi: float = 0.01) -> np.ndarray:
+ """Calculate expected improvement at points X"""
+ # Ensure X is 2D
+ X = X.reshape(-1, len(self.parameters))
+
+ # Get predictive distribution from GP
+ mu, sigma = self.gp.predict(X, return_std=True)
+
+ # Get current best
+ mu_best = np.max(self.y)
+
+ # Calculate expected improvement
+ with np.errstate(divide="warn"):
+ imp = mu - mu_best - xi
+ Z = imp / sigma
+ ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
+ ei[sigma == 0.0] = 0.0
+
+ return ei
+
+ def _evaluate_parameters(self, params: Dict[str, float]) -> float:
+ """Evaluate a parameter set over multiple episodes"""
+ # Create a copy of base config and update with new parameters
+ cfg = MujocoCfg()
+ cfg.__dict__.update(self.base_cfg.__dict__)
+
+ for param_name, value in params.items():
+ self._set_cfg_value(cfg, param_name, value)
+
+ env = MujocoEnv(cfg, render=False)
+ try:
+ rewards = run_simulation(
+ env=env,
+ policy=self.policy,
+ cfg=cfg,
+ cmd_manager=self.cmd_manager,
+ num_episodes=20,
+ )
+ mean_reward = np.mean(rewards)
+ std_reward = np.std(rewards)
+ stability_penalty = 0.1 * std_reward
+ return mean_reward - stability_penalty
+
+ except Exception as e:
+ print(f"Evaluation failed: {e}")
+ return float("-inf")
+ finally:
+ env.close()
+
+ def _next_point(self) -> np.ndarray:
+ """Determine next point to evaluate using Thompson Sampling"""
+ best_x = None
+ best_ei = -float("inf")
+
+ # Random search for point with highest expected improvement
+ for _ in range(1000):
+ x = self._sample_random_point()
+ ei = self._expected_improvement(x.reshape(1, -1))
+ if ei > best_ei:
+ best_ei = ei
+ best_x = x
+
+ return best_x
+
+ def optimize(self) -> Tuple[Dict[str, float], List[float]]:
+ """Run Bayesian optimization"""
+ print("Starting Bayesian optimization...")
+ print(f"Initial exploration: {self.n_initial_points} points")
+
+ # Initial random exploration
+ for i in range(self.n_initial_points):
+ print(f"\nEvaluating initial point {i+1}/{self.n_initial_points}")
+ x = self._sample_random_point()
+ params = self._array_to_params(x)
+ y = self._evaluate_parameters(params)
+
+ self.X.append(x)
+ self.y.append(y)
+
+ print(f"Parameters: {params}")
+ print(f"Score: {y:.2f}")
+
+ # Convert lists to arrays
+ self.X = np.array(self.X)
+ self.y = np.array(self.y)
+
+ # Main optimization loop
+ best_score = max(self.y)
+ best_params = self._array_to_params(self.X[np.argmax(self.y)])
+ no_improvement_count = 0
+
+ for iteration in range(self.n_iterations):
+ print(f"\nIteration {iteration + 1}/{self.n_iterations}")
+
+ # Fit GP to all data
+ y_mean = np.mean(self.y)
+ y_std = np.std(self.y)
+ y_normalized = (self.y - y_mean) / (y_std + 1e-8) # avoid division by zero
+ self.gp.fit(self.X, y_normalized)
+
+ # Find next point to evaluate
+ x = self._next_point()
+ params = self._array_to_params(x)
+ y = self._evaluate_parameters(params)
+
+ # Update data
+ self.X = np.vstack((self.X, x))
+ self.y = np.append(self.y, y)
+
+ # Update best score
+ if y > best_score:
+ best_score = y
+ best_params = params
+ no_improvement_count = 0
+ print("New best configuration found!")
+ else:
+ no_improvement_count += 1
+
+ print("Current parameters:")
+ for name, value in params.items():
+ print(f" {name}: {value:.3f}")
+ print(f"Score: {y:.2f}")
+ print(f"Best score so far: {best_score:.2f}")
+
+ # Early stopping
+ if no_improvement_count >= 200:
+ print("\nStopping early: No improvement for 100 iterations")
+ break
+
+ print("\nOptimization complete!")
+ print("Best parameters found:")
+ for name, value in best_params.items():
+ print(f"{name}: {value:.3f}")
+ print(f"Best score: {best_score:.2f}")
+
+ return best_params, self.y.tolist()
diff --git a/sim/sim2sim/optimizers/evolution.py b/sim/sim2sim/optimizers/evolution.py
new file mode 100644
index 00000000..f2026e43
--- /dev/null
+++ b/sim/sim2sim/optimizers/evolution.py
@@ -0,0 +1,427 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Tuple
+
+import numpy as np
+
+from sim.envs.base.mujoco_env import MujocoCfg, MujocoEnv
+
+
+@dataclass
+class OptimizeParam:
+ """Definition of a parameter to optimize"""
+
+ name: str
+ range: Tuple[float, float] # [min, max]
+ init_val: float = None # none -> random initialization
+ mutation_range: Tuple[float, float] = (0.8, 1.2) # relative mutation bounds
+
+
+class ParameterOptimizer:
+ def __init__(
+ self,
+ base_cfg: MujocoCfg,
+ parameters: List[OptimizeParam],
+ n_iterations: int = 50,
+ population_size: int = 20,
+ elite_frac: float = 0.2,
+ ):
+ self.base_cfg = base_cfg
+ self.parameters = parameters
+ self.n_iterations = n_iterations
+ self.population_size = population_size
+ self.n_elite = max(1, int(population_size * elite_frac))
+
+ def _get_cfg_value(self, cfg: MujocoCfg, param_path: str) -> Any:
+ """Get a value from nested config using dot notation"""
+ value = cfg
+ for part in param_path.split("."):
+ value = getattr(value, part)
+ return value
+
+ def _set_cfg_value(self, cfg: MujocoCfg, param_path: str, value: Any) -> None:
+ """Set a value in nested config using dot notation"""
+ parts = param_path.split(".")
+ obj = cfg
+ for part in parts[:-1]:
+ obj = getattr(obj, part)
+ setattr(obj, parts[-1], value)
+
+ def _create_parameters(self) -> List[Dict[str, float]]:
+ """Create a population of parameter sets"""
+ population = []
+
+ for _ in range(self.population_size):
+ params = {}
+ for param in self.parameters:
+ if param.init_val is not None:
+ # Initialize near the provided value
+ value = param.init_val * np.random.uniform(0.9, 1.1)
+ else:
+ # Random initialization within bounds
+ value = np.random.uniform(param.range)
+ params[param.name] = np.clip(value, param.range[0], param.range[1])
+ population.append(params)
+
+ return population
+
+ def _evaluate_parameters(self, params: Dict[str, float], n_episodes: int = 3) -> float:
+ """Evaluate a parameter set over multiple episodes"""
+ # Create a copy of the base config
+ cfg = MujocoCfg()
+ cfg.__dict__.update(self.base_cfg.__dict__)
+
+ # Update config with new parameters
+ for param_name, value in params.items():
+ self._set_cfg_value(cfg, param_name, value)
+
+ env = MujocoEnv(cfg, render=False)
+ total_reward = 0.0
+
+ for _ in range(n_episodes):
+ env.reset()
+ episode_reward = 0
+ done = False
+
+ while not done:
+ # For testing stability, use zero actions
+ action = np.zeros(env.num_joints)
+ _, reward, done, _ = env.step(action)
+ episode_reward += reward
+
+ if done:
+ break
+
+ total_reward += episode_reward
+
+ env.close()
+ return total_reward / n_episodes
+
+ def _create_new_population(self, population: List[Dict[str, float]], scores: np.ndarray) -> List[Dict[str, float]]:
+ """Create new population from elite performers"""
+ # Sort by score
+ elite_idx = np.argsort(scores)[-self.n_elite :]
+ elite_population = [population[i] for i in elite_idx]
+
+ # Create new population through mutation of elite members
+ new_population = []
+ while len(new_population) < self.population_size:
+ # Select random elite member
+ parent = elite_population[np.random.randint(self.n_elite)]
+
+ # Mutate parameters
+ child = {}
+ for param in self.parameters:
+ # Apply random mutation within specified range
+ mutation = np.random.uniform(*param.mutation_range)
+ value = parent[param.name] * mutation
+ child[param.name] = np.clip(value, param.range[0], param.range[1])
+
+ new_population.append(child)
+
+ return new_population
+
+ def optimize(self) -> Tuple[Dict[str, float], List[float]]:
+ """Run parameter optimization"""
+ best_score = float("-inf")
+ best_params = None
+ history = []
+
+ # Initial population
+ population = self._create_parameters()
+
+ for iteration in range(self.n_iterations):
+ # Evaluate population
+ scores = np.array([self._evaluate_parameters(params) for params in population])
+
+ # Track best performer
+ max_score = np.max(scores)
+ if max_score > best_score:
+ best_score = max_score
+ best_params = population[np.argmax(scores)]
+
+ print(f"\nIteration {iteration + 1}/{self.n_iterations}")
+ print(f"Best score: {best_score:.2f}")
+ print("Best parameters:")
+ for name, value in best_params.items():
+ print(f" {name}: {value:.3f}")
+
+ history.append(best_score)
+
+ # Create new population
+ population = self._create_new_population(population, scores)
+
+ return best_params, history
+
+
+@dataclass
+class OptimizeParam:
+ """Definition of a parameter to optimize"""
+
+ name: str # Full parameter path (e.g., 'gains.kp_scale')
+ min_val: float
+ max_val: float
+ init_val: float = None # If None, will be randomly initialized
+ mutation_strength: float = 0.3 # Controls the strength of mutations
+
+
+class ParameterOptimizer:
+ def __init__(
+ self,
+ base_cfg: MujocoCfg,
+ parameters: List[OptimizeParam],
+ n_iterations: int = 50,
+ population_size: int = 40, # Increased population size
+ elite_frac: float = 0.1, # Reduced elite fraction
+ exploration_prob: float = 0.2, # Probability of random exploration
+ n_episodes: int = 5, # More episodes for better evaluation
+ adaptive_mutation: bool = True,
+ ):
+ self.base_cfg = base_cfg
+ self.parameters = parameters
+ self.n_iterations = n_iterations
+ self.population_size = population_size
+ self.n_elite = max(1, int(population_size * elite_frac))
+ self.exploration_prob = exploration_prob
+ self.n_episodes = n_episodes
+ self.adaptive_mutation = adaptive_mutation
+ self.mutation_strength = 1.0 # Will be adapted during optimization
+
+ # Initialize parameter ranges for normalization
+ self.param_ranges = {param.name: (param.max_val - param.min_val) for param in parameters}
+
+ def _get_cfg_value(self, cfg: MujocoCfg, param_path: str) -> Any:
+ """Get a value from nested config using dot notation"""
+ value = cfg
+ for part in param_path.split("."):
+ value = getattr(value, part)
+ return value
+
+ def _set_cfg_value(self, cfg: MujocoCfg, param_path: str, value: Any) -> None:
+ """Set a value in nested config using dot notation"""
+ parts = param_path.split(".")
+ obj = cfg
+ for part in parts[:-1]:
+ obj = getattr(obj, part)
+ setattr(obj, parts[-1], value)
+
+ def _create_parameters(self) -> List[Dict[str, float]]:
+ """Create a population of parameter sets with improved initialization"""
+ population = []
+
+ # Create structured initial population
+ for _ in range(self.population_size):
+ params = {}
+ if np.random.random() < self.exploration_prob:
+ # Random exploration
+ for param in self.parameters:
+ value = np.random.uniform(param.min_val, param.max_val)
+ params[param.name] = value
+ else:
+ # Systematic initialization around init values
+ for param in self.parameters:
+ if param.init_val is not None:
+ # Log-uniform sampling around init value
+ log_range = np.log([0.1, 10.0]) # Sample in range 0.1x to 10x
+ log_multiplier = np.random.uniform(*log_range)
+ value = param.init_val * np.exp(log_multiplier)
+ else:
+ # Latin Hypercube style sampling
+ segment = (param.max_val - param.min_val) / self.population_size
+ value = param.min_val + segment * (np.random.random() + _)
+
+ params[param.name] = np.clip(value, param.min_val, param.max_val)
+
+ population.append(params)
+
+ return population
+
+ def _evaluate_parameters(self, params: Dict[str, float]) -> float:
+ """Evaluate a parameter set with improved robustness"""
+ cfg = MujocoCfg()
+ cfg.__dict__.update(self.base_cfg.__dict__)
+
+ for param_name, value in params.items():
+ self._set_cfg_value(cfg, param_name, value)
+
+ env = MujocoEnv(cfg, render=False)
+ rewards = []
+
+ try:
+ for _ in range(self.n_episodes):
+ env.reset()
+ episode_reward = 0
+ episode_length = 0
+ done = False
+
+ while not done:
+ # Sinusoidal test actions for better stability evaluation
+ t = episode_length * env.cfg.sim.dt
+ action = 0.5 * np.sin(2 * np.pi * t)
+ _, reward, done, info = env.step(action * np.ones(env.num_joints))
+
+ episode_reward += reward
+ episode_length += 1
+
+ # Early termination for unstable episodes
+ if info.get("fall", False):
+ episode_reward *= 0.5 # Penalty for falling
+ break
+
+ rewards.append(episode_reward)
+
+ # Calculate score with stability consideration
+ mean_reward = np.mean(rewards)
+ std_reward = np.std(rewards)
+ stability_penalty = 0.1 * std_reward # Penalize inconsistent performance
+
+ return mean_reward - stability_penalty
+
+ except Exception as e:
+ print(f"Evaluation failed: {e}")
+ return float("-inf")
+ finally:
+ env.close()
+
+ def _crossover(self, parent1: Dict[str, float], parent2: Dict[str, float]) -> Dict[str, float]:
+ """Perform crossover between two parents"""
+ child = {}
+ for param in self.parameters:
+ # Interpolate between parents with random weight
+ weight = np.random.random()
+ value = weight * parent1[param.name] + (1 - weight) * parent2[param.name]
+ child[param.name] = np.clip(value, param.min_val, param.max_val)
+ return child
+
+ def _mutate(self, params: Dict[str, float], generation: int) -> Dict[str, float]:
+ """Apply mutation with adaptive strength"""
+ mutated = {}
+ for param in self.parameters:
+ value = params[param.name]
+
+ # Combine multiplicative and additive mutation
+ if np.random.random() < 0.5:
+ # Multiplicative mutation (good for parameters with large ranges)
+ mutation = np.random.lognormal(0, param.mutation_strength * self.mutation_strength)
+ value *= mutation
+ else:
+ # Additive mutation (good for fine-tuning)
+ range_size = self.param_ranges[param.name]
+ mutation = np.random.normal(0, param.mutation_strength * self.mutation_strength * range_size)
+ value += mutation
+
+ mutated[param.name] = np.clip(value, param.min_val, param.max_val)
+
+ return mutated
+
+ def _create_new_population(
+ self, population: List[Dict[str, float]], scores: np.ndarray, generation: int
+ ) -> List[Dict[str, float]]:
+ """Create new population with improved diversity"""
+ # Sort by score
+ sorted_indices = np.argsort(scores)
+ elite_idx = sorted_indices[-self.n_elite :]
+ elite_population = [population[i] for i in elite_idx]
+
+ # Adapt mutation strength based on improvement
+ if self.adaptive_mutation and generation > 0:
+ improvement = (scores[elite_idx[-1]] - scores[elite_idx[0]]) / abs(scores[elite_idx[0]])
+ if improvement < 0.01: # Small improvement
+ self.mutation_strength *= 1.2 # Increase exploration
+ elif improvement > 0.05: # Large improvement
+ self.mutation_strength *= 0.8 # Reduce exploration
+ self.mutation_strength = np.clip(self.mutation_strength, 0.5, 2.0)
+
+ new_population = elite_population.copy()
+
+ # Fill rest of population
+ while len(new_population) < self.population_size:
+ if np.random.random() < self.exploration_prob:
+ # Random exploration
+ params = {param.name: np.random.uniform(param.min_val, param.max_val) for param in self.parameters}
+ new_population.append(params)
+ else:
+ # Tournament selection
+ tournament_size = 3
+ parent1 = max(np.random.choice(population, tournament_size), key=lambda x: scores[population.index(x)])
+ parent2 = max(np.random.choice(population, tournament_size), key=lambda x: scores[population.index(x)])
+
+ # Crossover and mutation
+ child = self._crossover(parent1, parent2)
+ child = self._mutate(child, generation)
+ new_population.append(child)
+
+ return new_population
+
+ def optimize(self) -> Tuple[Dict[str, float], List[float]]:
+ """Run parameter optimization with improved logging"""
+ best_score = float("-inf")
+ best_params = None
+ history = []
+
+ population = self._create_parameters()
+
+ for iteration in range(self.n_iterations):
+ # Evaluate population
+ scores = np.array([self._evaluate_parameters(params) for params in population])
+
+ # Update best performer
+ max_score = np.max(scores)
+ if max_score > best_score:
+ best_score = max_score
+ best_params = population[np.argmax(scores)]
+
+ # Logging
+ print(f"\nIteration {iteration + 1}/{self.n_iterations}")
+ print(f"Best score: {best_score:.2f}")
+ print(f"Current mutation strength: {self.mutation_strength:.2f}")
+ print("Best parameters:")
+ for name, value in best_params.items():
+ print(f" {name}: {value:.3f}")
+
+ history.append(best_score)
+
+ # Create new population
+ population = self._create_new_population(population, scores, iteration)
+
+ return best_params, history
+
+
+if __name__ == "__main__":
+ cfg = MujocoCfg()
+ parameters = [
+ OptimizeParam(
+ name="gains.kp_scale",
+ min_val=0.1,
+ max_val=5.0,
+ init_val=1.0,
+ ),
+ OptimizeParam(
+ name="gains.kd_scale",
+ min_val=0.1,
+ max_val=5.0,
+ init_val=1.0,
+ ),
+ # OptimizeParam(
+ # name='gains.tau_scale',
+ # min_val=1.0,
+ # max_val=20.0,
+ # init_val=4.0,
+ # ),
+ # OptimizeParam(
+ # name='extras.tau_factor',
+ # min_val=1.0,
+ # max_val=20.0,
+ # init_val=1.0,
+ # ),
+ ]
+ optimizer = ParameterOptimizer(
+ cfg,
+ parameters,
+ n_iterations=200,
+ population_size=5,
+ )
+ best_params, history = optimizer.optimize()
+ print("\nOptimization complete!")
+ print("Best parameters found:")
+ for name, value in best_params.items():
+ print(f"{name}: {value:.3f}")
diff --git a/sim/simple_play.py b/sim/simple_play.py
new file mode 100644
index 00000000..7f55beb0
--- /dev/null
+++ b/sim/simple_play.py
@@ -0,0 +1,71 @@
+import argparse
+
+from sim.env_helpers import debug_robot_state
+from sim.utils.args_parsing import parse_args_with_extras
+from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO
+from sim.envs.humanoids.stompymicro_env import StompyMicroEnv
+from sim.utils.helpers import (
+ class_to_dict,
+ parse_sim_params,
+)
+from sim.algo.ppo.on_policy_runner import OnPolicyRunner
+import torch # isort: skip
+
+
+def simple_play(args: argparse.Namespace) -> None:
+ DEVICE = "cpu"
+ RAND = True
+ # Setup environment and training configs
+ env_cfg, train_cfg = StompyMicroCfg(), StompyMicroCfgPPO()
+ env_cfg.env.num_envs = 1
+ env_cfg.domain_rand.randomize_friction = RAND
+ env_cfg.domain_rand.randomize_base_mass = RAND
+ env_cfg.domain_rand.push_robots = RAND
+ if not RAND:
+ env_cfg.domain_rand.action_noise = 0
+ env_cfg.domain_rand.action_delay = 0
+ env_cfg.domain_rand.push_interval_s = 0
+ env_cfg.domain_rand.max_push_vel_xy = 0
+ else:
+ env_cfg.domain_rand.push_interval_s = 0.1 #1.7
+ env_cfg.domain_rand.max_push_vel_xy = 0.85
+ env_cfg.sim.physx.max_gpu_contact_pairs = 2**10
+ train_cfg.runner.load_run = -10
+ train_cfg.seed = 0
+
+ sim_params = {"sim": class_to_dict(env_cfg.sim)}
+ sim_params = parse_sim_params(args, sim_params)
+ env = StompyMicroEnv(
+ cfg=env_cfg,
+ sim_params=sim_params,
+ physics_engine=args.physics_engine,
+ sim_device=args.sim_device,
+ headless=False,
+ )
+ command = torch.Tensor([0.0, 0.0, 0.0, 1.0]).to(DEVICE)
+ env.commands[:] = command
+
+ # Setup and load policy
+ all_cfg = {**class_to_dict(train_cfg), **class_to_dict(env_cfg)}
+ ppo_runner = OnPolicyRunner(env, all_cfg, device=DEVICE)
+ resume_path = "logs/StompyMicro/Dec04_20-29-12_StandingRobust/model_12001.pt"
+ ppo_runner.load(resume_path, load_optimizer=False)
+ ppo_runner.alg.actor_critic.eval()
+ ppo_runner.alg.actor_critic.to(DEVICE)
+ policy = ppo_runner.alg.actor_critic.act_inference
+
+ # Run simulation
+ obs, _ = env.reset()
+ steps = int(env_cfg.env.episode_length_s / (env_cfg.sim.dt * env_cfg.control.decimation))
+ for t in range(steps):
+ actions = policy(obs.detach())
+ env.commands[:] = command
+ obs, _, _, _, _ = env.step(actions.detach())
+ if t % 17 == 0:
+ debug_robot_state("Isaac", obs[0], actions[0])
+
+
+if __name__ == "__main__":
+ args = parse_args_with_extras(lambda x: x)
+ print("Arguments:", vars(args))
+ simple_play(args)
diff --git a/sim/train.py b/sim/train.py
index aa5c100e..eaa4465c 100755
--- a/sim/train.py
+++ b/sim/train.py
@@ -6,7 +6,7 @@
import argparse
from sim.envs import task_registry # noqa: E402
-from sim.utils.helpers import get_args # noqa: E402
+from sim.utils.args_parsing import parse_args_with_extras # noqa: E402
def train(args: argparse.Namespace) -> None:
@@ -15,8 +15,14 @@ def train(args: argparse.Namespace) -> None:
ppo_runner.learn(num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=True)
-# Puts this import down here so that the environments are registered
+def add_train_arguments(parser):
+ """Add training-specific arguments."""
+ # Training
+ parser.add_argument("--horovod", action="store_true", default=False, help="Use horovod for multi-gpu training")
+ parser.add_argument("--trimesh", action="store_true", default=False, help="Use trimesh terrain")
+
if __name__ == "__main__":
- # python -m sim.humanoid_gym.train
- train(get_args())
+ args = parse_args_with_extras(add_train_arguments)
+ print("Arguments:", vars(args))
+ train(args)
diff --git a/sim/utils/args_parsing.py b/sim/utils/args_parsing.py
new file mode 100644
index 00000000..b4c9cc16
--- /dev/null
+++ b/sim/utils/args_parsing.py
@@ -0,0 +1,110 @@
+import argparse
+import sys
+from typing import Any, Dict, List
+
+from isaacgym import gymutil
+
+
+def create_base_parser(add_help=False):
+ """Create base parser with core arguments used across all scripts."""
+ parser = argparse.ArgumentParser(add_help=add_help)
+
+ # General
+ parser.add_argument(
+ "--task",
+ type=str,
+ default="stompymicro",
+ help="Resume training or start testing from a checkpoint. Overrides config file if provided.",
+ )
+ parser.add_argument(
+ "--rl_device",
+ type=str,
+ default="cuda:0",
+ help="Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)",
+ )
+ parser.add_argument(
+ "--num_envs", type=int, help="Number of environments to create. Overrides config file if provided."
+ )
+ parser.add_argument("--seed", type=int, help="Random seed. Overrides config file if provided.")
+ parser.add_argument(
+ "--max_iterations",
+ type=int,
+ # default=1001,
+ help="Maximum number of iterations. Refers to training iterations for `train.py` and playing steps for `play.py`.",
+ )
+
+ # Loading model
+ parser.add_argument("--resume", action="store_true", default=False, help="Resume training from a checkpoint")
+ parser.add_argument(
+ "--experiment_name", type=str, help="Name of the experiment to run or load. Overrides config file if provided."
+ )
+ parser.add_argument("--run_name", type=str, help="Name of the run. Overrides config file if provided.")
+ parser.add_argument(
+ "--load_run", type=str, help="Name of the run to load when resume=True. If -1: will load the last run."
+ )
+ parser.add_argument(
+ "--checkpoint", type=int, help="Saved model checkpoint number. If -1: will load the last checkpoint."
+ )
+
+ # Rendering
+ parser.add_argument("--headless", action="store_true", default=False, help="Force display off at all times")
+
+ return parser
+
+
+def convert_to_gymutil_format(parser: argparse.ArgumentParser) -> List[Dict[str, Any]]:
+ """Convert argparse parser arguments to gymutil custom_parameters format."""
+ custom_parameters = []
+ for action in parser._actions:
+ if action.dest != "help": # Skip the help action
+ param = {
+ "name": "--" + action.dest if not action.option_strings else action.option_strings[0],
+ "type": action.type,
+ "default": action.default,
+ "help": action.help,
+ }
+ if isinstance(action, argparse._StoreTrueAction):
+ param.pop("type") # Remove the type for store_true actions
+ param["action"] = "store_true"
+ elif isinstance(action, argparse._StoreAction):
+ if action.choices:
+ param["choices"] = action.choices
+ custom_parameters.append(param)
+ return custom_parameters
+
+
+def parse_args_with_extras(extra_args_fn=None):
+ """Parse arguments using both base parser and any extra arguments provided."""
+ parser = create_base_parser()
+
+ if extra_args_fn is not None:
+ extra_args_fn(parser)
+
+ # Store which arguments were meant to be True by default
+ true_by_default = {action.dest for action in parser._actions if action.default is True}
+
+ custom_parameters = convert_to_gymutil_format(parser)
+ args = gymutil.parse_arguments(description="RL Policy", custom_parameters=custom_parameters)
+
+ # Restore default=True values that weren't explicitly set to False
+ for arg_name in true_by_default:
+ # Check if this argument wasn't explicitly provided in command line
+ if not any(arg.lstrip("-").replace("-", "_") == arg_name for arg in sys.argv[1:]):
+ setattr(args, arg_name, True)
+
+ # Add the sim device arguments
+ args.sim_device_id = args.compute_device_id
+ args.sim_device = args.sim_device_type
+ if args.sim_device == "cuda":
+ args.sim_device += f":{args.sim_device_id}"
+
+ return args
+
+
+def print_args(args):
+ """Pretty print arguments."""
+ print("\nArguments:")
+ print("-" * 30)
+ for arg, value in sorted(vars(args).items()):
+ print(f"{arg:>20}: {value}")
+ print("-" * 30 + "\n")
diff --git a/sim/utils/cmd_manager.py b/sim/utils/cmd_manager.py
new file mode 100644
index 00000000..ad4161ba
--- /dev/null
+++ b/sim/utils/cmd_manager.py
@@ -0,0 +1,192 @@
+from enum import Enum
+from typing import List
+
+import numpy as np
+
+from sim.utils.helpers import draw_vector
+
+import torch # isort: skip
+
+
+class CommandMode(Enum):
+ FIXED = "fixed"
+ OSCILLATING = "oscillating"
+ KEYBOARD = "keyboard"
+ RANDOM = "random"
+
+
+class CommandManager:
+ """Manages robot commands"""
+
+ def __init__(
+ self,
+ num_envs: int = 1,
+ mode: str = "fixed",
+ default_cmd: List[float] = [0.0, -0.4, 0.0, 0 / 2 * np.pi], # default
+ # default_cmd: List[float] = [0.0, -0.3, 0.0, 1/2 * np.pi], # 90
+ # default_cmd: List[float] = [0.0, -0.6, 0.0, 0/2 * np.pi], # 0
+ # default_cmd: List[float] = [0.0, -0.3, 0.0, 0.5/2 * np.pi], # 45
+ # default_cmd: List[float] = [0.0, -0.0, 0.0, 0/2 * np.pi], # test
+ device="cpu",
+ env_cfg=None,
+ ):
+ self.num_envs = num_envs
+ self.mode = CommandMode(mode)
+ self.device = device
+ self.default_cmd = torch.tensor(default_cmd, device=self.device)
+ self.commands = self.default_cmd.repeat(num_envs, 1)
+ self.time = 0
+ self.env_cfg = env_cfg
+
+ # Mode-specific parameters
+ if self.mode == CommandMode.OSCILLATING:
+ self.osc_period = 5.0 # secs
+ self.min_x_vel = env_cfg.commands.ranges.lin_vel_x[0] if env_cfg else 0.0
+ self.max_x_vel = env_cfg.commands.ranges.lin_vel_x[1] if env_cfg else 0.3
+ self.osc_amplitude = (self.max_x_vel - self.min_x_vel) / 2
+ self.osc_offset = (self.max_x_vel + self.min_x_vel) / 2
+ elif self.mode == CommandMode.RANDOM:
+ self.cmd_ranges = (
+ {
+ "lin_vel_x": env_cfg.commands.ranges.lin_vel_x,
+ "lin_vel_y": env_cfg.commands.ranges.lin_vel_y,
+ "ang_vel_yaw": env_cfg.commands.ranges.ang_vel_yaw,
+ "heading": env_cfg.commands.ranges.heading,
+ }
+ if env_cfg
+ else {
+ "lin_vel_x": [-0.05, 0.23],
+ "lin_vel_y": [-0.05, 0.05],
+ "ang_vel_yaw": [-0.5, 0.5],
+ "heading": [-np.pi, np.pi],
+ }
+ )
+ self.resampling_time = env_cfg.commands.resampling_time if env_cfg else 8.0
+ self.last_sample_time = 0.0
+ elif self.mode == CommandMode.KEYBOARD:
+ try:
+ import pygame
+
+ pygame.init()
+ pygame.display.set_mode((100, 100))
+ self.x_vel_cmd = 0.0
+ self.y_vel_cmd = 0.0
+ self.yaw_vel_cmd = 0.0
+ except ImportError:
+ print("WARNING: pygame not found, falling back to fixed commands")
+ self.mode = CommandMode.FIXED
+
+ def close(self):
+ if self.mode == CommandMode.KEYBOARD:
+ import pygame
+
+ pygame.quit()
+
+ def update(self, dt: float) -> torch.Tensor:
+ """Updates and returns commands based on current mode."""
+ self.time += dt
+
+ if self.mode == CommandMode.FIXED:
+ return self.commands
+ elif self.mode == CommandMode.OSCILLATING:
+ # Oscillate x velocity between min and max
+ x_vel = self.osc_offset + self.osc_amplitude * torch.sin(
+ torch.tensor(2 * np.pi * self.time / self.osc_period)
+ )
+ self.commands[:, 0] = x_vel.to(self.device)
+ elif self.mode == CommandMode.RANDOM:
+ if self.time - self.last_sample_time >= self.resampling_time:
+ self.last_sample_time = self.time
+
+ if self.env_cfg and self.env_cfg.commands.heading_command:
+ new_commands = torch.from_numpy(
+ np.array(
+ [
+ np.random.uniform(*self.cmd_ranges["lin_vel_x"], size=self.num_envs),
+ np.random.uniform(*self.cmd_ranges["lin_vel_y"], size=self.num_envs),
+ np.zeros(self.num_envs),
+ np.random.uniform(*self.cmd_ranges["heading"], size=self.num_envs),
+ ]
+ ).T # Transpose to get shape [num_envs, 4]
+ ).to(self.device)
+ else:
+ new_commands = torch.tensor(
+ np.array(
+ [
+ np.random.uniform(*self.cmd_ranges["lin_vel_x"], size=self.num_envs),
+ np.random.uniform(*self.cmd_ranges["lin_vel_y"], size=self.num_envs),
+ np.random.uniform(*self.cmd_ranges["ang_vel_yaw"], size=self.num_envs),
+ np.zeros(self.num_envs),
+ ]
+ ).T # Transpose to get shape [num_envs, 4]
+ ).to(self.device)
+ self.commands = new_commands
+
+ elif self.mode == CommandMode.KEYBOARD:
+ self._handle_keyboard_input()
+ self.commands[:, 0] = torch.tensor(self.x_vel_cmd, device=self.device)
+ self.commands[:, 1] = torch.tensor(self.y_vel_cmd, device=self.device)
+ self.commands[:, 2] = torch.tensor(self.yaw_vel_cmd, device=self.device)
+
+ return self.commands
+
+ def draw(self, gym, viewer, env_handles, robot_positions, actual_vels) -> None:
+ """Draws robot heading and velocity vectors."""
+ if viewer is None:
+ return
+
+ gym.clear_lines(viewer)
+ cmd_vels = self.commands[:, :2].cpu().numpy() # x,y velocities
+ headings = self.commands[:, 3].cpu().numpy() # heading angles
+
+ for env_handle, robot_pos, cmd_vel, actual_vel, heading in zip(
+ env_handles, robot_positions, cmd_vels, actual_vels, headings
+ ):
+ # Draw heading direction (black/gray arrow)
+ heading_dir = (np.cos(heading), np.sin(heading))
+ draw_vector(gym, viewer, env_handle, robot_pos, heading_dir, (0.5, 0.5, 0.5), head_scale=0.08)
+
+ # Draw commanded velocity (green)
+ draw_vector(gym, viewer, env_handle, robot_pos, cmd_vel, (0.0, 1.0, 0.0))
+
+ # Draw actual velocity (red)
+ draw_vector(gym, viewer, env_handle, robot_pos, actual_vel, (1.0, 0.0, 0.0))
+
+ # Draw commanded velocity relative to heading: where the robot is trying to go relative to its current heading
+ # cmd_vel_relative = (
+ # cmd_vel[0] * np.cos(heading) - cmd_vel[1] * np.sin(heading),
+ # cmd_vel[0] * np.sin(heading) + cmd_vel[1] * np.cos(heading)
+ # )
+ # draw_vector(gym, viewer, env_handle, robot_pos, cmd_vel_relative, (0.0, 0.8, 0.2), head_scale=0.08)
+
+ def _handle_keyboard_input(self):
+ """Handles keyboard input for command updates."""
+ import pygame
+
+ for event in pygame.event.get():
+ if event.type == pygame.QUIT:
+ pygame.quit()
+
+ keys = pygame.key.get_pressed()
+
+ # Update movement commands based on arrow keys
+ if keys[pygame.K_UP]:
+ self.x_vel_cmd = min(self.x_vel_cmd + 0.0005, 0.5)
+ if keys[pygame.K_DOWN]:
+ self.x_vel_cmd = max(self.x_vel_cmd - 0.0005, -0.5)
+ if keys[pygame.K_LEFT]:
+ self.y_vel_cmd = min(self.y_vel_cmd + 0.0005, 0.5)
+ if keys[pygame.K_RIGHT]:
+ self.y_vel_cmd = max(self.y_vel_cmd - 0.0005, -0.5)
+
+ # Yaw control
+ if keys[pygame.K_a]:
+ self.yaw_vel_cmd = min(self.yaw_vel_cmd + 0.001, 0.5)
+ if keys[pygame.K_z]:
+ self.yaw_vel_cmd = max(self.yaw_vel_cmd - 0.001, -0.5)
+
+ # Reset commands
+ if keys[pygame.K_SPACE]:
+ self.x_vel_cmd = 0.0
+ self.y_vel_cmd = 0.0
+ self.yaw_vel_cmd = 0.0
diff --git a/sim/utils/helpers.py b/sim/utils/helpers.py
index 675fe070..5e42c644 100755
--- a/sim/utils/helpers.py
+++ b/sim/utils/helpers.py
@@ -34,6 +34,8 @@
import copy
import os
import random
+from datetime import datetime
+from typing import Any, Tuple, Union
import numpy as np
@@ -107,18 +109,40 @@ def parse_sim_params(args, cfg):
return sim_params
-def get_load_path(root, load_run=-1, checkpoint=-1):
+def get_load_path(root, load_run: Union[int, str] = -1, checkpoint: Union[int, str] = -1):
try:
runs = os.listdir(root)
- # TODO sort by date to handle change of month
- runs.sort()
+
+ # Sort by datetime instead of alphabetically
+ def parse_run_time(run_name):
+ try:
+ return datetime.strptime(run_name[:14], "%b%d_%H-%M-%S")
+ except:
+ return datetime.min
+
+ runs.sort(key=parse_run_time)
+
if "exported" in runs:
runs.remove("exported")
- last_run = os.path.join(root, runs[-1])
- except:
- raise ValueError("No runs in this directory: " + root)
- if load_run == -1:
- load_run = last_run
+
+ # Keep only runs with model files
+ runs = [run for run in runs if any("model" in file for file in os.listdir(os.path.join(root, run)))]
+ if not runs:
+ raise ValueError("No runs with model files in this directory: " + root)
+
+ except Exception as e:
+ raise ValueError("Error accessing directory: " + root) from e
+
+ # Handle load_run selection
+ if isinstance(load_run, str) and load_run.lstrip("-").isdigit():
+ load_run = int(load_run)
+
+ if isinstance(load_run, int):
+ try:
+ run_name = runs[load_run]
+ load_run = os.path.join(root, run_name)
+ except IndexError:
+ raise ValueError(f"Run index {load_run} out of range. Available runs: {len(runs)}")
else:
load_run = os.path.join(root, load_run)
@@ -161,18 +185,48 @@ def update_cfg_from_args(env_cfg, cfg_train, args):
def get_args() -> argparse.Namespace:
custom_parameters = [
+ # General
{
"name": "--task",
"type": str,
"default": "stompymicro",
"help": "Resume training or start testing from a checkpoint. Overrides config file if provided.",
},
+ {
+ "name": "--rl_device",
+ "type": str,
+ "default": "cuda:0",
+ "help": "Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)",
+ },
+ {
+ "name": "--num_envs",
+ "type": int,
+ "help": "Number of environments to create. Overrides config file if provided.",
+ },
+ {
+ "name": "--seed",
+ "type": int,
+ "help": "Random seed. Overrides config file if provided.",
+ },
+ {
+ "name": "--max_iterations",
+ "type": int,
+ "help": "Maximum number of iterations. Refers to training iterations for `train.py` and playing steps for `play.py`. Overrides config file if provided.",
+ },
+ # Training
{
"name": "--resume",
"action": "store_true",
"default": False,
"help": "Resume training from a checkpoint",
},
+ {
+ "name": "--horovod",
+ "action": "store_true",
+ "default": False,
+ "help": "Use horovod for multi-gpu training",
+ },
+ # Loading model
{
"name": "--experiment_name",
"type": str,
@@ -193,6 +247,7 @@ def get_args() -> argparse.Namespace:
"type": int,
"help": "Saved model checkpoint number. If -1: will load the last checkpoint. Overrides config file if provided.",
},
+ # Rendering
{
"name": "--headless",
"action": "store_true",
@@ -200,37 +255,31 @@ def get_args() -> argparse.Namespace:
"help": "Force display off at all times",
},
{
- "name": "--horovod",
+ "name": "--arrows",
"action": "store_true",
"default": False,
- "help": "Use horovod for multi-gpu training",
- },
- {
- "name": "--rl_device",
- "type": str,
- "default": "cuda:0",
- "help": "Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)",
- },
- {
- "name": "--num_envs",
- "type": int,
- "help": "Number of environments to create. Overrides config file if provided.",
+ "help": "Draw command and velocity arrows during visualization",
},
+ # Play
{
- "name": "--seed",
- "type": int,
- "help": "Random seed. Overrides config file if provided.",
+ "name": "--log_h5",
+ "action": "store_true",
+ "default": False,
+ "help": "Enable HDF5 logging",
},
{
- "name": "--max_iterations",
- "type": int,
- "help": "Maximum number of training iterations. Overrides config file if provided.",
+ "name": "--command_mode",
+ "type": str,
+ "default": "fixed",
+ "choices": ["fixed", "oscillating", "random", "keyboard"],
+ "help": "Control mode for the robot",
},
+ # Unused?
{
- "name": "--log_h5",
+ "name": "--trimesh",
"action": "store_true",
"default": False,
- "help": "Enable HDF5 logging",
+ "help": "Use trimesh terrain",
},
]
# parse arguments
@@ -244,7 +293,7 @@ def get_args() -> argparse.Namespace:
return args
-def export_policy_as_jit(actor_critic, path) -> None:
+def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None:
os.makedirs(path, exist_ok=True)
path = os.path.join(path, "policy_1.pt")
model = copy.deepcopy(actor_critic.actor).to("cpu")
@@ -265,3 +314,136 @@ def export_policy_as_onnx(actor_critic, path):
dummy_input = torch.randn(1, input_dim)
torch.onnx.export(model, dummy_input, path)
+
+
+def draw_vector(
+ gym: gymapi.Gym,
+ viewer: gymapi.Viewer,
+ env_handle: gymapi.Env,
+ start_pos: np.ndarray,
+ direction: Tuple[float, float],
+ color: Tuple[float, float, float],
+ clear_lines: bool = False,
+ head_scale: float = 0.1,
+) -> None:
+ """Draws a single vector with an arrowhead."""
+ if viewer is None:
+ return
+
+ # Unpack direction and create start position
+ vel_x, vel_y = direction
+ start = gymapi.Vec3(start_pos[0], start_pos[1], start_pos[2])
+
+ # Scale arrow length by magnitude
+ vel_magnitude = np.sqrt(vel_x**2 + vel_y**2)
+ if vel_magnitude > 0:
+ arrow_scale = np.clip(vel_magnitude, 0.1, 1.0)
+ normalized_x = vel_x / vel_magnitude
+ normalized_y = vel_y / vel_magnitude
+ else:
+ arrow_scale = 0.1
+ normalized_x = 0
+ normalized_y = 0
+
+ # Calculate end position and arrowhead
+ end = gymapi.Vec3(start.x + normalized_x * arrow_scale, start.y + normalized_y * arrow_scale, start.z)
+
+ # Calculate perpendicular vector for arrowhead
+ perp_x = -normalized_y
+ perp_y = normalized_x
+
+ head_left = gymapi.Vec3(
+ end.x - head_scale * (normalized_x * 0.7 + perp_x * 0.7),
+ end.y - head_scale * (normalized_y * 0.7 + perp_y * 0.7),
+ end.z,
+ )
+
+ head_right = gymapi.Vec3(
+ end.x - head_scale * (normalized_x * 0.7 - perp_x * 0.7),
+ end.y - head_scale * (normalized_y * 0.7 - perp_y * 0.7),
+ end.z,
+ )
+
+ # Create vertices and colors
+ verts = [
+ start.x,
+ start.y,
+ start.z,
+ end.x,
+ end.y,
+ end.z,
+ end.x,
+ end.y,
+ end.z,
+ head_left.x,
+ head_left.y,
+ head_left.z,
+ end.x,
+ end.y,
+ end.z,
+ head_right.x,
+ head_right.y,
+ head_right.z,
+ ]
+ colors = [color[0], color[1], color[2]] * 6
+
+ if clear_lines:
+ gym.clear_lines(viewer)
+ gym.add_lines(viewer, env_handle, 3, verts, colors)
+
+
+def analyze_policy(policy_path: str) -> dict:
+ """Analyzes a policy file to determine its input/output dimensions."""
+ info = {}
+
+ if policy_path.endswith(".pt"):
+ import torch
+
+ # Load TorchScript model
+ model = torch.jit.load(policy_path)
+
+ # Get the first parameter of the first layer to determine input size
+ first_layer = next(model.parameters())
+ info["input_dim"] = first_layer.shape[1]
+
+ # Get the last layer to determine output size
+ last_layer = None
+ for param in model.parameters():
+ last_layer = param
+ info["output_dim"] = last_layer.shape[0]
+
+ # Additional info
+ info["type"] = "TorchScript"
+ info["num_parameters"] = sum(p.numel() for p in model.parameters())
+
+ elif policy_path.endswith(".onnx"):
+ import onnx
+
+ # Load ONNX model
+ model = onnx.load(policy_path)
+
+ # Get input info
+ input_info = model.graph.input[0]
+ input_shape = [d.dim_value for d in input_info.type.tensor_type.shape.dim]
+ info["input_dim"] = input_shape[1] if len(input_shape) > 1 else input_shape[0]
+
+ # Get output info
+ output_info = model.graph.output[0]
+ output_shape = [d.dim_value for d in output_info.type.tensor_type.shape.dim]
+ info["output_dim"] = output_shape[1] if len(output_shape) > 1 else output_shape[0]
+
+ # Additional info
+ info["type"] = "ONNX"
+ info["input_shape"] = input_shape
+ info["output_shape"] = output_shape
+
+ else:
+ raise ValueError(f"Unsupported model type: {policy_path}")
+
+ return info
+
+
+if __name__ == "__main__":
+ path = "examples/experiments/walking/exp6-arm_movement++/policy_1.pt" # "policy_1.pt"
+ info = analyze_policy(path)
+ print(info)
diff --git a/sim/utils/logger.py b/sim/utils/logger.py
index a37b1473..d18e4990 100755
--- a/sim/utils/logger.py
+++ b/sim/utils/logger.py
@@ -32,7 +32,7 @@
from collections import defaultdict
from multiprocessing import Process, Value
-# import matplotlib.pyplot as plt
+import matplotlib.pyplot as plt
import numpy as np
diff --git a/sim/utils/task_registry.py b/sim/utils/task_registry.py
index 04198fa3..b2a97b6d 100755
--- a/sim/utils/task_registry.py
+++ b/sim/utils/task_registry.py
@@ -33,6 +33,7 @@
import os
from datetime import datetime
+from typing import Dict, Tuple
from sim import ROOT_DIR
from sim.algo.ppo.on_policy_runner import OnPolicyRunner # mypy: ignore
@@ -76,7 +77,7 @@ def get_cfgs(self, name):
return env_cfg, train_cfg
def make_env(self, name, args=None, env_cfg=None):
- """Creates an environment either from a registered namme or from the provided config file.
+ """Creates an environment either from a registered name or from the provided config file.
Args:
name (string): Name of a registered env.
@@ -120,7 +121,9 @@ def make_env(self, name, args=None, env_cfg=None):
self.env_cfg_for_wandb = env_cfg
return env, env_cfg
- def make_alg_runner(self, env, name=None, args=None, train_cfg=None, log_root="default"):
+ def make_alg_runner(
+ self, env, name=None, args=None, train_cfg=None, log_root="default"
+ ) -> Tuple[OnPolicyRunner, Dict]:
"""Creates the training algorithm either from a registered namme or from the provided config file.
Args: