diff --git a/examples/experiments/standing/robustv0/policy_1.pt b/examples/experiments/standing/robustv0/policy_1.pt new file mode 100644 index 00000000..3bdb256d Binary files /dev/null and b/examples/experiments/standing/robustv0/policy_1.pt differ diff --git a/examples/experiments/standing/robustv1/policy_1.pt b/examples/experiments/standing/robustv1/policy_1.pt new file mode 100644 index 00000000..68a6194e Binary files /dev/null and b/examples/experiments/standing/robustv1/policy_1.pt differ diff --git a/examples/experiments/walking/arms/model_700.pt b/examples/experiments/walking/arms/model_700.pt new file mode 100644 index 00000000..1f2d8006 Binary files /dev/null and b/examples/experiments/walking/arms/model_700.pt differ diff --git a/examples/experiments/walking/exp4-large_walk_with_arms/policy_1.pt b/examples/experiments/walking/exp4-large_walk_with_arms/policy_1.pt new file mode 100644 index 00000000..9d2de367 Binary files /dev/null and b/examples/experiments/walking/exp4-large_walk_with_arms/policy_1.pt differ diff --git a/examples/experiments/walking/exp5-larger_gait/policy_1.pt b/examples/experiments/walking/exp5-larger_gait/policy_1.pt new file mode 100644 index 00000000..ca649be8 Binary files /dev/null and b/examples/experiments/walking/exp5-larger_gait/policy_1.pt differ diff --git a/examples/experiments/walking/exp6-arm_movement++/policy_1.pt b/examples/experiments/walking/exp6-arm_movement++/policy_1.pt new file mode 100644 index 00000000..a33f7b7f Binary files /dev/null and b/examples/experiments/walking/exp6-arm_movement++/policy_1.pt differ diff --git a/examples/experiments/walking/increase_target_height/policy_1.pt b/examples/experiments/walking/increase_target_height/policy_1.pt new file mode 100644 index 00000000..f884687c Binary files /dev/null and b/examples/experiments/walking/increase_target_height/policy_1.pt differ diff --git a/examples/experiments/walking/target_height_and_clearance_rewards/policy_1.pt b/examples/experiments/walking/target_height_and_clearance_rewards/policy_1.pt new file mode 100644 index 00000000..239a8a80 Binary files /dev/null and b/examples/experiments/walking/target_height_and_clearance_rewards/policy_1.pt differ diff --git a/examples/new_gait.pt b/examples/new_gait.pt new file mode 100644 index 00000000..1b7358e2 Binary files /dev/null and b/examples/new_gait.pt differ diff --git a/examples/new_standing.pt b/examples/new_standing.pt new file mode 100644 index 00000000..b322dcf7 Binary files /dev/null and b/examples/new_standing.pt differ diff --git a/examples/original_walking.pt b/examples/original_walking.pt new file mode 100644 index 00000000..2ba5b97f Binary files /dev/null and b/examples/original_walking.pt differ diff --git a/examples/test_policy.pt b/examples/test_policy.pt new file mode 100644 index 00000000..69b339af Binary files /dev/null and b/examples/test_policy.pt differ diff --git a/examples/walking_micro.onnx b/examples/walking_micro.onnx index 68a35b8e..615fcb8e 100644 Binary files a/examples/walking_micro.onnx and b/examples/walking_micro.onnx differ diff --git a/examples/walking_micro.pt b/examples/walking_micro.pt index 58c94e6c..0b9f321a 100644 Binary files a/examples/walking_micro.pt and b/examples/walking_micro.pt differ diff --git a/examples/walking_pro.onnx b/examples/walking_pro.onnx index bbdb1b15..432b0e95 100644 Binary files a/examples/walking_pro.onnx and b/examples/walking_pro.onnx differ diff --git a/examples/walking_pro.pt b/examples/walking_pro.pt index 6aedefd4..2878ac43 100644 Binary files a/examples/walking_pro.pt and b/examples/walking_pro.pt differ diff --git a/policy_1.pt b/policy_1.pt index 500afdda..0b102b3b 100644 Binary files a/policy_1.pt and b/policy_1.pt differ diff --git a/sim/algo/ppo/on_policy_runner.py b/sim/algo/ppo/on_policy_runner.py index e3fb6e1c..6b1933ab 100755 --- a/sim/algo/ppo/on_policy_runner.py +++ b/sim/algo/ppo/on_policy_runner.py @@ -30,6 +30,7 @@ # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. # type: ignore +import json import os import statistics import time @@ -39,6 +40,7 @@ import torch import wandb +import yaml from torch.utils.tensorboard import SummaryWriter from sim.algo.ppo.actor_critic import ActorCritic @@ -46,6 +48,54 @@ from sim.algo.vec_env import VecEnv +def make_serializable(obj): + """Recursively convert an object and its components to JSON-serializable types.""" + if isinstance(obj, dict): + return {key: make_serializable(value) for key, value in obj.items()} + elif isinstance(obj, (list, tuple)): + return [make_serializable(item) for item in obj] + try: + # Try standard JSON serialization first + json.dumps(obj) + return obj + except (TypeError, OverflowError): + try: + # Try converting to float (for numpy numbers etc) + return float(obj) + except: + try: + # Try converting to int + return int(obj) + except: + # If all else fails, convert to string representation + return f"<{type(obj).__name__}:{str(obj)}>" + + +def write_config_file(config: dict, log_dir: str) -> None: + """Writes the configuration to a JSON file in the log directory.""" + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + # Add metadata section + metadata = { + "creation_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "experiment_name": config["runner"]["experiment_name"], + "run_name": config["runner"]["run_name"], + } + + serializable_config = make_serializable(config) + + full_config = {"metadata": metadata, "configuration": serializable_config} + + json_path = os.path.join(log_dir, "experiment_config.json") + with open(json_path, "w") as f: + json.dump(full_config, f, indent=2, sort_keys=True) + + yaml_path = os.path.join(log_dir, "experiment_config.yaml") + with open(yaml_path, "w") as f: + yaml.dump(full_config, f, default_flow_style=False, sort_keys=False) + + class OnPolicyRunner: def __init__(self, env: VecEnv, train_cfg: dict, log_dir: Optional[str] = None, device: str = "cpu"): self.cfg = train_cfg["runner"] @@ -102,6 +152,8 @@ def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = Fals config=self.all_cfg, ) self.writer = SummaryWriter(log_dir=self.log_dir, flush_secs=10) + if self.current_learning_iteration == 0: + write_config_file(self.all_cfg, self.log_dir) if init_at_random_ep_len: self.env.episode_length_buf = torch.randint_like( self.env.episode_length_buf, high=int(self.env.max_episode_length) diff --git a/sim/env.py b/sim/env.py index 8c0dcadc..cee99897 100755 --- a/sim/env.py +++ b/sim/env.py @@ -11,32 +11,29 @@ def model_dir(robotname: str) -> Path: print(os.getcwd()) - return Path(os.environ.get("MODEL_DIR", "sim/resources/" + robotname)) + model_dir = Path(os.environ.get("MODEL_DIR", "sim/resources/" + robotname)) + return model_dir def run_dir() -> Path: return Path(os.environ.get("RUN_DIR", "runs")) -def robot_urdf_path(robotname: str, legs_only: bool = False) -> Path: - if legs_only: - robot_path = model_dir(robotname) / "robot_fixed.urdf" - else: - robot_path = model_dir(robotname) / "robot_fixed.urdf" - +def robot_urdf_path(robotname: str, legs_only: bool = True) -> Path: + print(f"robot_urdf_path({robotname=}, {legs_only=})") + filename = "robot_fixed.urdf" if legs_only else "robot.urdf" + robot_path = model_dir(robotname) / filename if not robot_path.exists(): raise FileNotFoundError(f"URDF file not found: {robot_path}") - + print(f"robot_path={robot_path}") return robot_path.resolve() -def robot_mjcf_path(robotname: str, legs_only: bool = False) -> Path: - if legs_only: - robot_path = model_dir(robotname) / "robot_fixed.xml" - else: - robot_path = model_dir(robotname) / "robot_fixed.xml" - +def robot_mjcf_path(robotname: str, legs_only: bool = True) -> Path: + print(f"robot_mjcf_path({robotname=}, {legs_only=})") + filename = "robot_fixed.xml" if legs_only else "robot.xml" + robot_path = model_dir(robotname) / filename if not robot_path.exists(): raise FileNotFoundError(f"MJCF file not found: {robot_path}") - + print(f"robot_path={robot_path}") return robot_path.resolve() diff --git a/sim/env_helpers.py b/sim/env_helpers.py new file mode 100644 index 00000000..6e32537e --- /dev/null +++ b/sim/env_helpers.py @@ -0,0 +1,51 @@ +import numpy as np + +np.set_printoptions(precision=2, suppress=True) + + +def debug_robot_state(robot_name: str, obs_buf: np.ndarray, actions: np.ndarray) -> None: + """Debug function for robot state from observation buffer.""" + # Unpack the obs_buf components + cmd = obs_buf[:5] # Command input (sin, cos, vel_x, vel_y, vel_yaw) + q = obs_buf[5:21] # Joint positions (16) + dq = obs_buf[21:37] # Joint velocities (16) + prev_actions = obs_buf[37:53] # Actions (16) + ang_vel = obs_buf[53:56] # Base angular velocity (3) + euler = obs_buf[56:59] # Base euler angles (3) + + quat = euler_to_quat(euler) + + # Detailed unpack + vel_x = cmd[2] + vel_y = cmd[3] + vel_yaw = cmd[4] + + # Print the robot state + print(f"\n=== {robot_name} State ===") + # print(f"Command: {vel_x=:.2f} {vel_y=:.2f} {vel_yaw=:.2f}") + # print(f"RPY: [{euler[0]:.2f}, {euler[1]:.2f}, {euler[2]:.2f}]") + # print(f"Quat: [{quat[0]:.2f}, {quat[1]:.2f}, {quat[2]:.2f}, {quat[3]:.2f}]") + # print(f"AngVel: [{ang_vel[0]:.2f}, {ang_vel[1]:.2f}, {ang_vel[2]:.2f}]") + # print("Joints:") + # print(" Pos:", " ".join(f"{x:5.2f}" for x in q)) + # print(" Vel:", " ".join(f"{x:5.2f}" for x in dq)) + # print(" Act:", " ".join(f"{x:5.2f}" for x in prev_actions)) + # print("Actions:") + # print(" ", " ".join(f"{x:5.2f}" for x in actions)) + print(obs_buf[-59:]) + print("=================") + + +def euler_to_quat(euler): + """Convert euler angles (roll, pitch, yaw) to quaternion (x, y, z, w).""" + roll, pitch, yaw = euler + + cr, cp, cy = np.cos(roll / 2), np.cos(pitch / 2), np.cos(yaw / 2) + sr, sp, sy = np.sin(roll / 2), np.sin(pitch / 2), np.sin(yaw / 2) + + qw = cr * cp * cy + sr * sp * sy + qx = sr * cp * cy - cr * sp * sy + qy = cr * sp * cy + sr * cp * sy + qz = cr * cp * sy - sr * sp * cy + + return np.array([qx, qy, qz, qw]) diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py index d307168f..69f77b05 100755 --- a/sim/envs/__init__.py +++ b/sim/envs/__init__.py @@ -14,7 +14,10 @@ from sim.envs.humanoids.g1_env import G1FreeEnv from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO from sim.envs.humanoids.h1_env import H1FreeEnv -from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO +from sim.envs.humanoids.stompymicro_config import ( + StompyMicroCfg, + StompyMicroCfgPPO, +) from sim.envs.humanoids.stompymicro_env import StompyMicroEnv from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO from sim.envs.humanoids.stompymini_env import MiniFreeEnv diff --git a/sim/envs/base/base_env.py b/sim/envs/base/base_env.py new file mode 100644 index 00000000..108c3ccd --- /dev/null +++ b/sim/envs/base/base_env.py @@ -0,0 +1,25 @@ +from abc import ABC, abstractmethod +from typing import Any, Tuple + + +class Env(ABC): + """Minimal base environment class for RL.""" + + @abstractmethod + def step(self, action: Any) -> Tuple[Any, float, bool, dict]: + """Run one timestep of the environment. + Returns: observation, reward, done, info""" + pass + + @abstractmethod + def reset(self) -> Any: + """Reset the environment to initial state.""" + pass + + def render(self) -> None: + """Optional method to render the environment.""" + pass + + def close(self) -> None: + """Clean up resources.""" + pass diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py index db341d8f..6be1d110 100644 --- a/sim/envs/base/legged_robot.py +++ b/sim/envs/base/legged_robot.py @@ -102,11 +102,17 @@ def post_physics_step(self): # TODO(pfb30) - debug this origin = torch.tensor(self.cfg.init_state.rot, device=self.device).repeat(self.num_envs, 1) origin = quat_conjugate(origin) - self.base_quat[:] = quat_mul(origin, self.root_states[:, 3:7]) - self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat) - self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10]) - self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13]) + if self.imu_indices: + self.base_quat = quat_mul(origin, self.rigid_state[:, self.imu_indices, 3:7]) + self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 7:10]) + self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 10:13]) + else: + self.base_quat = self.root_states[:, 3:7] + self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10]) + self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13]) + + self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat) self.projected_gravity[:] = quat_rotate_inverse(self.base_quat, self.gravity_vec) self._post_physics_step_callback() @@ -190,7 +196,10 @@ def reset_idx(self, env_ids): # TODO(pfb30) - debug this origin = torch.tensor(self.cfg.init_state.rot, device=self.device).repeat(self.num_envs, 1) origin = quat_conjugate(origin) - self.base_quat[env_ids] = quat_mul(origin[env_ids, :], self.root_states[env_ids, 3:7]) + if self.imu_indices: + self.base_quat[env_ids] = quat_mul(origin[env_ids, :], self.rigid_state[env_ids, self.imu_indices, 3:7]) + else: + self.base_quat[env_ids] = quat_mul(origin[env_ids, :], self.root_states[env_ids, 3:7]) self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat) self.projected_gravity[env_ids] = quat_rotate_inverse(self.base_quat[env_ids], self.gravity_vec[env_ids]) @@ -234,6 +243,7 @@ def _process_rigid_shape_props(self, props, env_id): Returns: [List[gymapi.RigidShapeProperties]]: Modified rigid shape properties """ + if self.cfg.domain_rand.randomize_friction: if env_id == 0: # prepare friction randomization @@ -276,6 +286,10 @@ def _process_dof_props(self, props, env_id): self.dof_vel_limits[i] = props["velocity"][i].item() * self.cfg.safety.vel_limit self.torque_limits[i] = props["effort"][i].item() * self.cfg.safety.torque_limit + for i, dof_name in enumerate(self.dof_names): + props["friction"][i] = self.cfg.asset.friction + props["armature"][i] = self.cfg.asset.armature + return props def _process_rigid_body_props(self, props, env_id): @@ -363,7 +377,6 @@ def _compute_torques(self, actions): torques = p_gains * (actions_scaled + self.default_dof_pos - self.dof_pos) - d_gains * self.dof_vel res = torch.clip(torques, -self.torque_limits, self.torque_limits) - return res def _reset_dofs(self, env_ids): @@ -481,17 +494,24 @@ def _init_buffers(self): self.dof_state = gymtorch.wrap_tensor(dof_state_tensor) self.dof_pos = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 0] self.dof_vel = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 1] - # self.base_quat = self.root_states[:, 3:7] + + self.rigid_state = gymtorch.wrap_tensor(rigid_body_state) # .view(self.num_envs, -1, 13) + self.rigid_state = self.rigid_state.view(self.num_envs, -1, 13) # TODO(pfb30): debug this + # self.base_quat = self.root_states[:, 3:7] origin = torch.tensor(self.cfg.init_state.rot, device=self.device).repeat(self.num_envs, 1) origin = quat_conjugate(origin) - self.base_quat = quat_mul(origin, self.root_states[:, 3:7]) + + if self.imu_indices: + self.base_quat = quat_mul(origin, self.rigid_state[:, self.imu_indices, 3:7]) + else: + self.base_quat = quat_mul(origin, self.root_states[:, 3:7]) + self.base_euler_xyz = get_euler_xyz_tensor(self.base_quat) self.contact_forces = gymtorch.wrap_tensor(net_contact_forces).view( self.num_envs, -1, 3 ) # shape: num_envs, num_bodies, xyz axis - self.rigid_state = gymtorch.wrap_tensor(rigid_body_state).view(self.num_envs, -1, 13) # initialize some data used later on self.common_step_counter = 0 @@ -537,8 +557,13 @@ def _init_buffers(self): self.last_contacts = torch.zeros( self.num_envs, len(self.feet_indices), dtype=torch.bool, device=self.device, requires_grad=False ) - self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10]) - self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13]) + if self.imu_indices: + self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 7:10]) + self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.rigid_state[:, self.imu_indices, 10:13]) + else: + self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10]) + self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13]) + self.projected_gravity = quat_rotate_inverse(self.base_quat, self.gravity_vec) if self.cfg.terrain.measure_heights: self.height_points = self._init_height_points() @@ -592,6 +617,7 @@ def _prepare_reward_function(self): # prepare list of functions self.reward_functions = [] self.reward_names = [] + for name, scale in self.reward_scales.items(): if name == "termination": continue @@ -696,6 +722,7 @@ def _create_envs(self): self.num_dofs = len(self.dof_names) feet_names = [s for s in body_names if s in self.cfg.asset.foot_name] knee_names = [s for s in body_names if s in self.cfg.asset.knee_name] + penalized_contact_names = [] for name in self.cfg.asset.penalize_contacts_on: penalized_contact_names.extend([s for s in body_names if name in s]) @@ -738,9 +765,9 @@ def _create_envs(self): ) dof_props = self._process_dof_props(dof_props_asset, i) self.gym.set_actor_dof_properties(env_handle, actor_handle, dof_props) + body_props = self.gym.get_actor_rigid_body_properties(env_handle, actor_handle) body_props = self._process_rigid_body_props(body_props, i) - self.gym.set_actor_rigid_body_properties(env_handle, actor_handle, body_props, recomputeInertia=False) self.envs.append(env_handle) self.actor_handles.append(actor_handle) @@ -753,8 +780,7 @@ def _create_envs(self): print("feet", self.feet_indices) print(f"Processed body properties for env {i}:") for j, prop in enumerate(body_props): - print(f" Body {j} mass: {prop.mass}") - # prop.mass = prop.mass * 1e7 + print(f" Body {j} mass: {prop.mass}") print(f" Body {j} inertia: {prop.inertia.x}, {prop.inertia.y}, {prop.inertia.z}") self.knee_indices = torch.zeros(len(knee_names), dtype=torch.long, device=self.device, requires_grad=False) @@ -763,6 +789,12 @@ def _create_envs(self): self.envs[0], self.actor_handles[0], knee_names[i] ) + self.imu_indices = self.gym.find_actor_rigid_body_handle( + self.envs[0], self.actor_handles[0], self.cfg.asset.imu_name + ) + if self.imu_indices == -1: + self.imu_indices = None + self.penalised_contact_indices = torch.zeros( len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False ) diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py index 1198d62e..cbbb2cf8 100644 --- a/sim/envs/base/legged_robot_config.py +++ b/sim/envs/base/legged_robot_config.py @@ -29,8 +29,6 @@ # # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. -from enum import Enum - from sim.envs.base.base_config import BaseConfig @@ -124,7 +122,10 @@ class safety: class asset: file = "" name = "legged_robot" # actor name - foot_name = "None" # name of the feet bodies, used to index body state and contact force tensors + foot_name = ["None"] # name of the feet bodies, used to index body state and contact force tensors + knee_name = ["None"] + imu_name = None + penalize_contacts_on = [] terminate_after_contacts_on = [] disable_gravity = False @@ -143,6 +144,7 @@ class asset: max_angular_velocity = 1000.0 max_linear_velocity = 1000.0 armature = 0.0 + friction = 0.0 thickness = 0.01 class domain_rand: diff --git a/sim/envs/base/mujoco_env.py b/sim/envs/base/mujoco_env.py new file mode 100644 index 00000000..a7419dbc --- /dev/null +++ b/sim/envs/base/mujoco_env.py @@ -0,0 +1,274 @@ +import mujoco +import mujoco_viewer +import numpy as np +from dataclasses import dataclass +from typing import Tuple +from scipy.spatial.transform import Rotation as R +from collections import deque + +from sim.envs.base.base_env import Env +from sim.envs.base.legged_robot_config import LeggedRobotCfg +from sim.envs.humanoids.stompymicro_config import StompyMicroCfg + +np.set_printoptions(precision=2, suppress=True) + +MUJOCO_TO_ISAAC = { + 7: 13, # right_shoulder_pitch + 8: 14, # right_shoulder_yaw + 9: 15, # right_elbow_yaw + 10: 5, # left_shoulder_pitch + 11: 6, # left_shoulder_yaw + 12: 7, # left_elbow_yaw + 13: 8, # right_hip_pitch + 14: 9, # right_hip_yaw + 15: 10, # right_hip_roll + 16: 11, # right_knee_pitch + 17: 12, # right_ankle_pitch + 18: 0, # left_hip_pitch + 19: 1, # left_hip_yaw + 20: 2, # left_hip_roll + 21: 3, # left_knee_pitch + 22: 4, # left_ankle_pitch +} +ISAAC_TO_MUJOCO = {v: k for k, v in MUJOCO_TO_ISAAC.items()} + + +@dataclass +class MujocoCfg(StompyMicroCfg): # LeggedRobotCfg): + class gains: + tau_factor: float = 4 + kp_scale: float = 1.0 + kd_scale: float = 1.0 + + +class MujocoEnv(Env): + def __init__( + self, + cfg: MujocoCfg, + render: bool = False, + ): + self.cfg = cfg + self.robot = self.cfg.asset.robot + self.num_joints = len(self.robot.all_joints()) + self.model = mujoco.MjModel.from_xml_path(self.cfg.asset.file_xml) + self.model.opt.timestep = self.cfg.sim.dt + self.data = mujoco.MjData(self.model) + self.viewer = mujoco_viewer.MujocoViewer(self.model, self.data) if render else None + + self.mujoco_indices = np.array([k for k in MUJOCO_TO_ISAAC.keys()]) + self.isaac_order = np.array([MUJOCO_TO_ISAAC[k] for k in self.mujoco_indices]) + + stiffness = self.robot.stiffness() + damping = self.robot.damping() + effort = self.robot.effort() + + self.kps = np.array([stiffness[joint.split('_', 1)[1]] for joint in self.robot.all_joints()]) * self.cfg.gains.kp_scale + self.kds = np.array([damping[joint.split('_', 1)[1]] for joint in self.robot.all_joints()]) * self.cfg.gains.kd_scale + self.tau_limits = np.array([effort[joint.split('_', 1)[1]] for joint in self.robot.all_joints()]) * self.cfg.gains.tau_factor + + self.default_dof_pos = np.array([ + self.robot.default_standing().get(joint, 0.0) + for joint in self.robot.all_joints() + ]) + + self.num_joints = len(self.robot.all_joints()) + + ### Initialize Buffers ## + self.obs_history = deque(maxlen=self.cfg.env.frame_stack) + for _ in range(self.cfg.env.frame_stack): + self.obs_history.append(np.zeros(self.cfg.env.num_single_obs)) + self.commands = np.zeros(4) # [vel_x, vel_y, ang_vel_yaw, heading] + self.last_actions = np.zeros(self.num_joints) + self.origin_quat = R.from_quat(self.cfg.init_state.rot) + self.origin_quat_inv = self.origin_quat.inv() + ## Initialize Buffers ## + + self.commands_scale = np.array([ + self.cfg.normalization.obs_scales.lin_vel, + self.cfg.normalization.obs_scales.lin_vel, + self.cfg.normalization.obs_scales.ang_vel + ]) + + self.reset() + self.compute_observations() + print_model_info(self.model, self.data) + + def compute_observations(self) -> None: + """Compute observations matching StompyMicroEnv implementation""" + phase = self.episode_length_buf * self.cfg.sim.dt / self.cfg.rewards.cycle_time + sin_pos = np.sin(2 * np.pi * phase) + cos_pos = np.cos(2 * np.pi * phase) + command_input = np.array([sin_pos, cos_pos] + list(self.commands[:3] * self.commands_scale)) + + q = self.data.qpos[-self.num_joints:][self.isaac_order] + dq = self.data.qvel[-self.num_joints:][self.isaac_order] + q = (q - self.default_dof_pos) * self.cfg.normalization.obs_scales.dof_pos + dq = dq * self.cfg.normalization.obs_scales.dof_vel + + sensor_quat = self.data.sensor("orientation").data[[1, 2, 3, 0]] # wxyz to xyzw + sensor_rotation = R.from_quat(sensor_quat) + + base_rotation = self.origin_quat_inv * sensor_rotation + base_quat = base_rotation.as_quat() # returns xyzw quaternion + + ang_vel_world = self.data.sensor("angular-velocity").data + base_ang_vel = base_rotation.inv().apply(ang_vel_world) + base_ang_vel = base_ang_vel * self.cfg.normalization.obs_scales.ang_vel + + base_euler = base_rotation.as_euler('xyz') * self.cfg.normalization.obs_scales.quat + + assert command_input.shape == (5,) + assert q.shape == (self.num_joints,) + assert dq.shape == (self.num_joints,) + assert base_ang_vel.shape == (3,) + assert base_euler.shape == (3,) + + # Match Isaac's observation concatenation order + obs_buf = np.concatenate([ + command_input.flatten(), # 5D + q.flatten(), # num_joints D + dq.flatten(), # num_joints D + self.last_actions.flatten(), # num_joints D + base_ang_vel.flatten(), # 3D + base_euler.flatten() # 3D + ]) + + # TODO: Add noise + + self.obs_history.append(obs_buf) + + obs_buf_all = np.stack([self.obs_history[i] for i in range(self.obs_history.maxlen)], axis=1) # Shape: (K, T) + assert obs_buf_all.shape == (self.cfg.env.num_single_obs, self.cfg.env.frame_stack) + + self.obs_buf = obs_buf_all.reshape(-1)[np.newaxis, :] + assert self.obs_buf.shape == (1, self.cfg.env.num_single_obs * self.cfg.env.frame_stack) + + def reset(self) -> Tuple[np.ndarray, ...]: + """Reset environment matching Isaac's initialization""" + # Reset episode counter + self.episode_length_buf = 0 + self.reset_buf = False + + try: + self.data.qpos = self.model.keyframe("default").qpos + self.data.qpos[-self.num_joints:] = self.default_dof_pos + except KeyError as e: + print(f"Warning: Keyframe 'default' not found in model. Using zero initial state instead.") + self.data.qpos[-self.num_joints:] = self.default_dof_pos.copy() + + self.data.qpos[-self.num_joints:] += np.random.uniform( + -self.cfg.domain_rand.start_pos_noise, + self.cfg.domain_rand.start_pos_noise, + size=self.num_joints + ) + self.data.qvel = np.zeros_like(self.data.qvel) + self.data.qacc = np.zeros_like(self.data.qacc) + self.data.ctrl = np.zeros_like(self.data.ctrl) + + self.actions = np.zeros(self.num_joints) + self.last_actions = np.zeros(self.num_joints) + + mujoco.mj_step(self.model, self.data) + obs, _, _, _ = self.step(np.zeros(self.num_joints)) + return obs + + def step(self, action: np.ndarray): + """Execute environment step matching Isaac's implementation more closely""" + actions = np.clip(action, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions) + delay = np.random.rand(1) * self.cfg.domain_rand.action_delay + actions = (1 - delay) * actions + delay * self.actions + actions += self.cfg.domain_rand.action_noise * np.random.randn(*actions.shape) * actions + self.actions = np.clip(action, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions) + + # self.render() + + # TODO: Understand why below doesn't work! + for _ in range(self.cfg.control.decimation): + self.render() + self.data.ctrl = self._compute_torques(actions) + mujoco.mj_step(self.model, self.data) + + ## Post Physics Step ## + self.episode_length_buf += 1 + + # origin and imu_indices weirdness?? + + self.check_termination() + self.compute_rewards() + if self.reset_buf: + self.reset() + + self.compute_observations() + + ## Post Physics Step ## + + self.obs_buf = np.clip(self.obs_buf, -self.cfg.normalization.clip_observations, self.cfg.normalization.clip_observations) + + self.extras = { + 'step': self.episode_length_buf, + 'fall': self.reset_buf and self.episode_length_buf < self.cfg.env.episode_length_s / self.cfg.sim.dt + } + + return self.obs_buf, self.rew_buf, self.reset_buf, self.extras + + def render(self): + if self.viewer: + self.viewer.render() + + def close(self): + if self.viewer: + self.viewer.close() + + ## Helpers ## + + def compute_rewards(self) -> None: + # TODO: Implement reward computation + self.rew_buf = 0 + + def check_termination(self) -> None: + """Check termination conditions""" + self.reset_buf = False + self.reset_buf |= self.episode_length_buf >= self.cfg.env.episode_length_s / self.cfg.sim.dt + self.reset_buf |= self.data.qpos[2] < self.cfg.asset.termination_height + + def _compute_torques(self, actions: np.ndarray) -> np.ndarray: + """Calculate torques from position commands""" + if actions.ndim == 2: + actions = actions.squeeze(0) + + actions_mujoco = actions[self.isaac_order] + + actions_scaled = actions_mujoco * self.cfg.control.action_scale + q_mujoco = self.data.qpos[-self.num_joints:] + dq_mujoco = self.data.qvel[-self.num_joints:] + + torques = self.kps * (actions_scaled + self.default_dof_pos - q_mujoco) - self.kds * dq_mujoco + return np.clip(torques, -self.tau_limits, self.tau_limits) + + +def print_model_info(model, data): + print(f"\nMujoco DOF info:") + print(f"qpos shape: {data.qpos.shape}") + print(f"qvel shape: {data.qvel.shape}") + print("\nJoint info:") + for i in range(model.njnt): + print(f"Joint {i}: {model.joint(i).name}, type={model.joint(i).type}, qposadr={model.joint(i).qposadr}, dofadr={model.joint(i).dofadr}") + + print("\nBody Properties:") + for i in range(model.nbody): + name = model.body(i).name + mass = model.body(i).mass[0] + inertia = [ + model.body_inertia[i][0], # ixx + model.body_inertia[i][1], # iyy + model.body_inertia[i][2] # izz + ] + quat = model.body(i).quat + pos = model.body(i).pos + print(f"{name}: mass={mass:.3f} | inertia={inertia} | pos={pos} | quat={quat}") + + # print("\nFriction Properties:") + # for i in range(model.ngeom): + # name = model.geom(i).name + # friction = model.geom(i).friction + # print(f"{name}: friction={friction}") diff --git a/sim/envs/humanoids/dora_env.py b/sim/envs/humanoids/dora_env.py index 94ffa9be..116aa544 100644 --- a/sim/envs/humanoids/dora_env.py +++ b/sim/envs/humanoids/dora_env.py @@ -1,14 +1,12 @@ # mypy: disable-error-code="valid-newtype" """Defines the environment for training the humanoid.""" -from isaacgym.torch_utils import * # isort:skip - from sim.envs.base.legged_robot import LeggedRobot from sim.resources.dora.joints import Robot from sim.utils.terrain import HumanoidTerrain from isaacgym import gymtorch # isort:skip - +from isaacgym.torch_utils import * # isort:skip import torch # isort:skip diff --git a/sim/envs/humanoids/stompymicro_config.py b/sim/envs/humanoids/stompymicro_config.py index d5b83e76..12cbe362 100644 --- a/sim/envs/humanoids/stompymicro_config.py +++ b/sim/envs/humanoids/stompymicro_config.py @@ -1,13 +1,15 @@ """Defines the environment configuration for the Getting up task""" -from sim.env import robot_urdf_path +import numpy as np + +from sim.env import robot_mjcf_path, robot_urdf_path from sim.envs.base.legged_robot_config import ( # type: ignore LeggedRobotCfg, LeggedRobotCfgPPO, ) from sim.resources.stompymicro.joints import Robot -NUM_JOINTS = len(Robot.all_joints()) # 20 +NUM_JOINTS = len(Robot.all_joints()) # 16 class StompyMicroCfg(LeggedRobotCfg): @@ -23,7 +25,7 @@ class env(LeggedRobotCfg.env): num_privileged_obs = int(c_frame_stack * single_num_privileged_obs) num_actions = NUM_JOINTS num_envs = 4096 - episode_length_s = 24 # episode length in seconds + episode_length_s = 60 # episode length in seconds use_ref_actions = False class safety: @@ -35,13 +37,15 @@ class safety: class asset(LeggedRobotCfg.asset): name = "stompymicro" - file = str(robot_urdf_path(name)) + robot = Robot + file = str(robot_urdf_path(name, legs_only=Robot.legs_only)) + file_xml = str(robot_mjcf_path(name, legs_only=Robot.legs_only)) foot_name = ["foot_left", "foot_right"] - knee_name = ["ankle_pitch_left", "ankle_pitch_right"] + knee_name = ["left_knee_pitch_motor", "right_knee_pitch_motor"] - termination_height = 0.05 - default_feet_height = 0.01 + termination_height = 0.25 + default_feet_height = 0.02 terminate_after_contacts_on = ["torso"] @@ -51,6 +55,11 @@ class asset(LeggedRobotCfg.asset): replace_cylinder_with_capsule = False fix_base_link = False + # BAM parameters + # TODO update effort to larger one + friction = 0.053343597773929877 + armature = 0.008793405204572328 + class terrain(LeggedRobotCfg.terrain): mesh_type = "plane" # mesh_type = 'trimesh' @@ -70,14 +79,15 @@ class terrain(LeggedRobotCfg.terrain): class noise: add_noise = True - noise_level = 0.6 # scales other values + noise_level = 3.0 # scales other values class noise_scales: dof_pos = 0.05 - dof_vel = 0.5 - ang_vel = 0.1 - lin_vel = 0.05 - quat = 0.03 + dof_vel = 1.5 + lin_vel = 0.1 + ang_vel = 0.2 + quat = 0.1 + gravity = 0.05 height_measurements = 0.1 class init_state(LeggedRobotCfg.init_state): @@ -97,7 +107,7 @@ class control(LeggedRobotCfg.control): # action scale: target angle = actionScale * action + defaultAngle action_scale = 0.25 # decimation: Number of control action updates @ sim DT per policy DT - decimation = 10 # 100hz + decimation = 20 # 50hz class sim(LeggedRobotCfg.sim): dt = 0.001 # 1000 Hz @@ -108,7 +118,7 @@ class physx(LeggedRobotCfg.sim.physx): num_threads = 10 solver_type = 1 # 0: pgs, 1: tgs num_position_iterations = 4 - num_velocity_iterations = 1 + num_velocity_iterations = 0 contact_offset = 0.01 # [m] rest_offset = 0.0 # [m] bounce_threshold_velocity = 0.1 # [m/s] @@ -119,18 +129,18 @@ class physx(LeggedRobotCfg.sim.physx): contact_collection = 2 class domain_rand(LeggedRobotCfg.domain_rand): - start_pos_noise = 0.1 + start_pos_noise = 0.05 randomize_friction = True friction_range = [0.1, 2.0] - randomize_base_mass = True # True + randomize_base_mass = False added_mass_range = [-0.5, 0.5] - push_robots = True # True - push_interval_s = 4 - max_push_vel_xy = 0.05 - max_push_ang_vel = 0.1 + push_robots = True + push_interval_s = 3.5 + max_push_vel_xy = 0.6 # TODO: This was set to make standing significantly harder + max_push_ang_vel = 0.8 # dynamic randomization action_delay = 0.5 - action_noise = 0.02 + action_noise = 0.1 # TODO: This was set to make standing harder class commands(LeggedRobotCfg.commands): # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error) @@ -139,53 +149,50 @@ class commands(LeggedRobotCfg.commands): heading_command = True # if true: compute ang vel command from heading error class ranges: - lin_vel_x = [-0.3, 0.6] # min max [m/s] - lin_vel_y = [-0.3, 0.3] # min max [m/s] - ang_vel_yaw = [-0.3, 0.3] # min max [rad/s] - heading = [-3.14, 3.14] + # for each, min / max + lin_vel_x = [-0.4, 0.4] # [m/s] + lin_vel_y = [-0.4, 0.4] # [m/s] + ang_vel_yaw = [-np.pi / 20, np.pi / 20] # [rad/s] + heading = [-np.pi, np.pi] # [rad] + # Walking Task class rewards: base_height_target = Robot.height - min_dist = 0.07 - max_dist = 0.14 + min_dist = 0.14 + max_dist = 0.20 # put some settings here for LLM parameter tuning target_joint_pos_scale = 0.17 # rad - target_feet_height = 0.02 # m - cycle_time = 0.2 # sec + target_feet_height = 0.05 # m + cycle_time = 0.5 # sec # if true negative total rewards are clipped at zero (avoids early termination problems) only_positive_rewards = True # tracking reward = exp(error*sigma) tracking_sigma = 5.0 - max_contact_force = 50 # forces above this value are penalized + max_contact_force = 100 # forces above this value are penalized class scales: - - # TODO: add an argument - walking = False - - if walking == True: - # reference motion tracking - joint_pos = 1.6 - feet_clearance = 1.2 - feet_contact_number = 1.2 - feet_air_time = 1.2 - foot_slip = -0.05 - feet_distance = 0.2 - knee_distance = 0.2 - # contact - feet_contact_forces = -0.01 - # vel tracking - tracking_lin_vel = 1.2 - tracking_ang_vel = 1.1 - vel_mismatch_exp = 0.5 # lin_z; ang x,y - low_speed = 0.4 - track_vel_hard = 0.5 + # reference motion tracking + joint_pos = 1.0 + feet_clearance = 1.0 + feet_contact_number = 1.5 + feet_air_time = 2.0 + foot_slip = -0.3 + feet_distance = 0.2 + knee_distance = 0.2 + # contact + feet_contact_forces = -0.1 + # vel tracking + tracking_lin_vel = 1.0 + tracking_ang_vel = 1.0 + vel_mismatch_exp = 1.0 # lin_z; ang x,y + low_speed = 1.0 + track_vel_hard = 1.0 # base pos default_joint_pos = 1.0 - orientation = 1 - base_height = 0.2 + orientation = 2.0 + base_height = 0.5 base_acc = 0.2 # energy action_smoothness = -0.002 @@ -212,6 +219,32 @@ class viewer: lookat = [0, -2, 0] +# Standing Task +class rewards: + base_height_target = Robot.height + min_dist = 0.06 + max_dist = 0.16 + + cycle_time = 0.5 # sec + target_joint_pos_scale = 0.17 # rad + only_positive_rewards = True + tracking_sigma = 5.0 + max_contact_force = 50 # forces above this value are penalized + + class scales: + # base pos + default_joint_pos = 1.0 + base_height = 0.2 + base_acc = 0.2 + # energy + action_smoothness = -0.002 + torques = -1e-4 + dof_vel = -5e-3 + dof_acc = -1e-6 + +# StompyMicroCfg.rewards = rewards # comment out to use the walking task rewards + + class StompyMicroCfgPPO(LeggedRobotCfgPPO): seed = 5 runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner @@ -233,14 +266,19 @@ class runner: policy_class_name = "ActorCritic" algorithm_class_name = "PPO" num_steps_per_env = 60 # per iteration - max_iterations = 3001 # number of policy updates + max_iterations = 12001 # number of policy updates # logging - save_interval = 300 # check for potential saves every this many iterations + save_interval = 100 # check for potential saves every this many iterations experiment_name = "StompyMicro" - run_name = "" + run_name = "WalkingRobustFinal" # load and resume resume = False load_run = -1 # -1 = last run checkpoint = -1 # -1 = last saved model resume_path = None # updated from load_run and chkpt + + +if __name__ == "__main__": + cfg = StompyMicroCfg() + print(cfg.__dict__) diff --git a/sim/envs/humanoids/stompymicro_env.py b/sim/envs/humanoids/stompymicro_env.py index 79669b50..c5a4232c 100644 --- a/sim/envs/humanoids/stompymicro_env.py +++ b/sim/envs/humanoids/stompymicro_env.py @@ -2,13 +2,13 @@ """Defines the environment for training the humanoid.""" from sim.envs.base.legged_robot import LeggedRobot +from sim.envs.humanoids.stompymicro_config import StompyMicroCfg from sim.resources.stompymicro.joints import Robot from sim.utils.terrain import HumanoidTerrain from isaacgym import gymtorch # isort:skip from isaacgym.torch_utils import * # isort: skip - import torch # isort:skip @@ -46,7 +46,7 @@ class StompyMicroEnv(LeggedRobot): reset_idx(env_ids): Resets the environment for the specified environment IDs. """ - def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): + def __init__(self, cfg: StompyMicroCfg, sim_params, physics_engine, sim_device: str, headless: bool): super().__init__(cfg, sim_params, physics_engine, sim_device, headless) self.last_feet_z = self.cfg.asset.default_feet_height self.feet_height = torch.zeros((self.num_envs, 2), device=self.device) @@ -57,13 +57,26 @@ def __init__(self, cfg, sim_params, physics_engine, sim_device, headless): self.legs_joints = {} for name, joint in Robot.legs.left.joints_motors(): - print(name) joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) self.legs_joints["left_" + name] = joint_handle + print(f"Robot.legs.left.{name} -> {joint} ({joint_handle})") for name, joint in Robot.legs.right.joints_motors(): joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) self.legs_joints["right_" + name] = joint_handle + print(f"Robot.legs.right.{name} -> {joint} ({joint_handle})") + + if not Robot.legs_only: + self.arms_joints = {} + for name, joint in Robot.arms.left.joints_motors(): + joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) + self.arms_joints["left_" + name] = joint_handle + print(f"Robot.arms.left.{name} -> {joint} ({joint_handle})") + + for name, joint in Robot.arms.right.joints_motors(): + joint_handle = self.gym.find_actor_dof_handle(env_handle, actor_handle, joint) + self.arms_joints["right_" + name] = joint_handle + print(f"Robot.arms.right.{name} -> {joint} ({joint_handle})") self.compute_observations() @@ -118,7 +131,6 @@ def compute_ref_state(self): sin_pos = torch.sin(2 * torch.pi * phase) sin_pos_l = sin_pos.clone() sin_pos_r = sin_pos.clone() - default_clone = self.default_dof_pos.clone() self.ref_dof_pos = self.default_dof_pos.repeat(self.num_envs, 1) scale_1 = self.cfg.rewards.target_joint_pos_scale @@ -206,14 +218,14 @@ def compute_observations(self): self.privileged_obs_buf = torch.cat( ( self.command_input, # 2 + 3 - (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos, # 12 - self.dof_vel * self.obs_scales.dof_vel, # 12 - self.actions, # 12 - diff, # 12 + (self.dof_pos - self.default_joint_pd_target) * self.obs_scales.dof_pos, # 16 + self.dof_vel * self.obs_scales.dof_vel, # 16 + self.actions, # 16 + diff, # 16 self.base_lin_vel * self.obs_scales.lin_vel, # 3 self.base_ang_vel * self.obs_scales.ang_vel, # 3 self.base_euler_xyz * self.obs_scales.quat, # 3 - self.rand_push_force[:, :2], # 3 + self.rand_push_force[:, :2], # 2 self.rand_push_torque, # 3 self.env_frictions, # 1 self.body_mass / 30.0, # 1 @@ -221,19 +233,19 @@ def compute_observations(self): contact_mask, # 2 ), dim=-1, - ) + ) # 89 obs_buf = torch.cat( ( self.command_input, # 5 = 2D(sin cos) + 3D(vel_x, vel_y, aug_vel_yaw) - q, # 20D - dq, # 20D - self.actions, # 20D + q, # 16D + dq, # 16D + self.actions, # 16D self.base_ang_vel * self.obs_scales.ang_vel, # 3 self.base_euler_xyz * self.obs_scales.quat, # 3 ), dim=-1, - ) + ) # 59D if self.cfg.terrain.measure_heights: heights = ( @@ -317,7 +329,7 @@ def _reward_feet_air_time(self): self.last_contacts = contact first_contact = (self.feet_air_time > 0.0) * self.contact_filt self.feet_air_time += self.dt - air_time = self.feet_air_time.clamp(0, 0.5) * first_contact + air_time = self.feet_air_time.clamp(0, 1) * first_contact self.feet_air_time *= ~self.contact_filt return air_time.sum(dim=1) @@ -336,7 +348,8 @@ def _reward_orientation(self): """ quat_mismatch = torch.exp(-torch.sum(torch.abs(self.base_euler_xyz[:, :2]), dim=1) * 10) orientation = torch.exp(-torch.norm(self.projected_gravity[:, :2], dim=1) * 20) - return (quat_mismatch + orientation) / 2 + reward = (quat_mismatch + orientation) / 2 + return reward def _reward_feet_contact_forces(self): """Calculates the reward for keeping contact forces within a specified range. Penalizes @@ -406,7 +419,9 @@ def _reward_track_vel_hard(self): linear_error = 0.2 * (lin_vel_error + ang_vel_error) - return (lin_vel_error_exp + ang_vel_error_exp) / 2.0 - linear_error + reward = (lin_vel_error_exp + ang_vel_error_exp) / 2.0 - linear_error + + return reward def _reward_tracking_lin_vel(self): """Tracks linear velocity commands along the xy axes. diff --git a/sim/envs/humanoids/stompypro_config.py b/sim/envs/humanoids/stompypro_config.py index c68ab34e..6d1e020e 100644 --- a/sim/envs/humanoids/stompypro_config.py +++ b/sim/envs/humanoids/stompypro_config.py @@ -7,7 +7,7 @@ ) from sim.resources.stompypro.joints import Robot -NUM_JOINTS = len(Robot.all_joints()) # 12 +NUM_JOINTS = len(Robot.all_joints()) class StompyProCfg(LeggedRobotCfg): @@ -39,6 +39,7 @@ class asset(LeggedRobotCfg.asset): foot_name = ["L_foot", "R_foot"] knee_name = ["L_calf", "R_calf"] + imu_name = "imu_link" termination_height = 0.2 default_feet_height = 0.0 @@ -50,8 +51,8 @@ class asset(LeggedRobotCfg.asset): fix_base_link = False class terrain(LeggedRobotCfg.terrain): - mesh_type = "plane" - # mesh_type = 'trimesh' + # mesh_type = "plane" + mesh_type = "trimesh" curriculum = False # rough terrain only: measure_heights = False @@ -161,9 +162,9 @@ class rewards: class scales: # reference motion tracking - joint_pos = 1.6 - feet_clearance = 1.6 - feet_contact_number = 1.5 + joint_pos = 1.9 + feet_clearance = 1.7 + feet_contact_number = 1.7 # gait feet_air_time = 1.6 foot_slip = -0.05 @@ -172,8 +173,8 @@ class scales: # contact feet_contact_forces = -0.01 # vel tracking - tracking_lin_vel = 1.2 - tracking_ang_vel = 1.3 + tracking_lin_vel = 1.0 + tracking_ang_vel = 1.0 vel_mismatch_exp = 0.5 # lin_z; ang x,y low_speed = 0.2 track_vel_hard = 0.5 diff --git a/sim/model_export.py b/sim/model_export.py new file mode 100644 index 00000000..6e797e02 --- /dev/null +++ b/sim/model_export.py @@ -0,0 +1,297 @@ +"""Script to convert weights to Rust-compatible format.""" + +import re +from dataclasses import dataclass, fields +from io import BytesIO +from typing import List, Optional, Tuple + +import onnx +import onnxruntime as ort + +from sim.scripts.create_mjcf import load_embodiment + +import torch # isort: skip +from torch import Tensor, nn # isort: skip +from torch.distributions import Normal # isort: skip + + +@dataclass +class ActorCfg: + embodiment: str = "stompypro" + cycle_time: float = 0.4 # Cycle time for sinusoidal command input + action_scale: float = 0.25 # Scale for actions + lin_vel_scale: float = 2.0 # Scale for linear velocity + ang_vel_scale: float = 1.0 # Scale for angular velocity + quat_scale: float = 1.0 # Scale for quaternion + dof_pos_scale: float = 1.0 # Scale for joint positions + dof_vel_scale: float = 0.05 # Scale for joint velocities + frame_stack: int = 15 # Number of frames to stack for the policy input + clip_observations: float = 18.0 # Clip observations to this value + clip_actions: float = 18.0 # Clip actions to this value + + +class ActorCritic(nn.Module): + def __init__( + self, + num_actor_obs: int, + num_critic_obs: int, + num_actions: int, + actor_hidden_dims: List[int] = [256, 256, 256], + critic_hidden_dims: List[int] = [256, 256, 256], + init_noise_std: float = 1.0, + activation: nn.Module = nn.ELU(), + ) -> None: + super(ActorCritic, self).__init__() + + mlp_input_dim_a = num_actor_obs + mlp_input_dim_c = num_critic_obs + + # Policy function. + actor_layers = [] + actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0])) + actor_layers.append(activation) + for dim_i in range(len(actor_hidden_dims)): + if dim_i == len(actor_hidden_dims) - 1: + actor_layers.append(nn.Linear(actor_hidden_dims[dim_i], num_actions)) + else: + actor_layers.append(nn.Linear(actor_hidden_dims[dim_i], actor_hidden_dims[dim_i + 1])) + actor_layers.append(activation) + self.actor = nn.Sequential(*actor_layers) + + # Value function. + critic_layers = [] + critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0])) + critic_layers.append(activation) + for dim_i in range(len(critic_hidden_dims)): + if dim_i == len(critic_hidden_dims) - 1: + critic_layers.append(nn.Linear(critic_hidden_dims[dim_i], 1)) + else: + critic_layers.append(nn.Linear(critic_hidden_dims[dim_i], critic_hidden_dims[dim_i + 1])) + critic_layers.append(activation) + self.critic = nn.Sequential(*critic_layers) + + # Action noise. + self.std = nn.Parameter(init_noise_std * torch.ones(num_actions)) + self.distribution = None + + # Disable args validation for speedup. + Normal.set_default_validate_args = False + + +class Actor(nn.Module): + """Actor model. + + Parameters: + policy: The policy network. + cfg: The configuration for the actor. + """ + + def __init__(self, policy: nn.Module, cfg: ActorCfg) -> None: + super().__init__() + + self.robot = load_embodiment(cfg.embodiment) + + # Policy config + default_dof_pos_dict = self.robot.default_standing() + self.num_actions = len(self.robot.all_joints()) + self.frame_stack = cfg.frame_stack + + # 11 is the number of single observation features - 6 from IMU, 5 from command input + # 3 comes from the number of times num_actions is repeated in the observation (dof_pos, dof_vel, prev_actions) + self.num_single_obs = 11 + self.num_actions * 3 + self.num_observations = int(self.frame_stack * self.num_single_obs) + + self.policy = policy + + # This is the policy reference joint positions and should be the same order as the policy and mjcf file. + # CURRENTLY NOT USED IN FORWARD PASS TO MAKE MORE GENERALIZEABLE FOR REAL AND SIM + self.default_dof_pos = torch.tensor(list(default_dof_pos_dict.values())) + + self.action_scale = cfg.action_scale + self.lin_vel_scale = cfg.lin_vel_scale + self.ang_vel_scale = cfg.ang_vel_scale + self.quat_scale = cfg.quat_scale + self.dof_pos_scale = cfg.dof_pos_scale + self.dof_vel_scale = cfg.dof_vel_scale + + self.clip_observations = cfg.clip_observations + self.clip_actions = cfg.clip_actions + + self.cycle_time = cfg.cycle_time + + def get_init_buffer(self) -> Tensor: + return torch.zeros(self.num_observations) + + def forward( + self, + x_vel: Tensor, # x-coordinate of the target velocity + y_vel: Tensor, # y-coordinate of the target velocity + rot: Tensor, # target angular velocity + t: Tensor, # current policy time (sec) + dof_pos: Tensor, # current angular position of the DoFs relative to default + dof_vel: Tensor, # current angular velocity of the DoFs + prev_actions: Tensor, # previous actions taken by the model + imu_ang_vel: Tensor, # angular velocity of the IMU + imu_euler_xyz: Tensor, # euler angles of the IMU + buffer: Tensor, # buffer of previous observations + ) -> Tuple[Tensor, Tensor, Tensor]: + """Runs the actor model forward pass. + + Args: + x_vel: The x-coordinate of the target velocity, with shape (1). + y_vel: The y-coordinate of the target velocity, with shape (1). + rot: The target angular velocity, with shape (1). + t: The current policy time step, with shape (1). + dof_pos: The current angular position of the DoFs relative to default, with shape (num_actions). + dof_vel: The current angular velocity of the DoFs, with shape (num_actions). + prev_actions: The previous actions taken by the model, with shape (num_actions). + imu_ang_vel: The angular velocity of the IMU, with shape (3), + in radians per second. If IMU is not used, can be all zeros. + imu_euler_xyz: The euler angles of the IMU, with shape (3), + in radians. "XYZ" means (roll, pitch, yaw). If IMU is not used, + can be all zeros. + buffer: The buffer of previous actions, with shape (frame_stack * num_single_obs). This is + the return value of the previous forward pass. On the first + pass, it should be all zeros. + + Returns: + actions_scaled: The actions to take, with shape (num_actions), scaled by the action_scale. + actions: The actions to take, with shape (num_actions). + x: The new buffer of observations, with shape (frame_stack * num_single_obs). + """ + sin_pos = torch.sin(2 * torch.pi * t / self.cycle_time) + cos_pos = torch.cos(2 * torch.pi * t / self.cycle_time) + + # Construct command input + command_input = torch.cat( + ( + sin_pos, + cos_pos, + x_vel * self.lin_vel_scale, + y_vel * self.lin_vel_scale, + rot * self.ang_vel_scale, + ), + dim=0, + ) + + # Calculate current position and velocity observations + q = dof_pos * self.dof_pos_scale + dq = dof_vel * self.dof_vel_scale + + # Construct new observation + new_x = torch.cat( + ( + command_input, + q, + dq, + prev_actions, + imu_ang_vel * self.ang_vel_scale, + imu_euler_xyz * self.quat_scale, + ), + dim=0, + ) + + # Clip the inputs + new_x = torch.clamp(new_x, -self.clip_observations, self.clip_observations) + + # Add the new frame to the buffer + x = torch.cat((buffer, new_x), dim=0) + # Pop the oldest frame + x = x[self.num_single_obs :] + + policy_input = x.unsqueeze(0) + + # Get actions from the policy + actions = self.policy(policy_input).squeeze(0) + actions_scaled = actions * self.action_scale + + return actions_scaled, actions, x + + +def convert_model_to_onnx(model_path: str, cfg: ActorCfg, save_path: Optional[str] = None) -> ort.InferenceSession: + """Converts a PyTorch model to a ONNX format. + + Args: + model_path: Path to the PyTorch model. + cfg: The configuration for the actor. + save_path: Path to save the ONNX model. + + Returns: + An ONNX inference session. + """ + all_weights = torch.load(model_path, map_location="cpu", weights_only=True) + weights = all_weights["model_state_dict"] + num_actor_obs = weights["actor.0.weight"].shape[1] + num_critic_obs = weights["critic.0.weight"].shape[1] + num_actions = weights["std"].shape[0] + actor_hidden_dims = [v.shape[0] for k, v in weights.items() if re.match(r"actor\.\d+\.weight", k)] + critic_hidden_dims = [v.shape[0] for k, v in weights.items() if re.match(r"critic\.\d+\.weight", k)] + actor_hidden_dims = actor_hidden_dims[:-1] + critic_hidden_dims = critic_hidden_dims[:-1] + + ac_model = ActorCritic(num_actor_obs, num_critic_obs, num_actions, actor_hidden_dims, critic_hidden_dims) + ac_model.load_state_dict(weights) + + a_model = Actor(ac_model.actor, cfg) + + # Gets the model input tensors. + x_vel = torch.randn(1) + y_vel = torch.randn(1) + rot = torch.randn(1) + t = torch.randn(1) + dof_pos = torch.randn(a_model.num_actions) + dof_vel = torch.randn(a_model.num_actions) + prev_actions = torch.randn(a_model.num_actions) + imu_ang_vel = torch.randn(3) + imu_euler_xyz = torch.randn(3) + buffer = a_model.get_init_buffer() + input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, imu_ang_vel, imu_euler_xyz, buffer) + + jit_model = torch.jit.script(a_model) + + # Export the model to a buffer + buffer = BytesIO() + torch.onnx.export(jit_model, input_tensors, buffer) + buffer.seek(0) + + # Load the model as an onnx model + model_proto = onnx.load_model(buffer) + + # Add sim2sim metadata + robot_effort = list(a_model.robot.effort().values()) + robot_stiffness = list(a_model.robot.stiffness().values()) + robot_damping = list(a_model.robot.damping().values()) + num_actions = a_model.num_actions + num_observations = a_model.num_observations + + for field_name, field in [ + ("robot_effort", robot_effort), + ("robot_stiffness", robot_stiffness), + ("robot_damping", robot_damping), + ("num_actions", num_actions), + ("num_observations", num_observations), + ]: + meta = model_proto.metadata_props.add() + meta.key = field_name + meta.value = str(field) + + # Add the configuration of the model + for field in fields(cfg): + value = getattr(cfg, field.name) + meta = model_proto.metadata_props.add() + meta.key = field.name + meta.value = str(value) + + if save_path: + onnx.save_model(model_proto, save_path) + + # Convert model to bytes + buffer2 = BytesIO() + onnx.save_model(model_proto, buffer2) + buffer2.seek(0) + + return ort.InferenceSession(buffer2.read()) + + +if __name__ == "__main__": + convert_model_to_onnx("model_3000.pt", ActorCfg(), "policy.onnx") diff --git a/sim/play.py b/sim/play.py index 9f215534..29158f69 100755 --- a/sim/play.py +++ b/sim/play.py @@ -1,56 +1,31 @@ +# mypy: ignore-errors """Play a trained policy in the environment. Run: python sim/play.py --task g1 --log_h5 python sim/play.py --task stompymini --log_h5 + python sim/play.py --task=stompymicro --sim_device=cpu --num_envs=9 --arrows --command_mode=fixed """ - -# mypy: ignore-errors - import argparse -import copy import logging import os from datetime import datetime -from typing import Any, Union import cv2 import h5py import numpy as np -from isaacgym import gymapi from tqdm import tqdm -logger = logging.getLogger(__name__) - from sim.env import run_dir # noqa: E402 from sim.envs import task_registry # noqa: E402 -from sim.utils.helpers import get_args # noqa: E402 +from sim.utils.args_parsing import parse_args_with_extras +from sim.utils.cmd_manager import CommandManager # noqa: E402 +from sim.utils.helpers import export_policy_as_jit # noqa: E402 from sim.utils.logger import Logger # noqa: E402 -import torch # isort: skip - - -def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None: - os.makedirs(path, exist_ok=True) - path = os.path.join(path, "policy_1.pt") - model = copy.deepcopy(actor_critic.actor).to("cpu") - traced_script_module = torch.jit.script(model) - traced_script_module.save(path) - - -def export_policy_as_onnx(actor_critic, path): - os.makedirs(path, exist_ok=True) - path = os.path.join(path, "policy_1.onnx") - model = copy.deepcopy(actor_critic.actor).to("cpu") - - # Get the input dimension from the first layer of the model - first_layer = next(model.parameters()) - input_dim = first_layer.shape[1] - - # Create a dummy input tensor with the correct dimensions - dummy_input = torch.randn(1, input_dim) +from isaacgym import gymapi # isort: skip - torch.onnx.export(model, dummy_input, path) +logger = logging.getLogger(__name__) def play(args: argparse.Namespace) -> None: @@ -60,12 +35,15 @@ def play(args: argparse.Namespace) -> None: # override some parameters for testing env_cfg.env.num_envs = min(env_cfg.env.num_envs, 1) env_cfg.sim.max_gpu_contact_pairs = 2**10 - env_cfg.terrain.mesh_type = "plane" + if args.trimesh: + env_cfg.terrain.mesh_type = "trimesh" + else: + env_cfg.terrain.mesh_type = "plane" env_cfg.terrain.num_rows = 5 env_cfg.terrain.num_cols = 5 env_cfg.terrain.curriculum = False env_cfg.terrain.max_init_terrain_level = 5 - env_cfg.noise.add_noise = True + env_cfg.noise.add_noise = False env_cfg.domain_rand.push_robots = False env_cfg.domain_rand.joint_angle_noise = 0.0 env_cfg.noise.curriculum = False @@ -86,29 +64,35 @@ def play(args: argparse.Namespace) -> None: policy = ppo_runner.get_inference_policy(device=env.device) # Export policy if needed - EXPORT_POLICY = True - if EXPORT_POLICY: + if args.export_policy: path = os.path.join(".") export_policy_as_jit(ppo_runner.alg.actor_critic, path) print("Exported policy as jit script to: ", path) # export policy as a onnx module (used to run it on web) - if EXPORT_ONNX: - path = os.path.join(".") - export_policy_as_onnx(ppo_runner.alg.actor_critic, path) + if args.export_onnx: + from sim.model_export import ActorCfg, convert_model_to_onnx # noqa: E402 + + path = ppo_runner.alg.actor_critic + convert_model_to_onnx(path, ActorCfg(), save_path="policy.onnx") print("Exported policy as onnx to: ", path) # Prepare for logging env_logger = Logger(env.dt) robot_index = 0 joint_index = 1 - stop_state_log = 1000 now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + + # Prepare command manager + cmd_manager = CommandManager( + num_envs=env_cfg.env.num_envs, mode=args.command_mode, device=env.device, env_cfg=env_cfg + ) + if args.log_h5: h5_file = h5py.File(f"data{now}.h5", "w") # Create dataset for actions - max_timesteps = stop_state_log + max_timesteps = args.max_iterations num_dof = env.num_dof dset_actions = h5_file.create_dataset("actions", (max_timesteps, num_dof), dtype=np.float32) @@ -136,7 +120,7 @@ def play(args: argparse.Namespace) -> None: "observations/euler", (max_timesteps, buf_len, 3), dtype=np.float32 ) # root orientation - if RENDER: + if args.render: camera_properties = gymapi.CameraProperties() camera_properties.width = 1920 camera_properties.height = 1080 @@ -165,26 +149,28 @@ def play(args: argparse.Namespace) -> None: os.mkdir(experiment_dir) video = cv2.VideoWriter(dir, fourcc, 50.0, (1920, 1080)) - for t in tqdm(range(stop_state_log)): + for t in tqdm(range(train_cfg.runner.max_iterations)): actions = policy(obs.detach()) if args.log_h5: dset_actions[t] = actions.detach().numpy() - if FIX_COMMAND: - env.commands[:, 0] = 0.5 - env.commands[:, 1] = 0.0 - env.commands[:, 2] = 0.0 - env.commands[:, 3] = 0.0 + commands = cmd_manager.update(env.dt) + env.commands[:] = commands + obs, critic_obs, rews, dones, infos = env.step(actions.detach()) - print(f"IMU: {obs[0, (3 * env.num_actions + 5) + 3 : (3 * env.num_actions + 5) + 2 * 3]}") - if RENDER: + if args.render: env.gym.fetch_results(env.sim, True) env.gym.step_graphics(env.sim) env.gym.render_all_camera_sensors(env.sim) img = env.gym.get_camera_image(env.sim, env.envs[0], h1, gymapi.IMAGE_COLOR) img = np.reshape(img, (1080, 1920, 4)) img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR) + robot_positions = env.root_states[:, 0:3].cpu().numpy() + actual_vels = np.stack([env.base_lin_vel[:, 0].cpu().numpy(), env.base_lin_vel[:, 1].cpu().numpy()], axis=1) + + if args.arrows: + cmd_manager.draw(env.gym, env.viewer, env.envs, robot_positions, actual_vels) video.write(img[..., :3]) @@ -236,8 +222,9 @@ def play(args: argparse.Namespace) -> None: env_logger.print_rewards() env_logger.plot_states() + cmd_manager.close() - if RENDER: + if args.render: video.release() if args.log_h5: @@ -245,15 +232,35 @@ def play(args: argparse.Namespace) -> None: h5_file.close() -if __name__ == "__main__": - RENDER = True - FIX_COMMAND = True +def add_play_arguments(parser): + """Add play-specific arguments.""" + # Visualization + parser.add_argument( + "--arrows", action="store_true", default=False, help="Draw command and velocity arrows during visualization" + ) + parser.add_argument("--render", action="store_true", default=True, help="Enable rendering") + + # Control + parser.add_argument( + "--command_mode", + type=str, + default="fixed", + choices=["fixed", "oscillating", "random", "keyboard"], + help="Control mode for the robot", + ) - EXPORT_ONNX = True + # Export options + parser.add_argument("--export_policy", action="store_true", default=False, help="Export policy as JIT") + parser.add_argument("--export_onnx", action="store_true", default=False, help="Export policy as ONNX") - base_args = get_args() - parser = argparse.ArgumentParser(description="Extend base arguments with log_h5") - parser.add_argument("--log_h5", action="store_true", help="Enable HDF5 logging") - args, unknown = parser.parse_known_args(namespace=base_args) + # Logging + parser.add_argument("--log_h5", action="store_true", default=False, help="Enable HDF5 logging") + # Trimesh + parser.add_argument("--trimesh", action="store_true", default=False, help="Use trimesh terrain") + + +if __name__ == "__main__": + args = parse_args_with_extras(add_play_arguments) + print("Arguments:", vars(args)) play(args) diff --git a/sim/produce_sim_data.py b/sim/produce_sim_data.py new file mode 100644 index 00000000..f4bae38a --- /dev/null +++ b/sim/produce_sim_data.py @@ -0,0 +1,60 @@ +import argparse +import multiprocessing as mp +import subprocess +import time + +from tqdm import tqdm + + +def run_simulation(sim_idx: int, args: argparse.Namespace) -> None: + """Run a single simulation instance with the given parameters.""" + cmd = [ + "python", + "sim/sim2sim.py", + "--embodiment", + args.embodiment, + "--load_model", + args.load_model, + "--log_h5", + "--no_render", + ] + + try: + subprocess.run(cmd, check=True) + except subprocess.CalledProcessError as e: + print(f"Simulation {sim_idx} failed with error: {e}") + + +def run_parallel_sims(num_threads: int, args: argparse.Namespace) -> None: + """Run multiple simulation instances in parallel with a delay between each start.""" + processes = [] + delay_between_starts = 0.1 # Adjust the delay (in seconds) as needed + + for idx in range(num_threads): + p = mp.Process(target=run_simulation, args=(idx, args)) + p.start() + processes.append(p) + time.sleep(delay_between_starts) # Introduce a delay before starting the next process + + # Wait for all processes to finish with a progress bar + for p in tqdm(processes, desc="Running parallel simulations"): + p.join() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run parallel simulations.") + parser.add_argument("--num_threads", type=int, default=10, help="Number of parallel simulations to run") + parser.add_argument("--embodiment", default="stompypro", type=str, help="Embodiment name") + parser.add_argument("--load_model", default="examples/walking_pro.onnx", type=str, help="Path to model to load") + + args = parser.parse_args() + + # Run 100 examples total, in parallel batches + num_examples = 2000 + num_batches = (num_examples + args.num_threads - 1) // args.num_threads + + for batch in range(num_batches): + examples_remaining = num_examples - (batch * args.num_threads) + threads_this_batch = min(args.num_threads, examples_remaining) + print(f"\nRunning batch {batch+1}/{num_batches} ({threads_this_batch} simulations)") + run_parallel_sims(threads_this_batch, args) diff --git a/sim/requirements.txt b/sim/requirements.txt index 9751be9e..8c87ef02 100755 --- a/sim/requirements.txt +++ b/sim/requirements.txt @@ -1,13 +1,12 @@ -# requirements.txt - -mediapy -torch -python-dotenv -h5py -gdown +mediapy==1.2.2 +torch==2.4.1 +python-dotenv==1.0.1 +h5py==3.11.0 +gdown==5.2.0 matplotlib==3.3.4 numpy==1.19.5 -wandb +wandb==0.18.1 tensorboard==2.14.0 -onnx -onnxscript +onnx==1.15.0 +onnxscript==0.1.0.dev20240413 +pygame==2.6.1 \ No newline at end of file diff --git a/sim/resources/stompymicro/joints.py b/sim/resources/stompymicro/joints.py index fb1e1551..825e2665 100644 --- a/sim/resources/stompymicro/joints.py +++ b/sim/resources/stompymicro/joints.py @@ -5,6 +5,7 @@ tree of the various joint names in the robot. """ +import math import textwrap from abc import ABC from typing import Dict, List, Tuple, Union @@ -52,229 +53,282 @@ def __str__(self) -> str: return f"[{self.__class__.__name__}]{parts_str}" -class LeftArm(Node): - shoulder_yaw = "left_shoulder_yaw" - shoulder_pitch = "left_shoulder_pitch" - elbow_pitch = "left_elbow_yaw" # FIXME: yaw vs pitch - - class RightArm(Node): - shoulder_yaw = "right_shoulder_yaw" shoulder_pitch = "right_shoulder_pitch" - elbow_pitch = "right_elbow_yaw" # FIXME: yaw vs pitch + shoulder_yaw = "right_shoulder_yaw" + elbow_yaw = "right_elbow_yaw" -class LeftLeg(Node): - hip_roll = "left_hip_roll" - hip_yaw = "left_hip_yaw" - hip_pitch = "left_hip_pitch" - knee_pitch = "left_knee_pitch" - ankle_pitch = "left_ankle_pitch" +class LeftArm(Node): + shoulder_pitch = "left_shoulder_pitch" + shoulder_yaw = "left_shoulder_yaw" + elbow_yaw = "left_elbow_yaw" class RightLeg(Node): - hip_roll = "right_hip_roll" - hip_yaw = "right_hip_yaw" hip_pitch = "right_hip_pitch" + hip_yaw = "right_hip_yaw" + hip_roll = "right_hip_roll" knee_pitch = "right_knee_pitch" ankle_pitch = "right_ankle_pitch" +class LeftLeg(Node): + hip_pitch = "left_hip_pitch" + hip_yaw = "left_hip_yaw" + hip_roll = "left_hip_roll" + ankle_pitch = "left_ankle_pitch" + knee_pitch = "left_knee_pitch" + + class Legs(Node): - left = LeftLeg() right = RightLeg() + left = LeftLeg() + + +class Arms(Node): + right = RightArm() + left = LeftArm() + + +def get_facing_direction_quaternion(angle_degrees: float) -> List[float]: + theta = angle_degrees * (math.pi / 180) + half_theta = theta / 2 + return [0, 0, math.sin(half_theta), math.cos(half_theta)] class Robot(Node): - height = 0.31 - rotation = [0, 0, 0.707, 0.707] + height = 0.34 + angle = 0 + rotation = get_facing_direction_quaternion(angle) + print(f"Rotation: {rotation}") + + legs_only = False - # left_arm = LeftArm() - # right_arm = RightArm() legs = Legs() + arms = Arms() if not legs_only else None @classmethod def default_standing(cls) -> Dict[str, float]: - return { - # Legs - cls.legs.left.hip_pitch: 0.0, + legs = { # TODO: Set back to previous values + cls.legs.left.hip_pitch: 0, + cls.legs.left.knee_pitch: 0, cls.legs.left.hip_yaw: 0, cls.legs.left.hip_roll: 0, - cls.legs.left.knee_pitch: 0, cls.legs.left.ankle_pitch: 0, cls.legs.right.hip_pitch: 0, - cls.legs.right.hip_yaw: 0, - cls.legs.right.hip_roll: 0, cls.legs.right.knee_pitch: 0, cls.legs.right.ankle_pitch: 0, - # TODO: fixing this for debugging - # Arms - # cls.left_arm.shoulder_pitch: 0, - # cls.left_arm.shoulder_yaw: 0, - # cls.left_arm.elbow_pitch: 0, - # cls.right_arm.shoulder_pitch: 0, - # cls.right_arm.shoulder_yaw: 0, - # cls.right_arm.elbow_pitch: 0, + cls.legs.right.hip_yaw: 0, + cls.legs.right.hip_roll: 0, } + if cls.arms is None: + return legs + + arms = { + cls.arms.left.shoulder_pitch: 0.0, + cls.arms.left.shoulder_yaw: 0.0, + cls.arms.left.elbow_yaw: 0.0, + cls.arms.right.shoulder_pitch: 0.0, + cls.arms.right.shoulder_yaw: 0.0, + cls.arms.right.elbow_yaw: 0.0, + } + return {**legs, **arms} + @classmethod def default_limits(cls) -> Dict[str, Dict[str, float]]: - return { - # # left arm - # Robot.left_arm.shoulder_pitch: { - # "lower": -1.5707963, - # "upper": 1.5707963, - # }, - # Robot.left_arm.shoulder_yaw: { - # "lower": -1.5707963, - # "upper": 1.5707963, - # }, - # Robot.left_arm.elbow_pitch: { - # "lower": -1.2217305, - # "upper": 1.2217305, - # }, - # # right arm - # Robot.right_arm.shoulder_pitch: { - # "lower": -1.5707963, - # "upper": 1.5707963, - # }, - # Robot.right_arm.shoulder_yaw: { - # "lower": -1.5707963, - # "upper": 1.5707963, - # }, - # Robot.right_arm.elbow_pitch: { - # "lower": -1.2217305, - # "upper": 1.2217305, - # }, - # left leg - Robot.legs.left.hip_pitch: { + legs = { + # Left Leg + cls.legs.left.hip_pitch: { "lower": -1.5707963, "upper": 1.5707963, }, - Robot.legs.left.hip_yaw: { + cls.legs.left.hip_yaw: { "lower": -1.5707963, "upper": 0.087266463, }, - Robot.legs.left.hip_roll: { + cls.legs.left.hip_roll: { "lower": -0.78539816, "upper": 0.78539816, }, - Robot.legs.left.knee_pitch: { - "lower": 0, - "upper": 1.0471976, - }, - Robot.legs.left.ankle_pitch: { + cls.legs.left.knee_pitch: { "lower": -1.0471976, - "upper": 1.0471976, + "upper": 0, }, - # right leg - Robot.legs.right.hip_pitch: { + cls.legs.left.ankle_pitch: { "lower": -1.5707963, "upper": 1.5707963, }, - Robot.legs.right.hip_yaw: { + # Right Leg + cls.legs.right.hip_pitch: { + "lower": -1.5707963, + "upper": 1.5707963, + }, + cls.legs.right.hip_yaw: { "lower": -0.087266463, "upper": 1.5707963, }, - Robot.legs.right.hip_roll: { + cls.legs.right.hip_roll: { "lower": -0.78539816, "upper": 0.78539816, }, - Robot.legs.right.knee_pitch: { - "lower": -1.0471976, - "upper": 0, - }, - Robot.legs.right.ankle_pitch: { - "lower": -1.0471976, + cls.legs.right.knee_pitch: { + "lower": 0, "upper": 1.0471976, }, + cls.legs.right.ankle_pitch: { + "lower": -1.5707963, + "upper": 1.5707963, + }, } + if cls.arms is None: + return legs + + arms = { + # Left Arm + cls.arms.left.shoulder_pitch: { + "lower": -1.7453293, + "upper": 1.7453293, + }, + cls.arms.left.shoulder_yaw: { + "lower": -0.43633231, + "upper": 1.5707963, + }, + cls.arms.left.elbow_yaw: { + "lower": -1.5707963, + "upper": 1.5707963, + }, + # Right Arm + cls.arms.right.shoulder_pitch: { + "lower": -1.7453293, + "upper": 1.7453293, + }, + cls.arms.right.shoulder_yaw: { + "lower": -1.134464, + "upper": 0.87266463, + }, + cls.arms.right.elbow_yaw: { + "lower": -1.5707963, + "upper": 1.5707963, + }, + } + return {**legs, **arms} + # p_gains @classmethod def stiffness(cls) -> Dict[str, float]: - return { - "hip_pitch": 5, - "hip_yaw": 5, - "hip_roll": 5, - "knee_pitch": 5, - "ankle_pitch": 5, - # "shoulder_pitch": 5, - # "shoulder_yaw": 5, - # "shoulder_roll": 5, - # "elbow_pitch": 5, - # "elbow_yaw": 5, + legs = { + "hip_pitch": 17.681462808698132, + "hip_yaw": 17.681462808698132, + "hip_roll": 17.681462808698132, + "knee_pitch": 17.681462808698132, + "ankle_pitch": 17.681462808698132, + } + + if cls.arms is None: + return legs + + arms = { + "shoulder_pitch": 17.681462808698132, + "shoulder_yaw": 17.681462808698132, + "elbow_yaw": 17.681462808698132, } + return {**legs, **arms} # d_gains @classmethod def damping(cls) -> Dict[str, float]: - return { - "hip_pitch": 0.3, - "hip_yaw": 0.3, - "hip_roll": 0.3, - "knee_pitch": 0.3, - "ankle_pitch": 0.3, - # "shoulder_pitch": 0.3, - # "shoulder_yaw": 0.3, - # "shoulder_roll": 0.3, - # "elbow_pitch": 0.3, - # "elbow_yaw": 0.3, + legs = { + "hip_pitch": 0.5354656169048285, + "hip_yaw": 0.5354656169048285, + "hip_roll": 0.5354656169048285, + "knee_pitch": 0.5354656169048285, + "ankle_pitch": 0.5354656169048285, } - # pos_limits - @classmethod - def effort(cls) -> Dict[str, float]: - return { - "hip_pitch": 1, - "hip_yaw": 1, - "hip_roll": 1, - "knee_pitch": 1, - "ankle_pitch": 1, - # "shoulder_pitch": 1, - # "shoulder_yaw": 1, - # "shoulder_roll": 1, - # "elbow_pitch": 1, - # "elbow_yaw": 1, + if cls.arms is None: + return legs + + arms = { + "shoulder_pitch": 0.5354656169048285, + "shoulder_yaw": 0.5354656169048285, + "elbow_yaw": 0.5354656169048285, } + return {**legs, **arms} - # vel_limits + # pos_limits @classmethod - def velocity(cls) -> Dict[str, float]: - return { + def effort(cls) -> Dict[str, float]: + legs = { "hip_pitch": 10, "hip_yaw": 10, "hip_roll": 10, "knee_pitch": 10, "ankle_pitch": 10, - # "shoulder_pitch": 10, - # "shoulder_yaw": 10, - # "shoulder_roll": 10, - # "elbow_pitch": 10, - # "elbow_yaw": 10, } + if cls.arms is None: + return legs + + arms = { + "shoulder_pitch": 10, + "shoulder_yaw": 10, + "elbow_yaw": 10, + } + return {**legs, **arms} + + # vel_limits + @classmethod + def velocity(cls) -> Dict[str, float]: + legs = { + "hip_pitch": 5, + "hip_yaw": 5, + "hip_roll": 5, + "knee_pitch": 5, + "ankle_pitch": 5, + } + + if cls.arms is None: + return legs + + arms = { + "shoulder_pitch": 5, + "shoulder_yaw": 5, + "elbow_yaw": 5, + } + return {**legs, **arms} + @classmethod def friction(cls) -> Dict[str, float]: - return { - # pfb30 todo + legs = { "hip_pitch": 0.05, "hip_yaw": 0.05, "hip_roll": 0.05, "knee_pitch": 0.05, "ankle_pitch": 0.05, - # "ankle_pitch": 0.05, - # "elbow_yaw": 0.05, - # "elbow_pitch": 0.05, } + if cls.arms is None: + return legs + + arms = { + "shoulder_pitch": 0.05, + "shoulder_yaw": 0.05, + "elbow_yaw": 0.05, + } + return {**legs, **arms} + def print_joints() -> None: joints = Robot.all_joints() assert len(joints) == len(set(joints)), "Duplicate joint names found!" print(Robot()) + print(f"\nArms are {'disabled' if Robot.legs_only else 'enabled'}") + print(f"Number of joints: {len(joints)}") if __name__ == "__main__": - # python -m sim.Robot.joints + # python -m sim.resources.[robotname].joints print_joints() diff --git a/sim/resources/stompymicro/robot.urdf b/sim/resources/stompymicro/robot.urdf index a75c6b84..0c537b2b 100644 --- a/sim/resources/stompymicro/robot.urdf +++ b/sim/resources/stompymicro/robot.urdf @@ -2,500 +2,500 @@ - - - + + + - - - + + + - + - + - + - + - + - - - + + + - - - - - + + + + + - + - + - + - - + + - + - + - - - + + + - - - - - + + + + + - + - + - + - - + + - + - + - - - + + + - - - - - + + + + + - + - + - + - + - + - - - + + + - - - - - + + + + + - + - + - + - + - + - - - + + + - - - - - + + + + + - + - + - + - + - + - - - + + + - - - - - + + + + + - + - + - + - + - + - - - + + + - - - - - + + + + + - + - + - + - + - + - - - + + + - - - - - + + + + + - + - + - + - + - + - - - + + + - - - - - + + + + + - + - + - + - - + + - + - + - - - + + + - - - - - + + + + + - + - + - + - - + + - + - + - - - + + + - - - - - + + + + + - + - + - + - - + + - + - + - - - + + + - - - - - + + + + + - + - + - + - - + + - + - + - - - + + + - - - - - + + + + + - + - + - + - - + + - + - + - - - + + + - - - - - + + + + + - + - + - + - - + + - + - + - - - + + + - - - - - + + + + + - + - + - + - + - + - - - + + + - - - - - + + + + + - + - + - + - + - + - - - + + + \ No newline at end of file diff --git a/sim/resources/stompymicro/robot.xml b/sim/resources/stompymicro/robot.xml new file mode 100644 index 00000000..4a2985c7 --- /dev/null +++ b/sim/resources/stompymicro/robot.xml @@ -0,0 +1,222 @@ + + \ No newline at end of file diff --git a/sim/resources/stompymicro/robot_calibration.xml b/sim/resources/stompymicro/robot_calibration.xml index 199afdbf..e4c7a0d7 100644 --- a/sim/resources/stompymicro/robot_calibration.xml +++ b/sim/resources/stompymicro/robot_calibration.xml @@ -1,7 +1,8 @@ - + + + + + + \ No newline at end of file diff --git a/sim/resources/stompymicro/robot_fixed.urdf b/sim/resources/stompymicro/robot_fixed.urdf index b4fc9eb5..c61ed55d 100644 --- a/sim/resources/stompymicro/robot_fixed.urdf +++ b/sim/resources/stompymicro/robot_fixed.urdf @@ -3,7 +3,7 @@ - + @@ -28,75 +28,75 @@ - - - + + + - + - - + + - + - + - + - + - - - + + + - + - - + + - + - + - + - + - - - + + + - - + + - - + + @@ -115,16 +115,16 @@ - - - + + + - + - + @@ -144,16 +144,16 @@ - - - + + + - + - + @@ -173,16 +173,16 @@ - - - + + + - + - + @@ -202,16 +202,16 @@ - - - + + + - + - + @@ -231,16 +231,16 @@ - - - + + + - + - + @@ -260,191 +260,191 @@ - - - + + + - + - - + + - + - + - - + + - + - - - + + + - + - - + + - + - + - - + + - + - - - + + + - + - - - + + + - + - + - + - + - - - + + + - + - - - + + + - + - + - + - + - - - + + + - - - - - + + + + + - + - + - - + + - + - - - + + + - - - - + + + + - + - + - - + + - + - - - + + + - - + + - - + + @@ -463,16 +463,16 @@ - - - + + + - - + + - + @@ -492,9 +492,9 @@ - - - + + + \ No newline at end of file diff --git a/sim/resources/stompymicro/robot_fixed.xml b/sim/resources/stompymicro/robot_fixed.xml index b5436397..826a3aff 100644 --- a/sim/resources/stompymicro/robot_fixed.xml +++ b/sim/resources/stompymicro/robot_fixed.xml @@ -1,119 +1,98 @@ - + \ No newline at end of file diff --git a/sim/resources/stompypro/joints.py b/sim/resources/stompypro/joints.py index 75cdd058..27c6c2c6 100755 --- a/sim/resources/stompypro/joints.py +++ b/sim/resources/stompypro/joints.py @@ -118,6 +118,9 @@ def isaac_to_mujoco_signs(cls) -> Dict[str, int]: def default_positions(cls) -> Dict[str, float]: return {} + """This should be ordered according to how the policy is trained. E.g. the first + entry should be the angle of the first joint in the policy.""" + @classmethod def default_standing(cls) -> Dict[str, float]: return { diff --git a/sim/resources/stompypro/robot_fixed.urdf b/sim/resources/stompypro/robot_fixed.urdf index 5c682f98..00c47bbe 100644 --- a/sim/resources/stompypro/robot_fixed.urdf +++ b/sim/resources/stompypro/robot_fixed.urdf @@ -10,7 +10,30 @@ - + + + + + + + + + + + + + + + + + diff --git a/sim/resources/stompypro/robot_fixed.xml b/sim/resources/stompypro/robot_fixed.xml index 68b15adf..63063952 100644 --- a/sim/resources/stompypro/robot_fixed.xml +++ b/sim/resources/stompypro/robot_fixed.xml @@ -34,7 +34,7 @@ - + diff --git a/sim/scripts/calibration_isaac.py b/sim/scripts/calibration_isaac.py index 3cfc6673..89e23a7f 100755 --- a/sim/scripts/calibration_isaac.py +++ b/sim/scripts/calibration_isaac.py @@ -15,8 +15,8 @@ import numpy as np # Importing torch down here to avoid gymtorch issues. -import torch # noqa: E402 # type: ignore[import] from isaacgym import gymapi, gymtorch, gymutil +import torch # noqa: E402 # type: ignore[import] # isort: skip from matplotlib import pyplot as plt from sim.env import robot_urdf_path diff --git a/sim/scripts/calibration_mujoco.py b/sim/scripts/calibration_mujoco.py index 7045d44b..8244eca8 100755 --- a/sim/scripts/calibration_mujoco.py +++ b/sim/scripts/calibration_mujoco.py @@ -78,7 +78,7 @@ def run_mujoco(cfg: Any, joint_id: int = 0, steps: int = 1000) -> None: # noqa: Returns: None """ - model_dir = os.environ.get("MODEL_DIR") + model_dir = os.environ.get("MODEL_DIR") or "sim/resources" mujoco_model_path = f"{model_dir}/{args.embodiment}/robot_calibration.xml" model = mujoco.MjModel.from_xml_path(mujoco_model_path) model.opt.timestep = cfg.sim_config.dt diff --git a/sim/scripts/create_mjcf.py b/sim/scripts/create_mjcf.py index 123dcbbd..a968702f 100755 --- a/sim/scripts/create_mjcf.py +++ b/sim/scripts/create_mjcf.py @@ -16,7 +16,7 @@ import xml.dom.minidom import xml.etree.ElementTree as ET from pathlib import Path -from typing import Any, List, OrderedDict, Union +from typing import Any, List, Union from sim.scripts import mjcf @@ -373,20 +373,20 @@ def create_mjcf(filepath: Path) -> None: """Create a MJCF file for the Stompy robot.""" path = Path(filepath) robot_name = path.stem - path = path.parent + parent_path = path.parent robot = Sim2SimRobot( robot_name, - path, + parent_path, mjcf.Compiler(angle="radian", meshdir="meshes", autolimits=True, eulerseq="zyx"), ) robot.adapt_world() - robot.save(path / f"{robot_name}_fixed.xml") + robot.save(path.with_suffix(".xml")) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Create a MJCF file for the robot.") - parser.add_argument("filepath", type=str, help="The path to load and save the MJCF file.") + parser.add_argument("--filepath", type=str, help="The path to load and save the MJCF file.") parser.add_argument("--robot", type=str, help="The robot name to load.") args = parser.parse_args() # Robot name is whatever string comes right before ".urdf" extension diff --git a/sim/sim2sim.py b/sim/sim2sim.py deleted file mode 100755 index 1a9182be..00000000 --- a/sim/sim2sim.py +++ /dev/null @@ -1,322 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved. - -""" -Difference setup -python sim/play.py --task mini_ppo --sim_device cpu -python sim/sim2sim.py --load_model examples/standing_pro.pt --embodiment stompypro -python sim/sim2sim.py --load_model examples/standing_micro.pt --embodiment stompymicro -""" -import argparse -import math -import os -from collections import deque -from copy import deepcopy - -import mujoco -import mujoco_viewer -import numpy as np -import pygame -from scipy.spatial.transform import Rotation as R -from tqdm import tqdm - -from sim.scripts.create_mjcf import load_embodiment - -import torch # isort: skip - - -def handle_keyboard_input(): - global x_vel_cmd, y_vel_cmd, yaw_vel_cmd - - keys = pygame.key.get_pressed() - - # Update movement commands based on arrow keys - if keys[pygame.K_UP]: - x_vel_cmd += 0.0005 - if keys[pygame.K_DOWN]: - x_vel_cmd -= 0.0005 - if keys[pygame.K_LEFT]: - y_vel_cmd += 0.0005 - if keys[pygame.K_RIGHT]: - y_vel_cmd -= 0.0005 - - # Yaw control - if keys[pygame.K_a]: - yaw_vel_cmd += 0.001 - if keys[pygame.K_z]: - yaw_vel_cmd -= 0.001 - - -class Sim2simCfg: - def __init__( - self, - embodiment, - frame_stack=15, - c_frame_stack=3, - sim_duration=60.0, - dt=0.001, - decimation=10, - cycle_time=0.4, - tau_factor=3, - lin_vel=2.0, - ang_vel=1.0, - dof_pos=1.0, - dof_vel=0.05, - clip_observations=18.0, - clip_actions=18.0, - action_scale=0.25, - ): - self.robot = load_embodiment(embodiment) - - self.num_actions = len(self.robot.all_joints()) - - self.frame_stack = frame_stack - self.c_frame_stack = c_frame_stack - self.num_single_obs = 11 + self.num_actions * self.c_frame_stack - self.num_observations = int(self.frame_stack * self.num_single_obs) - - self.sim_duration = sim_duration - self.dt = dt - self.decimation = decimation - - self.cycle_time = cycle_time - - self.tau_factor = tau_factor - self.tau_limit = ( - np.array(list(self.robot.effort().values()) + list(self.robot.effort().values())) * self.tau_factor - ) - self.kps = np.array(list(self.robot.stiffness().values()) + list(self.robot.stiffness().values())) - self.kds = np.array(list(self.robot.damping().values()) + list(self.robot.damping().values())) - - self.lin_vel = lin_vel - self.ang_vel = ang_vel - self.dof_pos = dof_pos - self.dof_vel = dof_vel - - self.clip_observations = clip_observations - self.clip_actions = clip_actions - - self.action_scale = action_scale - - -def quaternion_to_euler_array(quat): - # Ensure quaternion is in the correct format [x, y, z, w] - x, y, z, w = quat - - # Roll (x-axis rotation) - t0 = +2.0 * (w * x + y * z) - t1 = +1.0 - 2.0 * (x * x + y * y) - roll_x = np.arctan2(t0, t1) - - # Pitch (y-axis rotation) - t2 = +2.0 * (w * y - z * x) - t2 = np.clip(t2, -1.0, 1.0) - pitch_y = np.arcsin(t2) - - # Yaw (z-axis rotation) - t3 = +2.0 * (w * z + x * y) - t4 = +1.0 - 2.0 * (y * y + z * z) - yaw_z = np.arctan2(t3, t4) - - # Returns roll, pitch, yaw in a NumPy array in radians - return np.array([roll_x, pitch_y, yaw_z]) - - -def get_obs(data): - """Extracts an observation from the mujoco data structure""" - q = data.qpos.astype(np.double) - dq = data.qvel.astype(np.double) - quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double) - r = R.from_quat(quat) - v = r.apply(data.qvel[:3], inverse=True).astype(np.double) # In the base frame - omega = data.sensor("angular-velocity").data.astype(np.double) - gvec = r.apply(np.array([0.0, 0.0, -1.0]), inverse=True).astype(np.double) - return (q, dq, quat, v, omega, gvec) - - -def pd_control(target_q, q, kp, target_dq, dq, kd, default): - """Calculates torques from position commands""" - return kp * (target_q + default - q) - kd * dq - - -def run_mujoco(policy, cfg, keyboard_use=False): - """ - Run the Mujoco simulation using the provided policy and configuration. - - Args: - policy: The policy used for controlling the simulation. - cfg: The configuration object containing simulation settings. - - Returns: - None - """ - model_dir = os.environ.get("MODEL_DIR") - mujoco_model_path = f"{model_dir}/{args.embodiment}/robot_fixed.xml" - - model = mujoco.MjModel.from_xml_path(mujoco_model_path) - model.opt.timestep = cfg.dt - data = mujoco.MjData(model) - - try: - data.qpos = model.keyframe("default").qpos - default = deepcopy(model.keyframe("default").qpos)[-cfg.num_actions :] - print("Default position:", default) - except: - print("No default position found, using zero initialization") - default = np.zeros(cfg.num_actions) # 3 for pos, 4 for quat, cfg.num_actions for joints - - mujoco.mj_step(model, data) - for ii in range(len(data.ctrl) + 1): - print(data.joint(ii).id, data.joint(ii).name) - - data.qvel = np.zeros_like(data.qvel) - data.qacc = np.zeros_like(data.qacc) - viewer = mujoco_viewer.MujocoViewer(model, data) - - target_q = np.zeros((cfg.num_actions), dtype=np.double) - action = np.zeros((cfg.num_actions), dtype=np.double) - - hist_obs = deque() - for _ in range(cfg.frame_stack): - hist_obs.append(np.zeros([1, cfg.num_single_obs], dtype=np.double)) - - count_lowlevel = 0 - - for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."): - if keyboard_use: - handle_keyboard_input() - - # Obtain an observation - q, dq, quat, v, omega, gvec = get_obs(data) - q = q[-cfg.num_actions :] - dq = dq[-cfg.num_actions :] - - # 1000hz -> 50hz - if count_lowlevel % cfg.decimation == 0: - obs = np.zeros([1, cfg.num_single_obs], dtype=np.float32) - eu_ang = quaternion_to_euler_array(quat) - eu_ang[eu_ang > math.pi] -= 2 * math.pi - - cur_pos_obs = (q - default) * cfg.dof_pos - - cur_vel_obs = dq * cfg.dof_vel - - obs[0, 0] = math.sin(2 * math.pi * count_lowlevel * cfg.dt / cfg.cycle_time) - obs[0, 1] = math.cos(2 * math.pi * count_lowlevel * cfg.dt / cfg.cycle_time) - obs[0, 2] = x_vel_cmd * cfg.lin_vel - obs[0, 3] = y_vel_cmd * cfg.lin_vel - obs[0, 4] = yaw_vel_cmd * cfg.ang_vel - obs[0, 5 : (cfg.num_actions + 5)] = cur_pos_obs - obs[0, (cfg.num_actions + 5) : (2 * cfg.num_actions + 5)] = cur_vel_obs - obs[0, (2 * cfg.num_actions + 5) : (3 * cfg.num_actions + 5)] = action - obs[0, (3 * cfg.num_actions + 5) : (3 * cfg.num_actions + 5) + 3] = omega - obs[0, (3 * cfg.num_actions + 5) + 3 : (3 * cfg.num_actions + 5) + 2 * 3] = eu_ang - - obs = np.clip(obs, -cfg.clip_observations, cfg.clip_observations) - - hist_obs.append(obs) - hist_obs.popleft() - - policy_input = np.zeros([1, cfg.num_observations], dtype=np.float32) - for i in range(cfg.frame_stack): - policy_input[0, i * cfg.num_single_obs : (i + 1) * cfg.num_single_obs] = hist_obs[i][0, :] - - action[:] = get_policy_output(policy, policy_input) - action = np.clip(action, -cfg.clip_actions, cfg.clip_actions) - target_q = action * cfg.action_scale - - target_dq = np.zeros((cfg.num_actions), dtype=np.double) - - # Generate PD control - tau = pd_control(target_q, q, cfg.kps, target_dq, dq, cfg.kds, default) # Calc torques - - tau = np.clip(tau, -cfg.tau_limit, cfg.tau_limit) # Clamp torques - # print(tau) - # print(eu_ang) - print(x_vel_cmd, y_vel_cmd, yaw_vel_cmd) - - data.ctrl = tau - - mujoco.mj_step(model, data) - viewer.render() - count_lowlevel += 1 - - viewer.close() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Deployment script.") - parser.add_argument("--load_model", type=str, required=True, help="Run to load from.") - parser.add_argument("--embodiment", type=str, required=True, help="embodiment") - parser.add_argument("--terrain", action="store_true", help="terrain or plane") - parser.add_argument("--load_actions", action="store_true", help="saved_actions") - parser.add_argument("--keyboard_use", action="store_true", help="keyboard_use") - args = parser.parse_args() - - if args.keyboard_use: - x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.0, 0.0, 0.0 - pygame.init() - pygame.display.set_caption("Simulation Control") - else: - x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.4, 0.0, 0.0 - - if "pt" in args.load_model: - policy = torch.jit.load(args.load_model) - elif "onnx" in args.load_model: - import onnxruntime as ort - - policy = ort.InferenceSession(args.load_model) - - def get_policy_output(policy, input_data): - if isinstance(policy, torch.jit._script.RecursiveScriptModule): - return policy(torch.tensor(input_data))[0].detach().numpy() - else: - ort_inputs = {policy.get_inputs()[0].name: input_data} - return policy.run(None, ort_inputs)[0][0] - - if args.embodiment == "stompypro": - cfg = Sim2simCfg( - args.embodiment, - sim_duration=60.0, - dt=0.001, - decimation=10, - cycle_time=0.4, - tau_factor=3.0, - ) - elif args.embodiment == "stompymicro": - cfg = Sim2simCfg( - args.embodiment, - sim_duration=60.0, - dt=0.001, - decimation=10, - cycle_time=0.2, - tau_factor=2, - ) - - run_mujoco(policy, cfg, args.keyboard_use) diff --git a/sim/sim2sim/__init__.py b/sim/sim2sim/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sim/sim2sim/calibrate.py b/sim/sim2sim/calibrate.py new file mode 100644 index 00000000..18bfa1c8 --- /dev/null +++ b/sim/sim2sim/calibrate.py @@ -0,0 +1,99 @@ +import multiprocessing as mp +import time + +from tqdm import tqdm + +from sim.envs.base.mujoco_env import MujocoCfg, MujocoEnv +from sim.sim2sim.mujoco.play import run_simulation +from sim.sim2sim.optimizers.bayesian import BayesianOptimizer, OptimizeParam + +# def run_parallel_sims(num_threads: int) -> None: +# """Run multiple simulation instances in parallel with a delay between each start.""" +# processes = [] +# delay_between_starts = 0.1 + +# for idx in range(num_threads): +# p = mp.Process(target=run_simulation, args=(idx, args)) +# p.start() +# processes.append(p) +# time.sleep(delay_between_starts) # Introduce a delay before starting the next process + +# # Wait for all processes to finish with a progress bar +# for p in tqdm(processes, desc="Running parallel simulations"): +# p.join() + + +if __name__ == "__main__": + import numpy as np + import onnxruntime as ort + from kinfer.export.pytorch import export_to_onnx + from kinfer.inference.python import ONNXModel + from scipy.spatial.transform import Rotation as R + + from sim.model_export import ActorCfg + from sim.sim2sim.helpers import get_actor_policy + from sim.utils.cmd_manager import CommandManager + + cfg = MujocoCfg() + cfg.env.num_envs = 1 + env = MujocoEnv(cfg, render=True) + cmd_manager = CommandManager(num_envs=1, mode="fixed", default_cmd=[0.0, 0.0, 0.0, 0.0]) # cfg.env.num_envs, + LOAD_MODEL_PATH = "policy_1.pt" + + policy_cfg = ActorCfg(embodiment=cfg.asset.name) + + actor_model, sim2sim_info, input_tensors = get_actor_policy(LOAD_MODEL_PATH, policy_cfg) + export_config = {**vars(policy_cfg), **sim2sim_info} + print(export_config) + + export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx") + policy = ONNXModel("kinfer_test.onnx") + + # gains.kp_scale: 1.566 + # gains.kd_scale: 9.312 + # 940 -> {'gains.kp_scale': 2.7692284239249987, 'gains.kd_scale': 17.73252701327175, 'gains.tau_factor': 3.1723978203087935} + + """ + Best parameters found: + gains.kp_scale: 1.445 + gains.kd_scale: 19.975 + gains.tau_factor: 6.350 + """ + parameters = [ + OptimizeParam( + name="gains.kp_scale", + min_val=0.5, + max_val=10.0, + ), + OptimizeParam( + name="gains.kd_scale", + min_val=0.2, + max_val=10.0, + ), + # OptimizeParam( + # name="gains.tau_factor", + # min_val=1.0, + # max_val=10.0, + # ), + ] + optimizer = BayesianOptimizer( + base_cfg=cfg, + policy=policy, + parameters=parameters, + cmd_manager=cmd_manager, + n_initial_points=200, + n_iterations=1000, + exploration_weight=0.25, + ) + + best_params, history = optimizer.optimize() + + print("\nOptimization complete!") + print("Best parameters found:") + for name, value in best_params.items(): + print(f"{name}: {value:.3f}") + + import json + + with open("optimization_results.json", "w") as f: + json.dump({"best_params": best_params, "history": history}, f, indent=2) diff --git a/sim/sim2sim/helpers.py b/sim/sim2sim/helpers.py new file mode 100644 index 00000000..20caba5e --- /dev/null +++ b/sim/sim2sim/helpers.py @@ -0,0 +1,66 @@ +from typing import Tuple + +import pygame + +from sim.model_export import Actor, ActorCfg + +import torch # isort:skip + + +def get_actor_policy(model_path: str, cfg: ActorCfg) -> Tuple[torch.nn.Module, dict, Tuple[torch.Tensor, ...]]: + model = torch.jit.load(model_path, map_location="cpu") + a_model = Actor(model, cfg) + + # Gets the model input tensors. + x_vel = torch.randn(1) + y_vel = torch.randn(1) + rot = torch.randn(1) + t = torch.randn(1) + dof_pos = torch.randn(a_model.num_actions) + dof_vel = torch.randn(a_model.num_actions) + prev_actions = torch.randn(a_model.num_actions) + imu_ang_vel = torch.randn(3) + imu_euler_xyz = torch.randn(3) + buffer = a_model.get_init_buffer() + input_tensors = (x_vel, y_vel, rot, t, dof_pos, dof_vel, prev_actions, imu_ang_vel, imu_euler_xyz, buffer) + + # Add sim2sim metadata + robot_effort = list(a_model.robot.effort().values()) + robot_stiffness = list(a_model.robot.stiffness().values()) + robot_damping = list(a_model.robot.damping().values()) + num_actions = a_model.num_actions + num_observations = a_model.num_observations + + return ( + a_model, + { + "robot_effort": robot_effort, + "robot_stiffness": robot_stiffness, + "robot_damping": robot_damping, + "num_actions": num_actions, + "num_observations": num_observations, + }, + input_tensors, + ) + + +def handle_keyboard_input(): + global x_vel_cmd, y_vel_cmd, yaw_vel_cmd + + keys = pygame.key.get_pressed() + + # Update movement commands based on arrow keys + if keys[pygame.K_UP]: + x_vel_cmd += 0.0005 + if keys[pygame.K_DOWN]: + x_vel_cmd -= 0.0005 + if keys[pygame.K_LEFT]: + y_vel_cmd += 0.0005 + if keys[pygame.K_RIGHT]: + y_vel_cmd -= 0.0005 + + # Yaw control + if keys[pygame.K_a]: + yaw_vel_cmd += 0.001 + if keys[pygame.K_z]: + yaw_vel_cmd -= 0.001 diff --git a/sim/sim2sim/mujoco/coplay.py b/sim/sim2sim/mujoco/coplay.py new file mode 100644 index 00000000..c8cc1e93 --- /dev/null +++ b/sim/sim2sim/mujoco/coplay.py @@ -0,0 +1,119 @@ +import argparse +import logging + +import numpy as np +from tqdm import tqdm + +from sim.env_helpers import debug_robot_state +from sim.envs.base.mujoco_env import MujocoCfg, MujocoEnv +from sim.utils.args_parsing import parse_args_with_extras +from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO +from sim.envs.humanoids.stompymicro_env import StompyMicroEnv +from sim.utils.helpers import class_to_dict, parse_sim_params +from sim.algo.ppo.on_policy_runner import OnPolicyRunner +import torch # isort: skip + +logger = logging.getLogger(__name__) + +np.set_printoptions(precision=3, suppress=True) +torch.set_printoptions(precision=3) + +DEVICE = "cuda:0" # "cpu" +LOAD_MODEL_PATH = "examples/experiments/standing/robustv1/policy_1.pt" + + +def coplay(args: argparse.Namespace) -> None: + """Run both Isaac Gym and Mujoco environments simultaneously.""" + cfg = MujocoCfg() + cfg.env.num_envs = 1 + + cfg.gains.kp_scale = 1. + cfg.gains.kd_scale = 1. + cfg.gains.tau_factor = 4.0 + + # Remove randomness + cfg.domain_rand.start_pos_noise = 0.0 + cfg.domain_rand.randomize_friction = False + cfg.domain_rand.randomize_base_mass = False + cfg.domain_rand.push_robots = False + cfg.domain_rand.action_noise = 0.0 + cfg.domain_rand.action_delay = 0.0 + + cfg.sim.physx.max_gpu_contact_pairs = 2**10 + + train_cfg = StompyMicroCfgPPO() + train_cfg.runner.load_run = -1 + train_cfg.seed = 0 + + # Create and setup environment + sim_params = {"sim": class_to_dict(cfg.sim)} + sim_params = parse_sim_params(args, sim_params) + + RENDER_MUJOCO = True + isaac_env = StompyMicroEnv( + cfg=cfg, + sim_params=sim_params, + physics_engine=args.physics_engine, + sim_device=args.sim_device, + headless=RENDER_MUJOCO, + ) + mujoco_env = MujocoEnv( + cfg, + render=RENDER_MUJOCO, + ) + + # Setup and load policy + all_cfg = {**class_to_dict(train_cfg), **class_to_dict(cfg)} + ppo_runner = OnPolicyRunner(isaac_env, all_cfg, device=DEVICE) + resume_path = "logs/StompyMicro/Dec04_20-29-12_StandingRobust/model_12001.pt" + ppo_runner.load(resume_path, load_optimizer=False) + ppo_runner.alg.actor_critic.eval() + ppo_runner.alg.actor_critic.to(DEVICE) + policy = ppo_runner.alg.actor_critic.act_inference + + isaac_obs, _ = isaac_env.reset() + mujoco_obs_np = mujoco_env.reset() + mujoco_obs = torch.from_numpy(mujoco_obs_np).float().to(DEVICE) + + ## DEBUG ## + print(f"Policy device: {next(ppo_runner.alg.actor_critic.parameters()).device}") + print(f"Isaac obs device: {isaac_obs.device}") + print(f"Mujoco obs device: {mujoco_obs.device}") + + print(f"Isaac obs shape: {isaac_obs.shape}") + print(f"Mujoco obs shape: {mujoco_obs.shape}") + + print(isaac_obs[0][-59:].detach().cpu().numpy()) + print(mujoco_obs[0][-59:].detach().cpu().numpy()) + ## DEBUG ## + + done1 = done2 = False + + ISAAC = False + + steps = int(cfg.env.episode_length_s / (cfg.sim.dt * cfg.control.decimation)) + for t in tqdm(range(steps)): + if ISAAC: + isaac_actions = policy(isaac_obs.detach()) + isaac_env.commands[:] = torch.zeros(4) + isaac_obs, _, _, done1, _ = isaac_env.step(isaac_actions.detach()) + + mujoco_actions = policy(mujoco_obs.detach()) + mujoco_env.commands[:] = torch.zeros(4) + mujoco_obs_np, _, done2, _ = mujoco_env.step(mujoco_actions.detach().cpu().numpy()) + mujoco_obs = torch.from_numpy(mujoco_obs_np).float().to(DEVICE) + + if t % 17 == 0: + print(f"Time: {t * cfg.sim.dt:.2f}s") + if ISAAC: + debug_robot_state("Isaac", isaac_obs[0].detach().cpu().numpy(), isaac_actions[0].detach().cpu().numpy()) + debug_robot_state("Mujoco", mujoco_obs[0].detach().cpu().numpy(), mujoco_actions[0].detach().cpu().numpy()) + + if done1 or done2: + break + + +if __name__ == "__main__": + args = parse_args_with_extras(lambda x: x) + print("Arguments:", vars(args)) + coplay(args) diff --git a/sim/sim2sim/mujoco/old.py b/sim/sim2sim/mujoco/old.py new file mode 100644 index 00000000..21d13c3d --- /dev/null +++ b/sim/sim2sim/mujoco/old.py @@ -0,0 +1,256 @@ +import argparse +import math +import os +from copy import deepcopy +from dataclasses import dataclass +from typing import Dict, List, Tuple, Union + +import mujoco +import mujoco_viewer +import numpy as np +import onnxruntime as ort +import pygame +from kinfer.export.pytorch import export_to_onnx +from kinfer.inference.python import ONNXModel +from sim.model_export import ActorCfg +from scipy.spatial.transform import Rotation as R +from tqdm import tqdm +from sim.sim2sim.helpers import get_actor_policy + + +@dataclass +class Sim2simCfg: + sim_duration: float = 60.0 + dt: float = 0.001 + decimation: int = 10 + tau_factor: float = 3 + cycle_time: float = 0.25 + + +def quaternion_to_euler_array(quat: np.ndarray) -> np.ndarray: + # Ensure quaternion is in the correct format [x, y, z, w] + x, y, z, w = quat + + # Roll (x-axis rotation) + t0 = +2.0 * (w * x + y * z) + t1 = +1.0 - 2.0 * (x * x + y * y) + roll_x = np.arctan2(t0, t1) + + # Pitch (y-axis rotation) + t2 = +2.0 * (w * y - z * x) + t2 = np.clip(t2, -1.0, 1.0) + pitch_y = np.arcsin(t2) + + # Yaw (z-axis rotation) + t3 = +2.0 * (w * z + x * y) + t4 = +1.0 - 2.0 * (y * y + z * z) + yaw_z = np.arctan2(t3, t4) + + # Returns roll, pitch, yaw in a NumPy array in radians + return np.array([roll_x, pitch_y, yaw_z]) + + +def get_obs(data: mujoco.MjData) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """Extracts an observation from the mujoco data structure""" + q = data.qpos.astype(np.double) + dq = data.qvel.astype(np.double) + quat = data.sensor("orientation").data[[1, 2, 3, 0]].astype(np.double) + r = R.from_quat(quat) + v = r.apply(data.qvel[:3], inverse=True).astype(np.double) # In the base frame + omega = data.sensor("angular-velocity").data.astype(np.double) + gvec = r.apply(np.array([0.0, 0.0, -1.0]), inverse=True).astype(np.double) + return (q, dq, quat, v, omega, gvec) + + +def pd_control( + target_q: np.ndarray, + q: np.ndarray, + kp: np.ndarray, + dq: np.ndarray, + kd: np.ndarray, + default: np.ndarray, +) -> np.ndarray: + """Calculates torques from position commands""" + return kp * (target_q + default - q) - kd * dq + + +def run_mujoco( + embodiment: str, + policy: ort.InferenceSession, + cfg: Sim2simCfg, + model_info: Dict[str, Union[float, List[float], str]], +) -> None: + """ + Run the Mujoco simulation using the provided policy and configuration. + """ + model_dir = os.environ.get("MODEL_DIR", "sim/resources") + mujoco_model_path = f"{model_dir}/{embodiment}/robot.xml" + + model = mujoco.MjModel.from_xml_path(mujoco_model_path) + model.opt.timestep = cfg.dt + data = mujoco.MjData(model) + + assert isinstance(model_info["num_actions"], int) + assert isinstance(model_info["num_observations"], int) + assert isinstance(model_info["robot_effort"], list) + assert isinstance(model_info["robot_stiffness"], list) + assert isinstance(model_info["robot_damping"], list) + + tau_limit = np.array(list(model_info["robot_effort"]) + list(model_info["robot_effort"])) * cfg.tau_factor + kps = np.array(list(model_info["robot_stiffness"]) + list(model_info["robot_stiffness"])) + kds = np.array(list(model_info["robot_damping"]) + list(model_info["robot_damping"])) + + try: + data.qpos = model.keyframe("default").qpos + default = deepcopy(model.keyframe("default").qpos)[-model_info["num_actions"] :] + print("Default position:", default) + except: + print("No default position found, using zero initialization") + default = np.zeros(model_info["num_actions"]) # 3 for pos, 4 for quat, cfg.num_actions for joints + default += np.random.uniform(-0.03, 0.03, size=default.shape) + print("Default position:", default) + mujoco.mj_step(model, data) + for ii in range(len(data.ctrl) + 1): + print(data.joint(ii).id, data.joint(ii).name) + + data.qvel = np.zeros_like(data.qvel) + data.qacc = np.zeros_like(data.qacc) + + viewer = mujoco_viewer.MujocoViewer(model, data) + + target_q = np.zeros((model_info["num_actions"]), dtype=np.double) + prev_actions = np.zeros((model_info["num_actions"]), dtype=np.double) + hist_obs = np.zeros((model_info["num_observations"]), dtype=np.double) + + count_lowlevel = 0 + + input_data = { + "x_vel.1": np.zeros(1).astype(np.float32), + "y_vel.1": np.zeros(1).astype(np.float32), + "rot.1": np.zeros(1).astype(np.float32), + "t.1": np.zeros(1).astype(np.float32), + "dof_pos.1": np.zeros(model_info["num_actions"]).astype(np.float32), + "dof_vel.1": np.zeros(model_info["num_actions"]).astype(np.float32), + "prev_actions.1": np.zeros(model_info["num_actions"]).astype(np.float32), + "imu_ang_vel.1": np.zeros(3).astype(np.float32), + "imu_euler_xyz.1": np.zeros(3).astype(np.float32), + "buffer.1": np.zeros(model_info["num_observations"]).astype(np.float32), + } + + # Initialize variables for tracking upright steps and average speed + upright_steps = 0 + total_speed = 0.0 + step_count = 0 + + for _ in tqdm(range(int(cfg.sim_duration / cfg.dt)), desc="Simulating..."): + # Obtain an observation + q, dq, quat, v, omega, gvec = get_obs(data) + q = q[-model_info["num_actions"] :] + dq = dq[-model_info["num_actions"] :] + + eu_ang = quaternion_to_euler_array(quat) + eu_ang[eu_ang > math.pi] -= 2 * math.pi + + # Check if the robot is upright (roll and pitch within ±30 degrees) + if abs(eu_ang[0]) > math.radians(30) or abs(eu_ang[1]) > math.radians(30): + print("Robot tilted heavily, ending simulation.") + break + else: + upright_steps += 1 # Increment upright steps + + # Calculate speed and accumulate for average speed calculation + speed = np.linalg.norm(v[:2]) # Speed in the x-y plane + total_speed += speed + step_count += 1 + + # 1000hz -> 50hz + if count_lowlevel % cfg.decimation == 0: + # Convert sim coordinates to policy coordinates + cur_pos_obs = q - default + cur_vel_obs = dq + + input_data["x_vel.1"] = np.array([x_vel_cmd], dtype=np.float32) + input_data["y_vel.1"] = np.array([y_vel_cmd], dtype=np.float32) + input_data["rot.1"] = np.array([yaw_vel_cmd], dtype=np.float32) + + input_data["t.1"] = np.array([count_lowlevel * cfg.dt], dtype=np.float32) + + input_data["dof_pos.1"] = cur_pos_obs.astype(np.float32) + input_data["dof_vel.1"] = cur_vel_obs.astype(np.float32) + + input_data["prev_actions.1"] = prev_actions.astype(np.float32) + + input_data["imu_ang_vel.1"] = omega.astype(np.float32) + input_data["imu_euler_xyz.1"] = eu_ang.astype(np.float32) + + input_data["buffer.1"] = hist_obs.astype(np.float32) + + policy_output = policy(input_data) + positions = policy_output["actions_scaled"] + curr_actions = policy_output["actions"] + hist_obs = policy_output["x.3"] + + target_q = positions + + prev_actions = curr_actions + + # Generate PD control + tau = pd_control(target_q, q, kps, dq, kds, default) # Calc torques + tau = np.clip(tau, -tau_limit, tau_limit) # Clamp torques + + data.ctrl = tau + mujoco.mj_step(model, data) + + viewer.render() + count_lowlevel += 1 + + viewer.close() + + # Calculate average speed + if step_count > 0: + average_speed = total_speed / step_count + else: + average_speed = 0.0 + + # Save or print the statistics at the end of the episode + print(f"Number of upright steps: {upright_steps}") + print(f"Average speed: {average_speed:.4f} m/s") + + +if __name__ == "__main__": + x_vel_cmd, y_vel_cmd, yaw_vel_cmd = 0.2, 0.0, 0.0 + + policy_cfg = ActorCfg(embodiment="stompymicro") + policy_cfg.cycle_time = 0.2 + cfg = Sim2simCfg( + sim_duration=10.0, + dt=0.001, + decimation=10, + tau_factor=2, + cycle_time=policy_cfg.cycle_time, + ) + + actor_model, sim2sim_info, input_tensors = get_actor_policy("policy_1.pt", policy_cfg) + + # Merge policy_cfg and sim2sim_info into a single config object + export_config = {**vars(policy_cfg), **sim2sim_info} + print(export_config) + export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx") + policy = ONNXModel("kinfer_test.onnx") + + metadata = policy.get_metadata() + + model_info = { + "num_actions": metadata["num_actions"], + "num_observations": metadata["num_observations"], + "robot_effort": metadata["robot_effort"], + "robot_stiffness": metadata["robot_stiffness"], + "robot_damping": metadata["robot_damping"], + } + + run_mujoco( + "stompymicro", + policy, + cfg, + model_info, + ) \ No newline at end of file diff --git a/sim/sim2sim/mujoco/play.py b/sim/sim2sim/mujoco/play.py new file mode 100644 index 00000000..5a6e7a43 --- /dev/null +++ b/sim/sim2sim/mujoco/play.py @@ -0,0 +1,185 @@ +import numpy as np +import onnxruntime as ort +from scipy.interpolate import griddata +import matplotlib.pyplot as plt + +from sim.env_helpers import debug_robot_state +from sim.envs.base.mujoco_env import MujocoCfg, MujocoEnv +from sim.utils.cmd_manager import CommandManager + +np.set_printoptions(precision=2, suppress=True) + + +def run_simulation(env: MujocoEnv, policy: ort.InferenceSession, cfg: MujocoCfg, cmd_manager: CommandManager, num_episodes: int = 1) -> float: + """Run a policy in the Mujoco environment.""" + total_reward = 0 + rewards = [] + + for episode in range(num_episodes): + obs = env.reset() + count = 0 + episode_reward = 0 + + target_q = np.zeros(env.num_joints) + prev_actions = np.zeros(env.num_joints) + hist_obs = np.zeros(policy.get_metadata()["num_observations"]) + + while count * cfg.sim.dt * cfg.control.decimation < cfg.env.episode_length_s: + q, dq, quat, v, omega, euler = obs + phase = count * cfg.sim.dt / cfg.rewards.cycle_time + command_input = np.array([np.sin(2 * np.pi * phase), np.cos(2 * np.pi * phase), 0.0, 0.0, 0.0]) + obs_buf = np.concatenate([command_input, q, dq, prev_actions, omega, euler]) + policy_output = policy( + { + "x_vel.1": np.array([command_input[2]], dtype=np.float32), + "y_vel.1": np.array([command_input[3]], dtype=np.float32), + "rot.1": np.array([command_input[4]], dtype=np.float32), + "t.1": np.array([count * cfg.sim.dt], dtype=np.float32), + "dof_pos.1": q.astype(np.float32), + "dof_vel.1": dq.astype(np.float32), + "prev_actions.1": prev_actions.astype(np.float32), + "imu_ang_vel.1": omega.astype(np.float32), + "imu_euler_xyz.1": euler.astype(np.float32), + "buffer.1": hist_obs.astype(np.float32), + } + ) + + target_q = policy_output["actions_scaled"] + prev_actions = policy_output["actions"] + hist_obs = policy_output["x.3"] + + obs, reward, done, info = env.step(target_q) + episode_reward += reward + count += 1 + + if count % 17 == 0: + pass # debug_robot_state("Mujoco", obs_buf, actions=target_q) + + if done: + # print(f"Episode {episode + 1} finished with reward: {episode_reward}") + total_reward += episode_reward + rewards.append(episode_reward) + break + + env.close() + + average_reward = total_reward / num_episodes + # print(f"Average reward over {num_episodes} episodes: {average_reward}") + return rewards + + +def run_experiment(env: MujocoEnv, policy: ort.InferenceSession, cfg: MujocoCfg, cmd_manager: CommandManager): + kp_scale_range = np.linspace(0.1, 5.0, 40) + kd_scale_range = np.linspace(0.1, 5.0, 40) + kp_mesh, kd_mesh = np.meshgrid(kp_scale_range, kd_scale_range) + gains_combinations = np.column_stack((kp_mesh.flatten(), kd_mesh.flatten())) + + results = [] + for i, (kp, kd) in enumerate(gains_combinations): + cfg.gains.kp_scale = kp + cfg.gains.kd_scale = kd + rewards = run_simulation(env, policy, cfg, cmd_manager, num_episodes=10) + mean_reward = np.mean(rewards) + results.append(mean_reward) + print(f"Experiment {i}/{len(gains_combinations)} -> (kds: {kd}, kps: {kp}, reward: {mean_reward:.2f})") + + results = np.array(results) + kp_interp = np.linspace(kp_scale_range.min(), kp_scale_range.max(), 100) + kd_interp = np.linspace(kd_scale_range.min(), kd_scale_range.max(), 100) + kp_mesh_fine, kd_mesh_fine = np.meshgrid(kp_interp, kd_interp) + + rewards_smooth = griddata( + gains_combinations, + results, + (kp_mesh_fine, kd_mesh_fine), + method='cubic' + ) + + plt.figure(figsize=(10, 8)) + plt.imshow( + rewards_smooth, + extent=[kp_scale_range.min(), kp_scale_range.max(), + kd_scale_range.min(), kd_scale_range.max()], + origin='lower', + aspect='auto', + cmap='viridis' + ) + + plt.colorbar(label='Mean Reward') + plt.xlabel('KP Scale') + plt.ylabel('KD Scale') + plt.title('Reward Heatmap for PD Gains') + + plt.contour( + kp_mesh_fine, kd_mesh_fine, rewards_smooth, + levels=10, + colors='white', + alpha=0.3, + linestyles='solid' + ) + + best_idx = np.argmax(results) + best_kp = gains_combinations[best_idx][0] + best_kd = gains_combinations[best_idx][1] + plt.plot(best_kp, best_kd, 'r*', markersize=15, label='Best Gains') + + plt.legend() + plt.tight_layout() + plt.show() + + print(f"\nBest configuration found:") + print(f"KP Scale: {best_kp:.2f}") + print(f"KD Scale: {best_kd:.2f}") + print(f"Mean Reward: {results[best_idx]:.2f}") + + +if __name__ == "__main__": + import numpy as np + from kinfer.export.pytorch import export_to_onnx + from kinfer.inference.python import ONNXModel + from scipy.spatial.transform import Rotation as R + + from sim.model_export import ActorCfg + from sim.sim2sim.helpers import get_actor_policy + + """ + Current parameters: + gains.kp_scale: 1.445 + gains.kd_scale: 19.975 + gains.tau_factor: 6.350 + """ + + cfg = MujocoCfg() + cfg.gains.kp_scale = 4.6 # 1.643 # 1.445 + cfg.gains.kd_scale = 7.5 # 29.027 # 19.975 + cfg.gains.tau_factor = 4 + cfg.env.num_envs = 1 + render = False + env = MujocoEnv(cfg, render=render) + cmd_manager = CommandManager(num_envs=cfg.env.num_envs, mode="fixed", default_cmd=[0.0, 0.0, 0.0, 0.0]) + LOAD_MODEL_PATH = "examples/experiments/standing/robustv1/policy_1.pt" + + policy_cfg = ActorCfg(embodiment=cfg.asset.name) + + actor_model, sim2sim_info, input_tensors = get_actor_policy(LOAD_MODEL_PATH, policy_cfg) + export_config = {**vars(policy_cfg), **sim2sim_info} + print(export_config) + + export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx") + policy = ONNXModel("kinfer_test.onnx") + + rewards = run_simulation(MujocoEnv(cfg, render=True), policy, cfg, cmd_manager, num_episodes=1) + print(f"Mean reward: {np.mean(rewards)}") + run_experiment(env, policy, cfg, cmd_manager) + + # kp_scale_range = np.linspace(0.5, 10.0, 100) + # kd_scale_range = np.linspace(0.2, 10.0, 100) + # np.random.shuffle(kp_scale_range) + # np.random.shuffle(kd_scale_range) + # for i in range(100): + # cfg.gains.kp_scale = kp_scale_range[i] + # cfg.gains.kd_scale = kd_scale_range[i] + # rewards = run_simulation(env, policy, cfg, cmd_manager, num_episodes=1) + # print(f"{i} -> (kds: {cfg.gains.kd_scale}, kps: {cfg.gains.kp_scale})") + + # 9 -> (kds: 0.8929292929292931, kps: 8.656565656565656) diff --git a/sim/sim2sim/mujoco/test.py b/sim/sim2sim/mujoco/test.py new file mode 100644 index 00000000..f7f17112 --- /dev/null +++ b/sim/sim2sim/mujoco/test.py @@ -0,0 +1,109 @@ +import numpy as np +import matplotlib.pyplot as plt +import os +from datetime import datetime + +from sim import ROOT_DIR +from sim.utils.args_parsing import parse_args_with_extras +from sim.model_export import ActorCfg +from sim.sim2sim.helpers import get_actor_policy + +from sim.envs.humanoids.stompymicro_config import StompyMicroCfgPPO, StompyMicroCfg +from sim.envs.humanoids.stompymicro_env import StompyMicroEnv +from sim.utils.helpers import ( + class_to_dict, + get_load_path, + parse_sim_params, +) +from sim.algo.ppo.on_policy_runner import OnPolicyRunner +from kinfer.export.pytorch import export_to_onnx # isort: skip +from kinfer.inference.python import ONNXModel # isort: skip +import torch # isort: skip + +np.set_printoptions(precision=3, suppress=True) +torch.set_printoptions(precision=3) + + +def test_policies(args): + LOAD_MODEL_PATH = "examples/experiments/standing/robustv1/policy_1.pt" + DEVICE = "cuda:0" + + ## ISAAC ## + args.load_model = LOAD_MODEL_PATH + env_cfg, train_cfg = StompyMicroCfg(), StompyMicroCfgPPO() + env_cfg.env.num_envs = 1 + env_cfg.sim.physx.max_gpu_contact_pairs = 2**10 + train_cfg.runner.resume = True + train_cfg.runner.load_run = -8 + train_cfg.seed = 0 + sim_params = {"sim": class_to_dict(env_cfg.sim)} + sim_params = parse_sim_params(args, sim_params) + env = StompyMicroEnv( + cfg=env_cfg, + sim_params=sim_params, + physics_engine=args.physics_engine, + sim_device=args.sim_device, + headless=args.headless, + ) + train_cfg_dict = class_to_dict(train_cfg) + env_cfg_dict = class_to_dict(env_cfg) + all_cfg = {**train_cfg_dict, **env_cfg_dict} + ppo_runner = OnPolicyRunner(env, all_cfg, log_dir=None, device=DEVICE) + resume_path = get_load_path( + root=os.path.join(ROOT_DIR, "logs", train_cfg.runner.experiment_name), load_run=train_cfg.runner.load_run, checkpoint=train_cfg.runner.checkpoint + ) + ppo_runner.load(resume_path, load_optimizer=False) + ppo_runner.alg.actor_critic.eval() + ppo_runner.alg.actor_critic.to(DEVICE) + policy = ppo_runner.alg.actor_critic.act_inference + + obs_buf = torch.zeros((1, 885)).to(DEVICE) + with torch.no_grad(): + isaac_actions = policy(obs_buf) + print(isaac_actions) + isaac_actions = isaac_actions.cpu().numpy()[0] + ## ISAAC ## + + ## MUJOCO ## + policy_cfg = ActorCfg(embodiment="stompymicro") + actor_model, sim2sim_info, input_tensors = get_actor_policy(LOAD_MODEL_PATH, policy_cfg) + export_config = {**vars(policy_cfg), **sim2sim_info} + export_to_onnx(actor_model, input_tensors=input_tensors, config=export_config, save_path="kinfer_test.onnx") + mujoco_policy = ONNXModel("kinfer_test.onnx") + num_actions = sim2sim_info["num_actions"] + mujoco_inputs = { + "x_vel.1": np.zeros(1, dtype=np.float32), + "y_vel.1": np.zeros(1, dtype=np.float32), + "rot.1": np.zeros(1, dtype=np.float32), + "t.1": np.zeros(1, dtype=np.float32), + "dof_pos.1": np.zeros(num_actions, dtype=np.float32), + "dof_vel.1": np.zeros(num_actions, dtype=np.float32), + "prev_actions.1": np.zeros(num_actions, dtype=np.float32), + "imu_ang_vel.1": np.zeros(3, dtype=np.float32), + "imu_euler_xyz.1": np.zeros(3, dtype=np.float32), + "buffer.1": np.zeros(sim2sim_info["num_observations"], dtype=np.float32), + } + mujoco_outputs = mujoco_policy(mujoco_inputs) + ## MUJOCO ## + + ## COMPARE ## + print("\nPolicy Output Comparison:") + print("-" * 50) + print("\nMujoco actions:") + print(mujoco_outputs["actions_scaled"]) + print("\nIsaac actions:") + print(isaac_actions) + print("\nDifference metrics:", np.abs(mujoco_outputs["actions_scaled"] - isaac_actions).mean()) + plt.plot(mujoco_outputs["actions_scaled"], label="Mujoco") + plt.plot(isaac_actions, label="Isaac") + plt.legend() + plt.show() + ## COMPARE ## + + return mujoco_outputs, isaac_actions + + +if __name__ == "__main__": + args = parse_args_with_extras(lambda x: x) + print("Arguments:", vars(args)) + test_policies(args) diff --git a/sim/sim2sim/optimizers/bayesian.py b/sim/sim2sim/optimizers/bayesian.py new file mode 100644 index 00000000..ceb1bef4 --- /dev/null +++ b/sim/sim2sim/optimizers/bayesian.py @@ -0,0 +1,218 @@ +from dataclasses import dataclass +from typing import Any, Dict, List, Tuple + +import numpy as np +import onnxruntime as ort +from scipy.stats import norm +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern + +from sim.envs.base.mujoco_env import MujocoCfg, MujocoEnv +from sim.sim2sim.mujoco.play import run_simulation +from sim.utils.cmd_manager import CommandManager + + +@dataclass +class OptimizeParam: + """Definition of a parameter to optimize""" + + name: str # Full parameter path (e.g., 'gains.kp_scale') + min_val: float + max_val: float + init_val: float = None + + +class BayesianOptimizer: + def __init__( + self, + base_cfg: MujocoCfg, + policy: ort.InferenceSession, + parameters: List[OptimizeParam], + cmd_manager: CommandManager, + n_initial_points: int = 10, + n_iterations: int = 40, + exploration_weight: float = 0.1, + ): + self.base_cfg = base_cfg + self.policy = policy + self.parameters = parameters + self.cmd_manager = cmd_manager + self.n_initial_points = n_initial_points + self.n_iterations = n_iterations + self.exploration_weight = exploration_weight + + # Initialize storage for observations + self.X = [] # Parameter configurations + self.y = [] # Observed scores + + # Set up parameter bounds for GP + self.bounds = np.array([[p.min_val, p.max_val] for p in parameters]) + + # Initialize GP with Matérn kernel (more flexible than RBF for real processes) + kernel = ConstantKernel(1.0) * RBF(length_scale=1.0) + ConstantKernel(1.0) * Matern(length_scale=1.0, nu=0.5) + + self.gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, normalize_y=True, alpha=1e-6) + + def _get_cfg_value(self, cfg: MujocoCfg, param_path: str) -> Any: + """Get a value from nested config using dot notation""" + value = cfg + for part in param_path.split("."): + value = getattr(value, part) + return value + + def _set_cfg_value(self, cfg: MujocoCfg, param_path: str, value: Any) -> None: + """Set a value in nested config using dot notation""" + parts = param_path.split(".") + obj = cfg + for part in parts[:-1]: + obj = getattr(obj, part) + setattr(obj, parts[-1], value) + + def _params_to_array(self, params: Dict[str, float]) -> np.ndarray: + """Convert parameter dictionary to array""" + return np.array([params[p.name] for p in self.parameters]) + + def _array_to_params(self, x: np.ndarray) -> Dict[str, float]: + """Convert array to parameter dictionary""" + return {p.name: float(val) for p, val in zip(self.parameters, x)} + + def _sample_random_point(self) -> np.ndarray: + """Sample a random point in the parameter space""" + return np.array([np.random.uniform(p.min_val, p.max_val) for p in self.parameters]) + + def _expected_improvement(self, X: np.ndarray, xi: float = 0.01) -> np.ndarray: + """Calculate expected improvement at points X""" + # Ensure X is 2D + X = X.reshape(-1, len(self.parameters)) + + # Get predictive distribution from GP + mu, sigma = self.gp.predict(X, return_std=True) + + # Get current best + mu_best = np.max(self.y) + + # Calculate expected improvement + with np.errstate(divide="warn"): + imp = mu - mu_best - xi + Z = imp / sigma + ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z) + ei[sigma == 0.0] = 0.0 + + return ei + + def _evaluate_parameters(self, params: Dict[str, float]) -> float: + """Evaluate a parameter set over multiple episodes""" + # Create a copy of base config and update with new parameters + cfg = MujocoCfg() + cfg.__dict__.update(self.base_cfg.__dict__) + + for param_name, value in params.items(): + self._set_cfg_value(cfg, param_name, value) + + env = MujocoEnv(cfg, render=False) + try: + rewards = run_simulation( + env=env, + policy=self.policy, + cfg=cfg, + cmd_manager=self.cmd_manager, + num_episodes=20, + ) + mean_reward = np.mean(rewards) + std_reward = np.std(rewards) + stability_penalty = 0.1 * std_reward + return mean_reward - stability_penalty + + except Exception as e: + print(f"Evaluation failed: {e}") + return float("-inf") + finally: + env.close() + + def _next_point(self) -> np.ndarray: + """Determine next point to evaluate using Thompson Sampling""" + best_x = None + best_ei = -float("inf") + + # Random search for point with highest expected improvement + for _ in range(1000): + x = self._sample_random_point() + ei = self._expected_improvement(x.reshape(1, -1)) + if ei > best_ei: + best_ei = ei + best_x = x + + return best_x + + def optimize(self) -> Tuple[Dict[str, float], List[float]]: + """Run Bayesian optimization""" + print("Starting Bayesian optimization...") + print(f"Initial exploration: {self.n_initial_points} points") + + # Initial random exploration + for i in range(self.n_initial_points): + print(f"\nEvaluating initial point {i+1}/{self.n_initial_points}") + x = self._sample_random_point() + params = self._array_to_params(x) + y = self._evaluate_parameters(params) + + self.X.append(x) + self.y.append(y) + + print(f"Parameters: {params}") + print(f"Score: {y:.2f}") + + # Convert lists to arrays + self.X = np.array(self.X) + self.y = np.array(self.y) + + # Main optimization loop + best_score = max(self.y) + best_params = self._array_to_params(self.X[np.argmax(self.y)]) + no_improvement_count = 0 + + for iteration in range(self.n_iterations): + print(f"\nIteration {iteration + 1}/{self.n_iterations}") + + # Fit GP to all data + y_mean = np.mean(self.y) + y_std = np.std(self.y) + y_normalized = (self.y - y_mean) / (y_std + 1e-8) # avoid division by zero + self.gp.fit(self.X, y_normalized) + + # Find next point to evaluate + x = self._next_point() + params = self._array_to_params(x) + y = self._evaluate_parameters(params) + + # Update data + self.X = np.vstack((self.X, x)) + self.y = np.append(self.y, y) + + # Update best score + if y > best_score: + best_score = y + best_params = params + no_improvement_count = 0 + print("New best configuration found!") + else: + no_improvement_count += 1 + + print("Current parameters:") + for name, value in params.items(): + print(f" {name}: {value:.3f}") + print(f"Score: {y:.2f}") + print(f"Best score so far: {best_score:.2f}") + + # Early stopping + if no_improvement_count >= 200: + print("\nStopping early: No improvement for 100 iterations") + break + + print("\nOptimization complete!") + print("Best parameters found:") + for name, value in best_params.items(): + print(f"{name}: {value:.3f}") + print(f"Best score: {best_score:.2f}") + + return best_params, self.y.tolist() diff --git a/sim/sim2sim/optimizers/evolution.py b/sim/sim2sim/optimizers/evolution.py new file mode 100644 index 00000000..f2026e43 --- /dev/null +++ b/sim/sim2sim/optimizers/evolution.py @@ -0,0 +1,427 @@ +from dataclasses import dataclass +from typing import Any, Dict, List, Tuple + +import numpy as np + +from sim.envs.base.mujoco_env import MujocoCfg, MujocoEnv + + +@dataclass +class OptimizeParam: + """Definition of a parameter to optimize""" + + name: str + range: Tuple[float, float] # [min, max] + init_val: float = None # none -> random initialization + mutation_range: Tuple[float, float] = (0.8, 1.2) # relative mutation bounds + + +class ParameterOptimizer: + def __init__( + self, + base_cfg: MujocoCfg, + parameters: List[OptimizeParam], + n_iterations: int = 50, + population_size: int = 20, + elite_frac: float = 0.2, + ): + self.base_cfg = base_cfg + self.parameters = parameters + self.n_iterations = n_iterations + self.population_size = population_size + self.n_elite = max(1, int(population_size * elite_frac)) + + def _get_cfg_value(self, cfg: MujocoCfg, param_path: str) -> Any: + """Get a value from nested config using dot notation""" + value = cfg + for part in param_path.split("."): + value = getattr(value, part) + return value + + def _set_cfg_value(self, cfg: MujocoCfg, param_path: str, value: Any) -> None: + """Set a value in nested config using dot notation""" + parts = param_path.split(".") + obj = cfg + for part in parts[:-1]: + obj = getattr(obj, part) + setattr(obj, parts[-1], value) + + def _create_parameters(self) -> List[Dict[str, float]]: + """Create a population of parameter sets""" + population = [] + + for _ in range(self.population_size): + params = {} + for param in self.parameters: + if param.init_val is not None: + # Initialize near the provided value + value = param.init_val * np.random.uniform(0.9, 1.1) + else: + # Random initialization within bounds + value = np.random.uniform(param.range) + params[param.name] = np.clip(value, param.range[0], param.range[1]) + population.append(params) + + return population + + def _evaluate_parameters(self, params: Dict[str, float], n_episodes: int = 3) -> float: + """Evaluate a parameter set over multiple episodes""" + # Create a copy of the base config + cfg = MujocoCfg() + cfg.__dict__.update(self.base_cfg.__dict__) + + # Update config with new parameters + for param_name, value in params.items(): + self._set_cfg_value(cfg, param_name, value) + + env = MujocoEnv(cfg, render=False) + total_reward = 0.0 + + for _ in range(n_episodes): + env.reset() + episode_reward = 0 + done = False + + while not done: + # For testing stability, use zero actions + action = np.zeros(env.num_joints) + _, reward, done, _ = env.step(action) + episode_reward += reward + + if done: + break + + total_reward += episode_reward + + env.close() + return total_reward / n_episodes + + def _create_new_population(self, population: List[Dict[str, float]], scores: np.ndarray) -> List[Dict[str, float]]: + """Create new population from elite performers""" + # Sort by score + elite_idx = np.argsort(scores)[-self.n_elite :] + elite_population = [population[i] for i in elite_idx] + + # Create new population through mutation of elite members + new_population = [] + while len(new_population) < self.population_size: + # Select random elite member + parent = elite_population[np.random.randint(self.n_elite)] + + # Mutate parameters + child = {} + for param in self.parameters: + # Apply random mutation within specified range + mutation = np.random.uniform(*param.mutation_range) + value = parent[param.name] * mutation + child[param.name] = np.clip(value, param.range[0], param.range[1]) + + new_population.append(child) + + return new_population + + def optimize(self) -> Tuple[Dict[str, float], List[float]]: + """Run parameter optimization""" + best_score = float("-inf") + best_params = None + history = [] + + # Initial population + population = self._create_parameters() + + for iteration in range(self.n_iterations): + # Evaluate population + scores = np.array([self._evaluate_parameters(params) for params in population]) + + # Track best performer + max_score = np.max(scores) + if max_score > best_score: + best_score = max_score + best_params = population[np.argmax(scores)] + + print(f"\nIteration {iteration + 1}/{self.n_iterations}") + print(f"Best score: {best_score:.2f}") + print("Best parameters:") + for name, value in best_params.items(): + print(f" {name}: {value:.3f}") + + history.append(best_score) + + # Create new population + population = self._create_new_population(population, scores) + + return best_params, history + + +@dataclass +class OptimizeParam: + """Definition of a parameter to optimize""" + + name: str # Full parameter path (e.g., 'gains.kp_scale') + min_val: float + max_val: float + init_val: float = None # If None, will be randomly initialized + mutation_strength: float = 0.3 # Controls the strength of mutations + + +class ParameterOptimizer: + def __init__( + self, + base_cfg: MujocoCfg, + parameters: List[OptimizeParam], + n_iterations: int = 50, + population_size: int = 40, # Increased population size + elite_frac: float = 0.1, # Reduced elite fraction + exploration_prob: float = 0.2, # Probability of random exploration + n_episodes: int = 5, # More episodes for better evaluation + adaptive_mutation: bool = True, + ): + self.base_cfg = base_cfg + self.parameters = parameters + self.n_iterations = n_iterations + self.population_size = population_size + self.n_elite = max(1, int(population_size * elite_frac)) + self.exploration_prob = exploration_prob + self.n_episodes = n_episodes + self.adaptive_mutation = adaptive_mutation + self.mutation_strength = 1.0 # Will be adapted during optimization + + # Initialize parameter ranges for normalization + self.param_ranges = {param.name: (param.max_val - param.min_val) for param in parameters} + + def _get_cfg_value(self, cfg: MujocoCfg, param_path: str) -> Any: + """Get a value from nested config using dot notation""" + value = cfg + for part in param_path.split("."): + value = getattr(value, part) + return value + + def _set_cfg_value(self, cfg: MujocoCfg, param_path: str, value: Any) -> None: + """Set a value in nested config using dot notation""" + parts = param_path.split(".") + obj = cfg + for part in parts[:-1]: + obj = getattr(obj, part) + setattr(obj, parts[-1], value) + + def _create_parameters(self) -> List[Dict[str, float]]: + """Create a population of parameter sets with improved initialization""" + population = [] + + # Create structured initial population + for _ in range(self.population_size): + params = {} + if np.random.random() < self.exploration_prob: + # Random exploration + for param in self.parameters: + value = np.random.uniform(param.min_val, param.max_val) + params[param.name] = value + else: + # Systematic initialization around init values + for param in self.parameters: + if param.init_val is not None: + # Log-uniform sampling around init value + log_range = np.log([0.1, 10.0]) # Sample in range 0.1x to 10x + log_multiplier = np.random.uniform(*log_range) + value = param.init_val * np.exp(log_multiplier) + else: + # Latin Hypercube style sampling + segment = (param.max_val - param.min_val) / self.population_size + value = param.min_val + segment * (np.random.random() + _) + + params[param.name] = np.clip(value, param.min_val, param.max_val) + + population.append(params) + + return population + + def _evaluate_parameters(self, params: Dict[str, float]) -> float: + """Evaluate a parameter set with improved robustness""" + cfg = MujocoCfg() + cfg.__dict__.update(self.base_cfg.__dict__) + + for param_name, value in params.items(): + self._set_cfg_value(cfg, param_name, value) + + env = MujocoEnv(cfg, render=False) + rewards = [] + + try: + for _ in range(self.n_episodes): + env.reset() + episode_reward = 0 + episode_length = 0 + done = False + + while not done: + # Sinusoidal test actions for better stability evaluation + t = episode_length * env.cfg.sim.dt + action = 0.5 * np.sin(2 * np.pi * t) + _, reward, done, info = env.step(action * np.ones(env.num_joints)) + + episode_reward += reward + episode_length += 1 + + # Early termination for unstable episodes + if info.get("fall", False): + episode_reward *= 0.5 # Penalty for falling + break + + rewards.append(episode_reward) + + # Calculate score with stability consideration + mean_reward = np.mean(rewards) + std_reward = np.std(rewards) + stability_penalty = 0.1 * std_reward # Penalize inconsistent performance + + return mean_reward - stability_penalty + + except Exception as e: + print(f"Evaluation failed: {e}") + return float("-inf") + finally: + env.close() + + def _crossover(self, parent1: Dict[str, float], parent2: Dict[str, float]) -> Dict[str, float]: + """Perform crossover between two parents""" + child = {} + for param in self.parameters: + # Interpolate between parents with random weight + weight = np.random.random() + value = weight * parent1[param.name] + (1 - weight) * parent2[param.name] + child[param.name] = np.clip(value, param.min_val, param.max_val) + return child + + def _mutate(self, params: Dict[str, float], generation: int) -> Dict[str, float]: + """Apply mutation with adaptive strength""" + mutated = {} + for param in self.parameters: + value = params[param.name] + + # Combine multiplicative and additive mutation + if np.random.random() < 0.5: + # Multiplicative mutation (good for parameters with large ranges) + mutation = np.random.lognormal(0, param.mutation_strength * self.mutation_strength) + value *= mutation + else: + # Additive mutation (good for fine-tuning) + range_size = self.param_ranges[param.name] + mutation = np.random.normal(0, param.mutation_strength * self.mutation_strength * range_size) + value += mutation + + mutated[param.name] = np.clip(value, param.min_val, param.max_val) + + return mutated + + def _create_new_population( + self, population: List[Dict[str, float]], scores: np.ndarray, generation: int + ) -> List[Dict[str, float]]: + """Create new population with improved diversity""" + # Sort by score + sorted_indices = np.argsort(scores) + elite_idx = sorted_indices[-self.n_elite :] + elite_population = [population[i] for i in elite_idx] + + # Adapt mutation strength based on improvement + if self.adaptive_mutation and generation > 0: + improvement = (scores[elite_idx[-1]] - scores[elite_idx[0]]) / abs(scores[elite_idx[0]]) + if improvement < 0.01: # Small improvement + self.mutation_strength *= 1.2 # Increase exploration + elif improvement > 0.05: # Large improvement + self.mutation_strength *= 0.8 # Reduce exploration + self.mutation_strength = np.clip(self.mutation_strength, 0.5, 2.0) + + new_population = elite_population.copy() + + # Fill rest of population + while len(new_population) < self.population_size: + if np.random.random() < self.exploration_prob: + # Random exploration + params = {param.name: np.random.uniform(param.min_val, param.max_val) for param in self.parameters} + new_population.append(params) + else: + # Tournament selection + tournament_size = 3 + parent1 = max(np.random.choice(population, tournament_size), key=lambda x: scores[population.index(x)]) + parent2 = max(np.random.choice(population, tournament_size), key=lambda x: scores[population.index(x)]) + + # Crossover and mutation + child = self._crossover(parent1, parent2) + child = self._mutate(child, generation) + new_population.append(child) + + return new_population + + def optimize(self) -> Tuple[Dict[str, float], List[float]]: + """Run parameter optimization with improved logging""" + best_score = float("-inf") + best_params = None + history = [] + + population = self._create_parameters() + + for iteration in range(self.n_iterations): + # Evaluate population + scores = np.array([self._evaluate_parameters(params) for params in population]) + + # Update best performer + max_score = np.max(scores) + if max_score > best_score: + best_score = max_score + best_params = population[np.argmax(scores)] + + # Logging + print(f"\nIteration {iteration + 1}/{self.n_iterations}") + print(f"Best score: {best_score:.2f}") + print(f"Current mutation strength: {self.mutation_strength:.2f}") + print("Best parameters:") + for name, value in best_params.items(): + print(f" {name}: {value:.3f}") + + history.append(best_score) + + # Create new population + population = self._create_new_population(population, scores, iteration) + + return best_params, history + + +if __name__ == "__main__": + cfg = MujocoCfg() + parameters = [ + OptimizeParam( + name="gains.kp_scale", + min_val=0.1, + max_val=5.0, + init_val=1.0, + ), + OptimizeParam( + name="gains.kd_scale", + min_val=0.1, + max_val=5.0, + init_val=1.0, + ), + # OptimizeParam( + # name='gains.tau_scale', + # min_val=1.0, + # max_val=20.0, + # init_val=4.0, + # ), + # OptimizeParam( + # name='extras.tau_factor', + # min_val=1.0, + # max_val=20.0, + # init_val=1.0, + # ), + ] + optimizer = ParameterOptimizer( + cfg, + parameters, + n_iterations=200, + population_size=5, + ) + best_params, history = optimizer.optimize() + print("\nOptimization complete!") + print("Best parameters found:") + for name, value in best_params.items(): + print(f"{name}: {value:.3f}") diff --git a/sim/simple_play.py b/sim/simple_play.py new file mode 100644 index 00000000..7f55beb0 --- /dev/null +++ b/sim/simple_play.py @@ -0,0 +1,71 @@ +import argparse + +from sim.env_helpers import debug_robot_state +from sim.utils.args_parsing import parse_args_with_extras +from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO +from sim.envs.humanoids.stompymicro_env import StompyMicroEnv +from sim.utils.helpers import ( + class_to_dict, + parse_sim_params, +) +from sim.algo.ppo.on_policy_runner import OnPolicyRunner +import torch # isort: skip + + +def simple_play(args: argparse.Namespace) -> None: + DEVICE = "cpu" + RAND = True + # Setup environment and training configs + env_cfg, train_cfg = StompyMicroCfg(), StompyMicroCfgPPO() + env_cfg.env.num_envs = 1 + env_cfg.domain_rand.randomize_friction = RAND + env_cfg.domain_rand.randomize_base_mass = RAND + env_cfg.domain_rand.push_robots = RAND + if not RAND: + env_cfg.domain_rand.action_noise = 0 + env_cfg.domain_rand.action_delay = 0 + env_cfg.domain_rand.push_interval_s = 0 + env_cfg.domain_rand.max_push_vel_xy = 0 + else: + env_cfg.domain_rand.push_interval_s = 0.1 #1.7 + env_cfg.domain_rand.max_push_vel_xy = 0.85 + env_cfg.sim.physx.max_gpu_contact_pairs = 2**10 + train_cfg.runner.load_run = -10 + train_cfg.seed = 0 + + sim_params = {"sim": class_to_dict(env_cfg.sim)} + sim_params = parse_sim_params(args, sim_params) + env = StompyMicroEnv( + cfg=env_cfg, + sim_params=sim_params, + physics_engine=args.physics_engine, + sim_device=args.sim_device, + headless=False, + ) + command = torch.Tensor([0.0, 0.0, 0.0, 1.0]).to(DEVICE) + env.commands[:] = command + + # Setup and load policy + all_cfg = {**class_to_dict(train_cfg), **class_to_dict(env_cfg)} + ppo_runner = OnPolicyRunner(env, all_cfg, device=DEVICE) + resume_path = "logs/StompyMicro/Dec04_20-29-12_StandingRobust/model_12001.pt" + ppo_runner.load(resume_path, load_optimizer=False) + ppo_runner.alg.actor_critic.eval() + ppo_runner.alg.actor_critic.to(DEVICE) + policy = ppo_runner.alg.actor_critic.act_inference + + # Run simulation + obs, _ = env.reset() + steps = int(env_cfg.env.episode_length_s / (env_cfg.sim.dt * env_cfg.control.decimation)) + for t in range(steps): + actions = policy(obs.detach()) + env.commands[:] = command + obs, _, _, _, _ = env.step(actions.detach()) + if t % 17 == 0: + debug_robot_state("Isaac", obs[0], actions[0]) + + +if __name__ == "__main__": + args = parse_args_with_extras(lambda x: x) + print("Arguments:", vars(args)) + simple_play(args) diff --git a/sim/train.py b/sim/train.py index aa5c100e..eaa4465c 100755 --- a/sim/train.py +++ b/sim/train.py @@ -6,7 +6,7 @@ import argparse from sim.envs import task_registry # noqa: E402 -from sim.utils.helpers import get_args # noqa: E402 +from sim.utils.args_parsing import parse_args_with_extras # noqa: E402 def train(args: argparse.Namespace) -> None: @@ -15,8 +15,14 @@ def train(args: argparse.Namespace) -> None: ppo_runner.learn(num_learning_iterations=train_cfg.runner.max_iterations, init_at_random_ep_len=True) -# Puts this import down here so that the environments are registered +def add_train_arguments(parser): + """Add training-specific arguments.""" + # Training + parser.add_argument("--horovod", action="store_true", default=False, help="Use horovod for multi-gpu training") + parser.add_argument("--trimesh", action="store_true", default=False, help="Use trimesh terrain") + if __name__ == "__main__": - # python -m sim.humanoid_gym.train - train(get_args()) + args = parse_args_with_extras(add_train_arguments) + print("Arguments:", vars(args)) + train(args) diff --git a/sim/utils/args_parsing.py b/sim/utils/args_parsing.py new file mode 100644 index 00000000..b4c9cc16 --- /dev/null +++ b/sim/utils/args_parsing.py @@ -0,0 +1,110 @@ +import argparse +import sys +from typing import Any, Dict, List + +from isaacgym import gymutil + + +def create_base_parser(add_help=False): + """Create base parser with core arguments used across all scripts.""" + parser = argparse.ArgumentParser(add_help=add_help) + + # General + parser.add_argument( + "--task", + type=str, + default="stompymicro", + help="Resume training or start testing from a checkpoint. Overrides config file if provided.", + ) + parser.add_argument( + "--rl_device", + type=str, + default="cuda:0", + help="Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)", + ) + parser.add_argument( + "--num_envs", type=int, help="Number of environments to create. Overrides config file if provided." + ) + parser.add_argument("--seed", type=int, help="Random seed. Overrides config file if provided.") + parser.add_argument( + "--max_iterations", + type=int, + # default=1001, + help="Maximum number of iterations. Refers to training iterations for `train.py` and playing steps for `play.py`.", + ) + + # Loading model + parser.add_argument("--resume", action="store_true", default=False, help="Resume training from a checkpoint") + parser.add_argument( + "--experiment_name", type=str, help="Name of the experiment to run or load. Overrides config file if provided." + ) + parser.add_argument("--run_name", type=str, help="Name of the run. Overrides config file if provided.") + parser.add_argument( + "--load_run", type=str, help="Name of the run to load when resume=True. If -1: will load the last run." + ) + parser.add_argument( + "--checkpoint", type=int, help="Saved model checkpoint number. If -1: will load the last checkpoint." + ) + + # Rendering + parser.add_argument("--headless", action="store_true", default=False, help="Force display off at all times") + + return parser + + +def convert_to_gymutil_format(parser: argparse.ArgumentParser) -> List[Dict[str, Any]]: + """Convert argparse parser arguments to gymutil custom_parameters format.""" + custom_parameters = [] + for action in parser._actions: + if action.dest != "help": # Skip the help action + param = { + "name": "--" + action.dest if not action.option_strings else action.option_strings[0], + "type": action.type, + "default": action.default, + "help": action.help, + } + if isinstance(action, argparse._StoreTrueAction): + param.pop("type") # Remove the type for store_true actions + param["action"] = "store_true" + elif isinstance(action, argparse._StoreAction): + if action.choices: + param["choices"] = action.choices + custom_parameters.append(param) + return custom_parameters + + +def parse_args_with_extras(extra_args_fn=None): + """Parse arguments using both base parser and any extra arguments provided.""" + parser = create_base_parser() + + if extra_args_fn is not None: + extra_args_fn(parser) + + # Store which arguments were meant to be True by default + true_by_default = {action.dest for action in parser._actions if action.default is True} + + custom_parameters = convert_to_gymutil_format(parser) + args = gymutil.parse_arguments(description="RL Policy", custom_parameters=custom_parameters) + + # Restore default=True values that weren't explicitly set to False + for arg_name in true_by_default: + # Check if this argument wasn't explicitly provided in command line + if not any(arg.lstrip("-").replace("-", "_") == arg_name for arg in sys.argv[1:]): + setattr(args, arg_name, True) + + # Add the sim device arguments + args.sim_device_id = args.compute_device_id + args.sim_device = args.sim_device_type + if args.sim_device == "cuda": + args.sim_device += f":{args.sim_device_id}" + + return args + + +def print_args(args): + """Pretty print arguments.""" + print("\nArguments:") + print("-" * 30) + for arg, value in sorted(vars(args).items()): + print(f"{arg:>20}: {value}") + print("-" * 30 + "\n") diff --git a/sim/utils/cmd_manager.py b/sim/utils/cmd_manager.py new file mode 100644 index 00000000..ad4161ba --- /dev/null +++ b/sim/utils/cmd_manager.py @@ -0,0 +1,192 @@ +from enum import Enum +from typing import List + +import numpy as np + +from sim.utils.helpers import draw_vector + +import torch # isort: skip + + +class CommandMode(Enum): + FIXED = "fixed" + OSCILLATING = "oscillating" + KEYBOARD = "keyboard" + RANDOM = "random" + + +class CommandManager: + """Manages robot commands""" + + def __init__( + self, + num_envs: int = 1, + mode: str = "fixed", + default_cmd: List[float] = [0.0, -0.4, 0.0, 0 / 2 * np.pi], # default + # default_cmd: List[float] = [0.0, -0.3, 0.0, 1/2 * np.pi], # 90 + # default_cmd: List[float] = [0.0, -0.6, 0.0, 0/2 * np.pi], # 0 + # default_cmd: List[float] = [0.0, -0.3, 0.0, 0.5/2 * np.pi], # 45 + # default_cmd: List[float] = [0.0, -0.0, 0.0, 0/2 * np.pi], # test + device="cpu", + env_cfg=None, + ): + self.num_envs = num_envs + self.mode = CommandMode(mode) + self.device = device + self.default_cmd = torch.tensor(default_cmd, device=self.device) + self.commands = self.default_cmd.repeat(num_envs, 1) + self.time = 0 + self.env_cfg = env_cfg + + # Mode-specific parameters + if self.mode == CommandMode.OSCILLATING: + self.osc_period = 5.0 # secs + self.min_x_vel = env_cfg.commands.ranges.lin_vel_x[0] if env_cfg else 0.0 + self.max_x_vel = env_cfg.commands.ranges.lin_vel_x[1] if env_cfg else 0.3 + self.osc_amplitude = (self.max_x_vel - self.min_x_vel) / 2 + self.osc_offset = (self.max_x_vel + self.min_x_vel) / 2 + elif self.mode == CommandMode.RANDOM: + self.cmd_ranges = ( + { + "lin_vel_x": env_cfg.commands.ranges.lin_vel_x, + "lin_vel_y": env_cfg.commands.ranges.lin_vel_y, + "ang_vel_yaw": env_cfg.commands.ranges.ang_vel_yaw, + "heading": env_cfg.commands.ranges.heading, + } + if env_cfg + else { + "lin_vel_x": [-0.05, 0.23], + "lin_vel_y": [-0.05, 0.05], + "ang_vel_yaw": [-0.5, 0.5], + "heading": [-np.pi, np.pi], + } + ) + self.resampling_time = env_cfg.commands.resampling_time if env_cfg else 8.0 + self.last_sample_time = 0.0 + elif self.mode == CommandMode.KEYBOARD: + try: + import pygame + + pygame.init() + pygame.display.set_mode((100, 100)) + self.x_vel_cmd = 0.0 + self.y_vel_cmd = 0.0 + self.yaw_vel_cmd = 0.0 + except ImportError: + print("WARNING: pygame not found, falling back to fixed commands") + self.mode = CommandMode.FIXED + + def close(self): + if self.mode == CommandMode.KEYBOARD: + import pygame + + pygame.quit() + + def update(self, dt: float) -> torch.Tensor: + """Updates and returns commands based on current mode.""" + self.time += dt + + if self.mode == CommandMode.FIXED: + return self.commands + elif self.mode == CommandMode.OSCILLATING: + # Oscillate x velocity between min and max + x_vel = self.osc_offset + self.osc_amplitude * torch.sin( + torch.tensor(2 * np.pi * self.time / self.osc_period) + ) + self.commands[:, 0] = x_vel.to(self.device) + elif self.mode == CommandMode.RANDOM: + if self.time - self.last_sample_time >= self.resampling_time: + self.last_sample_time = self.time + + if self.env_cfg and self.env_cfg.commands.heading_command: + new_commands = torch.from_numpy( + np.array( + [ + np.random.uniform(*self.cmd_ranges["lin_vel_x"], size=self.num_envs), + np.random.uniform(*self.cmd_ranges["lin_vel_y"], size=self.num_envs), + np.zeros(self.num_envs), + np.random.uniform(*self.cmd_ranges["heading"], size=self.num_envs), + ] + ).T # Transpose to get shape [num_envs, 4] + ).to(self.device) + else: + new_commands = torch.tensor( + np.array( + [ + np.random.uniform(*self.cmd_ranges["lin_vel_x"], size=self.num_envs), + np.random.uniform(*self.cmd_ranges["lin_vel_y"], size=self.num_envs), + np.random.uniform(*self.cmd_ranges["ang_vel_yaw"], size=self.num_envs), + np.zeros(self.num_envs), + ] + ).T # Transpose to get shape [num_envs, 4] + ).to(self.device) + self.commands = new_commands + + elif self.mode == CommandMode.KEYBOARD: + self._handle_keyboard_input() + self.commands[:, 0] = torch.tensor(self.x_vel_cmd, device=self.device) + self.commands[:, 1] = torch.tensor(self.y_vel_cmd, device=self.device) + self.commands[:, 2] = torch.tensor(self.yaw_vel_cmd, device=self.device) + + return self.commands + + def draw(self, gym, viewer, env_handles, robot_positions, actual_vels) -> None: + """Draws robot heading and velocity vectors.""" + if viewer is None: + return + + gym.clear_lines(viewer) + cmd_vels = self.commands[:, :2].cpu().numpy() # x,y velocities + headings = self.commands[:, 3].cpu().numpy() # heading angles + + for env_handle, robot_pos, cmd_vel, actual_vel, heading in zip( + env_handles, robot_positions, cmd_vels, actual_vels, headings + ): + # Draw heading direction (black/gray arrow) + heading_dir = (np.cos(heading), np.sin(heading)) + draw_vector(gym, viewer, env_handle, robot_pos, heading_dir, (0.5, 0.5, 0.5), head_scale=0.08) + + # Draw commanded velocity (green) + draw_vector(gym, viewer, env_handle, robot_pos, cmd_vel, (0.0, 1.0, 0.0)) + + # Draw actual velocity (red) + draw_vector(gym, viewer, env_handle, robot_pos, actual_vel, (1.0, 0.0, 0.0)) + + # Draw commanded velocity relative to heading: where the robot is trying to go relative to its current heading + # cmd_vel_relative = ( + # cmd_vel[0] * np.cos(heading) - cmd_vel[1] * np.sin(heading), + # cmd_vel[0] * np.sin(heading) + cmd_vel[1] * np.cos(heading) + # ) + # draw_vector(gym, viewer, env_handle, robot_pos, cmd_vel_relative, (0.0, 0.8, 0.2), head_scale=0.08) + + def _handle_keyboard_input(self): + """Handles keyboard input for command updates.""" + import pygame + + for event in pygame.event.get(): + if event.type == pygame.QUIT: + pygame.quit() + + keys = pygame.key.get_pressed() + + # Update movement commands based on arrow keys + if keys[pygame.K_UP]: + self.x_vel_cmd = min(self.x_vel_cmd + 0.0005, 0.5) + if keys[pygame.K_DOWN]: + self.x_vel_cmd = max(self.x_vel_cmd - 0.0005, -0.5) + if keys[pygame.K_LEFT]: + self.y_vel_cmd = min(self.y_vel_cmd + 0.0005, 0.5) + if keys[pygame.K_RIGHT]: + self.y_vel_cmd = max(self.y_vel_cmd - 0.0005, -0.5) + + # Yaw control + if keys[pygame.K_a]: + self.yaw_vel_cmd = min(self.yaw_vel_cmd + 0.001, 0.5) + if keys[pygame.K_z]: + self.yaw_vel_cmd = max(self.yaw_vel_cmd - 0.001, -0.5) + + # Reset commands + if keys[pygame.K_SPACE]: + self.x_vel_cmd = 0.0 + self.y_vel_cmd = 0.0 + self.yaw_vel_cmd = 0.0 diff --git a/sim/utils/helpers.py b/sim/utils/helpers.py index 675fe070..5e42c644 100755 --- a/sim/utils/helpers.py +++ b/sim/utils/helpers.py @@ -34,6 +34,8 @@ import copy import os import random +from datetime import datetime +from typing import Any, Tuple, Union import numpy as np @@ -107,18 +109,40 @@ def parse_sim_params(args, cfg): return sim_params -def get_load_path(root, load_run=-1, checkpoint=-1): +def get_load_path(root, load_run: Union[int, str] = -1, checkpoint: Union[int, str] = -1): try: runs = os.listdir(root) - # TODO sort by date to handle change of month - runs.sort() + + # Sort by datetime instead of alphabetically + def parse_run_time(run_name): + try: + return datetime.strptime(run_name[:14], "%b%d_%H-%M-%S") + except: + return datetime.min + + runs.sort(key=parse_run_time) + if "exported" in runs: runs.remove("exported") - last_run = os.path.join(root, runs[-1]) - except: - raise ValueError("No runs in this directory: " + root) - if load_run == -1: - load_run = last_run + + # Keep only runs with model files + runs = [run for run in runs if any("model" in file for file in os.listdir(os.path.join(root, run)))] + if not runs: + raise ValueError("No runs with model files in this directory: " + root) + + except Exception as e: + raise ValueError("Error accessing directory: " + root) from e + + # Handle load_run selection + if isinstance(load_run, str) and load_run.lstrip("-").isdigit(): + load_run = int(load_run) + + if isinstance(load_run, int): + try: + run_name = runs[load_run] + load_run = os.path.join(root, run_name) + except IndexError: + raise ValueError(f"Run index {load_run} out of range. Available runs: {len(runs)}") else: load_run = os.path.join(root, load_run) @@ -161,18 +185,48 @@ def update_cfg_from_args(env_cfg, cfg_train, args): def get_args() -> argparse.Namespace: custom_parameters = [ + # General { "name": "--task", "type": str, "default": "stompymicro", "help": "Resume training or start testing from a checkpoint. Overrides config file if provided.", }, + { + "name": "--rl_device", + "type": str, + "default": "cuda:0", + "help": "Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)", + }, + { + "name": "--num_envs", + "type": int, + "help": "Number of environments to create. Overrides config file if provided.", + }, + { + "name": "--seed", + "type": int, + "help": "Random seed. Overrides config file if provided.", + }, + { + "name": "--max_iterations", + "type": int, + "help": "Maximum number of iterations. Refers to training iterations for `train.py` and playing steps for `play.py`. Overrides config file if provided.", + }, + # Training { "name": "--resume", "action": "store_true", "default": False, "help": "Resume training from a checkpoint", }, + { + "name": "--horovod", + "action": "store_true", + "default": False, + "help": "Use horovod for multi-gpu training", + }, + # Loading model { "name": "--experiment_name", "type": str, @@ -193,6 +247,7 @@ def get_args() -> argparse.Namespace: "type": int, "help": "Saved model checkpoint number. If -1: will load the last checkpoint. Overrides config file if provided.", }, + # Rendering { "name": "--headless", "action": "store_true", @@ -200,37 +255,31 @@ def get_args() -> argparse.Namespace: "help": "Force display off at all times", }, { - "name": "--horovod", + "name": "--arrows", "action": "store_true", "default": False, - "help": "Use horovod for multi-gpu training", - }, - { - "name": "--rl_device", - "type": str, - "default": "cuda:0", - "help": "Device used by the RL algorithm, (cpu, gpu, cuda:0, cuda:1 etc..)", - }, - { - "name": "--num_envs", - "type": int, - "help": "Number of environments to create. Overrides config file if provided.", + "help": "Draw command and velocity arrows during visualization", }, + # Play { - "name": "--seed", - "type": int, - "help": "Random seed. Overrides config file if provided.", + "name": "--log_h5", + "action": "store_true", + "default": False, + "help": "Enable HDF5 logging", }, { - "name": "--max_iterations", - "type": int, - "help": "Maximum number of training iterations. Overrides config file if provided.", + "name": "--command_mode", + "type": str, + "default": "fixed", + "choices": ["fixed", "oscillating", "random", "keyboard"], + "help": "Control mode for the robot", }, + # Unused? { - "name": "--log_h5", + "name": "--trimesh", "action": "store_true", "default": False, - "help": "Enable HDF5 logging", + "help": "Use trimesh terrain", }, ] # parse arguments @@ -244,7 +293,7 @@ def get_args() -> argparse.Namespace: return args -def export_policy_as_jit(actor_critic, path) -> None: +def export_policy_as_jit(actor_critic: Any, path: Union[str, os.PathLike]) -> None: os.makedirs(path, exist_ok=True) path = os.path.join(path, "policy_1.pt") model = copy.deepcopy(actor_critic.actor).to("cpu") @@ -265,3 +314,136 @@ def export_policy_as_onnx(actor_critic, path): dummy_input = torch.randn(1, input_dim) torch.onnx.export(model, dummy_input, path) + + +def draw_vector( + gym: gymapi.Gym, + viewer: gymapi.Viewer, + env_handle: gymapi.Env, + start_pos: np.ndarray, + direction: Tuple[float, float], + color: Tuple[float, float, float], + clear_lines: bool = False, + head_scale: float = 0.1, +) -> None: + """Draws a single vector with an arrowhead.""" + if viewer is None: + return + + # Unpack direction and create start position + vel_x, vel_y = direction + start = gymapi.Vec3(start_pos[0], start_pos[1], start_pos[2]) + + # Scale arrow length by magnitude + vel_magnitude = np.sqrt(vel_x**2 + vel_y**2) + if vel_magnitude > 0: + arrow_scale = np.clip(vel_magnitude, 0.1, 1.0) + normalized_x = vel_x / vel_magnitude + normalized_y = vel_y / vel_magnitude + else: + arrow_scale = 0.1 + normalized_x = 0 + normalized_y = 0 + + # Calculate end position and arrowhead + end = gymapi.Vec3(start.x + normalized_x * arrow_scale, start.y + normalized_y * arrow_scale, start.z) + + # Calculate perpendicular vector for arrowhead + perp_x = -normalized_y + perp_y = normalized_x + + head_left = gymapi.Vec3( + end.x - head_scale * (normalized_x * 0.7 + perp_x * 0.7), + end.y - head_scale * (normalized_y * 0.7 + perp_y * 0.7), + end.z, + ) + + head_right = gymapi.Vec3( + end.x - head_scale * (normalized_x * 0.7 - perp_x * 0.7), + end.y - head_scale * (normalized_y * 0.7 - perp_y * 0.7), + end.z, + ) + + # Create vertices and colors + verts = [ + start.x, + start.y, + start.z, + end.x, + end.y, + end.z, + end.x, + end.y, + end.z, + head_left.x, + head_left.y, + head_left.z, + end.x, + end.y, + end.z, + head_right.x, + head_right.y, + head_right.z, + ] + colors = [color[0], color[1], color[2]] * 6 + + if clear_lines: + gym.clear_lines(viewer) + gym.add_lines(viewer, env_handle, 3, verts, colors) + + +def analyze_policy(policy_path: str) -> dict: + """Analyzes a policy file to determine its input/output dimensions.""" + info = {} + + if policy_path.endswith(".pt"): + import torch + + # Load TorchScript model + model = torch.jit.load(policy_path) + + # Get the first parameter of the first layer to determine input size + first_layer = next(model.parameters()) + info["input_dim"] = first_layer.shape[1] + + # Get the last layer to determine output size + last_layer = None + for param in model.parameters(): + last_layer = param + info["output_dim"] = last_layer.shape[0] + + # Additional info + info["type"] = "TorchScript" + info["num_parameters"] = sum(p.numel() for p in model.parameters()) + + elif policy_path.endswith(".onnx"): + import onnx + + # Load ONNX model + model = onnx.load(policy_path) + + # Get input info + input_info = model.graph.input[0] + input_shape = [d.dim_value for d in input_info.type.tensor_type.shape.dim] + info["input_dim"] = input_shape[1] if len(input_shape) > 1 else input_shape[0] + + # Get output info + output_info = model.graph.output[0] + output_shape = [d.dim_value for d in output_info.type.tensor_type.shape.dim] + info["output_dim"] = output_shape[1] if len(output_shape) > 1 else output_shape[0] + + # Additional info + info["type"] = "ONNX" + info["input_shape"] = input_shape + info["output_shape"] = output_shape + + else: + raise ValueError(f"Unsupported model type: {policy_path}") + + return info + + +if __name__ == "__main__": + path = "examples/experiments/walking/exp6-arm_movement++/policy_1.pt" # "policy_1.pt" + info = analyze_policy(path) + print(info) diff --git a/sim/utils/logger.py b/sim/utils/logger.py index a37b1473..d18e4990 100755 --- a/sim/utils/logger.py +++ b/sim/utils/logger.py @@ -32,7 +32,7 @@ from collections import defaultdict from multiprocessing import Process, Value -# import matplotlib.pyplot as plt +import matplotlib.pyplot as plt import numpy as np diff --git a/sim/utils/task_registry.py b/sim/utils/task_registry.py index 04198fa3..b2a97b6d 100755 --- a/sim/utils/task_registry.py +++ b/sim/utils/task_registry.py @@ -33,6 +33,7 @@ import os from datetime import datetime +from typing import Dict, Tuple from sim import ROOT_DIR from sim.algo.ppo.on_policy_runner import OnPolicyRunner # mypy: ignore @@ -76,7 +77,7 @@ def get_cfgs(self, name): return env_cfg, train_cfg def make_env(self, name, args=None, env_cfg=None): - """Creates an environment either from a registered namme or from the provided config file. + """Creates an environment either from a registered name or from the provided config file. Args: name (string): Name of a registered env. @@ -120,7 +121,9 @@ def make_env(self, name, args=None, env_cfg=None): self.env_cfg_for_wandb = env_cfg return env, env_cfg - def make_alg_runner(self, env, name=None, args=None, train_cfg=None, log_root="default"): + def make_alg_runner( + self, env, name=None, args=None, train_cfg=None, log_root="default" + ) -> Tuple[OnPolicyRunner, Dict]: """Creates the training algorithm either from a registered namme or from the provided config file. Args: