diff --git a/examples/walking_pro.onnx b/examples/walking_pro.onnx index 0c633f77..bbdb1b15 100644 Binary files a/examples/walking_pro.onnx and b/examples/walking_pro.onnx differ diff --git a/examples/walking_pro.pt b/examples/walking_pro.pt index 12fba223..6aedefd4 100644 Binary files a/examples/walking_pro.pt and b/examples/walking_pro.pt differ diff --git a/sim/envs/humanoids/stompypro_config.py b/sim/envs/humanoids/stompypro_config.py index 66ee2efc..c68ab34e 100644 --- a/sim/envs/humanoids/stompypro_config.py +++ b/sim/envs/humanoids/stompypro_config.py @@ -26,12 +26,11 @@ class env(LeggedRobotCfg.env): episode_length_s = 24 # episode length in seconds use_ref_actions = False - class safety: + class safety(LeggedRobotCfg.safety): # safety factors pos_limit = 1.0 vel_limit = 1.0 - torque_limit = 0.85#0.85 - terminate_after_contacts_on = ["base", "L_thigh", "R_thigh"] + torque_limit = 0.85 class asset(LeggedRobotCfg.asset): name = "stompypro" @@ -121,10 +120,10 @@ class domain_rand(LeggedRobotCfg.domain_rand): friction_range = [0.1, 2.0] randomize_base_mass = True - added_mass_range = [-1.0, 1.0] + added_mass_range = [-5.0, 5.0] push_robots = True push_interval_s = 4 - max_push_vel_xy = 0.3 # 0.2 + max_push_vel_xy = 0.2 max_push_ang_vel = 0.4 # dynamic randomization action_noise = 0.02 @@ -140,7 +139,7 @@ class ranges: lin_vel_x = [-0.3, 0.6] # min max [m/s] lin_vel_y = [-0.3, 0.3] # min max [m/s] ang_vel_yaw = [-0.3, 0.3] # min max [rad/s] - heading = [-0.14, 0.14] + heading = [-3.14, 3.14] # a - normal # b - negate target_join_pos_scale (-0.14) @@ -152,18 +151,37 @@ class rewards: # put some settings here for LLM parameter tuning target_joint_pos_scale = 0.17 # rad target_feet_height = 0.06 # m + cycle_time = 0.4 # sec # if true negative total rewards are clipped at zero (avoids early termination problems) only_positive_rewards = True # tracking reward = exp(error*sigma) tracking_sigma = 5.0 - max_contact_force = 400 # forces above this value are penalize d + max_contact_force = 400 # forces above this value are penalized class scales: + # reference motion tracking + joint_pos = 1.6 + feet_clearance = 1.6 + feet_contact_number = 1.5 + # gait + feet_air_time = 1.6 + foot_slip = -0.05 + feet_distance = 0.2 + knee_distance = 0.2 + # contact + feet_contact_forces = -0.01 + # vel tracking + tracking_lin_vel = 1.2 + tracking_ang_vel = 1.3 + vel_mismatch_exp = 0.5 # lin_z; ang x,y + low_speed = 0.2 + track_vel_hard = 0.5 + # base pos default_joint_pos = 0.5 - orientation = 1.2 - base_height = 0.4 + orientation = 1 + base_height = 0.2 base_acc = 0.2 # energy action_smoothness = -0.002 @@ -241,6 +259,7 @@ class scales: dof_acc = -1e-7 collision = -1.0 + class StompyProCfgPPO(LeggedRobotCfgPPO): seed = 5 runner_class_name = "OnPolicyRunner" # DWLOnPolicyRunner diff --git a/sim/resources/stompypro/joints.py b/sim/resources/stompypro/joints.py index 62f7c53b..ceb51e96 100755 --- a/sim/resources/stompypro/joints.py +++ b/sim/resources/stompypro/joints.py @@ -117,25 +117,16 @@ def default_positions(cls) -> Dict[str, float]: @classmethod def default_standing(cls) -> Dict[str, float]: return { - Robot.legs.left.hip_pitch: -0.157, - Robot.legs.left.hip_yaw: 0.0394, - Robot.legs.left.hip_roll: 0.0628, + Robot.legs.left.hip_pitch: -0.23, + Robot.legs.left.hip_yaw: 0.0, + Robot.legs.left.hip_roll: 0.0 , Robot.legs.left.knee_pitch: 0.441, Robot.legs.left.ankle_pitch: -0.258, - Robot.legs.right.hip_pitch: -0.22, - Robot.legs.right.hip_yaw: 0.026, - Robot.legs.right.hip_roll: 0.0314, + Robot.legs.right.hip_pitch: -0.23, + Robot.legs.right.hip_yaw: 0.0, + Robot.legs.right.hip_roll: 0.0, Robot.legs.right.knee_pitch: 0.441, - Robot.legs.right.ankle_pitch: -0.223, - - # Robot.arms.left.shoulder_pitch: 0.0, - # Robot.arms.left.shoulder_roll: 0.0, - # Robot.arms.left.shoulder_yaw: 0.0, - # Robot.arms.left.elbow_pitch: 0.0, - # Robot.arms.right.shoulder_pitch: 0.0, - # Robot.arms.right.shoulder_roll: 0.0, - # Robot.arms.right.shoulder_yaw: 0.0, - # Robot.arms.right.elbow_pitch: 0.0, + Robot.legs.right.ankle_pitch: -0.258, } @classmethod @@ -146,17 +137,13 @@ def default_limits(cls) -> Dict[str, Dict[str, float]]: @classmethod def stiffness(cls) -> Dict[str, float]: return { - "hip_y": 150, - "hip_x": 150, - "hip_z": 100, - "knee": 150, - "ankle_y": 25, - # "shoulder_y": 120, - # "shoulder_z": 60, - # "shoulder_x": 60, - # "elbow_x": 120, + "hip_y": 300, + "hip_x": 200, + "hip_z": 200, + "knee": 300, + "ankle_y": 300, } - + # d_gains @classmethod def damping(cls) -> Dict[str, float]: @@ -166,40 +153,26 @@ def damping(cls) -> Dict[str, float]: "hip_z": 10, "knee": 10, "ankle_y": 10, - # "shoulder_y": 10, - # "shoulder_z": 10, - # "shoulder_x": 10, - # "elbow_x": 10, } - # pos_limits + # # pos_limits @classmethod def effort(cls) -> Dict[str, float]: return { - "hip_y": 150, - "hip_x": 150, + "hip_y": 250, + "hip_x": 100, "hip_z": 100, - "knee": 150, - "ankle_y": 25, - # "shoulder_y": 120, - # "shoulder_z": 60, - # "shoulder_x": 60, - # "elbow_x": 120, + "knee": 250, + "ankle_y": 17, } - # vel_limits + # # vel_limits @classmethod def velocity(cls) -> Dict[str, float]: return { - "hip_y": 10, - "hip_x": 10, - "hip_z": 10, + "hip": 10, "knee": 10, - "ankle_y": 10, - # "shoulder_y": 10, - # "shoulder_z": 10, - # "shoulder_x": 10, - # "elbow_x": 10, + "ankle": 10 } @classmethod @@ -207,7 +180,7 @@ def friction(cls) -> Dict[str, float]: return { "hip": 0, "knee": 0, - "ankle": 0, + "ankle": 0.01, } diff --git a/sim/resources/stompypro/robot_fixed.urdf b/sim/resources/stompypro/robot_fixed.urdf index 515d6d8b..5c682f98 100644 --- a/sim/resources/stompypro/robot_fixed.urdf +++ b/sim/resources/stompypro/robot_fixed.urdf @@ -35,7 +35,7 @@ - + @@ -62,7 +62,7 @@ - + @@ -114,7 +114,7 @@ - + @@ -140,8 +140,8 @@ - - + + @@ -166,7 +166,7 @@ - + @@ -193,7 +193,7 @@ - + @@ -245,7 +245,7 @@ - + @@ -271,8 +271,8 @@ - - + + @@ -496,4 +496,4 @@ - + \ No newline at end of file diff --git a/sim/resources/stompypro/robot_fixed.xml b/sim/resources/stompypro/robot_fixed.xml index 8603351d..68b15adf 100644 --- a/sim/resources/stompypro/robot_fixed.xml +++ b/sim/resources/stompypro/robot_fixed.xml @@ -140,16 +140,16 @@ - - - - - - - - - - + + + + + + + + + + @@ -186,6 +186,6 @@ - + diff --git a/sim/sim2sim.py b/sim/sim2sim.py index 187f1a20..fe9927cd 100755 --- a/sim/sim2sim.py +++ b/sim/sim2sim.py @@ -50,7 +50,7 @@ class cmd: - vx = 0.5 + vx = 0.4 vy = 0.0 dyaw = 0.0 @@ -184,7 +184,7 @@ def run_mujoco(policy, cfg): tau = np.clip(tau, -cfg.robot_config.tau_limit, cfg.robot_config.tau_limit) # Clamp torques print(tau) - # breakpoint() + data.ctrl = tau mujoco.mj_step(model, data) @@ -223,7 +223,7 @@ class rewards: cycle_time = 0.4 class robot_config: - tau_factor = 0.85 + tau_factor = 2 tau_limit = np.array(list(robot.effort().values()) + list(robot.effort().values())) * tau_factor kps = np.array(list(robot.stiffness().values()) + list(robot.stiffness().values())) kds = np.array(list(robot.damping().values()) + list(robot.damping().values()))