Files
Gym_GPU/rl_game/get_up/config/t1_env_cfg.py
2026-04-01 04:40:00 -04:00

615 lines
26 KiB
Python

import torch
import isaaclab.envs.mdp as mdp
from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.envs.mdp import JointPositionActionCfg
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass
from rl_game.get_up.env.t1_env import T1SceneCfg
def _contact_force_z(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor:
"""Sum positive vertical contact force on selected bodies."""
sensor = env.scene.sensors.get(sensor_cfg.name)
forces_z = sensor.data.net_forces_w[:, :, 2]
body_ids = sensor_cfg.body_ids
if body_ids is None:
selected_z = forces_z
else:
selected_z = forces_z[:, body_ids]
return torch.clamp(torch.sum(selected_z, dim=-1), min=0.0)
def root_height_obs(env: ManagerBasedRLEnv) -> torch.Tensor:
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
return env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2].unsqueeze(-1)
def head_height_obs(env: ManagerBasedRLEnv) -> torch.Tensor:
head_idx, _ = env.scene["robot"].find_bodies("H2")
return env.scene["robot"].data.body_state_w[:, head_idx[0], 2].unsqueeze(-1)
def foot_support_force_obs(env: ManagerBasedRLEnv, foot_sensor_cfg: SceneEntityCfg, norm_force: float = 120.0) -> torch.Tensor:
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
return torch.tanh(foot_force_z / norm_force).unsqueeze(-1)
def arm_support_force_obs(env: ManagerBasedRLEnv, arm_sensor_cfg: SceneEntityCfg, norm_force: float = 120.0) -> torch.Tensor:
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
return torch.tanh(arm_force_z / norm_force).unsqueeze(-1)
def contact_balance_obs(
env: ManagerBasedRLEnv,
foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg,
) -> torch.Tensor:
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
total_support = foot_force_z + arm_force_z + 1e-6
foot_support_ratio = torch.clamp(foot_force_z / total_support, min=0.0, max=1.0)
return foot_support_ratio.unsqueeze(-1)
def reset_root_state_bimodal_lie_pose(
env: ManagerBasedRLEnv,
env_ids: torch.Tensor,
asset_cfg: SceneEntityCfg,
roll_range: tuple[float, float],
pitch_abs_range: tuple[float, float],
yaw_abs_range: tuple[float, float],
x_range: tuple[float, float],
y_range: tuple[float, float],
z_range: tuple[float, float],
):
"""Reset with two lying modes around +pi/2 and -pi/2."""
robot = env.scene[asset_cfg.name]
num_resets = len(env_ids)
default_root_state = robot.data.default_root_state[env_ids].clone()
env_origins = env.scene.env_origins[env_ids]
for i, bounds in enumerate([x_range, y_range, z_range]):
v_min, v_max = bounds
rand_vals = torch.rand(num_resets, device=env.device)
default_root_state[:, i] = env_origins[:, i] + v_min + rand_vals * (v_max - v_min)
euler_angles = torch.zeros((num_resets, 3), device=env.device)
roll_min, roll_max = roll_range
euler_angles[:, 0] = roll_min + torch.rand(num_resets, device=env.device) * (roll_max - roll_min)
pitch_min, pitch_max = pitch_abs_range
pitch_mag = pitch_min + torch.rand(num_resets, device=env.device) * (pitch_max - pitch_min)
pitch_sign = torch.where(
torch.rand(num_resets, device=env.device) > 0.5,
torch.ones(num_resets, device=env.device),
-torch.ones(num_resets, device=env.device),
)
euler_angles[:, 1] = pitch_mag * pitch_sign
yaw_min, yaw_max = yaw_abs_range
yaw_mag = yaw_min + torch.rand(num_resets, device=env.device) * (yaw_max - yaw_min)
yaw_sign = torch.where(
torch.rand(num_resets, device=env.device) > 0.5,
torch.ones(num_resets, device=env.device),
-torch.ones(num_resets, device=env.device),
)
euler_angles[:, 2] = yaw_mag * yaw_sign
roll, pitch, yaw = euler_angles[:, 0], euler_angles[:, 1], euler_angles[:, 2]
cr, sr = torch.cos(roll * 0.5), torch.sin(roll * 0.5)
cp, sp = torch.cos(pitch * 0.5), torch.sin(pitch * 0.5)
cy, sy = torch.cos(yaw * 0.5), torch.sin(yaw * 0.5)
qw = cr * cp * cy + sr * sp * sy
qx = sr * cp * cy - cr * sp * sy
qy = cr * sp * cy + sr * cp * sy
qz = cr * cp * sy - sr * sp * cy
default_root_state[:, 3:7] = torch.stack([qw, qx, qy, qz], dim=-1)
robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
def smooth_additive_getup_reward(
env: ManagerBasedRLEnv,
min_head_height: float,
min_pelvis_height: float,
foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg,
head_track_gain: float = 7.0,
pelvis_track_gain: float = 3.2,
head_progress_gain: float = 3.5,
pelvis_progress_gain: float = 2.0,
head_clearance_gain: float = 2.8,
torso_track_gain: float = 4.2,
upright_track_gain: float = 3.6,
foot_support_gain: float = 2.0,
arm_release_gain: float = 1.2,
arm_push_gain: float = 2.2,
arm_push_threshold: float = 10.0,
arm_push_sharpness: float = 0.12,
head_sigma: float = 0.09,
pelvis_sigma: float = 0.08,
torso_sigma: float = 0.20,
upright_sigma: float = 0.22,
support_sigma: float = 0.30,
tuck_gain: float = 0.6,
no_foot_penalty_gain: float = 1.2,
horizontal_vel_penalty_gain: float = 0.25,
angular_vel_penalty_gain: float = 0.22,
split_penalty_gain: float = 2.5,
split_soft_limit: float = 0.33,
split_hard_limit: float = 0.44,
split_hard_penalty_gain: float = 9.0,
head_delta_gain: float = 18.0,
pelvis_delta_gain: float = 15.0,
internal_reward_scale: float = 0.45,
) -> torch.Tensor:
head_idx, _ = env.scene["robot"].find_bodies("H2")
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
foot_indices, _ = env.scene["robot"].find_bodies(".*_foot_link")
head_pos = env.scene["robot"].data.body_state_w[:, head_idx[0], :3]
pelvis_pos = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], :3]
head_h = head_pos[:, 2]
pelvis_h = pelvis_pos[:, 2]
root_lin_vel_w = env.scene["robot"].data.root_lin_vel_w
root_lin_speed_xy = torch.norm(root_lin_vel_w[:, :2], dim=-1)
root_ang_speed = torch.norm(env.scene["robot"].data.root_ang_vel_w, dim=-1)
prev_head_key = "prev_head_height"
prev_pelvis_key = "prev_pelvis_height"
if prev_head_key not in env.extras:
env.extras[prev_head_key] = head_h.clone()
if prev_pelvis_key not in env.extras:
env.extras[prev_pelvis_key] = pelvis_h.clone()
prev_head_h = env.extras[prev_head_key]
prev_pelvis_h = env.extras[prev_pelvis_key]
# Dense progress reward: positive-only height improvements help break plateaus.
head_delta = torch.clamp(head_h - prev_head_h, min=0.0, max=0.05)
pelvis_delta = torch.clamp(pelvis_h - prev_pelvis_h, min=0.0, max=0.05)
projected_gravity = env.scene["robot"].data.projected_gravity_b
gravity_error = torch.norm(projected_gravity[:, :2], dim=-1)
upright_ratio = torch.clamp(1.0 - gravity_error, min=0.0, max=1.0)
torso_vec = head_pos - pelvis_pos
torso_vec_norm = torso_vec / (torch.norm(torso_vec, dim=-1, keepdim=True) + 1e-5)
torso_alignment = torch.clamp(torso_vec_norm[:, 2], min=0.0, max=1.0)
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
total_support = foot_force_z + arm_force_z + 1e-6
foot_support_ratio = torch.clamp(foot_force_z / total_support, min=0.0, max=1.0)
arm_support_ratio = torch.clamp(arm_force_z / total_support, min=0.0, max=1.0)
head_track = torch.exp(-0.5 * torch.square((head_h - min_head_height) / head_sigma))
pelvis_track = torch.exp(-0.5 * torch.square((pelvis_h - min_pelvis_height) / pelvis_sigma))
# Dense height-progress shaping: provide reward signal all the way from lying to standing.
head_progress = torch.clamp(head_h / min_head_height, min=0.0, max=1.0)
pelvis_progress = torch.clamp(pelvis_h / min_pelvis_height, min=0.0, max=1.0)
# Encourage "head up" posture: head should stay clearly above pelvis.
head_clearance = torch.clamp((head_h - pelvis_h) / 0.45, min=0.0, max=1.0)
torso_track = torch.exp(-0.5 * torch.square((1.0 - torso_alignment) / torso_sigma))
upright_track = torch.exp(-0.5 * torch.square((1.0 - upright_ratio) / upright_sigma))
foot_support_track = torch.exp(-0.5 * torch.square((1.0 - foot_support_ratio) / support_sigma))
arm_release_track = torch.exp(-0.5 * torch.square(arm_support_ratio / support_sigma))
# Two-stage arm shaping:
# - early phase: encourage arm push to lift body
# - later phase: encourage releasing arm support for stand-up posture
push_phase = torch.sigmoid((0.5 - pelvis_progress) * 20.0)
release_phase = 1.0 - push_phase
arm_push_signal = torch.sigmoid((arm_force_z - arm_push_threshold) * arm_push_sharpness)
arm_push_reward = arm_push_signal * push_phase
arm_release_reward = arm_release_track * release_phase
feet_center_xy = torch.mean(env.scene["robot"].data.body_state_w[:, foot_indices, :2], dim=1)
pelvis_xy = pelvis_pos[:, :2]
feet_to_pelvis_dist = torch.norm(feet_center_xy - pelvis_xy, dim=-1)
tuck_legs_reward = torch.exp(-2.0 * feet_to_pelvis_dist)
posture_reward = (
head_track_gain * head_track
+ pelvis_track_gain * pelvis_track
+ head_progress_gain * head_progress
+ pelvis_progress_gain * pelvis_progress
+ head_delta_gain * head_delta
+ pelvis_delta_gain * pelvis_delta
+ head_clearance_gain * head_clearance
+ torso_track_gain * torso_track
+ upright_track_gain * upright_track
+ foot_support_gain * foot_support_track
+ arm_release_gain * arm_release_reward
+ arm_push_gain * arm_push_reward
+ tuck_gain * tuck_legs_reward
)
no_foot_penalty = -no_foot_penalty_gain * (1.0 - foot_support_ratio)
horizontal_velocity_penalty = -horizontal_vel_penalty_gain * root_lin_speed_xy
angular_velocity_penalty = -angular_vel_penalty_gain * root_ang_speed
left_foot_idx, _ = env.scene["robot"].find_bodies(".*left.*foot.*")
right_foot_idx, _ = env.scene["robot"].find_bodies(".*right.*foot.*")
if len(left_foot_idx) > 0 and len(right_foot_idx) > 0:
left_foot_pos = env.scene["robot"].data.body_state_w[:, left_foot_idx[0], :3]
right_foot_pos = env.scene["robot"].data.body_state_w[:, right_foot_idx[0], :3]
feet_distance = torch.norm(left_foot_pos[:, :2] - right_foot_pos[:, :2], dim=-1)
# Two-stage anti-split penalty:
# - soft: penalize widening beyond normal stance width
# - hard: strongly suppress large split postures
split_soft_excess = torch.clamp(feet_distance - split_soft_limit, min=0.0)
split_hard_excess = torch.clamp(feet_distance - split_hard_limit, min=0.0)
splits_penalty = (
-split_penalty_gain * split_soft_excess
-split_hard_penalty_gain * torch.square(split_hard_excess)
)
else:
splits_penalty = torch.zeros_like(head_h)
total_reward = (
posture_reward
+ no_foot_penalty
+ horizontal_velocity_penalty
+ angular_velocity_penalty
+ splits_penalty
)
env.extras[prev_head_key] = head_h.detach()
env.extras[prev_pelvis_key] = pelvis_h.detach()
# Down-scale dense shaping to make success bonus relatively more dominant.
return internal_reward_scale * total_reward
def ground_farming_timeout(env: ManagerBasedRLEnv, max_time: float, height_threshold: float) -> torch.Tensor:
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
episode_time = env.episode_length_buf * env.step_dt
return ((episode_time > max_time) & (pelvis_h < height_threshold)).bool()
def is_supported_standing(
env: ManagerBasedRLEnv,
foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg,
min_head_height: float,
min_pelvis_height: float,
max_angle_error: float,
velocity_threshold: float,
min_foot_support_force: float,
max_arm_support_force: float,
standing_time: float,
timer_name: str = "stable_timer",
) -> torch.Tensor:
head_idx, _ = env.scene["robot"].find_bodies("H2")
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
is_stable_now = (
(head_h > min_head_height)
& (pelvis_h > min_pelvis_height)
& (gravity_error < max_angle_error)
& (root_vel_norm < velocity_threshold)
& (foot_force_z > min_foot_support_force)
& (arm_force_z < max_arm_support_force)
)
if timer_name not in env.extras:
env.extras[timer_name] = torch.zeros(env.num_envs, device=env.device)
dt = env.physics_dt * env.cfg.decimation
env.extras[timer_name] = torch.where(
is_stable_now, env.extras[timer_name] + dt, torch.zeros_like(env.extras[timer_name])
)
return (env.extras[timer_name] > standing_time).bool()
def base_ang_vel_penalty(env: ManagerBasedRLEnv) -> torch.Tensor:
return torch.sum(torch.square(env.scene["robot"].data.root_ang_vel_w), dim=-1)
def airborne_flip_penalty(
env: ManagerBasedRLEnv,
foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg,
full_support_sensor_cfg: SceneEntityCfg,
full_support_threshold: float = 12.0,
min_pelvis_height: float = 0.34,
contact_force_threshold: float = 6.0,
flip_ang_vel_threshold: float = 5.4,
inverted_gravity_threshold: float = 0.45,
) -> torch.Tensor:
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
projected_gravity = env.scene["robot"].data.projected_gravity_b
ang_vel = env.scene["robot"].data.root_ang_vel_w
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
support_force = foot_force_z + arm_force_z
no_support_ratio = torch.exp(-support_force / contact_force_threshold)
full_support_force_z = _contact_force_z(env, full_support_sensor_cfg)
is_fully_airborne = full_support_force_z < full_support_threshold
airborne_ratio = torch.sigmoid((pelvis_h - min_pelvis_height) * 10.0) * no_support_ratio * is_fully_airborne.float()
ang_speed = torch.norm(ang_vel, dim=-1)
spin_excess = torch.clamp(ang_speed - flip_ang_vel_threshold, min=0.0)
inverted_ratio = torch.clamp((projected_gravity[:, 2] - inverted_gravity_threshold) / (1.0 - inverted_gravity_threshold), min=0.0, max=1.0)
return airborne_ratio * (torch.square(spin_excess) + 0.1 * inverted_ratio)
def airborne_flip_termination(
env: ManagerBasedRLEnv,
foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg,
full_support_sensor_cfg: SceneEntityCfg,
full_support_threshold: float = 12.0,
min_pelvis_height: float = 0.34,
contact_force_threshold: float = 6.0,
inverted_gravity_threshold: float = 0.45,
flip_ang_vel_threshold: float = 5.6,
persist_time: float = 0.18,
timer_name: str = "airborne_flip_timer",
) -> torch.Tensor:
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
projected_gravity = env.scene["robot"].data.projected_gravity_b
ang_vel = env.scene["robot"].data.root_ang_vel_w
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
has_no_support = (foot_force_z < contact_force_threshold) & (arm_force_z < contact_force_threshold)
full_support_force_z = _contact_force_z(env, full_support_sensor_cfg)
is_fully_airborne = full_support_force_z < full_support_threshold
is_airborne = (pelvis_h > min_pelvis_height) & has_no_support & is_fully_airborne
is_inverted = projected_gravity[:, 2] > inverted_gravity_threshold
is_fast_spin = torch.norm(ang_vel, dim=-1) > flip_ang_vel_threshold
bad_state = is_airborne & (is_inverted | is_fast_spin)
if timer_name not in env.extras:
env.extras[timer_name] = torch.zeros(env.num_envs, device=env.device)
dt = env.physics_dt * env.cfg.decimation
env.extras[timer_name] = torch.where(
bad_state, env.extras[timer_name] + dt, torch.zeros_like(env.extras[timer_name])
)
return (env.extras[timer_name] > persist_time).bool()
T1_JOINT_NAMES = [
"AAHead_yaw", "Head_pitch",
"Left_Shoulder_Pitch", "Left_Shoulder_Roll", "Left_Elbow_Pitch", "Left_Elbow_Yaw",
"Right_Shoulder_Pitch", "Right_Shoulder_Roll", "Right_Elbow_Pitch", "Right_Elbow_Yaw",
"Waist",
"Left_Hip_Pitch", "Right_Hip_Pitch", "Left_Hip_Roll", "Right_Hip_Roll",
"Left_Hip_Yaw", "Right_Hip_Yaw", "Left_Knee_Pitch", "Right_Knee_Pitch",
"Left_Ankle_Pitch", "Right_Ankle_Pitch", "Left_Ankle_Roll", "Right_Ankle_Roll",
]
@configclass
class T1ObservationCfg:
@configclass
class PolicyCfg(ObsGroup):
concatenate_terms = True
root_height = ObsTerm(func=root_height_obs)
head_height = ObsTerm(func=head_height_obs)
foot_support_force = ObsTerm(
func=foot_support_force_obs,
params={"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]), "norm_force": 120.0},
)
arm_support_force = ObsTerm(
func=arm_support_force_obs,
params={"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]), "norm_force": 120.0},
)
foot_support_ratio = ObsTerm(
func=contact_balance_obs,
params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
},
)
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
projected_gravity = ObsTerm(func=mdp.projected_gravity)
joint_pos = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
joint_vel = ObsTerm(func=mdp.joint_vel_rel, params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
actions = ObsTerm(func=mdp.last_action)
policy = PolicyCfg()
@configclass
class T1EventCfg:
reset_robot_rotation = EventTerm(
func=reset_root_state_bimodal_lie_pose,
params={
"asset_cfg": SceneEntityCfg("robot"),
"roll_range": (-0.15, 0.15),
"pitch_abs_range": (1.40, 1.70),
"yaw_abs_range": (0.0, 3.14),
"x_range": (-0.04, 0.04),
"y_range": (-0.04, 0.04),
"z_range": (0.10, 0.18),
},
mode="reset",
)
@configclass
class T1ActionCfg:
head_action = JointPositionActionCfg(
asset_name="robot",
joint_names=[
"AAHead_yaw", "Head_pitch",
],
scale=0.3,
use_default_offset=True
)
arm_action = JointPositionActionCfg(
asset_name="robot",
joint_names=[
"Left_Shoulder_Pitch", "Left_Shoulder_Roll", "Left_Elbow_Pitch", "Left_Elbow_Yaw",
"Right_Shoulder_Pitch", "Right_Shoulder_Roll", "Right_Elbow_Pitch", "Right_Elbow_Yaw",
],
scale=1.2,
use_default_offset=True,
)
torso_action = JointPositionActionCfg(
asset_name="robot",
joint_names=[
"Waist"
],
scale=0.3,
use_default_offset=True
)
leg_action = JointPositionActionCfg(
asset_name="robot",
joint_names=[
"Left_Hip_Pitch", "Right_Hip_Pitch", "Left_Hip_Roll", "Right_Hip_Roll", "Left_Hip_Yaw",
"Right_Hip_Yaw", "Left_Knee_Pitch", "Right_Knee_Pitch", "Left_Ankle_Pitch", "Right_Ankle_Pitch",
"Left_Ankle_Roll", "Right_Ankle_Roll",
],
scale=1.5,
use_default_offset=True,
)
@configclass
class T1GetUpRewardCfg:
smooth_getup = RewTerm(
func=smooth_additive_getup_reward,
weight=3.0,
params={
"min_head_height": 1.02,
"min_pelvis_height": 0.78,
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"head_track_gain": 7.0,
"pelvis_track_gain": 3.2,
"head_progress_gain": 3.5,
"pelvis_progress_gain": 2.0,
"head_clearance_gain": 2.8,
"torso_track_gain": 4.2,
"upright_track_gain": 3.6,
"foot_support_gain": 2.0,
"arm_release_gain": 1.2,
"arm_push_gain": 2.2,
"arm_push_threshold": 10.0,
"arm_push_sharpness": 0.12,
"head_sigma": 0.09,
"pelvis_sigma": 0.08,
"torso_sigma": 0.20,
"upright_sigma": 0.22,
"support_sigma": 0.30,
"tuck_gain": 0.6,
"no_foot_penalty_gain": 1.2,
"horizontal_vel_penalty_gain": 0.18,
"angular_vel_penalty_gain": 0.16,
"split_penalty_gain": 2.8,
"split_soft_limit": 0.33,
"split_hard_limit": 0.44,
"split_hard_penalty_gain": 9.0,
"head_delta_gain": 18.0,
"pelvis_delta_gain": 15.0,
"internal_reward_scale": 0.45,
},
)
anti_airborne_flip = RewTerm(
func=airborne_flip_penalty,
weight=-0.18,
params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"full_support_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*"]),
"full_support_threshold": 12.0,
"min_pelvis_height": 0.34,
"contact_force_threshold": 6.0,
"flip_ang_vel_threshold": 5.4,
"inverted_gravity_threshold": 0.45,
},
)
base_ang_vel = RewTerm(func=base_ang_vel_penalty, weight=-0.007)
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.009)
joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.005)
action_penalty = RewTerm(func=mdp.action_l2, weight=-0.005)
is_success_bonus = RewTerm(
func=is_supported_standing,
weight=100.0,
params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"min_head_height": 1.05,
"min_pelvis_height": 0.65,
"max_angle_error": 0.25,
"velocity_threshold": 0.15,
"min_foot_support_force": 34.0,
"max_arm_support_force": 20.0,
"standing_time": 0.40,
"timer_name": "reward_stable_timer",
},
)
@configclass
class T1GetUpTerminationsCfg:
time_out = DoneTerm(func=mdp.time_out)
anti_farming = DoneTerm(func=ground_farming_timeout, params={"max_time": 5.5, "height_threshold": 0.24})
illegal_contact = DoneTerm(
func=mdp.illegal_contact,
params={"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["Trunk"]), "threshold": 200.0},
)
standing_success = DoneTerm(
func=is_supported_standing,
params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"min_head_height": 1.10,
"min_pelvis_height": 0.83,
"max_angle_error": 0.10,
"velocity_threshold": 0.10,
"min_foot_support_force": 36.0,
"max_arm_support_force": 16.0,
"standing_time": 1.0,
"timer_name": "term_stable_timer",
},
)
joint_velocity_limit = DoneTerm(
func=mdp.joint_vel_out_of_manual_limit,
params={"asset_cfg": SceneEntityCfg("robot"), "max_velocity": 50.0},
)
airborne_flip_abort = DoneTerm(
func=airborne_flip_termination,
params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"full_support_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*"]),
"full_support_threshold": 12.0,
"min_pelvis_height": 0.34,
"contact_force_threshold": 6.0,
"inverted_gravity_threshold": 0.45,
"flip_ang_vel_threshold": 5.6,
"persist_time": 0.18,
"timer_name": "airborne_flip_timer",
},
)
@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
scene = T1SceneCfg(num_envs=8192, env_spacing=5.0)
observations = T1ObservationCfg()
rewards = T1GetUpRewardCfg()
terminations = T1GetUpTerminationsCfg()
events = T1EventCfg()
actions = T1ActionCfg()
episode_length_s = 10.0
decimation = 4