Files
Gym_GPU/rl_game/get_up/config/t1_env_cfg.py

826 lines
34 KiB
Python

import torch
from pathlib import Path
import yaml
import isaaclab.envs.mdp as mdp
from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.envs.mdp import JointPositionActionCfg
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass
from rl_game.get_up.amp.amp_rewards import amp_style_prior_reward
from rl_game.get_up.env.t1_env import T1SceneCfg
_PROJECT_ROOT = Path(__file__).resolve().parents[3]
_DEFAULT_FRONT_KEYFRAME_YAML = str(_PROJECT_ROOT / "behaviors" / "custom" / "keyframe" / "get_up" / "get_up_front.yaml")
_DEFAULT_BACK_KEYFRAME_YAML = str(_PROJECT_ROOT / "behaviors" / "custom" / "keyframe" / "get_up" / "get_up_back.yaml")
def _contact_force_z(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor:
"""Sum positive vertical contact force on selected bodies."""
sensor = env.scene.sensors.get(sensor_cfg.name)
forces_z = sensor.data.net_forces_w[:, :, 2]
body_ids = sensor_cfg.body_ids
if body_ids is None:
selected_z = forces_z
else:
selected_z = forces_z[:, body_ids]
return torch.clamp(torch.sum(selected_z, dim=-1), min=0.0)
def _safe_tensor(x: torch.Tensor, nan: float = 0.0, pos: float = 1e3, neg: float = -1e3) -> torch.Tensor:
"""Keep reward pipeline numerically stable."""
return torch.nan_to_num(x, nan=nan, posinf=pos, neginf=neg)
def _resolve_path(path_like: str) -> Path:
path = Path(path_like).expanduser()
if path.is_absolute():
return path
return (_PROJECT_ROOT / path).resolve()
def _interpolate_keyframes(
keyframes: list[dict],
sample_dt: float,
) -> tuple[torch.Tensor, list[str]]:
"""Interpolate sparse keyframes into dense [T, M] motor angle table in radians."""
if len(keyframes) == 0:
raise ValueError("No keyframes in motion yaml")
motor_names: set[str] = set()
for frame in keyframes:
motors = frame.get("motor_positions", {})
if isinstance(motors, dict):
motor_names.update(motors.keys())
names = sorted(motor_names)
if len(names) == 0:
raise ValueError("No motor_positions found in keyframes")
frame_count = len(keyframes)
times = torch.zeros(frame_count, dtype=torch.float32)
values = torch.full((frame_count, len(names)), float("nan"), dtype=torch.float32)
name_to_idx = {name: idx for idx, name in enumerate(names)}
elapsed = 0.0
for i, frame in enumerate(keyframes):
elapsed += max(float(frame.get("delta", 0.1)), 1e-4)
times[i] = elapsed
motors = frame.get("motor_positions", {})
if not isinstance(motors, dict):
continue
for motor_name, motor_deg in motors.items():
if motor_name not in name_to_idx:
continue
values[i, name_to_idx[motor_name]] = float(motor_deg) * (torch.pi / 180.0)
values[0] = torch.nan_to_num(values[0], nan=0.0)
for i in range(1, frame_count):
values[i] = torch.where(torch.isnan(values[i]), values[i - 1], values[i])
sample_dt = max(float(sample_dt), 1e-3)
sample_times = torch.arange(0.0, float(times[-1]) + 1e-6, sample_dt, dtype=torch.float32)
sample_times[0] = max(sample_times[0], 1e-6)
sample_times = torch.clamp(sample_times, min=times[0], max=times[-1])
upper = torch.searchsorted(times, sample_times, right=True)
upper = torch.clamp(upper, min=1, max=frame_count - 1)
lower = upper - 1
t0 = times.index_select(0, lower)
t1 = times.index_select(0, upper)
v0 = values.index_select(0, lower)
v1 = values.index_select(0, upper)
alpha = ((sample_times - t0) / (t1 - t0 + 1e-6)).unsqueeze(-1)
interp = v0 + alpha * (v1 - v0)
return interp, names
def _motion_table_from_yaml(
yaml_path: str,
joint_names: list[str],
sample_dt: float,
) -> tuple[torch.Tensor, float]:
"""
Build [T, J] target joint motion from get-up keyframes.
Unknown joints default to 0.0 (neutral).
"""
path = _resolve_path(yaml_path)
if not path.is_file():
raise FileNotFoundError(f"Motion yaml not found: {path}")
with path.open("r", encoding="utf-8") as f:
payload = yaml.safe_load(f) or {}
keyframes = payload.get("keyframes", [])
if not isinstance(keyframes, list):
raise ValueError(f"Invalid keyframes in yaml: {path}")
motor_table, motor_names = _interpolate_keyframes(keyframes, sample_dt=sample_dt)
motor_to_idx = {name: idx for idx, name in enumerate(motor_names)}
joint_table = torch.zeros((motor_table.shape[0], len(joint_names)), dtype=torch.float32)
sign_flip_bases = {"Shoulder_Roll", "Elbow_Yaw", "Hip_Roll", "Hip_Yaw", "Ankle_Roll"}
for j, joint_name in enumerate(joint_names):
alias = "Head_yaw" if joint_name == "AAHead_yaw" else joint_name
base = alias.replace("Left_", "").replace("Right_", "")
src = None
if alias in motor_to_idx:
src = alias
elif base in motor_to_idx:
src = base
if src is None:
continue
sign = -1.0 if alias.startswith("Right_") and base in sign_flip_bases else 1.0
joint_table[:, j] = sign * motor_table[:, motor_to_idx[src]]
duration_s = max(float(motor_table.shape[0] - 1) * sample_dt, sample_dt)
return joint_table, duration_s
def _get_keyframe_motion_cache(
env: ManagerBasedRLEnv,
front_motion_path: str,
back_motion_path: str,
sample_dt: float,
):
"""Cache interpolated front/back get-up motion priors on env device."""
cache_key = "getup_keyframe_motion_cache"
sig = (str(front_motion_path), str(back_motion_path), float(sample_dt))
cached = env.extras.get(cache_key, None)
if isinstance(cached, dict) and cached.get("sig") == sig:
return cached
front_table, front_duration = _motion_table_from_yaml(front_motion_path, T1_JOINT_NAMES, sample_dt)
back_table, back_duration = _motion_table_from_yaml(back_motion_path, T1_JOINT_NAMES, sample_dt)
cache = {
"sig": sig,
"sample_dt": float(sample_dt),
"front_motion": front_table.to(device=env.device),
"back_motion": back_table.to(device=env.device),
"front_duration": float(front_duration),
"back_duration": float(back_duration),
}
env.extras[cache_key] = cache
return cache
def keyframe_motion_prior_reward(
env: ManagerBasedRLEnv,
front_motion_path: str = _DEFAULT_FRONT_KEYFRAME_YAML,
back_motion_path: str = _DEFAULT_BACK_KEYFRAME_YAML,
sample_dt: float = 0.04,
pose_sigma: float = 0.42,
vel_sigma: float = 1.6,
joint_subset: str = "all",
internal_reward_scale: float = 1.0,
) -> torch.Tensor:
"""
DeepMimic-style dense reward from keyframe get-up motions.
- mode=1 uses front sequence
- mode=0 uses back sequence
"""
motion_cache = _get_keyframe_motion_cache(
env=env,
front_motion_path=front_motion_path,
back_motion_path=back_motion_path,
sample_dt=sample_dt,
)
# env.extras["getup_mode"]: 1=front, 0=back
getup_mode = env.extras.get("getup_mode", None)
if not isinstance(getup_mode, torch.Tensor) or getup_mode.shape[0] != env.num_envs:
getup_mode = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
env.extras["getup_mode"] = getup_mode
getup_mode = getup_mode.to(dtype=torch.long)
use_front = getup_mode == 1
step_dt = env.step_dt
phase_time = torch.clamp(env.episode_length_buf * step_dt, min=0.0)
front_motion = motion_cache["front_motion"]
back_motion = motion_cache["back_motion"]
front_idx = torch.clamp((phase_time / sample_dt).to(torch.long), min=0, max=front_motion.shape[0] - 1)
back_idx = torch.clamp((phase_time / sample_dt).to(torch.long), min=0, max=back_motion.shape[0] - 1)
target_front = front_motion.index_select(0, front_idx)
target_back = back_motion.index_select(0, back_idx)
target_pos = torch.where(use_front.unsqueeze(-1), target_front, target_back)
robot_data = env.scene["robot"].data
current_pos = _safe_tensor(robot_data.joint_pos)
current_vel = _safe_tensor(robot_data.joint_vel)
if joint_subset == "legs":
legs_idx, _ = env.scene["robot"].find_joints(".*(Hip|Knee|Ankle).*")
if len(legs_idx) > 0:
ids = torch.tensor(legs_idx, device=env.device, dtype=torch.long)
current_pos = current_pos.index_select(1, ids)
current_vel = current_vel.index_select(1, ids)
target_pos = target_pos.index_select(1, ids)
elif joint_subset == "core":
core_idx, _ = env.scene["robot"].find_joints(".*(Waist|Hip|Knee|Ankle).*")
if len(core_idx) > 0:
ids = torch.tensor(core_idx, device=env.device, dtype=torch.long)
current_pos = current_pos.index_select(1, ids)
current_vel = current_vel.index_select(1, ids)
target_pos = target_pos.index_select(1, ids)
target_vel = torch.zeros_like(target_pos)
pos_mse = torch.mean(torch.square(current_pos - target_pos), dim=-1)
vel_mse = torch.mean(torch.square(current_vel - target_vel), dim=-1)
pose_sigma = max(float(pose_sigma), 1e-3)
vel_sigma = max(float(vel_sigma), 1e-3)
pose_reward = torch.exp(-pos_mse / pose_sigma)
vel_reward = torch.exp(-vel_mse / vel_sigma)
prior_reward = 0.75 * pose_reward + 0.25 * vel_reward
prior_reward = _safe_tensor(prior_reward, nan=0.0, pos=1.0, neg=0.0)
log_dict = env.extras.get("log", {})
if isinstance(log_dict, dict):
log_dict["keyframe_prior_mean"] = torch.mean(prior_reward).detach().item()
log_dict["keyframe_front_ratio"] = torch.mean(use_front.float()).detach().item()
env.extras["log"] = log_dict
return internal_reward_scale * prior_reward
def root_height_obs(env: ManagerBasedRLEnv) -> torch.Tensor:
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
return env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2].unsqueeze(-1)
def head_height_obs(env: ManagerBasedRLEnv) -> torch.Tensor:
head_idx, _ = env.scene["robot"].find_bodies("H2")
return env.scene["robot"].data.body_state_w[:, head_idx[0], 2].unsqueeze(-1)
def foot_support_force_obs(env: ManagerBasedRLEnv, foot_sensor_cfg: SceneEntityCfg, norm_force: float = 120.0) -> torch.Tensor:
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
return torch.tanh(foot_force_z / norm_force).unsqueeze(-1)
def arm_support_force_obs(env: ManagerBasedRLEnv, arm_sensor_cfg: SceneEntityCfg, norm_force: float = 120.0) -> torch.Tensor:
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
return torch.tanh(arm_force_z / norm_force).unsqueeze(-1)
def contact_balance_obs(
env: ManagerBasedRLEnv,
foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg,
) -> torch.Tensor:
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
total_support = foot_force_z + arm_force_z + 1e-6
foot_support_ratio = torch.clamp(foot_force_z / total_support, min=0.0, max=1.0)
return foot_support_ratio.unsqueeze(-1)
def reset_root_state_bimodal_lie_pose(
env: ManagerBasedRLEnv,
env_ids: torch.Tensor,
asset_cfg: SceneEntityCfg,
roll_range: tuple[float, float],
pitch_abs_range: tuple[float, float],
yaw_abs_range: tuple[float, float],
x_range: tuple[float, float],
y_range: tuple[float, float],
z_range: tuple[float, float],
):
"""Reset with two lying modes around +pi/2 and -pi/2."""
robot = env.scene[asset_cfg.name]
num_resets = len(env_ids)
default_root_state = robot.data.default_root_state[env_ids].clone()
env_origins = env.scene.env_origins[env_ids]
for i, bounds in enumerate([x_range, y_range, z_range]):
v_min, v_max = bounds
rand_vals = torch.rand(num_resets, device=env.device)
default_root_state[:, i] = env_origins[:, i] + v_min + rand_vals * (v_max - v_min)
euler_angles = torch.zeros((num_resets, 3), device=env.device)
roll_min, roll_max = roll_range
euler_angles[:, 0] = roll_min + torch.rand(num_resets, device=env.device) * (roll_max - roll_min)
pitch_min, pitch_max = pitch_abs_range
pitch_mag = pitch_min + torch.rand(num_resets, device=env.device) * (pitch_max - pitch_min)
pitch_sign = torch.where(
torch.rand(num_resets, device=env.device) > 0.5,
torch.ones(num_resets, device=env.device),
-torch.ones(num_resets, device=env.device),
)
euler_angles[:, 1] = pitch_mag * pitch_sign
# Cache get-up mode for motion priors: pitch<0 => front, pitch>=0 => back.
if "getup_mode" not in env.extras or not isinstance(env.extras.get("getup_mode"), torch.Tensor):
env.extras["getup_mode"] = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
env.extras["getup_mode"][env_ids] = (pitch_sign < 0.0).to(torch.long)
yaw_min, yaw_max = yaw_abs_range
yaw_mag = yaw_min + torch.rand(num_resets, device=env.device) * (yaw_max - yaw_min)
yaw_sign = torch.where(
torch.rand(num_resets, device=env.device) > 0.5,
torch.ones(num_resets, device=env.device),
-torch.ones(num_resets, device=env.device),
)
euler_angles[:, 2] = yaw_mag * yaw_sign
roll, pitch, yaw = euler_angles[:, 0], euler_angles[:, 1], euler_angles[:, 2]
cr, sr = torch.cos(roll * 0.5), torch.sin(roll * 0.5)
cp, sp = torch.cos(pitch * 0.5), torch.sin(pitch * 0.5)
cy, sy = torch.cos(yaw * 0.5), torch.sin(yaw * 0.5)
qw = cr * cp * cy + sr * sp * sy
qx = sr * cp * cy - cr * sp * sy
qy = cr * sp * cy + sr * cp * sy
qz = cr * cp * sy - sr * sp * cy
default_root_state[:, 3:7] = torch.stack([qw, qx, qy, qz], dim=-1)
robot.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
robot.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
def smooth_additive_getup_reward(
env: ManagerBasedRLEnv,
min_head_height: float,
min_pelvis_height: float,
foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg,
upright_gain: float = 2.4,
pelvis_progress_gain: float = 1.8,
head_clearance_gain: float = 1.0,
foot_support_gain: float = 1.2,
arm_release_gain: float = 1.2,
knee_mid_bend_gain: float = 0.8,
knee_target: float = 1.0,
knee_sigma: float = 0.5,
hip_roll_penalty_gain: float = 0.5,
hip_roll_soft_limit: float = 0.42,
symmetry_penalty_gain: float = 0.2,
standing_vel_penalty_gain: float = 0.35,
standing_vel_gate_h: float = 0.65,
stand_core_gain: float = 2.4,
stand_upright_threshold: float = 0.82,
stand_foot_support_threshold: float = 0.65,
stand_arm_support_threshold: float = 0.25,
internal_reward_scale: float = 1.0,
) -> torch.Tensor:
# Cache expensive regex-based index lookups once per run.
idx_cache_key = "getup_idx_cache"
idx_cache = env.extras.get(idx_cache_key, None)
if not isinstance(idx_cache, dict):
idx_cache = {}
env.extras[idx_cache_key] = idx_cache
def _cached_joint_ids(cache_name: str, expr: str) -> torch.Tensor:
ids = idx_cache.get(cache_name, None)
if isinstance(ids, torch.Tensor):
return ids
joint_idx, _ = env.scene["robot"].find_joints(expr)
ids = torch.tensor(joint_idx, device=env.device, dtype=torch.long) if len(joint_idx) > 0 else torch.empty(0, device=env.device, dtype=torch.long)
idx_cache[cache_name] = ids
return ids
def _cached_body_id(cache_name: str, expr: str) -> int | None:
idx = idx_cache.get(cache_name, None)
if isinstance(idx, int):
return idx
body_idx, _ = env.scene["robot"].find_bodies(expr)
idx = int(body_idx[0]) if len(body_idx) > 0 else None
idx_cache[cache_name] = idx
return idx
joint_pos = _safe_tensor(env.scene["robot"].data.joint_pos)
head_id = _cached_body_id("head_id", "H2")
pelvis_id = _cached_body_id("pelvis_id", "Trunk")
head_h = env.scene["robot"].data.body_state_w[:, head_id, 2] if head_id is not None else torch.zeros(env.num_envs, device=env.device)
pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_id, 2] if pelvis_id is not None else torch.zeros(env.num_envs, device=env.device)
head_h = _safe_tensor(head_h, nan=0.0, pos=2.0, neg=-2.0)
pelvis_h = _safe_tensor(pelvis_h, nan=0.0, pos=2.0, neg=-2.0)
projected_gravity = _safe_tensor(env.scene["robot"].data.projected_gravity_b, nan=0.0, pos=2.0, neg=-2.0)
upright_ratio = torch.clamp(1.0 - torch.norm(projected_gravity[:, :2], dim=-1), min=0.0, max=1.0)
pelvis_progress = torch.clamp((pelvis_h - 0.20) / (min_pelvis_height - 0.20 + 1e-6), min=0.0, max=1.0)
head_clearance = torch.clamp((head_h - pelvis_h) / 0.45, min=0.0, max=1.0)
foot_force_z = _safe_tensor(_contact_force_z(env, foot_sensor_cfg), nan=0.0, pos=1e3, neg=0.0)
arm_force_z = _safe_tensor(_contact_force_z(env, arm_sensor_cfg), nan=0.0, pos=1e3, neg=0.0)
total_support = _safe_tensor(foot_force_z + arm_force_z + 1e-6, nan=1.0, pos=1e4, neg=1e-6)
foot_support_ratio = torch.clamp(foot_force_z / total_support, min=0.0, max=1.0)
arm_support_ratio = torch.clamp(arm_force_z / total_support, min=0.0, max=1.0)
release_phase = torch.sigmoid((pelvis_h - 0.58) * 8.0)
arm_release_reward = (1.0 - arm_support_ratio) * release_phase
knee_ids = _cached_joint_ids("knee_ids", ".*Knee_Pitch")
if knee_ids.numel() > 0:
knee_fold = torch.mean(torch.abs(joint_pos.index_select(1, knee_ids)), dim=-1)
knee_mid_bend = torch.exp(-0.5 * torch.square((knee_fold - knee_target) / knee_sigma))
else:
knee_mid_bend = torch.zeros_like(pelvis_h)
hip_roll_ids = _cached_joint_ids("hip_roll_ids", ".*Hip_Roll")
if hip_roll_ids.numel() > 0:
hip_roll_abs = torch.mean(torch.abs(joint_pos.index_select(1, hip_roll_ids)), dim=-1)
else:
hip_roll_abs = torch.zeros_like(pelvis_h)
hip_roll_excess = torch.clamp(hip_roll_abs - hip_roll_soft_limit, min=0.0, max=0.5)
left_leg_ids = _cached_joint_ids("left_leg_ids", "^Left_(Hip_Pitch|Hip_Roll|Hip_Yaw|Knee_Pitch|Ankle_Pitch|Ankle_Roll)$")
right_leg_ids = _cached_joint_ids("right_leg_ids", "^Right_(Hip_Pitch|Hip_Roll|Hip_Yaw|Knee_Pitch|Ankle_Pitch|Ankle_Roll)$")
if left_leg_ids.numel() > 0 and right_leg_ids.numel() > 0 and left_leg_ids.numel() == right_leg_ids.numel():
left_leg = joint_pos.index_select(1, left_leg_ids)
right_leg = joint_pos.index_select(1, right_leg_ids)
symmetry_penalty = torch.mean(torch.abs(left_leg - right_leg), dim=-1)
else:
symmetry_penalty = torch.zeros_like(pelvis_h)
root_vel = _safe_tensor(torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1), nan=10.0, pos=10.0, neg=0.0)
standing_gate = torch.sigmoid((pelvis_h - standing_vel_gate_h) * 10.0)
# Core dense signal for "stand up and stand stable".
stand_head = torch.sigmoid((head_h - min_head_height) * 10.0)
stand_pelvis = torch.sigmoid((pelvis_h - min_pelvis_height) * 10.0)
stand_upright = torch.sigmoid((upright_ratio - stand_upright_threshold) * 12.0)
stand_foot = torch.sigmoid((foot_support_ratio - stand_foot_support_threshold) * 12.0)
stand_arm_release = torch.sigmoid((stand_arm_support_threshold - arm_support_ratio) * 12.0)
stand_still = torch.exp(-3.0 * root_vel)
stand_core = (
0.22 * stand_head
+ 0.22 * stand_pelvis
+ 0.18 * stand_upright
+ 0.18 * stand_foot
+ 0.10 * stand_arm_release
+ 0.10 * stand_still
)
total_reward = (
upright_gain * upright_ratio
+ pelvis_progress_gain * pelvis_progress
+ head_clearance_gain * head_clearance
+ foot_support_gain * foot_support_ratio
+ arm_release_gain * arm_release_reward
+ knee_mid_bend_gain * knee_mid_bend
- hip_roll_penalty_gain * hip_roll_excess
- symmetry_penalty_gain * symmetry_penalty
- standing_vel_penalty_gain * standing_gate * root_vel
+ stand_core_gain * stand_core
)
total_reward = _safe_tensor(total_reward, nan=0.0, pos=100.0, neg=-100.0)
upright_mean = torch.mean(upright_ratio).detach().item()
foot_support_ratio_mean = torch.mean(foot_support_ratio).detach().item()
arm_support_ratio_mean = torch.mean(arm_support_ratio).detach().item()
hip_roll_mean = torch.mean(hip_roll_abs).detach().item()
stand_core_mean = torch.mean(stand_core).detach().item()
log_dict = env.extras.get("log", {})
if isinstance(log_dict, dict):
log_dict["upright_mean"] = upright_mean
log_dict["foot_support_ratio_mean"] = foot_support_ratio_mean
log_dict["arm_support_ratio_mean"] = arm_support_ratio_mean
log_dict["hip_roll_mean"] = hip_roll_mean
log_dict["stand_core_mean"] = stand_core_mean
env.extras["log"] = log_dict
return internal_reward_scale * total_reward
def ground_farming_timeout(env: ManagerBasedRLEnv, max_time: float, height_threshold: float) -> torch.Tensor:
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
episode_time = env.episode_length_buf * env.step_dt
return ((episode_time > max_time) & (pelvis_h < height_threshold)).bool()
def is_supported_standing(
env: ManagerBasedRLEnv,
foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg,
min_head_height: float,
min_pelvis_height: float,
max_angle_error: float,
velocity_threshold: float,
min_foot_support_force: float,
max_arm_support_force: float,
standing_time: float,
timer_name: str = "stable_timer",
) -> torch.Tensor:
head_idx, _ = env.scene["robot"].find_bodies("H2")
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
is_stable_now = (
(head_h > min_head_height)
& (pelvis_h > min_pelvis_height)
& (gravity_error < max_angle_error)
& (root_vel_norm < velocity_threshold)
& (foot_force_z > min_foot_support_force)
& (arm_force_z < max_arm_support_force)
)
if timer_name not in env.extras:
env.extras[timer_name] = torch.zeros(env.num_envs, device=env.device)
dt = env.physics_dt * env.cfg.decimation
env.extras[timer_name] = torch.where(
is_stable_now, env.extras[timer_name] + dt, torch.zeros_like(env.extras[timer_name])
)
return (env.extras[timer_name] > standing_time).bool()
def airborne_flip_termination(
env: ManagerBasedRLEnv,
foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg,
full_support_sensor_cfg: SceneEntityCfg,
full_support_threshold: float = 12.0,
min_pelvis_height: float = 0.34,
contact_force_threshold: float = 6.0,
inverted_gravity_threshold: float = 0.45,
flip_ang_vel_threshold: float = 5.6,
persist_time: float = 0.18,
timer_name: str = "airborne_flip_timer",
) -> torch.Tensor:
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
projected_gravity = env.scene["robot"].data.projected_gravity_b
ang_vel = env.scene["robot"].data.root_ang_vel_w
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
has_no_support = (foot_force_z < contact_force_threshold) & (arm_force_z < contact_force_threshold)
full_support_force_z = _contact_force_z(env, full_support_sensor_cfg)
is_fully_airborne = full_support_force_z < full_support_threshold
is_airborne = (pelvis_h > min_pelvis_height) & has_no_support & is_fully_airborne
is_inverted = projected_gravity[:, 2] > inverted_gravity_threshold
is_fast_spin = torch.norm(ang_vel, dim=-1) > flip_ang_vel_threshold
bad_state = is_airborne & (is_inverted | is_fast_spin)
if timer_name not in env.extras:
env.extras[timer_name] = torch.zeros(env.num_envs, device=env.device)
dt = env.physics_dt * env.cfg.decimation
env.extras[timer_name] = torch.where(
bad_state, env.extras[timer_name] + dt, torch.zeros_like(env.extras[timer_name])
)
return (env.extras[timer_name] > persist_time).bool()
def nonfinite_state_termination(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Terminate envs when sim state becomes NaN/Inf."""
robot_data = env.scene["robot"].data
finite_joint_pos = torch.isfinite(robot_data.joint_pos).all(dim=-1)
finite_joint_vel = torch.isfinite(robot_data.joint_vel).all(dim=-1)
finite_root_lin = torch.isfinite(robot_data.root_lin_vel_w).all(dim=-1)
finite_root_ang = torch.isfinite(robot_data.root_ang_vel_w).all(dim=-1)
finite_gravity = torch.isfinite(robot_data.projected_gravity_b).all(dim=-1)
finite_root_pos = torch.isfinite(robot_data.root_pos_w).all(dim=-1)
is_finite = finite_joint_pos & finite_joint_vel & finite_root_lin & finite_root_ang & finite_gravity & finite_root_pos
return ~is_finite
T1_JOINT_NAMES = [
"AAHead_yaw", "Head_pitch",
"Left_Shoulder_Pitch", "Left_Shoulder_Roll", "Left_Elbow_Pitch", "Left_Elbow_Yaw",
"Right_Shoulder_Pitch", "Right_Shoulder_Roll", "Right_Elbow_Pitch", "Right_Elbow_Yaw",
"Waist",
"Left_Hip_Pitch", "Right_Hip_Pitch", "Left_Hip_Roll", "Right_Hip_Roll",
"Left_Hip_Yaw", "Right_Hip_Yaw", "Left_Knee_Pitch", "Right_Knee_Pitch",
"Left_Ankle_Pitch", "Right_Ankle_Pitch", "Left_Ankle_Roll", "Right_Ankle_Roll",
]
@configclass
class T1ObservationCfg:
@configclass
class PolicyCfg(ObsGroup):
concatenate_terms = True
root_height = ObsTerm(func=root_height_obs)
head_height = ObsTerm(func=head_height_obs)
foot_support_force = ObsTerm(
func=foot_support_force_obs,
params={"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]), "norm_force": 120.0},
)
arm_support_force = ObsTerm(
func=arm_support_force_obs,
params={"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]), "norm_force": 120.0},
)
foot_support_ratio = ObsTerm(
func=contact_balance_obs,
params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
},
)
base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
projected_gravity = ObsTerm(func=mdp.projected_gravity)
joint_pos = ObsTerm(func=mdp.joint_pos_rel, params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
joint_vel = ObsTerm(func=mdp.joint_vel_rel, params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
actions = ObsTerm(func=mdp.last_action)
policy = PolicyCfg()
@configclass
class T1EventCfg:
reset_robot_rotation = EventTerm(
func=reset_root_state_bimodal_lie_pose,
params={
"asset_cfg": SceneEntityCfg("robot"),
"roll_range": (-0.15, 0.15),
"pitch_abs_range": (1.40, 1.70),
"yaw_abs_range": (0.0, 3.14),
"x_range": (-0.04, 0.04),
"y_range": (-0.04, 0.04),
"z_range": (0.10, 0.18),
},
mode="reset",
)
@configclass
class T1ActionCfg:
head_action = JointPositionActionCfg(
asset_name="robot",
joint_names=[
"AAHead_yaw", "Head_pitch",
],
scale=0.5,
use_default_offset=True
)
arm_action = JointPositionActionCfg(
asset_name="robot",
joint_names=[
"Left_Shoulder_Pitch", "Left_Shoulder_Roll", "Left_Elbow_Pitch", "Left_Elbow_Yaw",
"Right_Shoulder_Pitch", "Right_Shoulder_Roll", "Right_Elbow_Pitch", "Right_Elbow_Yaw",
],
scale=0.82,
use_default_offset=True,
)
torso_action = JointPositionActionCfg(
asset_name="robot",
joint_names=[
"Waist"
],
scale=0.58,
use_default_offset=True
)
leg_action = JointPositionActionCfg(
asset_name="robot",
joint_names=[
"Left_Hip_Pitch", "Right_Hip_Pitch", "Left_Hip_Roll", "Right_Hip_Roll", "Left_Hip_Yaw",
"Right_Hip_Yaw", "Left_Knee_Pitch", "Right_Knee_Pitch", "Left_Ankle_Pitch", "Right_Ankle_Pitch",
"Left_Ankle_Roll", "Right_Ankle_Roll",
],
scale=1.05,
use_default_offset=True,
)
@configclass
class T1GetUpRewardCfg:
smooth_getup = RewTerm(
func=smooth_additive_getup_reward,
weight=5.0,
params={
"min_head_height": 1.02,
"min_pelvis_height": 0.78,
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"upright_gain": 2.4,
"pelvis_progress_gain": 1.8,
"head_clearance_gain": 1.0,
"foot_support_gain": 1.2,
"arm_release_gain": 1.2,
"knee_mid_bend_gain": 0.8,
"knee_target": 1.0,
"knee_sigma": 0.5,
"hip_roll_penalty_gain": 0.5,
"hip_roll_soft_limit": 0.42,
"symmetry_penalty_gain": 0.2,
"standing_vel_penalty_gain": 0.35,
"standing_vel_gate_h": 0.65,
"stand_core_gain": 2.4,
"stand_upright_threshold": 0.82,
"stand_foot_support_threshold": 0.65,
"stand_arm_support_threshold": 0.25,
"internal_reward_scale": 1.0,
},
)
is_success_bonus = RewTerm(
func=is_supported_standing,
weight=150.0,
params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"min_head_height": 1.05,
"min_pelvis_height": 0.65,
"max_angle_error": 0.18,
"velocity_threshold": 0.10,
"min_foot_support_force": 36.0,
"max_arm_support_force": 14.0,
"standing_time": 0.70,
"timer_name": "reward_stable_timer",
},
)
keyframe_motion_prior = RewTerm(
func=keyframe_motion_prior_reward,
weight=0.0,
params={
"front_motion_path": _DEFAULT_FRONT_KEYFRAME_YAML,
"back_motion_path": _DEFAULT_BACK_KEYFRAME_YAML,
"sample_dt": 0.04,
"pose_sigma": 0.42,
"vel_sigma": 1.6,
"joint_subset": "all",
"internal_reward_scale": 1.0,
},
)
# AMP reward is disabled by default until a discriminator model path is provided.
amp_style_prior = RewTerm(
func=amp_style_prior_reward,
weight=0.0,
params={
"amp_enabled": False,
"amp_model_path": "",
"amp_train_enabled": False,
"amp_expert_features_path": "",
"disc_hidden_dim": 256,
"disc_hidden_layers": 2,
"disc_lr": 3e-4,
"disc_weight_decay": 1e-6,
"disc_update_interval": 4,
"disc_batch_size": 1024,
"disc_min_expert_samples": 2048,
"disc_history_steps": 4,
"feature_clip": 8.0,
"logit_scale": 1.0,
"amp_reward_gain": 1.0,
"internal_reward_scale": 1.0,
},
)
@configclass
class T1GetUpTerminationsCfg:
time_out = DoneTerm(func=mdp.time_out)
nonfinite_state_abort = DoneTerm(func=nonfinite_state_termination)
anti_farming = DoneTerm(func=ground_farming_timeout, params={"max_time": 4.5, "height_threshold": 0.48})
illegal_contact = DoneTerm(
func=mdp.illegal_contact,
params={"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["Trunk"]), "threshold": 200.0},
)
standing_success = DoneTerm(
func=is_supported_standing,
params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"min_head_height": 1.10,
"min_pelvis_height": 0.83,
"max_angle_error": 0.08,
"velocity_threshold": 0.08,
"min_foot_support_force": 36.0,
"max_arm_support_force": 16.0,
"standing_time": 1.4,
"timer_name": "term_stable_timer",
},
)
joint_velocity_limit = DoneTerm(
func=mdp.joint_vel_out_of_manual_limit,
params={"asset_cfg": SceneEntityCfg("robot"), "max_velocity": 50.0},
)
airborne_flip_abort = DoneTerm(
func=airborne_flip_termination,
params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"full_support_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*"]),
"full_support_threshold": 12.0,
"min_pelvis_height": 0.34,
"contact_force_threshold": 6.0,
"inverted_gravity_threshold": 0.45,
"flip_ang_vel_threshold": 5.6,
"persist_time": 0.18,
"timer_name": "airborne_flip_timer",
},
)
@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
scene = T1SceneCfg(num_envs=8192, env_spacing=5.0)
observations = T1ObservationCfg()
rewards = T1GetUpRewardCfg()
terminations = T1GetUpTerminationsCfg()
events = T1EventCfg()
actions = T1ActionCfg()
episode_length_s = 10.0
decimation = 4