use amp strategy

This commit is contained in:
2026-04-12 08:41:10 -04:00
parent 6c3445f50d
commit 9e6e7e00f8
6 changed files with 835 additions and 227 deletions

Binary file not shown.

View File

@@ -0,0 +1,204 @@
import argparse
import math
from pathlib import Path
import torch
import yaml
# AMP feature order must match `_build_amp_features` in `t1_env_cfg.py`:
# [joint_pos_rel(23), joint_vel(23), root_lin_vel(3), root_ang_vel(3), projected_gravity(3)].
T1_JOINT_NAMES = [
"AAHead_yaw",
"Head_pitch",
"Left_Shoulder_Pitch",
"Left_Shoulder_Roll",
"Left_Elbow_Pitch",
"Left_Elbow_Yaw",
"Right_Shoulder_Pitch",
"Right_Shoulder_Roll",
"Right_Elbow_Pitch",
"Right_Elbow_Yaw",
"Waist",
"Left_Hip_Pitch",
"Left_Hip_Roll",
"Left_Hip_Yaw",
"Left_Knee_Pitch",
"Left_Ankle_Pitch",
"Left_Ankle_Roll",
"Right_Hip_Pitch",
"Right_Hip_Roll",
"Right_Hip_Yaw",
"Right_Knee_Pitch",
"Right_Ankle_Pitch",
"Right_Ankle_Roll",
]
JOINT_TO_IDX = {name: i for i, name in enumerate(T1_JOINT_NAMES)}
# Mirror rules aligned with `behaviors/custom/keyframe/keyframe.py`.
MOTOR_SYMMETRY = {
"Head_yaw": (("Head_yaw",), False),
"Head_pitch": (("Head_pitch",), False),
"Shoulder_Pitch": (("Left_Shoulder_Pitch", "Right_Shoulder_Pitch"), False),
"Shoulder_Roll": (("Left_Shoulder_Roll", "Right_Shoulder_Roll"), True),
"Elbow_Pitch": (("Left_Elbow_Pitch", "Right_Elbow_Pitch"), False),
"Elbow_Yaw": (("Left_Elbow_Yaw", "Right_Elbow_Yaw"), True),
"Waist": (("Waist",), False),
"Hip_Pitch": (("Left_Hip_Pitch", "Right_Hip_Pitch"), False),
"Hip_Roll": (("Left_Hip_Roll", "Right_Hip_Roll"), True),
"Hip_Yaw": (("Left_Hip_Yaw", "Right_Hip_Yaw"), True),
"Knee_Pitch": (("Left_Knee_Pitch", "Right_Knee_Pitch"), False),
"Ankle_Pitch": (("Left_Ankle_Pitch", "Right_Ankle_Pitch"), False),
"Ankle_Roll": (("Left_Ankle_Roll", "Right_Ankle_Roll"), True),
}
READABLE_TO_POLICY = {"Head_yaw": "AAHead_yaw"}
def decode_keyframe_motor_positions(raw_motor_positions: dict[str, float]) -> dict[str, float]:
"""Decode one keyframe into per-joint radians."""
out: dict[str, float] = {}
deg_to_rad = math.pi / 180.0
for readable_name, position_deg in raw_motor_positions.items():
if readable_name in MOTOR_SYMMETRY:
motor_names, is_inverse_direction = MOTOR_SYMMETRY[readable_name]
invert_state = bool(is_inverse_direction)
for motor_name in motor_names:
signed_deg = position_deg if invert_state else -position_deg
invert_state = False
out_name = READABLE_TO_POLICY.get(motor_name, motor_name)
out[out_name] = float(signed_deg) * deg_to_rad
else:
out_name = READABLE_TO_POLICY.get(readable_name, readable_name)
out[out_name] = float(position_deg) * deg_to_rad
return out
def load_sequence(yaml_path: Path) -> list[tuple[float, torch.Tensor]]:
"""Load yaml keyframes -> list[(delta_seconds, joint_pos_vec)]."""
with yaml_path.open("r", encoding="utf-8") as f:
desc = yaml.safe_load(f) or {}
out: list[tuple[float, torch.Tensor]] = []
for keyframe in desc.get("keyframes", []):
delta_s = max(float(keyframe.get("delta", 0.1)), 1e-3)
raw = keyframe.get("motor_positions", {}) or {}
decoded = decode_keyframe_motor_positions(raw)
joint_pos = torch.zeros(len(T1_JOINT_NAMES), dtype=torch.float32)
for j_name, j_val in decoded.items():
idx = JOINT_TO_IDX.get(j_name, None)
if idx is not None:
joint_pos[idx] = float(j_val)
out.append((delta_s, joint_pos))
return out
def sequence_to_amp_features(
sequence: list[tuple[float, torch.Tensor]],
sample_fps: float,
projected_gravity: tuple[float, float, float],
) -> torch.Tensor:
"""Convert decoded sequence into AMP features tensor (N, 55)."""
if len(sequence) == 0:
raise ValueError("Empty keyframe sequence.")
dt = 1.0 / max(sample_fps, 1e-6)
grav = torch.tensor(projected_gravity, dtype=torch.float32)
frames_joint_pos: list[torch.Tensor] = []
for delta_s, joint_pos in sequence:
repeat = max(int(round(delta_s / dt)), 1)
for _ in range(repeat):
frames_joint_pos.append(joint_pos.clone())
if len(frames_joint_pos) < 2:
frames_joint_pos.append(frames_joint_pos[0].clone())
pos = torch.stack(frames_joint_pos, dim=0)
vel = torch.zeros_like(pos)
vel[1:] = (pos[1:] - pos[:-1]) / dt
vel[0] = vel[1]
root_lin = torch.zeros((pos.shape[0], 3), dtype=torch.float32)
root_ang = torch.zeros((pos.shape[0], 3), dtype=torch.float32)
grav_batch = grav.unsqueeze(0).repeat(pos.shape[0], 1)
return torch.cat([pos, vel, root_lin, root_ang, grav_batch], dim=-1)
def main():
parser = argparse.ArgumentParser(description="Build AMP expert features from get_up keyframe YAML files.")
parser.add_argument(
"--front_yaml",
type=str,
default="behaviors/custom/keyframe/get_up/get_up_front.yaml",
help="Path to front get-up YAML.",
)
parser.add_argument(
"--back_yaml",
type=str,
default="behaviors/custom/keyframe/get_up/get_up_back.yaml",
help="Path to back get-up YAML.",
)
parser.add_argument(
"--sample_fps",
type=float,
default=50.0,
help="Sampling fps when expanding keyframe durations.",
)
parser.add_argument(
"--repeat_cycles",
type=int,
default=200,
help="How many times to repeat front+back sequences to enlarge dataset.",
)
parser.add_argument(
"--projected_gravity",
type=float,
nargs=3,
default=(0.0, 0.0, -1.0),
help="Projected gravity feature used for synthesized expert data.",
)
parser.add_argument(
"--output",
type=str,
default="rl_game/get_up/amp/expert_features.pt",
help="Output expert feature file path.",
)
args = parser.parse_args()
front_path = Path(args.front_yaml).expanduser().resolve()
back_path = Path(args.back_yaml).expanduser().resolve()
if not front_path.is_file():
raise FileNotFoundError(f"Front YAML not found: {front_path}")
if not back_path.is_file():
raise FileNotFoundError(f"Back YAML not found: {back_path}")
front_seq = load_sequence(front_path)
back_seq = load_sequence(back_path)
front_feat = sequence_to_amp_features(front_seq, args.sample_fps, tuple(args.projected_gravity))
back_feat = sequence_to_amp_features(back_seq, args.sample_fps, tuple(args.projected_gravity))
base_feat = torch.cat([front_feat, back_feat], dim=0)
repeat_cycles = max(int(args.repeat_cycles), 1)
expert_features = base_feat.repeat(repeat_cycles, 1).contiguous()
out_path = Path(args.output).expanduser()
if not out_path.is_absolute():
out_path = Path.cwd() / out_path
out_path.parent.mkdir(parents=True, exist_ok=True)
torch.save(
{
"expert_features": expert_features,
"feature_dim": int(expert_features.shape[1]),
"num_samples": int(expert_features.shape[0]),
"source": "get_up_keyframe_yaml",
"front_yaml": str(front_path),
"back_yaml": str(back_path),
"sample_fps": float(args.sample_fps),
"repeat_cycles": repeat_cycles,
"projected_gravity": [float(v) for v in args.projected_gravity],
},
str(out_path),
)
print(f"[INFO]: saved expert features -> {out_path}")
print(f"[INFO]: shape={tuple(expert_features.shape)}")
if __name__ == "__main__":
main()

View File

@@ -17,7 +17,7 @@ params:
name: default name: default
sigma_init: sigma_init:
name: const_initializer name: const_initializer
val: 0.80 val: 0.42
fixed_sigma: False fixed_sigma: False
mlp: mlp:
units: [512, 256, 128] units: [512, 256, 128]
@@ -41,7 +41,7 @@ params:
normalize_advantage: True normalize_advantage: True
gamma: 0.99 gamma: 0.99
tau: 0.95 tau: 0.95
learning_rate: 3e-4 learning_rate: 1e-4
lr_schedule: adaptive lr_schedule: adaptive
kl_threshold: 0.013 kl_threshold: 0.013
score_to_win: 20000 score_to_win: 20000
@@ -49,7 +49,7 @@ params:
save_best_after: 50 save_best_after: 50
save_frequency: 100 save_frequency: 100
grad_norm: 0.8 grad_norm: 0.8
entropy_coef: 0.00008 entropy_coef: 0.00011
truncate_grads: True truncate_grads: True
bounds_loss_coef: 0.01 bounds_loss_coef: 0.01
e_clip: 0.2 e_clip: 0.2

View File

@@ -1,4 +1,6 @@
import torch import torch
import torch.nn as nn
from pathlib import Path
import isaaclab.envs.mdp as mdp import isaaclab.envs.mdp as mdp
from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
from isaaclab.managers import ObservationGroupCfg as ObsGroup from isaaclab.managers import ObservationGroupCfg as ObsGroup
@@ -11,7 +13,6 @@ from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass from isaaclab.utils import configclass
from rl_game.get_up.env.t1_env import T1SceneCfg from rl_game.get_up.env.t1_env import T1SceneCfg
def _contact_force_z(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor: def _contact_force_z(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg) -> torch.Tensor:
"""Sum positive vertical contact force on selected bodies.""" """Sum positive vertical contact force on selected bodies."""
sensor = env.scene.sensors.get(sensor_cfg.name) sensor = env.scene.sensors.get(sensor_cfg.name)
@@ -24,6 +25,279 @@ def _contact_force_z(env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg) -> torc
return torch.clamp(torch.sum(selected_z, dim=-1), min=0.0) return torch.clamp(torch.sum(selected_z, dim=-1), min=0.0)
def _safe_tensor(x: torch.Tensor, nan: float = 0.0, pos: float = 1e3, neg: float = -1e3) -> torch.Tensor:
"""Keep reward pipeline numerically stable."""
return torch.nan_to_num(x, nan=nan, posinf=pos, neginf=neg)
class AMPDiscriminator(nn.Module):
"""Lightweight discriminator used by online AMP updates."""
def __init__(self, input_dim: int, hidden_dims: tuple[int, ...]):
super().__init__()
layers: list[nn.Module] = []
in_dim = input_dim
for h_dim in hidden_dims:
layers.append(nn.Linear(in_dim, h_dim))
layers.append(nn.LayerNorm(h_dim))
layers.append(nn.SiLU())
in_dim = h_dim
layers.append(nn.Linear(in_dim, 1))
self.net = nn.Sequential(*layers)
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.net(x)
def _extract_tensor_from_amp_payload(payload) -> torch.Tensor | None:
if isinstance(payload, torch.Tensor):
return payload
if isinstance(payload, dict):
for key in ("expert_features", "features", "obs"):
value = payload.get(key, None)
if isinstance(value, torch.Tensor):
return value
return None
def _load_amp_expert_features(
expert_features_path: str,
device: str,
feature_dim: int,
fallback_samples: int,
) -> torch.Tensor | None:
"""Load expert AMP features. Returns None when file is unavailable."""
if not expert_features_path:
return None
p = Path(expert_features_path).expanduser()
if not p.is_file():
return None
try:
payload = torch.load(str(p), map_location="cpu")
except Exception:
return None
expert = _extract_tensor_from_amp_payload(payload)
if expert is None:
return None
expert = expert.float()
if expert.ndim == 1:
expert = expert.unsqueeze(0)
if expert.ndim != 2:
return None
if expert.shape[1] != feature_dim:
return None
if expert.shape[0] < 2:
return None
if expert.shape[0] < fallback_samples:
reps = int((fallback_samples + expert.shape[0] - 1) // expert.shape[0])
expert = expert.repeat(reps, 1)
return expert.to(device=device)
def _get_amp_state(
env: ManagerBasedRLEnv,
amp_enabled: bool,
amp_model_path: str,
amp_train_enabled: bool,
amp_expert_features_path: str,
feature_dim: int,
disc_hidden_dim: int,
disc_hidden_layers: int,
disc_lr: float,
disc_weight_decay: float,
disc_min_expert_samples: int,
):
"""Get cached AMP state (frozen jit or trainable discriminator)."""
cache_key = "amp_state_cache"
hidden_layers = max(int(disc_hidden_layers), 1)
hidden_dim = max(int(disc_hidden_dim), 16)
state_sig = (
bool(amp_enabled),
str(amp_model_path),
bool(amp_train_enabled),
str(amp_expert_features_path),
int(feature_dim),
hidden_dim,
hidden_layers,
float(disc_lr),
float(disc_weight_decay),
)
cached = env.extras.get(cache_key, None)
if isinstance(cached, dict) and cached.get("sig") == state_sig:
return cached
state = {
"sig": state_sig,
"mode": "disabled",
"model": None,
"optimizer": None,
"expert_features": None,
"step": 0,
"last_loss": 0.0,
"last_acc_policy": 0.0,
"last_acc_expert": 0.0,
}
if amp_train_enabled:
expert_features = _load_amp_expert_features(
amp_expert_features_path,
device=env.device,
feature_dim=feature_dim,
fallback_samples=max(disc_min_expert_samples, 512),
)
if expert_features is not None:
model = AMPDiscriminator(input_dim=feature_dim, hidden_dims=tuple([hidden_dim] * hidden_layers)).to(env.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=float(disc_lr), weight_decay=float(disc_weight_decay))
state["mode"] = "trainable"
state["model"] = model
state["optimizer"] = optimizer
state["expert_features"] = expert_features
elif amp_enabled and amp_model_path:
model_path = Path(amp_model_path).expanduser()
if model_path.is_file():
try:
model = torch.jit.load(str(model_path), map_location=env.device)
model.eval()
state["mode"] = "jit"
state["model"] = model
except Exception:
pass
env.extras[cache_key] = state
return state
def _build_amp_features(env: ManagerBasedRLEnv, feature_clip: float = 8.0) -> torch.Tensor:
"""Build AMP-style discriminator features from robot kinematics."""
robot_data = env.scene["robot"].data
joint_pos_rel = robot_data.joint_pos - robot_data.default_joint_pos
joint_vel = robot_data.joint_vel
root_lin_vel = robot_data.root_lin_vel_w
root_ang_vel = robot_data.root_ang_vel_w
projected_gravity = robot_data.projected_gravity_b
amp_features = torch.cat([joint_pos_rel, joint_vel, root_lin_vel, root_ang_vel, projected_gravity], dim=-1)
amp_features = _safe_tensor(amp_features, nan=0.0, pos=feature_clip, neg=-feature_clip)
return torch.clamp(amp_features, min=-feature_clip, max=feature_clip)
def amp_style_prior_reward(
env: ManagerBasedRLEnv,
amp_enabled: bool = False,
amp_model_path: str = "",
amp_train_enabled: bool = False,
amp_expert_features_path: str = "",
disc_hidden_dim: int = 256,
disc_hidden_layers: int = 2,
disc_lr: float = 3e-4,
disc_weight_decay: float = 1e-6,
disc_update_interval: int = 4,
disc_batch_size: int = 1024,
disc_min_expert_samples: int = 2048,
feature_clip: float = 8.0,
logit_scale: float = 1.0,
amp_reward_gain: float = 1.0,
internal_reward_scale: float = 1.0,
) -> torch.Tensor:
"""AMP style prior reward with optional online discriminator training."""
zeros = torch.zeros(env.num_envs, device=env.device)
amp_score = zeros
model_loaded = 0.0
amp_train_active = 0.0
disc_loss = 0.0
disc_acc_policy = 0.0
disc_acc_expert = 0.0
amp_features = _build_amp_features(env, feature_clip=feature_clip)
amp_state = _get_amp_state(
env=env,
amp_enabled=amp_enabled,
amp_model_path=amp_model_path,
amp_train_enabled=amp_train_enabled,
amp_expert_features_path=amp_expert_features_path,
feature_dim=int(amp_features.shape[-1]),
disc_hidden_dim=disc_hidden_dim,
disc_hidden_layers=disc_hidden_layers,
disc_lr=disc_lr,
disc_weight_decay=disc_weight_decay,
disc_min_expert_samples=disc_min_expert_samples,
)
discriminator = amp_state.get("model", None)
if discriminator is not None:
model_loaded = 1.0
if amp_state.get("mode") == "trainable" and discriminator is not None:
amp_train_active = 1.0
optimizer = amp_state.get("optimizer", None)
expert_features = amp_state.get("expert_features", None)
amp_state["step"] = int(amp_state.get("step", 0)) + 1
update_interval = max(int(disc_update_interval), 1)
batch_size = max(int(disc_batch_size), 32)
if optimizer is not None and isinstance(expert_features, torch.Tensor) and amp_state["step"] % update_interval == 0:
policy_features = amp_features.detach()
policy_count = policy_features.shape[0]
if policy_count > batch_size:
policy_ids = torch.randint(0, policy_count, (batch_size,), device=env.device)
policy_batch = policy_features.index_select(0, policy_ids)
else:
policy_batch = policy_features
expert_count = expert_features.shape[0]
expert_ids = torch.randint(0, expert_count, (policy_batch.shape[0],), device=env.device)
expert_batch = expert_features.index_select(0, expert_ids)
discriminator.train()
optimizer.zero_grad(set_to_none=True)
logits_expert = discriminator(expert_batch).squeeze(-1)
logits_policy = discriminator(policy_batch).squeeze(-1)
loss_expert = nn.functional.binary_cross_entropy_with_logits(logits_expert, torch.ones_like(logits_expert))
loss_policy = nn.functional.binary_cross_entropy_with_logits(logits_policy, torch.zeros_like(logits_policy))
loss = 0.5 * (loss_expert + loss_policy)
loss.backward()
nn.utils.clip_grad_norm_(discriminator.parameters(), max_norm=1.0)
optimizer.step()
discriminator.eval()
with torch.no_grad():
disc_loss = float(loss.detach().item())
disc_acc_expert = float((torch.sigmoid(logits_expert) > 0.5).float().mean().item())
disc_acc_policy = float((torch.sigmoid(logits_policy) < 0.5).float().mean().item())
amp_state["last_loss"] = disc_loss
amp_state["last_acc_expert"] = disc_acc_expert
amp_state["last_acc_policy"] = disc_acc_policy
else:
disc_loss = float(amp_state.get("last_loss", 0.0))
disc_acc_expert = float(amp_state.get("last_acc_expert", 0.0))
disc_acc_policy = float(amp_state.get("last_acc_policy", 0.0))
if discriminator is not None:
discriminator.eval()
with torch.no_grad():
logits = discriminator(amp_features)
if isinstance(logits, (tuple, list)):
logits = logits[0]
if logits.ndim > 1:
logits = logits.squeeze(-1)
logits = _safe_tensor(logits, nan=0.0, pos=20.0, neg=-20.0)
amp_score = torch.sigmoid(logit_scale * logits)
amp_score = _safe_tensor(amp_score, nan=0.0, pos=1.0, neg=0.0)
amp_reward = _safe_tensor(amp_reward_gain * amp_score, nan=0.0, pos=10.0, neg=0.0)
log_dict = env.extras.get("log", {})
if isinstance(log_dict, dict):
log_dict["amp_score_mean"] = torch.mean(amp_score).detach().item()
log_dict["amp_reward_mean"] = torch.mean(amp_reward).detach().item()
log_dict["amp_model_loaded_mean"] = model_loaded
log_dict["amp_train_active_mean"] = amp_train_active
log_dict["amp_disc_loss_mean"] = disc_loss
log_dict["amp_disc_acc_policy_mean"] = disc_acc_policy
log_dict["amp_disc_acc_expert_mean"] = disc_acc_expert
env.extras["log"] = log_dict
return internal_reward_scale * amp_reward
def root_height_obs(env: ManagerBasedRLEnv) -> torch.Tensor: def root_height_obs(env: ManagerBasedRLEnv) -> torch.Tensor:
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk") pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
return env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2].unsqueeze(-1) return env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2].unsqueeze(-1)
@@ -122,148 +396,143 @@ def smooth_additive_getup_reward(
min_pelvis_height: float, min_pelvis_height: float,
foot_sensor_cfg: SceneEntityCfg, foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg, arm_sensor_cfg: SceneEntityCfg,
head_track_gain: float = 7.0, upright_gain: float = 2.4,
pelvis_track_gain: float = 3.2, pelvis_progress_gain: float = 1.8,
head_progress_gain: float = 3.5, head_clearance_gain: float = 1.0,
pelvis_progress_gain: float = 2.0, foot_support_gain: float = 1.2,
head_clearance_gain: float = 2.8,
torso_track_gain: float = 4.2,
upright_track_gain: float = 3.6,
foot_support_gain: float = 2.0,
arm_release_gain: float = 1.2, arm_release_gain: float = 1.2,
arm_push_gain: float = 2.2, knee_mid_bend_gain: float = 0.8,
arm_push_threshold: float = 10.0, knee_target: float = 1.0,
arm_push_sharpness: float = 0.12, knee_sigma: float = 0.5,
head_sigma: float = 0.09, hip_roll_penalty_gain: float = 0.5,
pelvis_sigma: float = 0.08, hip_roll_soft_limit: float = 0.42,
torso_sigma: float = 0.20, symmetry_penalty_gain: float = 0.2,
upright_sigma: float = 0.22, standing_vel_penalty_gain: float = 0.35,
support_sigma: float = 0.30, standing_vel_gate_h: float = 0.65,
tuck_gain: float = 0.6, stand_core_gain: float = 2.4,
no_foot_penalty_gain: float = 1.2, stand_upright_threshold: float = 0.82,
horizontal_vel_penalty_gain: float = 0.25, stand_foot_support_threshold: float = 0.65,
angular_vel_penalty_gain: float = 0.22, stand_arm_support_threshold: float = 0.25,
split_penalty_gain: float = 2.5, internal_reward_scale: float = 1.0,
split_soft_limit: float = 0.33,
split_hard_limit: float = 0.44,
split_hard_penalty_gain: float = 9.0,
head_delta_gain: float = 18.0,
pelvis_delta_gain: float = 15.0,
internal_reward_scale: float = 0.45,
) -> torch.Tensor: ) -> torch.Tensor:
head_idx, _ = env.scene["robot"].find_bodies("H2") # Cache expensive regex-based index lookups once per run.
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk") idx_cache_key = "getup_idx_cache"
foot_indices, _ = env.scene["robot"].find_bodies(".*_foot_link") idx_cache = env.extras.get(idx_cache_key, None)
if not isinstance(idx_cache, dict):
idx_cache = {}
env.extras[idx_cache_key] = idx_cache
head_pos = env.scene["robot"].data.body_state_w[:, head_idx[0], :3] def _cached_joint_ids(cache_name: str, expr: str) -> torch.Tensor:
pelvis_pos = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], :3] ids = idx_cache.get(cache_name, None)
head_h = head_pos[:, 2] if isinstance(ids, torch.Tensor):
pelvis_h = pelvis_pos[:, 2] return ids
root_lin_vel_w = env.scene["robot"].data.root_lin_vel_w joint_idx, _ = env.scene["robot"].find_joints(expr)
root_lin_speed_xy = torch.norm(root_lin_vel_w[:, :2], dim=-1) ids = torch.tensor(joint_idx, device=env.device, dtype=torch.long) if len(joint_idx) > 0 else torch.empty(0, device=env.device, dtype=torch.long)
root_ang_speed = torch.norm(env.scene["robot"].data.root_ang_vel_w, dim=-1) idx_cache[cache_name] = ids
return ids
prev_head_key = "prev_head_height" def _cached_body_id(cache_name: str, expr: str) -> int | None:
prev_pelvis_key = "prev_pelvis_height" idx = idx_cache.get(cache_name, None)
if prev_head_key not in env.extras: if isinstance(idx, int):
env.extras[prev_head_key] = head_h.clone() return idx
if prev_pelvis_key not in env.extras: body_idx, _ = env.scene["robot"].find_bodies(expr)
env.extras[prev_pelvis_key] = pelvis_h.clone() idx = int(body_idx[0]) if len(body_idx) > 0 else None
prev_head_h = env.extras[prev_head_key] idx_cache[cache_name] = idx
prev_pelvis_h = env.extras[prev_pelvis_key] return idx
# Dense progress reward: positive-only height improvements help break plateaus.
head_delta = torch.clamp(head_h - prev_head_h, min=0.0, max=0.05)
pelvis_delta = torch.clamp(pelvis_h - prev_pelvis_h, min=0.0, max=0.05)
projected_gravity = env.scene["robot"].data.projected_gravity_b joint_pos = _safe_tensor(env.scene["robot"].data.joint_pos)
gravity_error = torch.norm(projected_gravity[:, :2], dim=-1) head_id = _cached_body_id("head_id", "H2")
upright_ratio = torch.clamp(1.0 - gravity_error, min=0.0, max=1.0) pelvis_id = _cached_body_id("pelvis_id", "Trunk")
head_h = env.scene["robot"].data.body_state_w[:, head_id, 2] if head_id is not None else torch.zeros(env.num_envs, device=env.device)
pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_id, 2] if pelvis_id is not None else torch.zeros(env.num_envs, device=env.device)
head_h = _safe_tensor(head_h, nan=0.0, pos=2.0, neg=-2.0)
pelvis_h = _safe_tensor(pelvis_h, nan=0.0, pos=2.0, neg=-2.0)
projected_gravity = _safe_tensor(env.scene["robot"].data.projected_gravity_b, nan=0.0, pos=2.0, neg=-2.0)
torso_vec = head_pos - pelvis_pos upright_ratio = torch.clamp(1.0 - torch.norm(projected_gravity[:, :2], dim=-1), min=0.0, max=1.0)
torso_vec_norm = torso_vec / (torch.norm(torso_vec, dim=-1, keepdim=True) + 1e-5) pelvis_progress = torch.clamp((pelvis_h - 0.20) / (min_pelvis_height - 0.20 + 1e-6), min=0.0, max=1.0)
torso_alignment = torch.clamp(torso_vec_norm[:, 2], min=0.0, max=1.0) head_clearance = torch.clamp((head_h - pelvis_h) / 0.45, min=0.0, max=1.0)
foot_force_z = _contact_force_z(env, foot_sensor_cfg) foot_force_z = _safe_tensor(_contact_force_z(env, foot_sensor_cfg), nan=0.0, pos=1e3, neg=0.0)
arm_force_z = _contact_force_z(env, arm_sensor_cfg) arm_force_z = _safe_tensor(_contact_force_z(env, arm_sensor_cfg), nan=0.0, pos=1e3, neg=0.0)
total_support = foot_force_z + arm_force_z + 1e-6 total_support = _safe_tensor(foot_force_z + arm_force_z + 1e-6, nan=1.0, pos=1e4, neg=1e-6)
foot_support_ratio = torch.clamp(foot_force_z / total_support, min=0.0, max=1.0) foot_support_ratio = torch.clamp(foot_force_z / total_support, min=0.0, max=1.0)
arm_support_ratio = torch.clamp(arm_force_z / total_support, min=0.0, max=1.0) arm_support_ratio = torch.clamp(arm_force_z / total_support, min=0.0, max=1.0)
release_phase = torch.sigmoid((pelvis_h - 0.58) * 8.0)
arm_release_reward = (1.0 - arm_support_ratio) * release_phase
head_track = torch.exp(-0.5 * torch.square((head_h - min_head_height) / head_sigma)) knee_ids = _cached_joint_ids("knee_ids", ".*Knee_Pitch")
pelvis_track = torch.exp(-0.5 * torch.square((pelvis_h - min_pelvis_height) / pelvis_sigma)) if knee_ids.numel() > 0:
# Dense height-progress shaping: provide reward signal all the way from lying to standing. knee_fold = torch.mean(torch.abs(joint_pos.index_select(1, knee_ids)), dim=-1)
head_progress = torch.clamp(head_h / min_head_height, min=0.0, max=1.0) knee_mid_bend = torch.exp(-0.5 * torch.square((knee_fold - knee_target) / knee_sigma))
pelvis_progress = torch.clamp(pelvis_h / min_pelvis_height, min=0.0, max=1.0)
# Encourage "head up" posture: head should stay clearly above pelvis.
head_clearance = torch.clamp((head_h - pelvis_h) / 0.45, min=0.0, max=1.0)
torso_track = torch.exp(-0.5 * torch.square((1.0 - torso_alignment) / torso_sigma))
upright_track = torch.exp(-0.5 * torch.square((1.0 - upright_ratio) / upright_sigma))
foot_support_track = torch.exp(-0.5 * torch.square((1.0 - foot_support_ratio) / support_sigma))
arm_release_track = torch.exp(-0.5 * torch.square(arm_support_ratio / support_sigma))
# Two-stage arm shaping:
# - early phase: encourage arm push to lift body
# - later phase: encourage releasing arm support for stand-up posture
push_phase = torch.sigmoid((0.5 - pelvis_progress) * 20.0)
release_phase = 1.0 - push_phase
arm_push_signal = torch.sigmoid((arm_force_z - arm_push_threshold) * arm_push_sharpness)
arm_push_reward = arm_push_signal * push_phase
arm_release_reward = arm_release_track * release_phase
feet_center_xy = torch.mean(env.scene["robot"].data.body_state_w[:, foot_indices, :2], dim=1)
pelvis_xy = pelvis_pos[:, :2]
feet_to_pelvis_dist = torch.norm(feet_center_xy - pelvis_xy, dim=-1)
tuck_legs_reward = torch.exp(-2.0 * feet_to_pelvis_dist)
posture_reward = (
head_track_gain * head_track
+ pelvis_track_gain * pelvis_track
+ head_progress_gain * head_progress
+ pelvis_progress_gain * pelvis_progress
+ head_delta_gain * head_delta
+ pelvis_delta_gain * pelvis_delta
+ head_clearance_gain * head_clearance
+ torso_track_gain * torso_track
+ upright_track_gain * upright_track
+ foot_support_gain * foot_support_track
+ arm_release_gain * arm_release_reward
+ arm_push_gain * arm_push_reward
+ tuck_gain * tuck_legs_reward
)
no_foot_penalty = -no_foot_penalty_gain * (1.0 - foot_support_ratio)
horizontal_velocity_penalty = -horizontal_vel_penalty_gain * root_lin_speed_xy
angular_velocity_penalty = -angular_vel_penalty_gain * root_ang_speed
left_foot_idx, _ = env.scene["robot"].find_bodies(".*left.*foot.*")
right_foot_idx, _ = env.scene["robot"].find_bodies(".*right.*foot.*")
if len(left_foot_idx) > 0 and len(right_foot_idx) > 0:
left_foot_pos = env.scene["robot"].data.body_state_w[:, left_foot_idx[0], :3]
right_foot_pos = env.scene["robot"].data.body_state_w[:, right_foot_idx[0], :3]
feet_distance = torch.norm(left_foot_pos[:, :2] - right_foot_pos[:, :2], dim=-1)
# Two-stage anti-split penalty:
# - soft: penalize widening beyond normal stance width
# - hard: strongly suppress large split postures
split_soft_excess = torch.clamp(feet_distance - split_soft_limit, min=0.0)
split_hard_excess = torch.clamp(feet_distance - split_hard_limit, min=0.0)
splits_penalty = (
-split_penalty_gain * split_soft_excess
-split_hard_penalty_gain * torch.square(split_hard_excess)
)
else: else:
splits_penalty = torch.zeros_like(head_h) knee_mid_bend = torch.zeros_like(pelvis_h)
hip_roll_ids = _cached_joint_ids("hip_roll_ids", ".*Hip_Roll")
if hip_roll_ids.numel() > 0:
hip_roll_abs = torch.mean(torch.abs(joint_pos.index_select(1, hip_roll_ids)), dim=-1)
else:
hip_roll_abs = torch.zeros_like(pelvis_h)
hip_roll_excess = torch.clamp(hip_roll_abs - hip_roll_soft_limit, min=0.0, max=0.5)
left_leg_ids = _cached_joint_ids("left_leg_ids", "^Left_(Hip_Pitch|Hip_Roll|Hip_Yaw|Knee_Pitch|Ankle_Pitch|Ankle_Roll)$")
right_leg_ids = _cached_joint_ids("right_leg_ids", "^Right_(Hip_Pitch|Hip_Roll|Hip_Yaw|Knee_Pitch|Ankle_Pitch|Ankle_Roll)$")
if left_leg_ids.numel() > 0 and right_leg_ids.numel() > 0 and left_leg_ids.numel() == right_leg_ids.numel():
left_leg = joint_pos.index_select(1, left_leg_ids)
right_leg = joint_pos.index_select(1, right_leg_ids)
symmetry_penalty = torch.mean(torch.abs(left_leg - right_leg), dim=-1)
else:
symmetry_penalty = torch.zeros_like(pelvis_h)
root_vel = _safe_tensor(torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1), nan=10.0, pos=10.0, neg=0.0)
standing_gate = torch.sigmoid((pelvis_h - standing_vel_gate_h) * 10.0)
# Core dense signal for "stand up and stand stable".
stand_head = torch.sigmoid((head_h - min_head_height) * 10.0)
stand_pelvis = torch.sigmoid((pelvis_h - min_pelvis_height) * 10.0)
stand_upright = torch.sigmoid((upright_ratio - stand_upright_threshold) * 12.0)
stand_foot = torch.sigmoid((foot_support_ratio - stand_foot_support_threshold) * 12.0)
stand_arm_release = torch.sigmoid((stand_arm_support_threshold - arm_support_ratio) * 12.0)
stand_still = torch.exp(-3.0 * root_vel)
stand_core = (
0.22 * stand_head
+ 0.22 * stand_pelvis
+ 0.18 * stand_upright
+ 0.18 * stand_foot
+ 0.10 * stand_arm_release
+ 0.10 * stand_still
)
total_reward = ( total_reward = (
posture_reward upright_gain * upright_ratio
+ no_foot_penalty + pelvis_progress_gain * pelvis_progress
+ horizontal_velocity_penalty + head_clearance_gain * head_clearance
+ angular_velocity_penalty + foot_support_gain * foot_support_ratio
+ splits_penalty + arm_release_gain * arm_release_reward
+ knee_mid_bend_gain * knee_mid_bend
- hip_roll_penalty_gain * hip_roll_excess
- symmetry_penalty_gain * symmetry_penalty
- standing_vel_penalty_gain * standing_gate * root_vel
+ stand_core_gain * stand_core
) )
env.extras[prev_head_key] = head_h.detach() total_reward = _safe_tensor(total_reward, nan=0.0, pos=100.0, neg=-100.0)
env.extras[prev_pelvis_key] = pelvis_h.detach()
# Down-scale dense shaping to make success bonus relatively more dominant.
return internal_reward_scale * total_reward
upright_mean = torch.mean(upright_ratio).detach().item()
foot_support_ratio_mean = torch.mean(foot_support_ratio).detach().item()
arm_support_ratio_mean = torch.mean(arm_support_ratio).detach().item()
hip_roll_mean = torch.mean(hip_roll_abs).detach().item()
stand_core_mean = torch.mean(stand_core).detach().item()
log_dict = env.extras.get("log", {})
if isinstance(log_dict, dict):
log_dict["upright_mean"] = upright_mean
log_dict["foot_support_ratio_mean"] = foot_support_ratio_mean
log_dict["arm_support_ratio_mean"] = arm_support_ratio_mean
log_dict["hip_roll_mean"] = hip_roll_mean
log_dict["stand_core_mean"] = stand_core_mean
env.extras["log"] = log_dict
return internal_reward_scale * total_reward
def ground_farming_timeout(env: ManagerBasedRLEnv, max_time: float, height_threshold: float) -> torch.Tensor: def ground_farming_timeout(env: ManagerBasedRLEnv, max_time: float, height_threshold: float) -> torch.Tensor:
@@ -315,38 +584,6 @@ def is_supported_standing(
return (env.extras[timer_name] > standing_time).bool() return (env.extras[timer_name] > standing_time).bool()
def base_ang_vel_penalty(env: ManagerBasedRLEnv) -> torch.Tensor:
return torch.sum(torch.square(env.scene["robot"].data.root_ang_vel_w), dim=-1)
def airborne_flip_penalty(
env: ManagerBasedRLEnv,
foot_sensor_cfg: SceneEntityCfg,
arm_sensor_cfg: SceneEntityCfg,
full_support_sensor_cfg: SceneEntityCfg,
full_support_threshold: float = 12.0,
min_pelvis_height: float = 0.34,
contact_force_threshold: float = 6.0,
flip_ang_vel_threshold: float = 5.4,
inverted_gravity_threshold: float = 0.45,
) -> torch.Tensor:
pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
projected_gravity = env.scene["robot"].data.projected_gravity_b
ang_vel = env.scene["robot"].data.root_ang_vel_w
foot_force_z = _contact_force_z(env, foot_sensor_cfg)
arm_force_z = _contact_force_z(env, arm_sensor_cfg)
support_force = foot_force_z + arm_force_z
no_support_ratio = torch.exp(-support_force / contact_force_threshold)
full_support_force_z = _contact_force_z(env, full_support_sensor_cfg)
is_fully_airborne = full_support_force_z < full_support_threshold
airborne_ratio = torch.sigmoid((pelvis_h - min_pelvis_height) * 10.0) * no_support_ratio * is_fully_airborne.float()
ang_speed = torch.norm(ang_vel, dim=-1)
spin_excess = torch.clamp(ang_speed - flip_ang_vel_threshold, min=0.0)
inverted_ratio = torch.clamp((projected_gravity[:, 2] - inverted_gravity_threshold) / (1.0 - inverted_gravity_threshold), min=0.0, max=1.0)
return airborne_ratio * (torch.square(spin_excess) + 0.1 * inverted_ratio)
def airborne_flip_termination( def airborne_flip_termination(
env: ManagerBasedRLEnv, env: ManagerBasedRLEnv,
foot_sensor_cfg: SceneEntityCfg, foot_sensor_cfg: SceneEntityCfg,
@@ -383,6 +620,19 @@ def airborne_flip_termination(
return (env.extras[timer_name] > persist_time).bool() return (env.extras[timer_name] > persist_time).bool()
def nonfinite_state_termination(env: ManagerBasedRLEnv) -> torch.Tensor:
"""Terminate envs when sim state becomes NaN/Inf."""
robot_data = env.scene["robot"].data
finite_joint_pos = torch.isfinite(robot_data.joint_pos).all(dim=-1)
finite_joint_vel = torch.isfinite(robot_data.joint_vel).all(dim=-1)
finite_root_lin = torch.isfinite(robot_data.root_lin_vel_w).all(dim=-1)
finite_root_ang = torch.isfinite(robot_data.root_ang_vel_w).all(dim=-1)
finite_gravity = torch.isfinite(robot_data.projected_gravity_b).all(dim=-1)
finite_root_pos = torch.isfinite(robot_data.root_pos_w).all(dim=-1)
is_finite = finite_joint_pos & finite_joint_vel & finite_root_lin & finite_root_ang & finite_gravity & finite_root_pos
return ~is_finite
T1_JOINT_NAMES = [ T1_JOINT_NAMES = [
"AAHead_yaw", "Head_pitch", "AAHead_yaw", "Head_pitch",
"Left_Shoulder_Pitch", "Left_Shoulder_Roll", "Left_Elbow_Pitch", "Left_Elbow_Yaw", "Left_Shoulder_Pitch", "Left_Shoulder_Roll", "Left_Elbow_Pitch", "Left_Elbow_Yaw",
@@ -450,7 +700,7 @@ class T1ActionCfg:
joint_names=[ joint_names=[
"AAHead_yaw", "Head_pitch", "AAHead_yaw", "Head_pitch",
], ],
scale=0.3, scale=0.5,
use_default_offset=True use_default_offset=True
) )
arm_action = JointPositionActionCfg( arm_action = JointPositionActionCfg(
@@ -459,7 +709,7 @@ class T1ActionCfg:
"Left_Shoulder_Pitch", "Left_Shoulder_Roll", "Left_Elbow_Pitch", "Left_Elbow_Yaw", "Left_Shoulder_Pitch", "Left_Shoulder_Roll", "Left_Elbow_Pitch", "Left_Elbow_Yaw",
"Right_Shoulder_Pitch", "Right_Shoulder_Roll", "Right_Elbow_Pitch", "Right_Elbow_Yaw", "Right_Shoulder_Pitch", "Right_Shoulder_Roll", "Right_Elbow_Pitch", "Right_Elbow_Yaw",
], ],
scale=1.2, scale=0.82,
use_default_offset=True, use_default_offset=True,
) )
torso_action = JointPositionActionCfg( torso_action = JointPositionActionCfg(
@@ -467,7 +717,7 @@ class T1ActionCfg:
joint_names=[ joint_names=[
"Waist" "Waist"
], ],
scale=0.3, scale=0.58,
use_default_offset=True use_default_offset=True
) )
leg_action = JointPositionActionCfg( leg_action = JointPositionActionCfg(
@@ -477,7 +727,7 @@ class T1ActionCfg:
"Right_Hip_Yaw", "Left_Knee_Pitch", "Right_Knee_Pitch", "Left_Ankle_Pitch", "Right_Ankle_Pitch", "Right_Hip_Yaw", "Left_Knee_Pitch", "Right_Knee_Pitch", "Left_Ankle_Pitch", "Right_Ankle_Pitch",
"Left_Ankle_Roll", "Right_Ankle_Roll", "Left_Ankle_Roll", "Right_Ankle_Roll",
], ],
scale=1.5, scale=1.05,
use_default_offset=True, use_default_offset=True,
) )
@@ -486,83 +736,77 @@ class T1ActionCfg:
class T1GetUpRewardCfg: class T1GetUpRewardCfg:
smooth_getup = RewTerm( smooth_getup = RewTerm(
func=smooth_additive_getup_reward, func=smooth_additive_getup_reward,
weight=3.0, weight=5.0,
params={ params={
"min_head_height": 1.02, "min_head_height": 1.02,
"min_pelvis_height": 0.78, "min_pelvis_height": 0.78,
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]), "foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]), "arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"head_track_gain": 7.0, "upright_gain": 2.4,
"pelvis_track_gain": 3.2, "pelvis_progress_gain": 1.8,
"head_progress_gain": 3.5, "head_clearance_gain": 1.0,
"pelvis_progress_gain": 2.0, "foot_support_gain": 1.2,
"head_clearance_gain": 2.8,
"torso_track_gain": 4.2,
"upright_track_gain": 3.6,
"foot_support_gain": 2.0,
"arm_release_gain": 1.2, "arm_release_gain": 1.2,
"arm_push_gain": 2.2, "knee_mid_bend_gain": 0.8,
"arm_push_threshold": 10.0, "knee_target": 1.0,
"arm_push_sharpness": 0.12, "knee_sigma": 0.5,
"head_sigma": 0.09, "hip_roll_penalty_gain": 0.5,
"pelvis_sigma": 0.08, "hip_roll_soft_limit": 0.42,
"torso_sigma": 0.20, "symmetry_penalty_gain": 0.2,
"upright_sigma": 0.22, "standing_vel_penalty_gain": 0.35,
"support_sigma": 0.30, "standing_vel_gate_h": 0.65,
"tuck_gain": 0.6, "stand_core_gain": 2.4,
"no_foot_penalty_gain": 1.2, "stand_upright_threshold": 0.82,
"horizontal_vel_penalty_gain": 0.18, "stand_foot_support_threshold": 0.65,
"angular_vel_penalty_gain": 0.16, "stand_arm_support_threshold": 0.25,
"split_penalty_gain": 2.8, "internal_reward_scale": 1.0,
"split_soft_limit": 0.33,
"split_hard_limit": 0.44,
"split_hard_penalty_gain": 9.0,
"head_delta_gain": 18.0,
"pelvis_delta_gain": 15.0,
"internal_reward_scale": 0.45,
}, },
) )
anti_airborne_flip = RewTerm(
func=airborne_flip_penalty,
weight=-0.18,
params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"full_support_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*"]),
"full_support_threshold": 12.0,
"min_pelvis_height": 0.34,
"contact_force_threshold": 6.0,
"flip_ang_vel_threshold": 5.4,
"inverted_gravity_threshold": 0.45,
},
)
base_ang_vel = RewTerm(func=base_ang_vel_penalty, weight=-0.007)
action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.009)
joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.005)
action_penalty = RewTerm(func=mdp.action_l2, weight=-0.005)
is_success_bonus = RewTerm( is_success_bonus = RewTerm(
func=is_supported_standing, func=is_supported_standing,
weight=100.0, weight=150.0,
params={ params={
"foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]), "foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]), "arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"min_head_height": 1.05, "min_head_height": 1.05,
"min_pelvis_height": 0.65, "min_pelvis_height": 0.65,
"max_angle_error": 0.25, "max_angle_error": 0.18,
"velocity_threshold": 0.15, "velocity_threshold": 0.10,
"min_foot_support_force": 34.0, "min_foot_support_force": 36.0,
"max_arm_support_force": 20.0, "max_arm_support_force": 14.0,
"standing_time": 0.40, "standing_time": 0.70,
"timer_name": "reward_stable_timer", "timer_name": "reward_stable_timer",
}, },
) )
# AMP reward is disabled by default until a discriminator model path is provided.
amp_style_prior = RewTerm(
func=amp_style_prior_reward,
weight=0.0,
params={
"amp_enabled": False,
"amp_model_path": "",
"amp_train_enabled": False,
"amp_expert_features_path": "",
"disc_hidden_dim": 256,
"disc_hidden_layers": 2,
"disc_lr": 3e-4,
"disc_weight_decay": 1e-6,
"disc_update_interval": 4,
"disc_batch_size": 1024,
"disc_min_expert_samples": 2048,
"feature_clip": 8.0,
"logit_scale": 1.0,
"amp_reward_gain": 1.0,
"internal_reward_scale": 1.0,
},
)
@configclass @configclass
class T1GetUpTerminationsCfg: class T1GetUpTerminationsCfg:
time_out = DoneTerm(func=mdp.time_out) time_out = DoneTerm(func=mdp.time_out)
anti_farming = DoneTerm(func=ground_farming_timeout, params={"max_time": 5.5, "height_threshold": 0.24}) nonfinite_state_abort = DoneTerm(func=nonfinite_state_termination)
anti_farming = DoneTerm(func=ground_farming_timeout, params={"max_time": 4.5, "height_threshold": 0.48})
illegal_contact = DoneTerm( illegal_contact = DoneTerm(
func=mdp.illegal_contact, func=mdp.illegal_contact,
params={"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["Trunk"]), "threshold": 200.0}, params={"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["Trunk"]), "threshold": 200.0},
@@ -574,11 +818,11 @@ class T1GetUpTerminationsCfg:
"arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]), "arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"]),
"min_head_height": 1.10, "min_head_height": 1.10,
"min_pelvis_height": 0.83, "min_pelvis_height": 0.83,
"max_angle_error": 0.10, "max_angle_error": 0.08,
"velocity_threshold": 0.10, "velocity_threshold": 0.08,
"min_foot_support_force": 36.0, "min_foot_support_force": 36.0,
"max_arm_support_force": 16.0, "max_arm_support_force": 16.0,
"standing_time": 1.0, "standing_time": 1.4,
"timer_name": "term_stable_timer", "timer_name": "term_stable_timer",
}, },
) )

Binary file not shown.

View File

@@ -13,6 +13,36 @@ from isaaclab.app import AppLauncher
parser = argparse.ArgumentParser(description="Train T1 get-up policy.") parser = argparse.ArgumentParser(description="Train T1 get-up policy.")
parser.add_argument("--num_envs", type=int, default=8192, help="Number of parallel environments") parser.add_argument("--num_envs", type=int, default=8192, help="Number of parallel environments")
parser.add_argument("--seed", type=int, default=42, help="Random seed") parser.add_argument("--seed", type=int, default=42, help="Random seed")
parser.add_argument(
"--amp_model",
type=str,
default="",
help="TorchScript AMP discriminator path (.pt/.jit). Empty disables AMP reward.",
)
parser.add_argument(
"--amp_reward_weight",
type=float,
default=0.6,
help="Reward term weight for AMP style prior when --amp_model is provided.",
)
parser.add_argument(
"--amp_expert_features",
type=str,
default="",
help="Path to torch file containing expert AMP features (N, D) for online discriminator training.",
)
parser.add_argument(
"--amp_train_discriminator",
action="store_true",
help="Enable online AMP discriminator updates using --amp_expert_features.",
)
parser.add_argument("--amp_disc_hidden_dim", type=int, default=256, help="Hidden width for AMP discriminator.")
parser.add_argument("--amp_disc_hidden_layers", type=int, default=2, help="Hidden layer count for AMP discriminator.")
parser.add_argument("--amp_disc_lr", type=float, default=3e-4, help="Learning rate for AMP discriminator.")
parser.add_argument("--amp_disc_weight_decay", type=float, default=1e-6, help="Weight decay for AMP discriminator.")
parser.add_argument("--amp_disc_update_interval", type=int, default=4, help="Train discriminator every N reward calls.")
parser.add_argument("--amp_disc_batch_size", type=int, default=1024, help="Discriminator train batch size.")
parser.add_argument("--amp_logit_scale", type=float, default=1.0, help="Scale before sigmoid(logits) for AMP score.")
AppLauncher.add_app_launcher_args(parser) AppLauncher.add_app_launcher_args(parser)
args_cli = parser.parse_args() args_cli = parser.parse_args()
@@ -22,12 +52,113 @@ simulation_app = app_launcher.app
import gymnasium as gym import gymnasium as gym
import yaml import yaml
from isaaclab_rl.rl_games import RlGamesVecEnvWrapper from isaaclab_rl.rl_games import RlGamesVecEnvWrapper
from rl_games.common.algo_observer import DefaultAlgoObserver
from rl_games.torch_runner import Runner from rl_games.torch_runner import Runner
from rl_games.common import env_configurations, vecenv from rl_games.common import env_configurations, vecenv
from rl_game.get_up.config.t1_env_cfg import T1EnvCfg from rl_game.get_up.config.t1_env_cfg import T1EnvCfg
class T1MetricObserver(DefaultAlgoObserver):
"""Collect custom env metrics and print to terminal."""
def __init__(self):
super().__init__()
self._tracked = (
"upright_mean",
"foot_support_ratio_mean",
"arm_support_ratio_mean",
"hip_roll_mean",
"stand_core_mean",
"amp_score_mean",
"amp_reward_mean",
"amp_model_loaded_mean",
"amp_train_active_mean",
"amp_disc_loss_mean",
"amp_disc_acc_policy_mean",
"amp_disc_acc_expert_mean",
)
self._metric_sums: dict[str, float] = {}
self._metric_counts: dict[str, int] = {}
@staticmethod
def _to_float(value):
"""Best-effort conversion for scalars/tensors/arrays."""
if value is None:
return None
# Handles torch tensors and numpy arrays without importing either package.
if hasattr(value, "detach"):
value = value.detach()
if hasattr(value, "cpu"):
value = value.cpu()
if hasattr(value, "numel") and callable(value.numel):
if value.numel() == 0:
return None
if value.numel() == 1:
if hasattr(value, "item"):
return float(value.item())
return float(value)
if hasattr(value, "float") and callable(value.float):
return float(value.float().mean().item())
if hasattr(value, "mean") and callable(value.mean):
try:
return float(value.mean())
except Exception:
pass
try:
return float(value)
except Exception:
return None
def _collect_from_dict(self, data: dict):
for key in self._tracked:
val = self._to_float(data.get(key))
if val is None:
continue
self._metric_sums[key] = self._metric_sums.get(key, 0.0) + val
self._metric_counts[key] = self._metric_counts.get(key, 0) + 1
def process_infos(self, infos, done_indices):
# Keep default score handling.
super().process_infos(infos, done_indices)
if not infos:
return
if isinstance(infos, dict):
self._collect_from_dict(infos)
log = infos.get("log")
if isinstance(log, dict):
self._collect_from_dict(log)
episode = infos.get("episode")
if isinstance(episode, dict):
self._collect_from_dict(episode)
return
if isinstance(infos, (list, tuple)):
for item in infos:
if not isinstance(item, dict):
continue
self._collect_from_dict(item)
log = item.get("log")
if isinstance(log, dict):
self._collect_from_dict(log)
episode = item.get("episode")
if isinstance(episode, dict):
self._collect_from_dict(episode)
def after_print_stats(self, frame, epoch_num, total_time):
super().after_print_stats(frame, epoch_num, total_time)
parts = []
for key in self._tracked:
count = self._metric_counts.get(key, 0)
if count <= 0:
continue
mean_val = self._metric_sums[key] / count
parts.append(f"{key}={mean_val:.4f}")
if parts:
print(f"[CUSTOM][epoch={epoch_num} frame={frame}] " + " ".join(parts))
self._metric_sums.clear()
self._metric_counts.clear()
def _parse_reward_from_last_ckpt(path: str) -> float: def _parse_reward_from_last_ckpt(path: str) -> float:
"""Extract reward value from checkpoint name like '..._rew_123.45.pth'.""" """Extract reward value from checkpoint name like '..._rew_123.45.pth'."""
match = re.search(r"_rew_(-?\d+(?:\.\d+)?)\.pth$", os.path.basename(path)) match = re.search(r"_rew_(-?\d+(?:\.\d+)?)\.pth$", os.path.basename(path))
@@ -69,11 +200,40 @@ def _find_best_resume_checkpoint(log_dir: str, run_name: str) -> str | None:
def main(): def main():
task_id = "Isaac-T1-GetUp-v0" task_id = "Isaac-T1-GetUp-v0"
env_cfg = T1EnvCfg()
amp_cfg = env_cfg.rewards.amp_style_prior
amp_cfg.params["logit_scale"] = float(args_cli.amp_logit_scale)
if args_cli.amp_train_discriminator:
expert_path = os.path.abspath(os.path.expanduser(args_cli.amp_expert_features)) if args_cli.amp_expert_features else ""
amp_cfg.weight = float(args_cli.amp_reward_weight)
amp_cfg.params["amp_train_enabled"] = True
amp_cfg.params["amp_enabled"] = False
amp_cfg.params["amp_expert_features_path"] = expert_path
amp_cfg.params["disc_hidden_dim"] = int(args_cli.amp_disc_hidden_dim)
amp_cfg.params["disc_hidden_layers"] = int(args_cli.amp_disc_hidden_layers)
amp_cfg.params["disc_lr"] = float(args_cli.amp_disc_lr)
amp_cfg.params["disc_weight_decay"] = float(args_cli.amp_disc_weight_decay)
amp_cfg.params["disc_update_interval"] = int(args_cli.amp_disc_update_interval)
amp_cfg.params["disc_batch_size"] = int(args_cli.amp_disc_batch_size)
print(f"[INFO]: AMP online discriminator enabled, expert_features={expert_path or '<missing>'}")
print(f"[INFO]: AMP reward weight={amp_cfg.weight:.3f}")
elif args_cli.amp_model:
amp_model_path = os.path.abspath(os.path.expanduser(args_cli.amp_model))
amp_cfg.weight = float(args_cli.amp_reward_weight)
amp_cfg.params["amp_enabled"] = True
amp_cfg.params["amp_model_path"] = amp_model_path
amp_cfg.params["amp_train_enabled"] = False
print(f"[INFO]: AMP inference enabled, discriminator={amp_model_path}")
print(f"[INFO]: AMP reward weight={amp_cfg.weight:.3f}")
else:
print("[INFO]: AMP disabled (use --amp_model to enable)")
if task_id not in gym.registry: if task_id not in gym.registry:
gym.register( gym.register(
id=task_id, id=task_id,
entry_point="isaaclab.envs:ManagerBasedRLEnv", entry_point="isaaclab.envs:ManagerBasedRLEnv",
kwargs={"cfg": T1EnvCfg()}, kwargs={"cfg": env_cfg},
) )
env = gym.make(task_id, num_envs=args_cli.num_envs, disable_env_checker=True) env = gym.make(task_id, num_envs=args_cli.num_envs, disable_env_checker=True)
@@ -92,14 +252,14 @@ def main():
rl_config["params"]["config"]["name"] = run_name rl_config["params"]["config"]["name"] = run_name
rl_config["params"]["config"]["env_name"] = "rlgym" rl_config["params"]["config"]["env_name"] = "rlgym"
checkpoint_path = None #_find_best_resume_checkpoint(log_dir, run_name) checkpoint_path = _find_best_resume_checkpoint(log_dir, run_name)
if checkpoint_path is not None: if checkpoint_path is not None:
print(f"[INFO]: resume from checkpoint: {checkpoint_path}") print(f"[INFO]: resume from checkpoint: {checkpoint_path}")
rl_config["params"]["config"]["load_path"] = checkpoint_path rl_config["params"]["config"]["load_path"] = checkpoint_path
else: else:
print("[INFO]: no checkpoint found, train from scratch") print("[INFO]: no checkpoint found, train from scratch")
runner = Runner() runner = Runner(algo_observer=T1MetricObserver())
runner.load(rl_config) runner.load(rl_config)
try: try:
runner.run({"train": True, "play": False, "checkpoint": checkpoint_path, "vec_env": wrapped_env}) runner.run({"train": True, "play": False, "checkpoint": checkpoint_path, "vec_env": wrapped_env})