Add AMP get-up pipeline with sequence discriminator and git-sourced expert data

This commit is contained in:
Chen
2026-04-20 15:51:44 +08:00
parent 9e6e7e00f8
commit 995f6522b2
10 changed files with 1226 additions and 443 deletions

View File

@@ -42,7 +42,29 @@ parser.add_argument("--amp_disc_lr", type=float, default=3e-4, help="Learning ra
parser.add_argument("--amp_disc_weight_decay", type=float, default=1e-6, help="Weight decay for AMP discriminator.")
parser.add_argument("--amp_disc_update_interval", type=int, default=4, help="Train discriminator every N reward calls.")
parser.add_argument("--amp_disc_batch_size", type=int, default=1024, help="Discriminator train batch size.")
parser.add_argument("--amp_disc_history_steps", type=int, default=4, help="Temporal history steps for AMP discriminator.")
parser.add_argument("--amp_logit_scale", type=float, default=1.0, help="Scale before sigmoid(logits) for AMP score.")
parser.add_argument(
"--amp_from_keyframes",
action="store_true",
help="Generate AMP expert features from get-up keyframe yaml files and enable online discriminator training.",
)
parser.add_argument(
"--amp_keyframe_front",
type=str,
default=os.path.join(PROJECT_ROOT, "behaviors", "custom", "keyframe", "get_up", "get_up_front.yaml"),
help="Front get-up keyframe yaml path for AMP expert generation.",
)
parser.add_argument(
"--amp_keyframe_back",
type=str,
default=os.path.join(PROJECT_ROOT, "behaviors", "custom", "keyframe", "get_up", "get_up_back.yaml"),
help="Back get-up keyframe yaml path for AMP expert generation.",
)
parser.add_argument("--amp_keyframe_dt", type=float, default=0.04, help="Resampling dt for keyframe AMP expert features.")
parser.add_argument("--amp_keyframe_repeat", type=int, default=16, help="Repeat count for each keyframe sequence.")
parser.add_argument("--keyframe_prior_weight", type=float, default=1.0, help="Weight for keyframe motion prior reward.")
parser.add_argument("--disable_keyframe_prior", action="store_true", help="Disable keyframe motion prior reward.")
AppLauncher.add_app_launcher_args(parser)
args_cli = parser.parse_args()
@@ -56,7 +78,8 @@ from rl_games.common.algo_observer import DefaultAlgoObserver
from rl_games.torch_runner import Runner
from rl_games.common import env_configurations, vecenv
from rl_game.get_up.config.t1_env_cfg import T1EnvCfg
from rl_game.get_up.amp.amp_motion import build_amp_expert_features_from_getup_keyframes
from rl_game.get_up.config.t1_env_cfg import T1EnvCfg, T1_JOINT_NAMES
class T1MetricObserver(DefaultAlgoObserver):
@@ -77,6 +100,8 @@ class T1MetricObserver(DefaultAlgoObserver):
"amp_disc_loss_mean",
"amp_disc_acc_policy_mean",
"amp_disc_acc_expert_mean",
"keyframe_prior_mean",
"keyframe_front_ratio",
)
self._metric_sums: dict[str, float] = {}
self._metric_counts: dict[str, int] = {}
@@ -202,10 +227,33 @@ def main():
task_id = "Isaac-T1-GetUp-v0"
env_cfg = T1EnvCfg()
if args_cli.disable_keyframe_prior:
env_cfg.rewards.keyframe_motion_prior.weight = 0.0
print("[INFO]: keyframe motion prior disabled")
else:
env_cfg.rewards.keyframe_motion_prior.weight = float(args_cli.keyframe_prior_weight)
print(f"[INFO]: keyframe motion prior weight={env_cfg.rewards.keyframe_motion_prior.weight:.3f}")
if args_cli.amp_from_keyframes:
auto_feature_path = os.path.join(os.path.dirname(__file__), "logs", "amp", "expert_features_from_keyframes.pt")
generated_path, feature_shape = build_amp_expert_features_from_getup_keyframes(
front_yaml_path=args_cli.amp_keyframe_front,
back_yaml_path=args_cli.amp_keyframe_back,
joint_names=T1_JOINT_NAMES,
output_path=auto_feature_path,
sample_dt=float(args_cli.amp_keyframe_dt),
repeat_count=int(args_cli.amp_keyframe_repeat),
)
args_cli.amp_expert_features = generated_path
args_cli.amp_train_discriminator = True
print(f"[INFO]: AMP expert features generated at {generated_path}, shape={feature_shape}")
amp_cfg = env_cfg.rewards.amp_style_prior
amp_cfg.params["logit_scale"] = float(args_cli.amp_logit_scale)
if args_cli.amp_train_discriminator:
expert_path = os.path.abspath(os.path.expanduser(args_cli.amp_expert_features)) if args_cli.amp_expert_features else ""
if not expert_path:
raise ValueError("--amp_train_discriminator requires --amp_expert_features or --amp_from_keyframes.")
amp_cfg.weight = float(args_cli.amp_reward_weight)
amp_cfg.params["amp_train_enabled"] = True
amp_cfg.params["amp_enabled"] = False
@@ -216,8 +264,10 @@ def main():
amp_cfg.params["disc_weight_decay"] = float(args_cli.amp_disc_weight_decay)
amp_cfg.params["disc_update_interval"] = int(args_cli.amp_disc_update_interval)
amp_cfg.params["disc_batch_size"] = int(args_cli.amp_disc_batch_size)
print(f"[INFO]: AMP online discriminator enabled, expert_features={expert_path or '<missing>'}")
amp_cfg.params["disc_history_steps"] = int(args_cli.amp_disc_history_steps)
print(f"[INFO]: AMP online discriminator enabled, expert_features={expert_path}")
print(f"[INFO]: AMP reward weight={amp_cfg.weight:.3f}")
print(f"[INFO]: AMP discriminator history_steps={amp_cfg.params['disc_history_steps']}")
elif args_cli.amp_model:
amp_model_path = os.path.abspath(os.path.expanduser(args_cli.amp_model))
amp_cfg.weight = float(args_cli.amp_reward_weight)