Add AMP get-up pipeline with sequence discriminator and git-sourced expert data

2026-04-20 15:51:44 +08:00
parent 9e6e7e00f8
commit 995f6522b2
10 changed files with 1226 additions and 443 deletions
--- a/rl_game/get_up/train.py
+++ b/rl_game/get_up/train.py
@@ -42,7 +42,29 @@ parser.add_argument("--amp_disc_lr", type=float, default=3e-4, help="Learning ra
 parser.add_argument("--amp_disc_weight_decay", type=float, default=1e-6, help="Weight decay for AMP discriminator.")
 parser.add_argument("--amp_disc_update_interval", type=int, default=4, help="Train discriminator every N reward calls.")
 parser.add_argument("--amp_disc_batch_size", type=int, default=1024, help="Discriminator train batch size.")
+parser.add_argument("--amp_disc_history_steps", type=int, default=4, help="Temporal history steps for AMP discriminator.")
 parser.add_argument("--amp_logit_scale", type=float, default=1.0, help="Scale before sigmoid(logits) for AMP score.")
+parser.add_argument(
+    "--amp_from_keyframes",
+    action="store_true",
+    help="Generate AMP expert features from get-up keyframe yaml files and enable online discriminator training.",
+)
+parser.add_argument(
+    "--amp_keyframe_front",
+    type=str,
+    default=os.path.join(PROJECT_ROOT, "behaviors", "custom", "keyframe", "get_up", "get_up_front.yaml"),
+    help="Front get-up keyframe yaml path for AMP expert generation.",
+)
+parser.add_argument(
+    "--amp_keyframe_back",
+    type=str,
+    default=os.path.join(PROJECT_ROOT, "behaviors", "custom", "keyframe", "get_up", "get_up_back.yaml"),
+    help="Back get-up keyframe yaml path for AMP expert generation.",
+)
+parser.add_argument("--amp_keyframe_dt", type=float, default=0.04, help="Resampling dt for keyframe AMP expert features.")
+parser.add_argument("--amp_keyframe_repeat", type=int, default=16, help="Repeat count for each keyframe sequence.")
+parser.add_argument("--keyframe_prior_weight", type=float, default=1.0, help="Weight for keyframe motion prior reward.")
+parser.add_argument("--disable_keyframe_prior", action="store_true", help="Disable keyframe motion prior reward.")
 AppLauncher.add_app_launcher_args(parser)
 args_cli = parser.parse_args()

@@ -56,7 +78,8 @@ from rl_games.common.algo_observer import DefaultAlgoObserver
 from rl_games.torch_runner import Runner
 from rl_games.common import env_configurations, vecenv

-from rl_game.get_up.config.t1_env_cfg import T1EnvCfg
+from rl_game.get_up.amp.amp_motion import build_amp_expert_features_from_getup_keyframes
+from rl_game.get_up.config.t1_env_cfg import T1EnvCfg, T1_JOINT_NAMES


 class T1MetricObserver(DefaultAlgoObserver):
@@ -77,6 +100,8 @@ class T1MetricObserver(DefaultAlgoObserver):
            "amp_disc_loss_mean",
            "amp_disc_acc_policy_mean",
            "amp_disc_acc_expert_mean",
+            "keyframe_prior_mean",
+            "keyframe_front_ratio",
        )
        self._metric_sums: dict[str, float] = {}
        self._metric_counts: dict[str, int] = {}
@@ -202,10 +227,33 @@ def main():
    task_id = "Isaac-T1-GetUp-v0"
    env_cfg = T1EnvCfg()

+    if args_cli.disable_keyframe_prior:
+        env_cfg.rewards.keyframe_motion_prior.weight = 0.0
+        print("[INFO]: keyframe motion prior disabled")
+    else:
+        env_cfg.rewards.keyframe_motion_prior.weight = float(args_cli.keyframe_prior_weight)
+        print(f"[INFO]: keyframe motion prior weight={env_cfg.rewards.keyframe_motion_prior.weight:.3f}")
+
+    if args_cli.amp_from_keyframes:
+        auto_feature_path = os.path.join(os.path.dirname(__file__), "logs", "amp", "expert_features_from_keyframes.pt")
+        generated_path, feature_shape = build_amp_expert_features_from_getup_keyframes(
+            front_yaml_path=args_cli.amp_keyframe_front,
+            back_yaml_path=args_cli.amp_keyframe_back,
+            joint_names=T1_JOINT_NAMES,
+            output_path=auto_feature_path,
+            sample_dt=float(args_cli.amp_keyframe_dt),
+            repeat_count=int(args_cli.amp_keyframe_repeat),
+        )
+        args_cli.amp_expert_features = generated_path
+        args_cli.amp_train_discriminator = True
+        print(f"[INFO]: AMP expert features generated at {generated_path}, shape={feature_shape}")
+
    amp_cfg = env_cfg.rewards.amp_style_prior
    amp_cfg.params["logit_scale"] = float(args_cli.amp_logit_scale)
    if args_cli.amp_train_discriminator:
        expert_path = os.path.abspath(os.path.expanduser(args_cli.amp_expert_features)) if args_cli.amp_expert_features else ""
+        if not expert_path:
+            raise ValueError("--amp_train_discriminator requires --amp_expert_features or --amp_from_keyframes.")
        amp_cfg.weight = float(args_cli.amp_reward_weight)
        amp_cfg.params["amp_train_enabled"] = True
        amp_cfg.params["amp_enabled"] = False
@@ -216,8 +264,10 @@ def main():
        amp_cfg.params["disc_weight_decay"] = float(args_cli.amp_disc_weight_decay)
        amp_cfg.params["disc_update_interval"] = int(args_cli.amp_disc_update_interval)
        amp_cfg.params["disc_batch_size"] = int(args_cli.amp_disc_batch_size)
-        print(f"[INFO]: AMP online discriminator enabled, expert_features={expert_path or '<missing>'}")
+        amp_cfg.params["disc_history_steps"] = int(args_cli.amp_disc_history_steps)
+        print(f"[INFO]: AMP online discriminator enabled, expert_features={expert_path}")
        print(f"[INFO]: AMP reward weight={amp_cfg.weight:.3f}")
+        print(f"[INFO]: AMP discriminator history_steps={amp_cfg.params['disc_history_steps']}")
    elif args_cli.amp_model:
        amp_model_path = os.path.abspath(os.path.expanduser(args_cli.amp_model))
        amp_cfg.weight = float(args_cli.amp_reward_weight)