Add AMP get-up pipeline with sequence discriminator and git-sourced expert data
This commit is contained in:
@@ -42,7 +42,29 @@ parser.add_argument("--amp_disc_lr", type=float, default=3e-4, help="Learning ra
|
||||
parser.add_argument("--amp_disc_weight_decay", type=float, default=1e-6, help="Weight decay for AMP discriminator.")
|
||||
parser.add_argument("--amp_disc_update_interval", type=int, default=4, help="Train discriminator every N reward calls.")
|
||||
parser.add_argument("--amp_disc_batch_size", type=int, default=1024, help="Discriminator train batch size.")
|
||||
parser.add_argument("--amp_disc_history_steps", type=int, default=4, help="Temporal history steps for AMP discriminator.")
|
||||
parser.add_argument("--amp_logit_scale", type=float, default=1.0, help="Scale before sigmoid(logits) for AMP score.")
|
||||
parser.add_argument(
|
||||
"--amp_from_keyframes",
|
||||
action="store_true",
|
||||
help="Generate AMP expert features from get-up keyframe yaml files and enable online discriminator training.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--amp_keyframe_front",
|
||||
type=str,
|
||||
default=os.path.join(PROJECT_ROOT, "behaviors", "custom", "keyframe", "get_up", "get_up_front.yaml"),
|
||||
help="Front get-up keyframe yaml path for AMP expert generation.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--amp_keyframe_back",
|
||||
type=str,
|
||||
default=os.path.join(PROJECT_ROOT, "behaviors", "custom", "keyframe", "get_up", "get_up_back.yaml"),
|
||||
help="Back get-up keyframe yaml path for AMP expert generation.",
|
||||
)
|
||||
parser.add_argument("--amp_keyframe_dt", type=float, default=0.04, help="Resampling dt for keyframe AMP expert features.")
|
||||
parser.add_argument("--amp_keyframe_repeat", type=int, default=16, help="Repeat count for each keyframe sequence.")
|
||||
parser.add_argument("--keyframe_prior_weight", type=float, default=1.0, help="Weight for keyframe motion prior reward.")
|
||||
parser.add_argument("--disable_keyframe_prior", action="store_true", help="Disable keyframe motion prior reward.")
|
||||
AppLauncher.add_app_launcher_args(parser)
|
||||
args_cli = parser.parse_args()
|
||||
|
||||
@@ -56,7 +78,8 @@ from rl_games.common.algo_observer import DefaultAlgoObserver
|
||||
from rl_games.torch_runner import Runner
|
||||
from rl_games.common import env_configurations, vecenv
|
||||
|
||||
from rl_game.get_up.config.t1_env_cfg import T1EnvCfg
|
||||
from rl_game.get_up.amp.amp_motion import build_amp_expert_features_from_getup_keyframes
|
||||
from rl_game.get_up.config.t1_env_cfg import T1EnvCfg, T1_JOINT_NAMES
|
||||
|
||||
|
||||
class T1MetricObserver(DefaultAlgoObserver):
|
||||
@@ -77,6 +100,8 @@ class T1MetricObserver(DefaultAlgoObserver):
|
||||
"amp_disc_loss_mean",
|
||||
"amp_disc_acc_policy_mean",
|
||||
"amp_disc_acc_expert_mean",
|
||||
"keyframe_prior_mean",
|
||||
"keyframe_front_ratio",
|
||||
)
|
||||
self._metric_sums: dict[str, float] = {}
|
||||
self._metric_counts: dict[str, int] = {}
|
||||
@@ -202,10 +227,33 @@ def main():
|
||||
task_id = "Isaac-T1-GetUp-v0"
|
||||
env_cfg = T1EnvCfg()
|
||||
|
||||
if args_cli.disable_keyframe_prior:
|
||||
env_cfg.rewards.keyframe_motion_prior.weight = 0.0
|
||||
print("[INFO]: keyframe motion prior disabled")
|
||||
else:
|
||||
env_cfg.rewards.keyframe_motion_prior.weight = float(args_cli.keyframe_prior_weight)
|
||||
print(f"[INFO]: keyframe motion prior weight={env_cfg.rewards.keyframe_motion_prior.weight:.3f}")
|
||||
|
||||
if args_cli.amp_from_keyframes:
|
||||
auto_feature_path = os.path.join(os.path.dirname(__file__), "logs", "amp", "expert_features_from_keyframes.pt")
|
||||
generated_path, feature_shape = build_amp_expert_features_from_getup_keyframes(
|
||||
front_yaml_path=args_cli.amp_keyframe_front,
|
||||
back_yaml_path=args_cli.amp_keyframe_back,
|
||||
joint_names=T1_JOINT_NAMES,
|
||||
output_path=auto_feature_path,
|
||||
sample_dt=float(args_cli.amp_keyframe_dt),
|
||||
repeat_count=int(args_cli.amp_keyframe_repeat),
|
||||
)
|
||||
args_cli.amp_expert_features = generated_path
|
||||
args_cli.amp_train_discriminator = True
|
||||
print(f"[INFO]: AMP expert features generated at {generated_path}, shape={feature_shape}")
|
||||
|
||||
amp_cfg = env_cfg.rewards.amp_style_prior
|
||||
amp_cfg.params["logit_scale"] = float(args_cli.amp_logit_scale)
|
||||
if args_cli.amp_train_discriminator:
|
||||
expert_path = os.path.abspath(os.path.expanduser(args_cli.amp_expert_features)) if args_cli.amp_expert_features else ""
|
||||
if not expert_path:
|
||||
raise ValueError("--amp_train_discriminator requires --amp_expert_features or --amp_from_keyframes.")
|
||||
amp_cfg.weight = float(args_cli.amp_reward_weight)
|
||||
amp_cfg.params["amp_train_enabled"] = True
|
||||
amp_cfg.params["amp_enabled"] = False
|
||||
@@ -216,8 +264,10 @@ def main():
|
||||
amp_cfg.params["disc_weight_decay"] = float(args_cli.amp_disc_weight_decay)
|
||||
amp_cfg.params["disc_update_interval"] = int(args_cli.amp_disc_update_interval)
|
||||
amp_cfg.params["disc_batch_size"] = int(args_cli.amp_disc_batch_size)
|
||||
print(f"[INFO]: AMP online discriminator enabled, expert_features={expert_path or '<missing>'}")
|
||||
amp_cfg.params["disc_history_steps"] = int(args_cli.amp_disc_history_steps)
|
||||
print(f"[INFO]: AMP online discriminator enabled, expert_features={expert_path}")
|
||||
print(f"[INFO]: AMP reward weight={amp_cfg.weight:.3f}")
|
||||
print(f"[INFO]: AMP discriminator history_steps={amp_cfg.params['disc_history_steps']}")
|
||||
elif args_cli.amp_model:
|
||||
amp_model_path = os.path.abspath(os.path.expanduser(args_cli.amp_model))
|
||||
amp_cfg.weight = float(args_cli.amp_reward_weight)
|
||||
|
||||
Reference in New Issue
Block a user