rl_game/get_up/config/t1_env_cfg.py

import torch
import random
import numpy as np
import isaaclab.envs.mdp as mdp
from isaaclab.assets import ArticulationCfg
from isaaclab.envs import ManagerBasedRLEnvCfg, ManagerBasedRLEnv
from isaaclab.managers import ObservationGroupCfg as ObsGroup
from isaaclab.managers import ObservationTermCfg as ObsTerm
from isaaclab.managers import RewardTermCfg as RewTerm
from isaaclab.managers import TerminationTermCfg as DoneTerm
from isaaclab.managers import EventTermCfg as EventTerm
from isaaclab.envs.mdp import JointPositionActionCfg
from isaaclab.managers import SceneEntityCfg
from isaaclab.utils import configclass
from rl_game.get_up.env.t1_env import T1SceneCfg


# ==========================================
# 1. 核心逻辑区：非线性加法引导与惩罚机制
# ==========================================

def smooth_additive_getup_reward(
        env: ManagerBasedRLEnv,
        min_head_height: float,
        min_pelvis_height: float,
        foot_sensor_cfg: SceneEntityCfg,
        arm_sensor_cfg: SceneEntityCfg,
) -> torch.Tensor:
    """
    指数级纯加法平滑奖励 (完全修复版)：
    融合了头部高度防驼背，以及脚底受力防跪地。
    """
    head_idx, _ = env.scene["robot"].find_bodies("H2")
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
    foot_indices, _ = env.scene["robot"].find_bodies(".*_foot_link")

    head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
    pelvis_pos = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], :3]
    pelvis_h = pelvis_pos[:, 2]

    foot_pos = env.scene["robot"].data.body_state_w[:, foot_indices, :3]
    feet_center_xy = torch.mean(foot_pos[:, :, :2], dim=1)
    pelvis_xy = pelvis_pos[:, :2]

    projected_gravity = env.scene["robot"].data.projected_gravity_b

    # --- 基础状态比率 ---
    upright_ratio = torch.clamp(1.0 - torch.norm(projected_gravity[:, :2], dim=-1), min=0.0, max=1.0)
    raw_height_ratio = torch.clamp(pelvis_h / min_pelvis_height, min=0.0, max=1.0)

    # 🌟 修复 1：把头部高度加回来，防驼背
    head_ratio = torch.clamp(head_h / min_head_height, min=0.0, max=1.0)

    # 指数级高度比例
    height_ratio_sq = torch.square(raw_height_ratio)

    # 1. 核心主线分：加入 head_ratio，促使它把上半身彻底挺直
    core_reward = (height_ratio_sq * 3.0) + (upright_ratio * 1.0) + (head_ratio * 1.0)

    # --- 辅助过渡分 ---
    foot_sensor = env.scene.sensors.get(foot_sensor_cfg.name)
    arm_sensor = env.scene.sensors.get(arm_sensor_cfg.name)
    foot_force_z = torch.clamp(torch.sum(foot_sensor.data.net_forces_w[:, :, 2], dim=-1), min=0.0)
    arm_force_z = torch.clamp(torch.sum(arm_sensor.data.net_forces_w[:, :, 2], dim=-1), min=0.0)

    feet_to_pelvis_dist = torch.norm(feet_center_xy - pelvis_xy, dim=-1)
    tuck_legs_reward = torch.exp(-3.0 * feet_to_pelvis_dist)

    arm_force_capped = torch.clamp(arm_force_z, min=0.0, max=200.0)
    arm_push_reward = arm_force_capped / 200.0

    # 🌟 修复 2：把脚底受力加回来，只要脚底板踩实了就给分，引导它脱离跪姿
    foot_contact_reward = (foot_force_z > 10.0).float()

    ground_factor = torch.clamp(1.0 - raw_height_ratio, min=0.0, max=1.0)

    # 将脚部接触分加入辅助奖励中
    aux_reward = ground_factor * (tuck_legs_reward * 0.5 + arm_push_reward * 0.5) + (foot_contact_reward * 0.5)

    return core_reward + aux_reward


def ground_farming_timeout(
        env: ManagerBasedRLEnv,
        max_time: float,
        height_threshold: float
) -> torch.Tensor:
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
    pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
    episode_time = env.episode_length_buf * env.step_dt
    return ((episode_time > max_time) & (pelvis_h < height_threshold)).bool()


def root_height_below_minimum(env: ManagerBasedRLEnv, minimum_height: float) -> torch.Tensor:
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
    pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
    return (pelvis_h < minimum_height).bool()


def is_standing_still(
        env: ManagerBasedRLEnv,
        min_head_height: float,
        min_pelvis_height: float,
        max_angle_error: float,
        standing_time: float,
        velocity_threshold: float = 0.15
) -> torch.Tensor:
    head_idx, _ = env.scene["robot"].find_bodies("H2")
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
    current_head_h = env.scene["robot"].data.body_state_w[:, head_idx[0], 2]
    current_pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]
    gravity_error = torch.norm(env.scene["robot"].data.projected_gravity_b[:, :2], dim=-1)
    root_vel_norm = torch.norm(env.scene["robot"].data.root_lin_vel_w, dim=-1)

    is_stable_now = (
            (current_head_h > min_head_height) &
            (current_pelvis_h > min_pelvis_height) &
            (gravity_error < max_angle_error) &
            (root_vel_norm < velocity_threshold)
    )

    if "stable_timer" not in env.extras:
        env.extras["stable_timer"] = torch.zeros(env.num_envs, device=env.device)
    dt = env.physics_dt * env.cfg.decimation
    env.extras["stable_timer"] = torch.where(is_stable_now, env.extras["stable_timer"] + dt,
                                             torch.zeros_like(env.extras["stable_timer"]))
    return (env.extras["stable_timer"] > standing_time).bool()


def anti_flying_penalty(
        env: ManagerBasedRLEnv,
        foot_sensor_cfg: SceneEntityCfg,
        arm_sensor_cfg: SceneEntityCfg,
) -> torch.Tensor:
    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
    pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]

    foot_sensor = env.scene.sensors.get(foot_sensor_cfg.name)
    arm_sensor = env.scene.sensors.get(arm_sensor_cfg.name)

    foot_force_z = torch.clamp(torch.sum(foot_sensor.data.net_forces_w[:, :, 2], dim=-1), min=0.0)
    arm_force_z = torch.clamp(torch.sum(arm_sensor.data.net_forces_w[:, :, 2], dim=-1), min=0.0)

    # 必须双手双脚都悬空 (< 10N)，才算是在天上飞
    is_flying = ((pelvis_h > 0.4) & (foot_force_z < 10.0) & (arm_force_z < 10.0)).float()
    return is_flying


def base_ang_vel_penalty(env: ManagerBasedRLEnv) -> torch.Tensor:
    ang_vel = env.scene["robot"].data.root_ang_vel_w
    return torch.sum(torch.square(ang_vel), dim=-1)


def anti_kneeling_penalty(env: ManagerBasedRLEnv) -> torch.Tensor:
    """
    🛠️ 修复版防跪地机制：
    使用正则匹配小腿 "Shank.*"。一旦骨盆抬离地面试图站立时，严格惩罚小腿碰地！
    """
    shank_indices, _ = env.scene["robot"].find_bodies("Shank.*")
    shank_z = env.scene["robot"].data.body_state_w[:, shank_indices, 2]

    # 判断是否有任何小腿刚体高度低于 0.15 米
    is_kneeling = torch.any(shank_z < 0.15, dim=-1).float()

    pelvis_idx, _ = env.scene["robot"].find_bodies("Trunk")
    pelvis_h = env.scene["robot"].data.body_state_w[:, pelvis_idx[0], 2]

    return is_kneeling * (pelvis_h > 0.35).float()


# ==========================================
# 2. 配置类区
# ==========================================

T1_JOINT_NAMES = [
    'AAHead_yaw', 'Head_pitch',
    'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
    'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw',
    'Waist',
    'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
    'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
    'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
]


@configclass
class T1ObservationCfg:
    @configclass
    class PolicyCfg(ObsGroup):
        concatenate_terms = True
        base_lin_vel = ObsTerm(func=mdp.base_lin_vel)
        base_ang_vel = ObsTerm(func=mdp.base_ang_vel)
        projected_gravity = ObsTerm(func=mdp.projected_gravity)
        joint_pos = ObsTerm(func=mdp.joint_pos_rel,
                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
        joint_vel = ObsTerm(func=mdp.joint_vel_rel,
                            params={"asset_cfg": SceneEntityCfg("robot", joint_names=T1_JOINT_NAMES)})
        actions = ObsTerm(func=mdp.last_action)

    policy = PolicyCfg()


@configclass
class T1EventCfg:
    reset_robot_rotation = EventTerm(
        func=mdp.reset_root_state_uniform,
        params={
            "asset_cfg": SceneEntityCfg("robot"),
            "pose_range": {
                "roll": (-3.14, 3.14),
                "pitch": (-3.14, 3.14),
                "yaw": (-3.14, 3.14),
                "x": (0.0, 0.0),
                "y": (0.0, 0.0),
                "z": (0.45, 0.65),
            },
            "velocity_range": {},
        },
        mode="reset",
    )


@configclass
class T1ActionCfg:
    arm_action = JointPositionActionCfg(
        asset_name="robot",
        joint_names=[
            'Left_Shoulder_Pitch', 'Left_Shoulder_Roll', 'Left_Elbow_Pitch', 'Left_Elbow_Yaw',
            'Right_Shoulder_Pitch', 'Right_Shoulder_Roll', 'Right_Elbow_Pitch', 'Right_Elbow_Yaw'
        ],
        scale=0.8, use_default_offset=True
    )
    torso_action = JointPositionActionCfg(
        asset_name="robot",
        joint_names=['Waist', 'AAHead_yaw', 'Head_pitch'],
        scale=0.8, use_default_offset=True
    )
    leg_action = JointPositionActionCfg(
        asset_name="robot",
        joint_names=[
            'Left_Hip_Pitch', 'Right_Hip_Pitch', 'Left_Hip_Roll', 'Right_Hip_Roll',
            'Left_Hip_Yaw', 'Right_Hip_Yaw', 'Left_Knee_Pitch', 'Right_Knee_Pitch',
            'Left_Ankle_Pitch', 'Right_Ankle_Pitch', 'Left_Ankle_Roll', 'Right_Ankle_Roll'
        ],
        scale=0.8, use_default_offset=True
    )


@configclass
class T1GetUpRewardCfg:
    smooth_getup = RewTerm(
        func=smooth_additive_getup_reward, weight=15.0,
        params={
            "min_head_height": 1.08, "min_pelvis_height": 0.72,
            "foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
            "arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"])
        }
    )

    anti_fly = RewTerm(
        func=anti_flying_penalty,
        weight=-5.0,
        params={
            "foot_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=[".*_foot_link"]),
            "arm_sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["A[LR][23]", ".*_hand_link"])
        }
    )

    # 🌟 新增：重罚小腿触地的跪姿
    anti_kneeling = RewTerm(func=anti_kneeling_penalty, weight=-3.0)

    base_ang_vel = RewTerm(func=base_ang_vel_penalty, weight=-0.05)
    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.05)
    joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.01)

    # 🌟 新增：大大降低成功的门槛，诱使它敢于站立
    is_success_bonus = RewTerm(
        func=is_standing_still,
        weight=100.0,
        params={
            "min_head_height": 1.05,
            "min_pelvis_height": 0.75,
            "max_angle_error": 0.4,  # 允许一定的弯腰
            "standing_time": 0.2,  # 仅需保持 0.1 秒即可拿走巨额赏金！
            "velocity_threshold": 0.8  # 允许身体轻微晃动
        }
    )


@configclass
class T1GetUpTerminationsCfg:
    time_out = DoneTerm(func=mdp.time_out)
    anti_farming = DoneTerm(
        func=ground_farming_timeout,
        params={"max_time": 5.0, "height_threshold": 0.3}
    )
    base_height = DoneTerm(func=root_height_below_minimum, params={"minimum_height": -0.2})
    illegal_contact = DoneTerm(
        func=mdp.illegal_contact,
        params={"sensor_cfg": SceneEntityCfg("contact_sensor", body_names=["Trunk"]), "threshold": 5000.0}
    )
    standing_success = DoneTerm(
        func=is_standing_still,
        params={
            "min_head_height": 1.05, "min_pelvis_height": 0.75,
            "max_angle_error": 0.2, "standing_time": 0.5, "velocity_threshold": 0.2
        }
    )


@configclass
class T1EnvCfg(ManagerBasedRLEnvCfg):
    scene = T1SceneCfg(num_envs=8192, env_spacing=2.5)
    observations = T1ObservationCfg()
    rewards = T1GetUpRewardCfg()
    terminations = T1GetUpTerminationsCfg()
    events = T1EventCfg()
    actions = T1ActionCfg()
    episode_length_s = 10.0
    decimation = 4