Từ Walking đến Parkour: Bước nhảy vọt
Trong các bài trước, chúng ta đã train humanoid đi trên mặt phẳng, vượt terrain khó, chạy nhanh, và bê vật thể. Nhưng tất cả đều là continuous locomotion — chân luôn tiếp xúc mặt đất theo pattern nhất quán.
Parkour là bước nhảy vọt: robot phải nhảy qua hố, leo lên bậc cao, vượt chướng ngại vật — những motion đòi hỏi flight phase (không chân nào chạm đất) và impact management (hấp thụ va đập khi tiếp đất). Đây là frontier khó nhất của humanoid locomotion.
Parkour Skills Taxonomy
| Skill | Mô tả | Độ khó | Key Challenge |
|---|---|---|---|
| Gap crossing | Nhảy qua hố/khe | ★★★ | Timing, flight trajectory |
| Box jumping | Nhảy lên/xuống hộp | ★★★ | Impact absorption, height estimation |
| Stair climbing | Leo cầu thang lên/xuống | ★★ | Step detection, balance |
| Vaulting | Chống tay nhảy qua | ★★★★ | Contact switching, arm-leg coordination |
| Wall climbing | Bám tường và leo | ★★★★★ | Multi-contact, friction, strength |
| Running jump | Chạy đà + nhảy xa | ★★★ | Velocity → takeoff transition |
| Landing | Tiếp đất an toàn | ★★★ | Impact distribution, recovery |
Terrain Perception: Height Map
Parkour cần nhìn thấy terrain phía trước để plan motions. Hai approach chính:
Approach 1: Privileged Learning (Teacher-Student)
class ParkourTeacherEnv:
"""Teacher policy with privileged terrain information."""
def __init__(self, terrain_resolution=0.05, scan_range=2.0):
self.resolution = terrain_resolution
self.scan_range = scan_range
self.n_points = int(scan_range / terrain_resolution)
def get_privileged_obs(self, robot_pos, robot_heading):
"""Height scan từ phía trước robot."""
scan_points = []
for i in range(self.n_points):
# Quét từ chân robot ra phía trước
dist = (i + 1) * self.resolution
x = robot_pos[0] + dist * np.cos(robot_heading)
y = robot_pos[1] + dist * np.sin(robot_heading)
# Lấy height từ terrain heightfield
height = self.terrain.get_height(x, y) - robot_pos[2]
scan_points.append(height)
return np.array(scan_points) # (n_points,)
def get_teacher_obs(self, base_obs):
"""Full observation cho teacher policy."""
privileged = self.get_privileged_obs(
self.robot_pos, self.robot_heading
)
return np.concatenate([base_obs, privileged])
class ParkourStudentPolicy:
"""Student policy learns from proprioception only."""
def __init__(self, teacher_policy, obs_history_len=50):
self.teacher = teacher_policy
self.history_len = obs_history_len
self.obs_history = []
# Student network: obs_history → estimated terrain
self.terrain_estimator = nn.Sequential(
nn.Linear(obs_history_len * obs_dim, 256),
nn.ReLU(),
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, n_terrain_points),
)
def distill(self, env, n_episodes=10000):
"""Distill teacher knowledge into student."""
for ep in range(n_episodes):
obs = env.reset()
self.obs_history.clear()
while not done:
# Teacher action
teacher_obs = env.get_teacher_obs(obs)
teacher_action = self.teacher.predict(teacher_obs)
# Student: estimate terrain from history
self.obs_history.append(obs)
if len(self.obs_history) > self.history_len:
self.obs_history.pop(0)
history_tensor = torch.tensor(
np.stack(self.obs_history)
).flatten()
estimated_terrain = self.terrain_estimator(history_tensor)
# Loss: match teacher's terrain knowledge
true_terrain = env.get_privileged_obs(
env.robot_pos, env.robot_heading
)
terrain_loss = F.mse_loss(
estimated_terrain,
torch.tensor(true_terrain)
)
# Action distillation loss
student_obs = torch.cat([
torch.tensor(obs), estimated_terrain
])
student_action = self.student_policy(student_obs)
action_loss = F.mse_loss(student_action, teacher_action)
total_loss = terrain_loss + action_loss
total_loss.backward()
self.optimizer.step()
Approach 2: Depth Camera
class DepthBasedParkour:
"""Parkour với depth camera observation."""
def __init__(self):
self.depth_encoder = nn.Sequential(
# Input: depth image (64, 64)
nn.Conv2d(1, 32, 5, stride=2), # → (30, 30)
nn.ReLU(),
nn.Conv2d(32, 64, 3, stride=2), # → (14, 14)
nn.ReLU(),
nn.Conv2d(64, 64, 3, stride=2), # → (6, 6)
nn.ReLU(),
nn.Flatten(),
nn.Linear(64 * 6 * 6, 128),
nn.ReLU(),
)
def process_depth(self, depth_image):
"""Encode depth image thành latent vector."""
# Normalize depth
depth_norm = (depth_image - depth_image.mean()) / (depth_image.std() + 1e-6)
depth_tensor = torch.tensor(depth_norm).unsqueeze(0).unsqueeze(0).float()
return self.depth_encoder(depth_tensor)
Reward Design cho Parkour
Multi-Phase Reward
class ParkourReward:
"""Reward function cho parkour tasks."""
def __init__(self, config):
self.config = config
self.weights = {
# Phase 1: Approach
'approach_velocity': 2.0, # Chạy đến obstacle
'heading_alignment': 0.5, # Hướng đúng
# Phase 2: Takeoff
'takeoff_velocity': 3.0, # Tốc độ nhảy
'takeoff_angle': 1.0, # Góc nhảy
'leg_extension': 1.0, # Duỗi chân
# Phase 3: Flight
'height_clearance': 5.0, # Bay qua obstacle
'body_orientation': 2.0, # Giữ thẳng người
# Phase 4: Landing
'landing_stability': 5.0, # Tiếp đất ổn định
'impact_reduction': 2.0, # Giảm va đập
'recovery_speed': 1.0, # Nhanh chóng ổn định
# General
'alive_bonus': 0.5,
'energy_penalty': -0.01,
'torque_penalty': -0.001,
}
def compute(self, state, prev_state):
reward = 0.0
# Alive bonus
if not state['fallen']:
reward += self.weights['alive_bonus']
# Forward velocity tracking
vel_error = abs(state['forward_vel'] - self.config.target_vel)
reward += self.weights['approach_velocity'] * np.exp(-vel_error)
# Height clearance (obstacle traversal)
if state['over_obstacle']:
clearance = state['foot_height'] - state['obstacle_height']
if clearance > 0:
reward += self.weights['height_clearance'] * min(clearance, 0.3)
# Landing stability
if state['just_landed']:
# Penalize high impact force
impact = state['ground_reaction_force']
max_safe_force = 3.0 * state['robot_mass'] * 9.81 # 3x body weight
if impact < max_safe_force:
reward += self.weights['landing_stability']
else:
reward -= self.weights['impact_reduction'] * (impact / max_safe_force - 1)
# Reward fast recovery to stable standing
if state['steps_to_stable'] < 10:
reward += self.weights['recovery_speed']
# Energy penalty
reward += self.weights['energy_penalty'] * state['total_power']
reward += self.weights['torque_penalty'] * np.sum(state['torques'] ** 2)
return reward
Training Pipeline cho Parkour
Obstacle Course Generation
from isaacgym import gymutil, gymapi
class ParkourTerrainGenerator:
"""Generate random obstacle courses cho training."""
def __init__(self, num_envs=4096, terrain_size=20.0):
self.num_envs = num_envs
self.terrain_size = terrain_size
def generate_gap_terrain(self, difficulty):
"""Generate terrain with gaps to jump over."""
heightfield = np.zeros((200, 200))
gap_width = 0.3 + difficulty * 0.7 # 0.3m → 1.0m
gap_depth = 0.5 + difficulty * 1.5 # 0.5m → 2.0m
# Place random gaps
n_gaps = int(3 + difficulty * 5)
for i in range(n_gaps):
x_start = np.random.randint(30, 170)
x_width = int(gap_width / 0.1)
heightfield[x_start:x_start+x_width, :] = -gap_depth
return heightfield
def generate_stepping_stones(self, difficulty):
"""Stepping stones with increasing gaps."""
heightfield = np.ones((200, 200)) * -2.0 # Deep pit
stone_size = max(0.4, 0.8 - difficulty * 0.4) # Smaller stones
gap = 0.3 + difficulty * 0.5 # Larger gaps
x = 0
while x < 20:
x_idx = int(x / 0.1)
w_idx = int(stone_size / 0.1)
# Random lateral offset
y_offset = int(np.random.uniform(-0.3, 0.3) / 0.1)
y_center = 100 + y_offset
heightfield[x_idx:x_idx+w_idx,
y_center-w_idx:y_center+w_idx] = 0.0
x += stone_size + gap
return heightfield
def generate_boxes(self, difficulty):
"""Boxes to jump on/off."""
heightfield = np.zeros((200, 200))
n_boxes = int(3 + difficulty * 5)
for i in range(n_boxes):
x = np.random.randint(20, 180)
y = np.random.randint(60, 140)
w = np.random.randint(5, 15)
h = 0.2 + difficulty * 0.6 # 0.2m → 0.8m height
heightfield[x:x+w, y:y+w] = h
return heightfield
def curriculum_terrain(self, difficulty):
"""Mix terrain types based on difficulty."""
terrain_type = np.random.choice(
['gaps', 'stones', 'boxes', 'stairs', 'mixed'],
p=[0.25, 0.2, 0.25, 0.15, 0.15]
)
generators = {
'gaps': self.generate_gap_terrain,
'stones': self.generate_stepping_stones,
'boxes': self.generate_boxes,
}
if terrain_type in generators:
return generators[terrain_type](difficulty)
# Mixed: combine multiple obstacle types
hf = np.zeros((200, 200))
for gen in generators.values():
hf = np.maximum(hf, gen(difficulty * 0.7))
return hf
Training Config
# Isaac Lab training config cho humanoid parkour
parkour_config = {
# Environment
"num_envs": 4096,
"episode_length": 500,
"terrain_curriculum": True,
"initial_difficulty": 0.0,
"max_difficulty": 1.0,
"difficulty_increment": 0.05,
"success_threshold": 0.7, # 70% success → increase difficulty
# Observation
"obs_components": [
"base_lin_vel", # (3,) base linear velocity
"base_ang_vel", # (3,) base angular velocity
"projected_gravity", # (3,) gravity in body frame
"commands", # (3,) velocity commands
"dof_pos", # (N,) joint positions
"dof_vel", # (N,) joint velocities
"actions", # (N,) previous actions
"height_scan", # (M,) terrain heights ahead
],
# PPO
"policy_class": "ActorCriticRecurrent", # LSTM for temporal reasoning
"hidden_sizes": [512, 256, 128],
"learning_rate": 1e-4,
"num_learning_epochs": 5,
"num_mini_batches": 4,
"clip_param": 0.2,
"entropy_coef": 0.01,
"value_loss_coef": 1.0,
"max_grad_norm": 1.0,
"gamma": 0.99,
"lam": 0.95,
# Training
"max_iterations": 5000,
"save_interval": 500,
"log_interval": 10,
}
Multi-Skill Composition
Unified vs Separate Policies
| Approach | Ưu điểm | Nhược điểm |
|---|---|---|
| Unified policy | Smooth transitions, end-to-end | Khó train, slower convergence |
| Separate policies | Easier to train, modular | Transition jitter, skill selection needed |
| Mixture of Experts | Best of both — shared backbone, expert heads | Complex architecture |
Mixture of Experts Implementation
class ParkourMoE(nn.Module):
"""Mixture of Experts cho parkour skills."""
def __init__(self, obs_dim, action_dim, n_experts=4):
super().__init__()
self.n_experts = n_experts
# Shared backbone
self.backbone = nn.Sequential(
nn.Linear(obs_dim, 512),
nn.ELU(),
nn.Linear(512, 256),
nn.ELU(),
)
# Expert heads (walk, jump, climb, land)
self.experts = nn.ModuleList([
nn.Sequential(
nn.Linear(256, 128),
nn.ELU(),
nn.Linear(128, action_dim),
) for _ in range(n_experts)
])
# Gating network
self.gate = nn.Sequential(
nn.Linear(256, 64),
nn.ELU(),
nn.Linear(64, n_experts),
nn.Softmax(dim=-1),
)
def forward(self, obs):
features = self.backbone(obs)
# Expert outputs
expert_outputs = torch.stack([
expert(features) for expert in self.experts
], dim=1) # (B, n_experts, action_dim)
# Gating weights
weights = self.gate(features) # (B, n_experts)
# Weighted combination
action = torch.sum(
expert_outputs * weights.unsqueeze(-1), dim=1
) # (B, action_dim)
return action, weights
Current State-of-the-Art và Limitations
Đã đạt được (2026)
- Gap crossing: Humanoid nhảy qua hố 1m+ trong sim (Robot Parkour Learning, Zhuang et al. 2023)
- Stair climbing: Lên/xuống cầu thang smooth trên real robot (Extreme Parkour, Cheng et al. 2024)
- Box jumping: Nhảy lên hộp 0.5m trong sim (Humanoid-Gym)
- Running: Humanoid chạy 3+ m/s trên terrain phẳng (Radosavovic et al. 2024)
Chưa giải quyết
- Vaulting: Chống tay nhảy qua — quá phức tạp cho RL hiện tại
- Wall climbing: Yêu cầu gripper + full-body contact planning
- Recovery from falls: Tự đứng dậy sau khi ngã — active research
- Outdoor parkour: Unstructured environments, variable surfaces
Tài liệu tham khảo
- Robot Parkour Learning — Zhuang et al., CoRL 2023
- Extreme Parkour with Legged Robots — Cheng et al., ICRA 2024
- Agile and Versatile Robot Locomotion via Agility Training — Radosavovic et al., 2024
- Humanoid-Gym: Reinforcement Learning for Humanoid Robot — Gu et al., 2024
Bài viết liên quan
- Unitree G1: Terrain Adaptation và Robust Walking
- Robot Parkour: Nhảy, leo cầu thang bằng RL — Locomotion series
- Sim-to-Real cho Humanoid: Deployment Best Practices — Bài tiếp theo