Bài cuối: Từ Sim đến Robot thật
Xuyên suốt 9 bài trước, chúng ta đã train humanoid đi (G1, H1), chạy (H1 advanced), bê vật (loco-manip), và parkour (bài 9) — tất cả trong simulation. Bây giờ là bước quan trọng nhất và cũng khó nhất: deploy lên robot thật.
Sim-to-real cho humanoid khó hơn quadruped (xem locomotion sim2real) vì: bipedal inherently unstable, narrow support polygon, higher center of mass, và nhiều DOF hơn. Một sai lầm nhỏ có thể khiến robot ngã và hư hỏng phần cứng trị giá hàng chục ngàn đô.

Domain Randomization Checklist cho Humanoid
Physical Parameters
class HumanoidDomainRandomization:
"""Domain randomization config cho humanoid sim-to-real."""
def __init__(self):
self.params = {
# --- Body dynamics ---
"base_mass_range": [0.85, 1.15], # ±15% total mass
"link_mass_range": [0.9, 1.1], # ±10% per link
"com_offset_range": [-0.02, 0.02], # ±2cm CoM offset
"inertia_range": [0.8, 1.2], # ±20% inertia
# --- Joint properties ---
"joint_friction_range": [0.0, 0.05], # Joint friction
"joint_damping_range": [0.5, 2.0], # Damping coefficient
"joint_armature_range": [0.01, 0.05], # Reflected inertia
"joint_stiffness_range": [0.0, 5.0], # Spring stiffness
# --- Actuator ---
"motor_strength_range": [0.85, 1.15], # ±15% max torque
"motor_offset_range": [-0.02, 0.02], # Position offset (rad)
# --- Ground ---
"ground_friction_range": [0.4, 1.5], # Friction coefficient
"ground_restitution_range": [0.0, 0.3], # Bounciness
# --- Delays & noise ---
"action_delay_range": [0, 30], # ms (0-2 sim steps)
"observation_delay_range": [0, 15], # ms
"action_noise_std": 0.02, # Action noise
"joint_pos_noise_std": 0.01, # Joint encoder noise
"joint_vel_noise_std": 0.1, # Joint velocity noise
"imu_noise_std": 0.05, # IMU noise
"imu_bias_range": [-0.1, 0.1], # IMU bias
# --- External perturbations ---
"push_force_range": [0, 50], # Random push (N)
"push_interval_range": [5, 15], # Seconds between pushes
"push_duration": 0.1, # Push duration (s)
# --- Terrain ---
"terrain_roughness_range": [0.0, 0.03], # Height variation
}
def randomize(self, env):
"""Apply randomization to environment."""
import numpy as np
for key, (low, high) in self.params.items():
value = np.random.uniform(low, high)
env.set_param(key, value)
return env
So sánh Sim vs Real Gaps
| Parameter |
Sim Default |
Real Robot |
Gap |
Impact |
| Control latency |
0ms |
5-20ms |
Critical |
Oscillation, instability |
| Joint friction |
0 |
0.01-0.05 Nm |
Medium |
Slow response |
| Ground friction |
1.0 |
0.3-1.2 |
High |
Slip, fall |
| Motor strength |
100% |
85-95% |
Medium |
Weaker motions |
| Sensor noise |
0 |
IMU: ±2°, encoder: ±0.5° |
Medium |
Jerky control |
| Link mass |
CAD exact |
+5-10% (cables, etc.) |
Medium |
Balance offset |
| Flexibility |
Rigid |
Harmonic drives flex |
High |
Compliance mismatch |
System Identification
Motor Identification
import numpy as np
from scipy.optimize import minimize
class MotorIdentification:
"""Identify real motor parameters from data."""
def __init__(self, joint_name, dt=0.002):
self.joint_name = joint_name
self.dt = dt
def collect_data(self, robot, duration=10.0):
"""Collect torque-position-velocity data."""
data = {'pos': [], 'vel': [], 'torque': [], 'cmd': []}
# Chirp signal for frequency response
t = 0
while t < duration:
freq = 0.5 + t / duration * 5.0 # 0.5 → 5.5 Hz
cmd = 0.3 * np.sin(2 * np.pi * freq * t)
robot.set_joint_position(self.joint_name, cmd)
state = robot.get_joint_state(self.joint_name)
data['pos'].append(state.position)
data['vel'].append(state.velocity)
data['torque'].append(state.effort)
data['cmd'].append(cmd)
t += self.dt
return {k: np.array(v) for k, v in data.items()}
def identify(self, data):
"""Fit motor model to collected data."""
pos = data['pos']
vel = data['vel']
torque = data['torque']
cmd = data['cmd']
def motor_model(params, cmd, pos, vel):
"""Simple motor model: tau = Kp*(cmd-pos) - Kd*vel - friction*sign(vel)"""
Kp, Kd, friction, damping = params
predicted_torque = (
Kp * (cmd - pos)
- Kd * vel
- friction * np.sign(vel)
- damping * vel
)
return predicted_torque
def objective(params):
pred = motor_model(params, cmd, pos, vel)
return np.mean((pred - torque) ** 2)
# Initial guess
x0 = [100.0, 5.0, 0.5, 0.1]
bounds = [(10, 500), (0.1, 50), (0, 5), (0, 2)]
result = minimize(objective, x0, bounds=bounds, method='L-BFGS-B')
Kp, Kd, friction, damping = result.x
print(f"Joint {self.joint_name}:")
print(f" Kp = {Kp:.1f}, Kd = {Kd:.2f}")
print(f" Friction = {friction:.3f}, Damping = {damping:.3f}")
print(f" MSE = {result.fun:.6f}")
return {
'Kp': Kp, 'Kd': Kd,
'friction': friction, 'damping': damping,
}
Real-Time Inference trên Onboard Computer
Unitree Robot Compute Options
| Robot |
Onboard Compute |
GPU |
Power |
Inference Speed |
| G1 |
Jetson Orin NX 16GB |
1024 CUDA |
25W |
~3ms (MLP) |
| H1 |
Jetson Orin NX 16GB |
1024 CUDA |
25W |
~3ms (MLP) |
| H1-2 |
Jetson AGX Orin 64GB |
2048 CUDA |
60W |
~1.5ms (MLP) |
Optimized Policy Inference
import torch
import time
import numpy as np
class PolicyInference:
"""Optimized policy inference cho real-time control."""
def __init__(self, policy_path, device='cuda'):
self.device = device
# Load policy
checkpoint = torch.load(policy_path, map_location=device)
self.policy = checkpoint['policy']
self.policy.eval()
# JIT compile cho speed
dummy_input = torch.zeros(1, self.obs_dim).to(device)
self.policy_jit = torch.jit.trace(self.policy, dummy_input)
self.policy_jit = torch.jit.freeze(self.policy_jit)
# Observation normalization stats
self.obs_mean = checkpoint['obs_mean'].to(device)
self.obs_std = checkpoint['obs_std'].to(device)
# Action scaling
self.action_scale = checkpoint.get('action_scale', 0.25)
self.default_dof_pos = checkpoint['default_dof_pos']
# Pre-allocate tensors (avoid allocation in control loop)
self.obs_tensor = torch.zeros(1, self.obs_dim, device=device)
def normalize_obs(self, obs):
"""Normalize observation using training stats."""
return (obs - self.obs_mean) / (self.obs_std + 1e-8)
@torch.no_grad()
def get_action(self, obs_np):
"""Get action from observation (numpy → numpy)."""
# Copy to pre-allocated tensor
self.obs_tensor[0] = torch.from_numpy(obs_np).float()
# Normalize
obs_norm = self.normalize_obs(self.obs_tensor)
# Inference
action = self.policy_jit(obs_norm)
# Scale and offset
target_pos = (
self.default_dof_pos +
action.squeeze() * self.action_scale
)
return target_pos.cpu().numpy()
def benchmark(self, n_iterations=1000):
"""Benchmark inference speed."""
obs = np.random.randn(self.obs_dim).astype(np.float32)
# Warm up
for _ in range(100):
self.get_action(obs)
# Measure
times = []
for _ in range(n_iterations):
start = time.perf_counter()
self.get_action(obs)
torch.cuda.synchronize()
times.append(time.perf_counter() - start)
mean_ms = np.mean(times) * 1000
std_ms = np.std(times) * 1000
max_ms = np.max(times) * 1000
print(f"Inference: {mean_ms:.2f} ± {std_ms:.2f} ms")
print(f"Max: {max_ms:.2f} ms")
print(f"Control freq: {1000/mean_ms:.0f} Hz")
Safety Systems
Safety Layer Architecture
class HumanoidSafetyLayer:
"""Safety layer between policy and actuators."""
def __init__(self, robot_config):
self.config = robot_config
# Joint limits (soft + hard)
self.soft_limits = robot_config['soft_joint_limits'] # Warning
self.hard_limits = robot_config['hard_joint_limits'] # Clamp
# Velocity limits
self.max_joint_vel = robot_config['max_joint_velocity'] # rad/s
# Torque limits
self.max_torque = robot_config['max_torque'] # Nm per joint
# Fall detection
self.min_base_height = 0.3 # meters (G1: 0.4, H1: 0.6)
self.max_tilt_angle = 60.0 # degrees
# E-stop state
self.e_stop_active = False
def check_and_apply(self, target_positions, current_state):
"""Apply safety checks and return safe commands."""
if self.e_stop_active:
return self._damping_mode(current_state)
# 1. Fall detection
if self._detect_fall(current_state):
print("FALL DETECTED — activating damping mode")
self.e_stop_active = True
return self._damping_mode(current_state)
# 2. Joint limit clamping
safe_positions = np.clip(
target_positions,
self.hard_limits[:, 0],
self.hard_limits[:, 1]
)
# 3. Velocity limiting
current_pos = current_state['joint_positions']
delta = safe_positions - current_pos
max_delta = self.max_joint_vel * self.config['dt']
safe_positions = current_pos + np.clip(delta, -max_delta, max_delta)
# 4. Soft limit warning
for i, (pos, (lo, hi)) in enumerate(
zip(safe_positions, self.soft_limits)
):
if pos < lo or pos > hi:
print(f"WARNING: Joint {i} near limit: {pos:.3f} "
f"[{lo:.3f}, {hi:.3f}]")
return safe_positions
def _detect_fall(self, state):
"""Detect if robot is falling."""
base_height = state['base_position'][2]
# Check height
if base_height < self.min_base_height:
return True
# Check tilt (roll and pitch)
roll = np.degrees(state['base_euler'][0])
pitch = np.degrees(state['base_euler'][1])
if abs(roll) > self.max_tilt_angle or abs(pitch) > self.max_tilt_angle:
return True
return False
def _damping_mode(self, state):
"""Zero torque + damping — robot goes limp safely."""
return state['joint_positions'] # Hold current position
def reset_estop(self):
"""Manual e-stop reset."""
self.e_stop_active = False
print("E-stop reset — resuming normal operation")
Deployment Framework: Unitree SDK + ROS 2
Complete Control Loop
#!/usr/bin/env python3
"""Humanoid locomotion deployment on Unitree G1/H1."""
import numpy as np
import time
from unitree_sdk2py.core.channel import ChannelPublisher, ChannelSubscriber
from unitree_sdk2py.idl.unitree_go.msg.dds_ import LowCmd_, LowState_
class HumanoidDeployment:
"""Deploy RL locomotion policy on Unitree humanoid."""
def __init__(self, policy_path, robot_type='g1'):
# Load policy
self.policy = PolicyInference(policy_path)
# Safety layer
self.safety = HumanoidSafetyLayer(
ROBOT_CONFIGS[robot_type]
)
# Robot config
self.config = ROBOT_CONFIGS[robot_type]
self.dt = self.config['control_dt'] # 0.02s = 50Hz
# Observation buffer
self.obs_history = []
self.history_len = 10
# Action filter
self.action_filter = ExponentialFilter(alpha=0.8)
# State
self.running = False
self.iteration = 0
def build_observation(self, state):
"""Construct observation vector from robot state."""
obs = np.concatenate([
state['base_angular_velocity'], # (3,) from IMU
state['projected_gravity'], # (3,) gravity in body frame
self.velocity_command, # (3,) vx, vy, vyaw
(state['joint_positions'] -
self.config['default_dof_pos']), # (N,) joint pos offset
state['joint_velocities'], # (N,) joint velocities
self.previous_action, # (N,) last action
])
# Clip observation
obs = np.clip(obs, -100.0, 100.0)
return obs
def control_step(self, state):
"""Single control step: obs → policy → safety → command."""
# 1. Build observation
obs = self.build_observation(state)
# 2. Policy inference
raw_action = self.policy.get_action(obs)
# 3. Action filtering (smooth)
filtered_action = self.action_filter.apply(raw_action)
# 4. Convert to target positions
target_positions = (
self.config['default_dof_pos'] +
filtered_action * self.config['action_scale']
)
# 5. Safety check
safe_positions = self.safety.check_and_apply(
target_positions, state
)
# 6. Store for next iteration
self.previous_action = filtered_action.copy()
self.iteration += 1
return safe_positions
def run(self, velocity_command=[0.5, 0.0, 0.0]):
"""Main control loop."""
self.velocity_command = np.array(velocity_command)
self.previous_action = np.zeros(self.config['num_dof'])
self.running = True
print(f"Starting deployment: vel_cmd = {velocity_command}")
print(f"Control frequency: {1/self.dt:.0f} Hz")
print("Press Ctrl+C to stop")
try:
while self.running:
loop_start = time.perf_counter()
# Read state
state = self.read_robot_state()
# Compute action
target_pos = self.control_step(state)
# Send command
self.send_command(target_pos)
# Timing
elapsed = time.perf_counter() - loop_start
sleep_time = self.dt - elapsed
if sleep_time > 0:
time.sleep(sleep_time)
elif elapsed > self.dt * 1.5:
print(f"WARNING: Control loop overrun: "
f"{elapsed*1000:.1f}ms > {self.dt*1000:.1f}ms")
except KeyboardInterrupt:
print("\nStopping — entering damping mode")
self.safety.e_stop_active = True
class ExponentialFilter:
"""Exponential moving average filter for actions."""
def __init__(self, alpha=0.8):
self.alpha = alpha
self.prev = None
def apply(self, x):
if self.prev is None:
self.prev = x.copy()
return x
filtered = self.alpha * x + (1 - self.alpha) * self.prev
self.prev = filtered.copy()
return filtered
# Robot configurations
ROBOT_CONFIGS = {
'g1': {
'num_dof': 23,
'control_dt': 0.02, # 50 Hz
'action_scale': 0.25,
'max_joint_velocity': 10.0, # rad/s
'default_dof_pos': np.zeros(23), # Standing pose
'soft_joint_limits': np.array([[-2.0, 2.0]] * 23),
'hard_joint_limits': np.array([[-2.5, 2.5]] * 23),
'max_torque': np.array([50.0] * 23),
'dt': 0.02,
},
'h1': {
'num_dof': 19,
'control_dt': 0.02,
'action_scale': 0.25,
'max_joint_velocity': 10.0,
'default_dof_pos': np.zeros(19),
'soft_joint_limits': np.array([[-2.0, 2.0]] * 19),
'hard_joint_limits': np.array([[-2.5, 2.5]] * 19),
'max_torque': np.array([80.0] * 19),
'dt': 0.02,
},
}

Debugging Common Failures
| Symptom |
Probable Cause |
Fix |
| Robot falls immediately |
Observation mismatch |
Verify joint order, IMU frame, gravity direction |
| Wobbling/oscillation |
PD gains too high or latency |
Reduce gains, add action filter |
| Drifts sideways |
IMU bias or asymmetric friction |
Calibrate IMU, check foot contact |
| Can't lift feet |
Friction too high or motor weak |
Reduce friction, check motor strength |
| Jerky motions |
No action smoothing |
Add EMA filter (alpha=0.7-0.9) |
| Falls on stairs |
No terrain adaptation |
Deploy teacher-student model with history |
Complete Deployment Checklist
Pre-Deployment
First Test Protocol
Monitoring During Deployment
Tổng kết Series
Tài liệu tham khảo
- Learning Humanoid Locomotion with Transformers — Radosavovic et al., 2024
- Real-World Humanoid Locomotion with RL — Gu et al., 2024
- Humanoid-Gym: Reinforcement Learning for Humanoid Robot — Gu et al., 2024
- Sim-to-Real Transfer for Bipedal Locomotion — Li et al., 2023
Bài viết liên quan