The Final Step: From Sim to Real Robot
Throughout the previous 9 posts, we trained humanoids to walk (G1, H1), run (H1 advanced), carry objects (loco-manip), and parkour (post 9) — all in simulation. Now comes the most important and difficult step: deploying to a real robot.
Sim-to-real for humanoids is harder than for quadrupeds (see locomotion sim2real) because: bipedal is inherently unstable, narrow support polygon, higher center of mass, and more DOF. A small mistake can cause the robot to fall and damage hardware worth tens of thousands of dollars.

Domain Randomization Checklist for Humanoids
Physical Parameters
class HumanoidDomainRandomization:
"""Domain randomization config for humanoid sim-to-real."""
def __init__(self):
self.params = {
# --- Body dynamics ---
"base_mass_range": [0.85, 1.15], # ±15% total mass
"link_mass_range": [0.9, 1.1], # ±10% per link
"com_offset_range": [-0.02, 0.02], # ±2cm CoM offset
"inertia_range": [0.8, 1.2], # ±20% inertia
# --- Joint properties ---
"joint_friction_range": [0.0, 0.05],
"joint_damping_range": [0.5, 2.0],
"joint_armature_range": [0.01, 0.05],
"joint_stiffness_range": [0.0, 5.0],
# --- Actuator ---
"motor_strength_range": [0.85, 1.15], # ±15% max torque
"motor_offset_range": [-0.02, 0.02], # Position offset (rad)
# --- Ground ---
"ground_friction_range": [0.4, 1.5],
"ground_restitution_range": [0.0, 0.3],
# --- Delays & noise ---
"action_delay_range": [0, 30], # ms
"observation_delay_range": [0, 15], # ms
"action_noise_std": 0.02,
"joint_pos_noise_std": 0.01,
"joint_vel_noise_std": 0.1,
"imu_noise_std": 0.05,
"imu_bias_range": [-0.1, 0.1],
# --- External perturbations ---
"push_force_range": [0, 50], # N
"push_interval_range": [5, 15], # seconds
"push_duration": 0.1, # seconds
# --- Terrain ---
"terrain_roughness_range": [0.0, 0.03],
}
def randomize(self, env):
"""Apply randomization to environment."""
for key, (low, high) in self.params.items():
value = np.random.uniform(low, high)
env.set_param(key, value)
return env
Sim vs Real Gap Comparison
| Parameter |
Sim Default |
Real Robot |
Gap |
Impact |
| Control latency |
0ms |
5-20ms |
Critical |
Oscillation, instability |
| Joint friction |
0 |
0.01-0.05 Nm |
Medium |
Slow response |
| Ground friction |
1.0 |
0.3-1.2 |
High |
Slip, fall |
| Motor strength |
100% |
85-95% |
Medium |
Weaker motions |
| Sensor noise |
0 |
IMU: ±2°, encoder: ±0.5° |
Medium |
Jerky control |
| Link mass |
CAD exact |
+5-10% (cables, etc.) |
Medium |
Balance offset |
| Flexibility |
Rigid |
Harmonic drives flex |
High |
Compliance mismatch |
System Identification
Motor Identification
import numpy as np
from scipy.optimize import minimize
class MotorIdentification:
"""Identify real motor parameters from data."""
def __init__(self, joint_name, dt=0.002):
self.joint_name = joint_name
self.dt = dt
def collect_data(self, robot, duration=10.0):
"""Collect torque-position-velocity data."""
data = {'pos': [], 'vel': [], 'torque': [], 'cmd': []}
t = 0
while t < duration:
freq = 0.5 + t / duration * 5.0 # Chirp 0.5 → 5.5 Hz
cmd = 0.3 * np.sin(2 * np.pi * freq * t)
robot.set_joint_position(self.joint_name, cmd)
state = robot.get_joint_state(self.joint_name)
data['pos'].append(state.position)
data['vel'].append(state.velocity)
data['torque'].append(state.effort)
data['cmd'].append(cmd)
t += self.dt
return {k: np.array(v) for k, v in data.items()}
def identify(self, data):
"""Fit motor model to collected data."""
pos, vel, torque, cmd = data['pos'], data['vel'], data['torque'], data['cmd']
def motor_model(params, cmd, pos, vel):
Kp, Kd, friction, damping = params
return Kp * (cmd - pos) - Kd * vel - friction * np.sign(vel) - damping * vel
def objective(params):
pred = motor_model(params, cmd, pos, vel)
return np.mean((pred - torque) ** 2)
x0 = [100.0, 5.0, 0.5, 0.1]
bounds = [(10, 500), (0.1, 50), (0, 5), (0, 2)]
result = minimize(objective, x0, bounds=bounds, method='L-BFGS-B')
Kp, Kd, friction, damping = result.x
print(f"Joint {self.joint_name}: Kp={Kp:.1f}, Kd={Kd:.2f}, "
f"friction={friction:.3f}, damping={damping:.3f}, MSE={result.fun:.6f}")
return {'Kp': Kp, 'Kd': Kd, 'friction': friction, 'damping': damping}
Real-Time Inference on Onboard Computer
Unitree Robot Compute Options
| Robot |
Onboard Compute |
GPU |
Power |
Inference Speed |
| G1 |
Jetson Orin NX 16GB |
1024 CUDA |
25W |
~3ms (MLP) |
| H1 |
Jetson Orin NX 16GB |
1024 CUDA |
25W |
~3ms (MLP) |
| H1-2 |
Jetson AGX Orin 64GB |
2048 CUDA |
60W |
~1.5ms (MLP) |
Optimized Policy Inference
import torch
import time
import numpy as np
class PolicyInference:
"""Optimized policy inference for real-time control."""
def __init__(self, policy_path, device='cuda'):
self.device = device
checkpoint = torch.load(policy_path, map_location=device)
self.policy = checkpoint['policy']
self.policy.eval()
# JIT compile for speed
dummy_input = torch.zeros(1, self.obs_dim).to(device)
self.policy_jit = torch.jit.trace(self.policy, dummy_input)
self.policy_jit = torch.jit.freeze(self.policy_jit)
# Normalization stats
self.obs_mean = checkpoint['obs_mean'].to(device)
self.obs_std = checkpoint['obs_std'].to(device)
self.action_scale = checkpoint.get('action_scale', 0.25)
self.default_dof_pos = checkpoint['default_dof_pos']
# Pre-allocate tensors
self.obs_tensor = torch.zeros(1, self.obs_dim, device=device)
@torch.no_grad()
def get_action(self, obs_np):
"""Get action from numpy observation."""
self.obs_tensor[0] = torch.from_numpy(obs_np).float()
obs_norm = (self.obs_tensor - self.obs_mean) / (self.obs_std + 1e-8)
action = self.policy_jit(obs_norm)
target_pos = self.default_dof_pos + action.squeeze() * self.action_scale
return target_pos.cpu().numpy()
def benchmark(self, n_iterations=1000):
"""Benchmark inference speed."""
obs = np.random.randn(self.obs_dim).astype(np.float32)
for _ in range(100):
self.get_action(obs)
times = []
for _ in range(n_iterations):
start = time.perf_counter()
self.get_action(obs)
torch.cuda.synchronize()
times.append(time.perf_counter() - start)
print(f"Inference: {np.mean(times)*1000:.2f} ± {np.std(times)*1000:.2f} ms")
print(f"Control freq: {1000/(np.mean(times)*1000):.0f} Hz")
Safety Systems
Safety Layer Architecture
class HumanoidSafetyLayer:
"""Safety layer between policy and actuators."""
def __init__(self, robot_config):
self.config = robot_config
self.soft_limits = robot_config['soft_joint_limits']
self.hard_limits = robot_config['hard_joint_limits']
self.max_joint_vel = robot_config['max_joint_velocity']
self.max_torque = robot_config['max_torque']
self.min_base_height = 0.3 # meters
self.max_tilt_angle = 60.0 # degrees
self.e_stop_active = False
def check_and_apply(self, target_positions, current_state):
"""Apply safety checks and return safe commands."""
if self.e_stop_active:
return self._damping_mode(current_state)
# Fall detection
if self._detect_fall(current_state):
print("FALL DETECTED — activating damping mode")
self.e_stop_active = True
return self._damping_mode(current_state)
# Joint limit clamping
safe_positions = np.clip(
target_positions, self.hard_limits[:, 0], self.hard_limits[:, 1]
)
# Velocity limiting
current_pos = current_state['joint_positions']
delta = safe_positions - current_pos
max_delta = self.max_joint_vel * self.config['dt']
safe_positions = current_pos + np.clip(delta, -max_delta, max_delta)
return safe_positions
def _detect_fall(self, state):
base_height = state['base_position'][2]
if base_height < self.min_base_height:
return True
roll = np.degrees(state['base_euler'][0])
pitch = np.degrees(state['base_euler'][1])
if abs(roll) > self.max_tilt_angle or abs(pitch) > self.max_tilt_angle:
return True
return False
def _damping_mode(self, state):
return state['joint_positions'] # Hold current position
Complete Deployment Framework
class HumanoidDeployment:
"""Deploy RL locomotion policy on Unitree humanoid."""
def __init__(self, policy_path, robot_type='g1'):
self.policy = PolicyInference(policy_path)
self.safety = HumanoidSafetyLayer(ROBOT_CONFIGS[robot_type])
self.config = ROBOT_CONFIGS[robot_type]
self.dt = self.config['control_dt']
self.action_filter = ExponentialFilter(alpha=0.8)
self.running = False
def build_observation(self, state):
"""Construct observation vector from robot state."""
obs = np.concatenate([
state['base_angular_velocity'],
state['projected_gravity'],
self.velocity_command,
state['joint_positions'] - self.config['default_dof_pos'],
state['joint_velocities'],
self.previous_action,
])
return np.clip(obs, -100.0, 100.0)
def control_step(self, state):
"""Single control step: obs → policy → safety → command."""
obs = self.build_observation(state)
raw_action = self.policy.get_action(obs)
filtered_action = self.action_filter.apply(raw_action)
target_positions = self.config['default_dof_pos'] + filtered_action * self.config['action_scale']
safe_positions = self.safety.check_and_apply(target_positions, state)
self.previous_action = filtered_action.copy()
return safe_positions
def run(self, velocity_command=[0.5, 0.0, 0.0]):
"""Main control loop."""
self.velocity_command = np.array(velocity_command)
self.previous_action = np.zeros(self.config['num_dof'])
self.running = True
print(f"Starting deployment: vel_cmd = {velocity_command}")
print(f"Control frequency: {1/self.dt:.0f} Hz")
try:
while self.running:
loop_start = time.perf_counter()
state = self.read_robot_state()
target_pos = self.control_step(state)
self.send_command(target_pos)
elapsed = time.perf_counter() - loop_start
sleep_time = self.dt - elapsed
if sleep_time > 0:
time.sleep(sleep_time)
except KeyboardInterrupt:
print("\nStopping — entering damping mode")
self.safety.e_stop_active = True

Debugging Common Failures
| Symptom |
Probable Cause |
Fix |
| Robot falls immediately |
Observation mismatch |
Verify joint order, IMU frame, gravity direction |
| Wobbling/oscillation |
PD gains too high or latency |
Reduce gains, add action filter |
| Drifts sideways |
IMU bias or asymmetric friction |
Calibrate IMU, check foot contact |
| Can't lift feet |
Friction too high or motor weak |
Reduce friction, check motor strength |
| Jerky motions |
No action smoothing |
Add EMA filter (alpha=0.7-0.9) |
| Falls on stairs |
No terrain adaptation |
Deploy teacher-student with history |
Complete Deployment Checklist
Pre-Deployment
First Test Protocol
Monitoring During Deployment
Series Summary
References
- Learning Humanoid Locomotion with Transformers — Radosavovic et al., 2024
- Real-World Humanoid Locomotion with RL — Gu et al., 2024
- Humanoid-Gym: Reinforcement Learning for Humanoid Robot — Gu et al., 2024
- Sim-to-Real Transfer for Bipedal Locomotion — Li et al., 2023
Related Posts