Pure locomotion is only half the problem. A humanoid robot becomes truly useful when it can walk and work simultaneously — carry boxes from storage to trucks, push carts in factories, open doors and walk through. This is the loco-manipulation problem: combining locomotion quality with task completion in a single policy.
Building on H1-2 loco-manipulation basics, this post dives deep into complex tasks and training strategies.
Why Loco-Manipulation Is Harder Than the Sum of Its Parts
Challenges
| Challenge | Explanation |
|---|---|
| Dynamic CoM shift | Carrying a 5kg object shifts CoM, invalidating locomotion policy |
| Multi-objective | Balance + track velocity + hold object + reach target simultaneously |
| Contact richness | Feet-ground + hands-object + object-environment = many contact modes |
| Temporal coupling | Must coordinate timing: step, reach, grasp, lift, step again |
| Observation complexity | Object pose, contact forces, task state + locomotion state |
Hierarchical vs End-to-End
# === Approach 1: Hierarchical (easier, modular) ===
class HierarchicalLocoManip:
"""
High-level task planner -> Mid-level: loco + arm commands
-> Low-level: joint torques
"""
def forward(self, task_obs):
walk_cmd, arm_cmd = self.task_planner(task_obs)
leg_actions = self.loco_policy(self.loco_obs, walk_cmd)
arm_actions = self.arm_policy(self.arm_obs, arm_cmd)
return torch.cat([leg_actions, arm_actions], dim=1)
# === Approach 2: End-to-End (harder, higher potential) ===
class EndToEndLocoManip:
"""Single policy: full observation -> all joint actions."""
def forward(self, full_obs):
all_actions = self.policy(full_obs)
return all_actions
| Hierarchical | End-to-End | |
|---|---|---|
| Pros | Modular, reuse loco policy, easy debug | Optimal coordination, emergent behaviors |
| Cons | Sub-optimal coupling, hand-designed interface | Needs more data, hard to train and debug |
| Training time | ~5h | ~20h+ |
| Best for | Production, clear task structure | Research, complex interactions |
Task 1: Carry Box While Walking
import torch
class CarryBoxTask:
"""
Task: pick up box from table, carry to destination, place down.
"""
def __init__(self, num_envs, device="cuda"):
self.num_envs = num_envs
self.device = device
self.box_size = [0.3, 0.3, 0.2]
self.box_mass_range = [1.0, 8.0]
self.APPROACH = 0
self.GRASP = 1
self.CARRY = 2
self.PLACE = 3
def reset(self, env_ids):
n = len(env_ids)
self.box_mass = torch.empty(n, device=self.device).uniform_(
self.box_mass_range[0], self.box_mass_range[1]
)
self.box_start = torch.zeros(n, 3, device=self.device)
self.box_start[:, 0] = torch.empty(n, device=self.device).uniform_(1, 2)
self.box_start[:, 2] = 0.8
self.place_target = torch.zeros(n, 3, device=self.device)
self.place_target[:, 0] = torch.empty(n, device=self.device).uniform_(3, 5)
self.place_target[:, 2] = 0.8
self.phase = torch.zeros(n, dtype=torch.long, device=self.device)
def compute_observation(self, robot_state, box_state):
"""Task-specific observations."""
obs = []
box_rel = box_state["position"] - robot_state["base_pos"]
obs.append(box_rel)
obs.append(box_state["orientation"])
target_rel = self.place_target - robot_state["base_pos"]
obs.append(target_rel)
phase_onehot = torch.nn.functional.one_hot(
self.phase, num_classes=4
).float()
obs.append(phase_onehot)
obs.append(robot_state["grasp_force"].unsqueeze(1))
for hand in ["left", "right"]:
hand_pos = robot_state[f"{hand}_hand_pos"]
dist = torch.norm(hand_pos - box_state["position"], dim=1)
obs.append(dist.unsqueeze(1))
return torch.cat(obs, dim=1)
def compute_reward(self, robot_state, box_state, action):
"""Multi-phase reward."""
rewards = {}
hand_box_dist = torch.min(
torch.norm(robot_state["left_hand_pos"] - box_state["position"], dim=1),
torch.norm(robot_state["right_hand_pos"] - box_state["position"], dim=1),
)
# Phase 0: APPROACH
approach_mask = (self.phase == self.APPROACH).float()
robot_box_dist = torch.norm(
robot_state["base_pos"][:, :2] - self.box_start[:, :2], dim=1
)
rewards["approach"] = 0.5 * approach_mask * torch.exp(
-robot_box_dist / 1.0
)
close_to_box = (robot_box_dist < 0.5) & (self.phase == self.APPROACH)
self.phase[close_to_box] = self.GRASP
# Phase 1: GRASP
grasp_mask = (self.phase == self.GRASP).float()
rewards["reach"] = 0.8 * grasp_mask * torch.exp(
-hand_box_dist / 0.1
)
grasped = (hand_box_dist < 0.05) & (self.phase == self.GRASP)
self.phase[grasped] = self.CARRY
# Phase 2: CARRY
carry_mask = (self.phase == self.CARRY).float()
box_target_dist = torch.norm(
box_state["position"][:, :2] - self.place_target[:, :2], dim=1
)
rewards["carry_progress"] = 1.0 * carry_mask * torch.exp(
-box_target_dist / 2.0
)
box_tilt = torch.norm(box_state["angular_velocity"], dim=1)
rewards["box_stability"] = -0.3 * carry_mask * box_tilt
box_height_error = torch.abs(box_state["position"][:, 2] - 0.9)
rewards["box_height"] = -0.5 * carry_mask * box_height_error
near_target = (box_target_dist < 0.3) & (self.phase == self.CARRY)
self.phase[near_target] = self.PLACE
# Phase 3: PLACE
place_mask = (self.phase == self.PLACE).float()
place_dist = torch.norm(
box_state["position"] - self.place_target, dim=1
)
rewards["place"] = 2.0 * place_mask * torch.exp(
-place_dist / 0.05
)
task_complete = (place_dist < 0.1) & (self.phase == self.PLACE)
rewards["completion_bonus"] = 10.0 * task_complete.float()
total = sum(rewards.values())
return total, rewards
Task 2: Push Cart
class PushCartTask:
"""
Task: push a cart from point A to B.
Challenge: maintain contact while walking.
"""
def __init__(self):
self.cart_friction = 0.3
self.cart_mass = 15.0
def compute_reward(self, robot_state, cart_state, target_pos):
rewards = {}
cart_target_dist = torch.norm(
cart_state["position"][:, :2] - target_pos[:, :2], dim=1
)
rewards["cart_progress"] = 1.0 * torch.exp(-cart_target_dist / 3.0)
left_contact = robot_state["left_hand_contact_force"]
right_contact = robot_state["right_hand_contact_force"]
contact_reward = torch.min(left_contact, right_contact)
rewards["contact"] = 0.3 * torch.clamp(contact_reward, max=1.0)
cart_heading = cart_state["heading"]
target_heading = torch.atan2(
target_pos[:, 1] - cart_state["position"][:, 1],
target_pos[:, 0] - cart_state["position"][:, 0],
)
heading_error = torch.abs(cart_heading - target_heading)
rewards["heading"] = 0.2 * torch.exp(-heading_error / 0.3)
cart_accel = torch.norm(cart_state["acceleration"][:, :2], dim=1)
rewards["smooth_push"] = -0.1 * cart_accel
total = sum(rewards.values())
return total, rewards
Task 3: Open Door + Walk Through
class OpenDoorTask:
"""
Most complex task: open door + walk through.
Requires precise coordination between arm and legs.
"""
def __init__(self):
self.door_width = 0.9
self.handle_height = 1.0
self.door_mass = 8.0
self.APPROACH_DOOR = 0
self.REACH_HANDLE = 1
self.TURN_HANDLE = 2
self.PUSH_DOOR = 3
self.WALK_THROUGH = 4
def compute_reward(self, robot_state, door_state):
rewards = {}
approach_mask = (self.phase == self.APPROACH_DOOR).float()
door_dist = torch.norm(
robot_state["base_pos"][:, :2] - door_state["handle_pos"][:, :2],
dim=1
)
rewards["approach"] = 0.3 * approach_mask * torch.exp(
-door_dist / 1.0
)
reach_mask = (self.phase == self.REACH_HANDLE).float()
hand_handle_dist = torch.norm(
robot_state["right_hand_pos"] - door_state["handle_pos"],
dim=1
)
rewards["reach_handle"] = 0.5 * reach_mask * torch.exp(
-hand_handle_dist / 0.05
)
turn_mask = (self.phase == self.TURN_HANDLE).float()
handle_angle = door_state["handle_angle"]
rewards["turn_handle"] = 0.5 * turn_mask * (
handle_angle / (torch.pi / 4)
)
push_mask = (self.phase == self.PUSH_DOOR).float()
door_angle = door_state["door_angle"]
rewards["push_door"] = 1.0 * push_mask * (
door_angle / (torch.pi / 2)
)
walk_mask = (self.phase == self.WALK_THROUGH).float()
through_door = robot_state["base_pos"][:, 0] > door_state["frame_pos"][:, 0] + 0.5
rewards["walk_through"] = 5.0 * walk_mask * through_door.float()
upright = torch.sum(
torch.square(robot_state["projected_gravity"][:, :2]), dim=1
)
rewards["upright"] = -1.5 * upright
rewards["termination"] = -200.0 * robot_state["terminated"].float()
total = sum(rewards.values())
return total, rewards
Full Training Pipeline
class LocoManipTrainer:
"""End-to-end training pipeline for loco-manipulation tasks."""
def __init__(self, env, task, device="cuda"):
self.env = env
self.task = task
self.device = device
import torch.nn as nn
self.policy = nn.Sequential(
nn.Linear(78 + 17, 512),
nn.ELU(),
nn.Linear(512, 512),
nn.ELU(),
nn.Linear(512, 256),
nn.ELU(),
nn.Linear(256, 35),
).to(device)
self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=3e-4)
def train(self, num_iterations=20000):
for iteration in range(num_iterations):
obs = self.env.get_observations()
task_obs = self.task.compute_observation(
self.env.robot_state, self.env.object_state
)
full_obs = torch.cat([obs, task_obs], dim=1)
actions = self.policy(full_obs)
next_obs, base_reward, done, info = self.env.step(actions)
task_reward, task_reward_terms = self.task.compute_reward(
self.env.robot_state, self.env.object_state, actions
)
total_reward = base_reward + task_reward
loss = -total_reward.mean()
self.optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(self.policy.parameters(), 1.0)
self.optimizer.step()
if iteration % 500 == 0:
success_rate = info.get("task_success_rate", 0)
print(f"Iter {iteration}: reward={total_reward.mean():.2f}, "
f"success={success_rate:.1%}")
For locomotion fundamentals, see Locomotion Basics. For RL basics, see RL for Robotics.
Summary
Loco-manipulation is a significant step beyond pure locomotion:
- Multi-phase tasks (approach, grasp, carry, place) require complex reward design
- Dynamic CoM shift when carrying objects forces the locomotion policy to adapt online
- Hierarchical vs end-to-end: hierarchical is easier for production, end-to-end has more potential
- 3 task examples: carry box, push cart, open door — each with unique challenges
- Training time: 10-20h for loco-manipulation vs 1-2h for pure locomotion
Next post — Humanoid Parkour — tackles jumping, climbing, and obstacle courses.
References
- HumanPlus: Humanoid Shadowing and Imitation from Humans — Fu et al., 2024
- OmniH2O: Universal and Dexterous Human-to-Humanoid Whole-Body Teleoperation and Learning — He et al., 2024
- Humanoid Loco-Manipulation with Learned Whole-Body Control — Chen et al., CoRL 2024
- Learning Whole-Body Manipulation for Quadrupedal Robot — Ha et al., 2024