Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions configs/examples/pi05_robocasa_eval_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
{
"dataset_mixture": {
"datasets": [
{
"repo_id": "physical-intelligence/libero"
Copy link
Copy Markdown
Member

@shuheng-liu shuheng-liu May 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have an example robocasa dataset?

}
],
"weights": [
1.0
],
"action_freq": 10.0,
"image_resample_strategy": "nearest",
"vector_resample_strategy": "nearest"
},
"policy": {
"type": "pi05",
"pretrained_path": "TensorAuto/tPi0.5-base",
"n_obs_steps": 1,
"input_features": {
"camera0": {
"shape": [
3,
224,
224
],
"type": "VISUAL"
},
"camera1": {
"shape": [
3,
224,
224
],
"type": "VISUAL"
},
"camera2": {
"shape": [
3,
224,
224
],
"type": "VISUAL"
},
"state": {
"shape": [
32
],
"type": "STATE"
}
},
"output_features": {
"actions": {
"shape": [
32
],
"type": "ACTION"
}
},
"normalization_mapping": {
"VISUAL": "IDENTITY",
"STATE": "MIN_MAX",
"ACTION": "MEAN_STD"
},
"chunk_size": 10,
"n_action_steps": 10,
"max_state_dim": 32,
"max_action_dim": 32,
"proj_width": 1024,
"num_steps": 10,
"attention_implementation": "eager",
"freeze_vision_encoder": true,
"train_expert_only": true,
"prompt_max_length": 256,
"discrete_action_max_length": 60,
"optimizer_lr": 2.5e-05,
"optimizer_betas": [
0.9,
0.95
],
"optimizer_eps": 1e-08,
"optimizer_weight_decay": 1e-10,
"scheduler_warmup_steps": 1000,
"scheduler_decay_steps": 30000,
"scheduler_decay_lr": 2.5e-06
},
"output_dir": "outputs/pi05_robocasa_eval",
"resume": false,
"seed": 1000,
"resolution": [
224,
224
],
"num_cams": 3,
"max_state_dim": 32,
"max_action_dim": 32,
"action_chunk": 10,
"loss_weighting": {
"MSE": 1.0,
"CE": 1.0
},
"num_workers": 4,
"batch_size": 2,
"gradient_accumulation_steps": 1,
"dataloader_batch_size": 2,
"prefetch_factor": 8,
"steps": 100,
"log_freq": 1,
"save_checkpoint": true,
"save_freq": 100,
"eval_freq": 10,
"use_policy_training_preset": true,
"trace_nans": false,
"optimizer": {
"type": "adamw",
"lr": 2.5e-05,
"weight_decay": 1e-10,
"grad_clip_norm": 10.0,
"betas": [
0.9,
0.95
],
"eps": 1e-08
},
"env": {
"type": "robocasa",
"task": "",
"fps": 20,
"max_parallel_tasks": 1,
"task_ids": [
0,
12,
267
],
"episode_length": 150,
"camera_names": [
"robot0_eye_in_hand",
"robot0_agentview_left",
"robot0_agentview_right"
],
"camera_height": 256,
"camera_width": 256,
"num_steps_wait": 10,
"render_cam": null,
"split": "all",
"seed_base": 0
},
"eval": {
"n_episodes": 1,
"batch_size": 1,
"use_async_envs": true,
"max_episodes_rendered": 1,
"recording_root": null,
"grid_size": null
},
"scheduler": {
"type": "cosine_decay_with_warmup",
"num_warmup_steps": 1000,
"num_decay_steps": 30000,
"peak_lr": 2.5e-05,
"decay_lr": 2.5e-06
},
"wandb": {
"enable": true,
"entity": "wyautox-autox",
"project": "pi05",
"run_id": null,
"name": null,
"notes": "Evaluating pi0.5 on robocasa",
"tags": [],
"group": null,
"job_type": null,
"mode": null,
"allow_resume": true,
"disable_artifact": false
}
}
59 changes: 59 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,37 @@ libero = [
urdf = [
"rerun-sdk>=0.28.2",
]
robocasa = [
# RoboCasa kitchen sim. Most runtime deps (numpy, mujoco, lerobot, gymnasium,
# opencv-python, imageio, termcolor, lxml, hidapi, numba, pygame) come from
# robocasa's own setup.py — listed below only when robocasa needs them but does
# not declare them, or when we need a tighter version floor than robocasa does.
# Mutually exclusive with the `libero` extra (numpy 2.x vs <2).
"robocasa",
"robosuite>=1.5.0", # robocasa imports robosuite at top-level but does not declare it
"glfw>=2.0", # required by mujoco's viewer; not pulled by robocasa
"msgpack>=1.0", # used by scripts/robocasa/server.py
"websockets>=11.0", # used by scripts/robocasa/server.py
]
trt = [
"tensorrt>=10.15.1.29 ; (sys_platform == 'linux' and platform_machine == 'x86_64') or (sys_platform == 'win32' and (platform_machine == 'AMD64' or platform_machine == 'x86_64'))",
]

[tool.uv.sources]
libero = { git = "https://github.com/shuheng-liu/LIBERO" , branch = "master" } # the official libero repo is misconfigured for pip install with git
# RoboCasa is shipped as a separate sim package; track upstream main. Replace with a
# fork (e.g. `{ git = "https://github.com/<your-fork>/robocasa", branch = "..." }`) or a
# local checkout (`{ path = "../robocasa", editable = true }`) if you carry local
# patches. The OpenTau wrapper only depends on `robocasa.utils.env_utils.create_env`,
# so an unmodified upstream install is sufficient.
robocasa = { git = "https://github.com/robocasa/robocasa", branch = "main" }
# RoboCasa uses upstream robosuite **master** APIs (`get_elements`,
# `xml_path_completion`-variants) that aren't in PyPI's `robosuite==1.5.x`. Scope this
# source to the robocasa extra via `extra = "robocasa"` so libero — which pins
# `robosuite==1.4.0` from PyPI — falls back to the index resolution.
robosuite = [
{ git = "https://github.com/ARISE-Initiative/robosuite", branch = "master", extra = "robocasa" },
]

# libero depends on gym, which depends on numpy 1.x, while rerun only supports urdf in v0.28 which requires numpy 2.x
[tool.uv]
Expand All @@ -154,11 +179,43 @@ libero = { git = "https://github.com/shuheng-liu/LIBERO" , branch = "master" }
# and the build regresses to the original CMake 4 failure with a confusing
# message. Bumping this floor errors loudly with a clear pointer instead.
required-version = ">=0.8.4"
# RoboCasa's setup.py declares hard equality pins (numpy==2.2.5, numba==0.61.2,
# scipy==1.15.3) that collide with opentau's own floors (numba>=0.62.0) and with
# whatever other extras pull in. Force compatible versions across the whole resolve so
# `uv sync --extra robocasa` succeeds without forking robocasa's setup.py. The override
# applies regardless of the active extra — it's a no-op for libero/urdf because they
# don't repin numba and the numpy bound here is satisfied by libero's numpy<2 too.
override-dependencies = [
"numba>=0.62.0",
"scipy>=1.13",
# Both opentau and lerobot want draccus>=0.10, but the robocasa-extras resolve
# currently picks 0.8.0 (an old shared transitive constraint). Force the modern
# version explicitly so `singledispatch(include_subclasses=...)` is available.
"draccus>=0.10.0",
# robocasa's setup.py pins tianshou==0.4.10, which transitively requires
# protobuf<3.20 — clashing with opentau's protobuf>=4.25. Tianshou 1.x requires
# Python>=3.11 (we're on 3.10), so we can't bump it. Force protobuf to opentau's
# range instead; opentau never actually exercises tianshou code paths, and
# tianshou's protobuf usage is limited to a tensorboard pb shim that still imports
# on protobuf 4.x at runtime even if its declared pin says otherwise.
"protobuf>=4.25",
]
conflicts = [
[
{ extra = "libero" },
{ extra = "urdf" },
],
# robocasa pins numpy 2.x; libero pulls numpy<2. Force users to pick one sim at a time.
[
{ extra = "libero" },
{ extra = "robocasa" },
],
# robocasa transitively pulls rerun-sdk<0.23 (via lerobot 0.3.3), while urdf needs
# rerun-sdk>=0.28.2. Declare them mutually exclusive so the resolver bails fast.
[
{ extra = "urdf" },
{ extra = "robocasa" },
],
]
# egl-probe (transitive via robomimic in the libero extra) ships an sdist whose
# CMakeLists declares cmake_minimum_required(VERSION 2.8.12). CMake 4.0 dropped
Expand Down Expand Up @@ -201,6 +258,8 @@ default.extend-ignore-identifiers-re = [
# Add individual words here to ignore them
"2nd",
"pn",
"Pn", # RoboCasa task names use "Pn" prefix (e.g. PnPCounterToCab)
"Metalic", # upstream RoboCasa class name (OrganizeMetalicUtensils)
"ser",
"ein",
]
Expand Down
91 changes: 91 additions & 0 deletions src/opentau/configs/robocasa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Copyright 2026 Tensor Auto Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""RoboCasa environment configuration module.

Mirrors :mod:`opentau.configs.libero` for the RoboCasa kitchen benchmark — extends the
base training pipeline configuration with RoboCasa-specific evaluation parameters used by
``scripts/robocasa_eval/eval.py`` and the bundled-in evaluation hook in
``scripts/train.py``.
"""

from dataclasses import dataclass

from opentau.configs.train import TrainPipelineConfig


@dataclass
class RoboCasaEvalConfig:
"""Configuration for RoboCasa environment evaluation.

Args:
task: Comma-separated RoboCasa task class names (e.g. ``"PnPCounterToCab"``).
Each name becomes one logical evaluation group, vectorised across rollouts.
max_steps: Hard cap on policy steps per episode (in addition to ``_check_success``).
Defaults to 1500.
chunk_usage: Number of actions to execute per policy call before re-querying.
If ``None``, defaults to the training config's ``action_chunk``.
n_simulations: Number of rollouts to run per task. Defaults to 50.
video_dir: Optional output directory for rollout videos.
action_dim: Effective action dim sent to RoboCasa (the policy output is
truncated or zero-padded to this size). Defaults to 16 (PI 0.5 action head).
seed_base: Starting seed for rollout 0; rollout ``i`` uses ``seed_base + i``.
camera_height: Off-screen camera height (pixels). Defaults to 256.
camera_width: Off-screen camera width (pixels). Defaults to 256.
split: Dataset split passed to ``robocasa.utils.env_utils.create_env``.

Raises:
ValueError: If ``task`` is empty.
"""

task: str = "PnPCounterToCab"
max_steps: int = 1500
chunk_usage: int | None = None
n_simulations: int = 50
video_dir: str | None = None
action_dim: int = 16
seed_base: int = 0
camera_height: int = 256
camera_width: int = 256
split: str | None = "all"

def __post_init__(self):
task_names = [s.strip() for s in str(self.task).split(",") if s.strip()]
if not task_names:
raise ValueError("RoboCasaEvalConfig.task must contain at least one task class name.")
self.task_names = task_names


@dataclass
class TrainConfigWithRoboCasaEval(TrainPipelineConfig):
"""Training configuration extended with RoboCasa evaluation settings.

Args:
robocasa: Configuration for RoboCasa environment evaluation. Must be provided.

Raises:
ValueError: If ``robocasa`` is None or ``chunk_usage`` is outside ``[1, action_chunk]``.
"""

robocasa: RoboCasaEvalConfig = None

def __post_init__(self):
super().__post_init__()
if self.robocasa is None:
raise ValueError("RoboCasa config must be provided.")
if self.robocasa.chunk_usage is None:
self.robocasa.chunk_usage = self.action_chunk
if not 1 <= self.robocasa.chunk_usage <= self.action_chunk:
raise ValueError(
f"Chunk usage must be between 1 and {self.action_chunk=}, got {self.robocasa.chunk_usage=}."
)
Loading
Loading