TensorAuto · akshay18iitg · May 11, 2026 · shuheng-liu · May 12, 2026
diff --git a/configs/examples/pi05_robocasa_eval_config.json b/configs/examples/pi05_robocasa_eval_config.json
@@ -0,0 +1,176 @@
+{
+    "dataset_mixture": {
+        "datasets": [
+            {
+                "repo_id": "physical-intelligence/libero"
+            }
+        ],
+        "weights": [
+            1.0
+        ],
+        "action_freq": 10.0,
+        "image_resample_strategy": "nearest",
+        "vector_resample_strategy": "nearest"
+    },
+    "policy": {
+        "type": "pi05",
+        "pretrained_path": "TensorAuto/tPi0.5-base",
+        "n_obs_steps": 1,
+        "input_features": {
+            "camera0": {
+                "shape": [
+                    3,
+                    224,
+                    224
+                ],
+                "type": "VISUAL"
+            },
+            "camera1": {
+                "shape": [
+                    3,
+                    224,
+                    224
+                ],
+                "type": "VISUAL"
+            },
+            "camera2": {
+                "shape": [
+                    3,
+                    224,
+                    224
+                ],
+                "type": "VISUAL"
+            },
+            "state": {
+                "shape": [
+                    32
+                ],
+                "type": "STATE"
+            }
+        },
+        "output_features": {
+            "actions": {
+                "shape": [
+                    32
+                ],
+                "type": "ACTION"
+            }
+        },
+        "normalization_mapping": {
+            "VISUAL": "IDENTITY",
+            "STATE": "MIN_MAX",
+            "ACTION": "MEAN_STD"
+        },
+        "chunk_size": 10,
+        "n_action_steps": 10,
+        "max_state_dim": 32,
+        "max_action_dim": 32,
+        "proj_width": 1024,
+        "num_steps": 10,
+        "attention_implementation": "eager",
+        "freeze_vision_encoder": true,
+        "train_expert_only": true,
+        "prompt_max_length": 256,
+        "discrete_action_max_length": 60,
+        "optimizer_lr": 2.5e-05,
+        "optimizer_betas": [
+            0.9,
+            0.95
+        ],
+        "optimizer_eps": 1e-08,
+        "optimizer_weight_decay": 1e-10,
+        "scheduler_warmup_steps": 1000,
+        "scheduler_decay_steps": 30000,
+        "scheduler_decay_lr": 2.5e-06
+    },
+    "output_dir": "outputs/pi05_robocasa_eval",
+    "resume": false,
+    "seed": 1000,
+    "resolution": [
+        224,
+        224
+    ],
+    "num_cams": 3,
+    "max_state_dim": 32,
+    "max_action_dim": 32,
+    "action_chunk": 10,
+    "loss_weighting": {
+        "MSE": 1.0,
+        "CE": 1.0
+    },
+    "num_workers": 4,
+    "batch_size": 2,
+    "gradient_accumulation_steps": 1,
+    "dataloader_batch_size": 2,
+    "prefetch_factor": 8,
+    "steps": 100,
+    "log_freq": 1,
+    "save_checkpoint": true,
+    "save_freq": 100,
+    "eval_freq": 10,
+    "use_policy_training_preset": true,
+    "trace_nans": false,
+    "optimizer": {
+        "type": "adamw",
+        "lr": 2.5e-05,
+        "weight_decay": 1e-10,
+        "grad_clip_norm": 10.0,
+        "betas": [
+            0.9,
+            0.95
+        ],
+        "eps": 1e-08
+    },
+    "env": {
+        "type": "robocasa",
+        "task": "",
+        "fps": 20,
+        "max_parallel_tasks": 1,
+        "task_ids": [
+            0,
+            12,
+            267
+        ],
+        "episode_length": 150,
+        "camera_names": [
+            "robot0_eye_in_hand",
+            "robot0_agentview_left",
+            "robot0_agentview_right"
+        ],
+        "camera_height": 256,
+        "camera_width": 256,
+        "num_steps_wait": 10,
+        "render_cam": null,
+        "split": "all",
+        "seed_base": 0
+    },
+    "eval": {
+        "n_episodes": 1,
+        "batch_size": 1,
+        "use_async_envs": true,
+        "max_episodes_rendered": 1,
+        "recording_root": null,
+        "grid_size": null
+    },
+    "scheduler": {
+        "type": "cosine_decay_with_warmup",
+        "num_warmup_steps": 1000,
+        "num_decay_steps": 30000,
+        "peak_lr": 2.5e-05,
+        "decay_lr": 2.5e-06
+    },
+    "wandb": {
+        "enable": true,
+        "entity": "wyautox-autox",
+        "project": "pi05",
+        "run_id": null,
+        "name": null,
+        "notes": "Evaluating pi0.5 on robocasa",
+        "tags": [],
+        "group": null,
+        "job_type": null,
+        "mode": null,
+        "allow_resume": true,
+        "disable_artifact": false
+    }
+}
diff --git a/pyproject.toml b/pyproject.toml
@@ -140,12 +140,37 @@ libero = [
 urdf = [
     "rerun-sdk>=0.28.2",
 ]
+robocasa = [
+    # RoboCasa kitchen sim. Most runtime deps (numpy, mujoco, lerobot, gymnasium,
+    # opencv-python, imageio, termcolor, lxml, hidapi, numba, pygame) come from
+    # robocasa's own setup.py — listed below only when robocasa needs them but does
+    # not declare them, or when we need a tighter version floor than robocasa does.
+    # Mutually exclusive with the `libero` extra (numpy 2.x vs <2).
+    "robocasa",
+    "robosuite>=1.5.0",         # robocasa imports robosuite at top-level but does not declare it
+    "glfw>=2.0",                # required by mujoco's viewer; not pulled by robocasa
+    "msgpack>=1.0",             # used by scripts/robocasa/server.py
+    "websockets>=11.0",         # used by scripts/robocasa/server.py
+]
 trt = [
     "tensorrt>=10.15.1.29 ; (sys_platform == 'linux' and platform_machine == 'x86_64') or (sys_platform == 'win32' and (platform_machine == 'AMD64' or platform_machine == 'x86_64'))",
 ]
 
 [tool.uv.sources]
 libero = { git = "https://github.com/shuheng-liu/LIBERO" , branch = "master" }  # the official libero repo is misconfigured for pip install with git
+# RoboCasa is shipped as a separate sim package; track upstream main. Replace with a
+# fork (e.g. `{ git = "https://github.com/<your-fork>/robocasa", branch = "..." }`) or a
+# local checkout (`{ path = "../robocasa", editable = true }`) if you carry local
+# patches. The OpenTau wrapper only depends on `robocasa.utils.env_utils.create_env`,
+# so an unmodified upstream install is sufficient.
+robocasa = { git = "https://github.com/robocasa/robocasa", branch = "main" }
+# RoboCasa uses upstream robosuite **master** APIs (`get_elements`,
+# `xml_path_completion`-variants) that aren't in PyPI's `robosuite==1.5.x`. Scope this
+# source to the robocasa extra via `extra = "robocasa"` so libero — which pins
+# `robosuite==1.4.0` from PyPI — falls back to the index resolution.
+robosuite = [
+    { git = "https://github.com/ARISE-Initiative/robosuite", branch = "master", extra = "robocasa" },
+]
 
 # libero depends on gym, which depends on numpy 1.x, while rerun only supports urdf in v0.28 which requires numpy 2.x
 [tool.uv]
@@ -154,11 +179,43 @@ libero = { git = "https://github.com/shuheng-liu/LIBERO" , branch = "master" }
 # and the build regresses to the original CMake 4 failure with a confusing
 # message. Bumping this floor errors loudly with a clear pointer instead.
 required-version = ">=0.8.4"
+# RoboCasa's setup.py declares hard equality pins (numpy==2.2.5, numba==0.61.2,
+# scipy==1.15.3) that collide with opentau's own floors (numba>=0.62.0) and with
+# whatever other extras pull in. Force compatible versions across the whole resolve so
+# `uv sync --extra robocasa` succeeds without forking robocasa's setup.py. The override
+# applies regardless of the active extra — it's a no-op for libero/urdf because they
+# don't repin numba and the numpy bound here is satisfied by libero's numpy<2 too.
+override-dependencies = [
+    "numba>=0.62.0",
+    "scipy>=1.13",
+    # Both opentau and lerobot want draccus>=0.10, but the robocasa-extras resolve
+    # currently picks 0.8.0 (an old shared transitive constraint). Force the modern
+    # version explicitly so `singledispatch(include_subclasses=...)` is available.
+    "draccus>=0.10.0",
+    # robocasa's setup.py pins tianshou==0.4.10, which transitively requires
+    # protobuf<3.20 — clashing with opentau's protobuf>=4.25. Tianshou 1.x requires
+    # Python>=3.11 (we're on 3.10), so we can't bump it. Force protobuf to opentau's
+    # range instead; opentau never actually exercises tianshou code paths, and
+    # tianshou's protobuf usage is limited to a tensorboard pb shim that still imports
+    # on protobuf 4.x at runtime even if its declared pin says otherwise.
+    "protobuf>=4.25",
+]
 conflicts = [
   [
     { extra = "libero" },
     { extra = "urdf" },
   ],
+  # robocasa pins numpy 2.x; libero pulls numpy<2. Force users to pick one sim at a time.
+  [
+    { extra = "libero" },
+    { extra = "robocasa" },
+  ],
+  # robocasa transitively pulls rerun-sdk<0.23 (via lerobot 0.3.3), while urdf needs
+  # rerun-sdk>=0.28.2. Declare them mutually exclusive so the resolver bails fast.
+  [
+    { extra = "urdf" },
+    { extra = "robocasa" },
+  ],
 ]
 # egl-probe (transitive via robomimic in the libero extra) ships an sdist whose
 # CMakeLists declares cmake_minimum_required(VERSION 2.8.12). CMake 4.0 dropped
@@ -201,6 +258,8 @@ default.extend-ignore-identifiers-re = [
     # Add individual words here to ignore them
     "2nd",
     "pn",
+    "Pn",  # RoboCasa task names use "Pn" prefix (e.g. PnPCounterToCab)
+    "Metalic",  # upstream RoboCasa class name (OrganizeMetalicUtensils)
     "ser",
     "ein",
 ]

diff --git a/src/opentau/configs/robocasa.py b/src/opentau/configs/robocasa.py
@@ -0,0 +1,91 @@
+# Copyright 2026 Tensor Auto Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""RoboCasa environment configuration module.
+
+Mirrors :mod:`opentau.configs.libero` for the RoboCasa kitchen benchmark — extends the
+base training pipeline configuration with RoboCasa-specific evaluation parameters used by
+``scripts/robocasa_eval/eval.py`` and the bundled-in evaluation hook in
+``scripts/train.py``.
+"""
+
+from dataclasses import dataclass
+
+from opentau.configs.train import TrainPipelineConfig
+
+
+@dataclass
+class RoboCasaEvalConfig:
+    """Configuration for RoboCasa environment evaluation.
+
+    Args:
+        task: Comma-separated RoboCasa task class names (e.g. ``"PnPCounterToCab"``).
+            Each name becomes one logical evaluation group, vectorised across rollouts.
+        max_steps: Hard cap on policy steps per episode (in addition to ``_check_success``).
+            Defaults to 1500.
+        chunk_usage: Number of actions to execute per policy call before re-querying.
+            If ``None``, defaults to the training config's ``action_chunk``.
+        n_simulations: Number of rollouts to run per task. Defaults to 50.
+        video_dir: Optional output directory for rollout videos.
+        action_dim: Effective action dim sent to RoboCasa (the policy output is
+            truncated or zero-padded to this size). Defaults to 16 (PI 0.5 action head).
+        seed_base: Starting seed for rollout 0; rollout ``i`` uses ``seed_base + i``.
+        camera_height: Off-screen camera height (pixels). Defaults to 256.
+        camera_width: Off-screen camera width (pixels). Defaults to 256.
+        split: Dataset split passed to ``robocasa.utils.env_utils.create_env``.
+
+    Raises:
+        ValueError: If ``task`` is empty.
+    """
+
+    task: str = "PnPCounterToCab"
+    max_steps: int = 1500
+    chunk_usage: int | None = None
+    n_simulations: int = 50
+    video_dir: str | None = None
+    action_dim: int = 16
+    seed_base: int = 0
+    camera_height: int = 256
+    camera_width: int = 256
+    split: str | None = "all"
+
+    def __post_init__(self):
+        task_names = [s.strip() for s in str(self.task).split(",") if s.strip()]
+        if not task_names:
+            raise ValueError("RoboCasaEvalConfig.task must contain at least one task class name.")
+        self.task_names = task_names
+
+
+@dataclass
+class TrainConfigWithRoboCasaEval(TrainPipelineConfig):
+    """Training configuration extended with RoboCasa evaluation settings.
+
+    Args:
+        robocasa: Configuration for RoboCasa environment evaluation. Must be provided.
+
+    Raises:
+        ValueError: If ``robocasa`` is None or ``chunk_usage`` is outside ``[1, action_chunk]``.
+    """
+
+    robocasa: RoboCasaEvalConfig = None
+
+    def __post_init__(self):
+        super().__post_init__()
+        if self.robocasa is None:
+            raise ValueError("RoboCasa config must be provided.")
+        if self.robocasa.chunk_usage is None:
+            self.robocasa.chunk_usage = self.action_chunk
+        if not 1 <= self.robocasa.chunk_usage <= self.action_chunk:
+            raise ValueError(
+                f"Chunk usage must be between 1 and {self.action_chunk=}, got {self.robocasa.chunk_usage=}."
+            )