From f34031ae881c96113fcf61a18bce0ae9dfb166f8 Mon Sep 17 00:00:00 2001 From: Hao Liang Date: Tue, 16 Jun 2026 20:40:31 -0700 Subject: [PATCH 1/3] DROID action-policy recipe + multi-node launcher + episode-shuffle (rebased on main) Rebased onto current main. main #34 upstreamed the DROID dataset (joint_pos, use_state, keep-ranges filter, action_space) so droid_lerobot_dataset.py now carries only the get_shuffle_blocks helper grafted onto main's version; #29's recipe change (dropped /cluster override) is incorporated. Remaining contribution: action_policy_droid_nano recipe (mode=policy, lr=2e-4 @ 8192 global, max_num_tokens_after_packing=-1, scrubbed comments), the episode-shuffle stream (action_sft_dataset.py), the multi-node-capable SFT launcher (NNODES/NODE_RANK/MASTER_ADDR passthrough + EXTRA_TAIL_OVERRIDES), and the post-train doc. Co-Authored-By: Claude Opus 4.8 Signed-off-by: Hao Liang --- .../action_policy_droid_nano.py | 60 ++++++---- .../vfm/action/datasets/action_sft_dataset.py | 67 ++++++++++- .../action/datasets/droid_lerobot_dataset.py | 16 +++ docs/action_policy_droid_posttrain.md | 111 ++++++++++-------- examples/_sft_launcher_common.sh | 14 ++- examples/launch_sft_action_policy_droid.sh | 17 ++- 6 files changed, 205 insertions(+), 80 deletions(-) diff --git a/cosmos_framework/configs/base/experiment/action/posttrain_config/action_policy_droid_nano.py b/cosmos_framework/configs/base/experiment/action/posttrain_config/action_policy_droid_nano.py index 4bc0c29..8880295 100644 --- a/cosmos_framework/configs/base/experiment/action/posttrain_config/action_policy_droid_nano.py +++ b/cosmos_framework/configs/base/experiment/action/posttrain_config/action_policy_droid_nano.py @@ -5,9 +5,8 @@ Mirrors the vision SFT stack (PackingDataLoader + RankPartitionedDataLoader), but feeds the DROID action dataset (``joint_pos`` 8D + ``use_state``, raw/ -un-normalized — same as the internal ``droid_lerobot_8b_policy`` run) through -``ActionTransformPipeline``, and trains the generation + action heads from the -public ``nvidia/Cosmos3-Nano`` base. +un-normalized) through ``ActionTransformPipeline``, and trains the generation + +action heads from the public ``nvidia/Cosmos3-Nano`` base. Usage (1 node, 8 GPU):: @@ -41,13 +40,10 @@ {"override /model": "mot_fsdp"}, {"override /data_train": None}, {"override /data_val": None}, - # Match internal droid_lerobot_8b_policy: apex FusedAdam with fp32 - # master_weights + eps 1e-8. adamw + fused + eps 1e-6 (bf16, no fp32 - # master) under-steps the small 5x-lr action heads and leaves the action - # loss on a noisy high plateau; an exact-match forward/optimizer test - # confirmed the convergence gap was the optimizer, not the model. + # FusedAdam with fp32 master_weights + eps 1e-8 (bf16 params + eps 1e-6 + # diverged on the action loss). {"override /optimizer": "fusedadamw"}, - {"override /scheduler": "lambdalinear"}, # matches internal droid_lerobot_8b (was lambdacosine) + {"override /scheduler": "lambdalinear"}, # linear LR decay {"override /checkpoint": "s3"}, { "override /callbacks": [ @@ -76,7 +72,7 @@ betas=[0.9, 0.99], eps=1.0e-08, fused=True, # popped by build_optimizer for FusedAdam (fused by construction) - # Generation + action heads (mirrors internal droid_lerobot_8b_policy). + # Train the generation + action heads. keys_to_select=[ "moe_gen", "time_embedder", @@ -86,7 +82,7 @@ "llm2action", "action_modality_embed", ], - lr=2.0e-04, # matches internal droid_lerobot_8b_policy submit (--lr 2e-4) + lr=2.0e-04, # for the 8192 global batch lr_multipliers={ "action2llm": 5.0, "llm2action": 5.0, @@ -96,7 +92,7 @@ weight_decay=0.05, ), scheduler=dict( - lr_scheduler_type="LambdaLinear", # matches internal droid_lerobot_8b (was LambdaCosine) + lr_scheduler_type="LambdaLinear", cycle_lengths=[100], # smoke: 100 iters (real run sets via TOML) f_max=[0.4], f_min=[0.0], @@ -125,7 +121,7 @@ device_monitor=dict( every_n=200, log_memory_detail=True, save_s3=False, step_size=1, upload_every_n_mul=5 ), - grad_clip=dict(clip_norm=1.0, force_finite=True), # matches internal make_8b + grad_clip=dict(clip_norm=1.0, force_finite=True), heart_beat=dict(every_n=200, save_s3=False, step_size=1, update_interval_in_minute=20), iter_speed=dict(every_n=1, hit_thres=50, save_s3=False, save_s3_every_log_n=500), low_precision=dict(update_iter=1), @@ -140,10 +136,9 @@ dcp_async_mode_enabled=False, enable_gcs_patch_in_boto3=True, keys_not_to_resume=[], - # Skip net_ema. (→ EMA warm-start copies net→net_ema, see dcp.py) AND the - # action heads, so they init fresh from the base — matches internal - # make_8b _DEFAULT_KEYS_TO_SKIP (Cosmos3-Nano's action heads are not - # DROID-policy-trained). + # Skip net_ema. (EMA warm-starts from net, see dcp.py) and the action + # heads, so they init fresh from the base (the base has no DROID-trained + # action heads). keys_to_skip_loading=[ "net_ema.", "action2llm", @@ -171,7 +166,7 @@ dataloader_train=L(PackingDataLoader)( audio_sample_rate=48000, dataset_name="action_droid", - max_samples_per_batch=128, # count-based batch (matches internal res480 8B) + max_samples_per_batch=128, # per rank -> 8192 global batch at 64 ranks (16 nodes, shard 8 x replicate 8) max_sequence_length=None, # None disables token packing (TOML can't express null) patch_spatial=2, sound_latent_fps=0, @@ -185,6 +180,13 @@ pin_memory=True, prefetch_factor=4, sampler=None, + # Shuffling is handled by the dataset (iterable_shuffle=True below): + # ActionIterableShuffleDataset streams rank x worker-sharded, episode-order- + # shuffled, sequential-within-episode. The map-style dataset has no internal + # shuffle, so a SequentialSampler would feed every rank the SAME consecutive + # overlapping windows -> global batch ~1 episode -> unstable grad-norm; a plain + # RandomSampler decorrelates but does random-access I/O -> slow + OOM. The + # iterable gives decorrelation with sequential reads. datasets=dict( droid=dict( ratio=1, @@ -193,15 +195,21 @@ fps=15.0, chunk_length=32, action_space="joint_pos", + # Policy-only task mode. "joint" would randomly pick + # forward_dynamics/inverse_dynamics/policy per sample (multi-task), + # which dilutes each per-task loss by ~1/3. + mode="policy", use_state=True, + iterable_shuffle=True, # rank x worker episode-shuffle stream + episode_shuffle_seed=42, use_image_augmentation=True, # SR boost (random crop+rescale + color jitter) # Keep-ranges window filter (drops idle/non-task frames). Off by default; - # the launcher sets use_filter_dict=True + filter_dict_path for internal parity. + # set use_filter_dict=True + filter_dict_path to enable. use_filter_dict=False, filter_dict_path=None, action_normalization=None, viewpoint="concat_view", # wrist 480p (top) + L/R shoulder 320x180 (bottom) - resolution="480", # 640x360 data @ 480p (matches internal res480 run) + resolution="480", # 640x360 data @ 480p max_action_dim="${model.config.max_action_dim}", cfg_dropout_rate=0.1, tokenizer_config="${model.config.vlm_config.tokenizer}", @@ -217,12 +225,18 @@ ) -# chunk_length=32 → 33 observation frames; pin the VAE encode duration to match -# (internal used [17] for chunk_length=16). Set post-construction so it lands on -# the deep-copied NANO_MODEL_CONFIG.tokenizer. +# chunk_length=32 -> 33 observation frames; pin the VAE encode duration to match. +# Set post-construction so it lands on the deep-copied NANO_MODEL_CONFIG.tokenizer. action_policy_droid_nano["model"]["config"]["tokenizer"]["encode_exact_durations"] = [33] +# Uncap the packed-sequence length. The NANO default (45056) caps the packed sequence, +# truncating long DROID windows to ~1/4 of their natural length; -1 (uncapped) processes +# the full vision sequence per step. Does not change the per-token loss; widens the +# effective vision context per step. +action_policy_droid_nano["model"]["config"]["max_num_tokens_after_packing"] = -1 + + for _item in [action_policy_droid_nano]: _name = [k for k, v in globals().items() if v is _item][0] cs.store(group="experiment", package="_global_", name=_name, node=_item) diff --git a/cosmos_framework/data/vfm/action/datasets/action_sft_dataset.py b/cosmos_framework/data/vfm/action/datasets/action_sft_dataset.py index 5d5b74e..1790de5 100644 --- a/cosmos_framework/data/vfm/action/datasets/action_sft_dataset.py +++ b/cosmos_framework/data/vfm/action/datasets/action_sft_dataset.py @@ -16,7 +16,7 @@ from typing import Any -from torch.utils.data import Dataset +from torch.utils.data import Dataset, IterableDataset, get_worker_info from cosmos_framework.data.vfm.action.datasets.droid_lerobot_dataset import DROIDLeRobotDataset from cosmos_framework.data.vfm.action.transforms import ActionTransformPipeline @@ -37,6 +37,55 @@ def __len__(self) -> int: def __getitem__(self, idx: int) -> dict[str, Any]: return self._transform(self._dataset[idx], self._resolution) + def get_shuffle_blocks(self): + """Delegate to the inner DROIDLeRobotDataset (per-episode/segment flat-index blocks).""" + return self._dataset.get_shuffle_blocks() + + + +class ActionIterableShuffleDataset(IterableDataset): + """Streaming view of a map-style ``ActionSFTDataset``. + + Each ``(rank, worker)`` is assigned a DISJOINT subset of episodes (sharded over + ``shard_world_size * num_workers``), shuffles its episode ORDER, and streams the + windows WITHIN each episode sequentially -> within-rank batch diversity (the N + workers of a rank stream N different episodes) AND cross-rank diversity, while + keeping reads sequential (I/O locality + COW; no RandomSampler random-access OOM). + Re-shuffles each epoch and streams indefinitely (the trainer stops at ``max_iter``). + + ``shard_world_size`` / ``shard_rank`` are set by ``RankPartitionedDataLoader``. + """ + + def __init__(self, dataset: "ActionSFTDataset", seed: int = 42): + super().__init__() + self._dataset = dataset + self._seed = int(seed) + self.shard_world_size = 1 + self.shard_rank = 0 + + def __len__(self) -> int: # informational only; iteration is infinite + return len(self._dataset) + + def __iter__(self): + import torch + + blocks = self._dataset.get_shuffle_blocks() + wi = get_worker_info() + wid = wi.id if wi is not None else 0 + nw = wi.num_workers if wi is not None else 1 + global_shard = int(self.shard_rank) * nw + wid + total_shards = max(1, int(self.shard_world_size) * nw) + epoch = 0 + while True: + g = torch.Generator() + g.manual_seed(self._seed + epoch) # same permutation across all (rank,worker) -> disjoint shard + order = torch.randperm(len(blocks), generator=g).tolist() + for b in order[global_shard::total_shards]: + start, length = blocks[b] + for idx in range(start, start + length): + yield self._dataset[idx] + epoch += 1 + def get_action_droid_sft_dataset( *, @@ -44,6 +93,7 @@ def get_action_droid_sft_dataset( fps: float = 15.0, chunk_length: int = 32, action_space: str = "joint_pos", + mode: str = "policy", use_state: bool = True, action_normalization: str | None = None, viewpoint: str = "concat_view", @@ -58,16 +108,18 @@ def get_action_droid_sft_dataset( append_duration_fps_timestamps: bool = True, append_resolution_info: bool = True, append_idle_frames: bool = False, -) -> ActionSFTDataset: - """Build the DROID action SFT dataset (joint_pos 8D by default), matching the - internal ``droid_lerobot_8b_policy`` data: ``action_space='joint_pos'`` + - ``use_state`` (8D, raw/un-normalized), concat_view, chunk_length 32.""" + iterable_shuffle: bool = False, + episode_shuffle_seed: int = 42, +) -> Dataset: + """Build the DROID action SFT dataset: ``action_space='joint_pos'`` (8D) + + ``use_state`` (raw/un-normalized), concat_view, chunk_length 32.""" dataset = DROIDLeRobotDataset( root=root, fps=fps, chunk_length=chunk_length, viewpoint=viewpoint, action_space=action_space, + mode=mode, use_state=use_state, action_normalization=action_normalization, use_image_augmentation=use_image_augmentation, @@ -83,4 +135,7 @@ def get_action_droid_sft_dataset( append_resolution_info=append_resolution_info, append_idle_frames=append_idle_frames, ) - return ActionSFTDataset(dataset, transform, resolution) + sft = ActionSFTDataset(dataset, transform, resolution) + if iterable_shuffle: + return ActionIterableShuffleDataset(sft, seed=episode_shuffle_seed) + return sft diff --git a/cosmos_framework/data/vfm/action/datasets/droid_lerobot_dataset.py b/cosmos_framework/data/vfm/action/datasets/droid_lerobot_dataset.py index 631f1e9..3bd1859 100644 --- a/cosmos_framework/data/vfm/action/datasets/droid_lerobot_dataset.py +++ b/cosmos_framework/data/vfm/action/datasets/droid_lerobot_dataset.py @@ -351,3 +351,19 @@ def __len__(self) -> int: if self._use_filter_dict: return int(self._seg_cum[-1]) if self._seg_cum.size else 0 return int(self._valid_cum[-1]) if self._valid_cum.size else 0 + + def get_shuffle_blocks(self) -> list[tuple[int, int]]: + """Per-episode (or per kept-segment, when ``use_filter_dict``) flat-index blocks + ``(start, length)``. ``ActionIterableShuffleDataset`` shuffles the ORDER of these + blocks and shards them disjointly across ranks, while keeping windows *within* a + block sequential -> decorrelates batches across ranks without random-access I/O + (preserves locality + copy-on-write memory sharing across workers).""" + cum = self._seg_cum if self._use_filter_dict else self._valid_cum + blocks: list[tuple[int, int]] = [] + prev = 0 + for c in np.asarray(cum).tolist(): + c = int(c) + if c > prev: + blocks.append((prev, c - prev)) + prev = c + return blocks diff --git a/docs/action_policy_droid_posttrain.md b/docs/action_policy_droid_posttrain.md index ac5c3fb..dbd46da 100644 --- a/docs/action_policy_droid_posttrain.md +++ b/docs/action_policy_droid_posttrain.md @@ -1,23 +1,32 @@ - - +# Cosmos3-Nano-Policy-DROID Post-Training -# DROID Action-Policy Post-Training — `Cosmos3-Nano-Policy-DROID` +[Cosmos3-Nano-Policy-DROID](https://huggingface.co/nvidia/Cosmos3-Nano-Policy-DROID) is an action policy fine-tuned from [`Cosmos3-Nano`](https://huggingface.co/nvidia/Cosmos3-Nano) (the 8B MoT) on the **DROID LeRobot** dataset, using absolute joint-position actions plus proprioceptive state at 480p. This example reproduces that post-training. The registered `action_policy_droid_nano` experiment, the DROID action dataset class (`joint_pos` 8-D + `use_state`), and the EMA warm-start fix all ship in this package; you supply two external inputs — a prepared DROID LeRobot dataset and a DCP base checkpoint converted from `nvidia/Cosmos3-Nano` (see [Inputs You Provide](#inputs-you-provide)). Validated end-to-end on H200: 1 node / 8 GPU and 2 nodes / 16 ranks (HSDP). -> **STATUS: recipe ships in this package.** The registered experiment, the DROID action -> dataset class (`joint_pos` 8D + `use_state`), and the EMA warm-start fix land here. -> To run it you supply two external inputs — a prepared **DROID LeRobot v3.0** dataset and -> a **DCP base checkpoint** converted from `nvidia/Cosmos3-Nano` (see -> [Inputs you provide](#inputs-you-provide)). Validated end-to-end on H200: 1 node / 8 GPU -> and 2 nodes / 16 ranks (HSDP). + -Fine-tune `Cosmos3-Nano` (the 8B MoT) into an action policy on the **DROID LeRobot** dataset, -reproducing `Cosmos3-Nano-Policy-DROID`. The policy is initialized from **`nvidia/Cosmos3-Nano`** -(public Hugging Face repo) and trained with absolute joint-position actions + proprioceptive -state at 480p. +______________________________________________________________________ + +**Table of Contents** + +- [Inputs You Provide](#inputs-you-provide) +- [Dataset](#dataset) +- [Recipe](#recipe) +- [Full Reproduction](#full-reproduction) +- [Checkpoints](#checkpoints) ______________________________________________________________________ -## Inputs you provide + + +Prerequisites: + +- [Setup](../README.md#setup) — clone the repo, install the training extras (`uv sync --all-extras --group=cu130-train`), and activate the environment. +- [Environment Variables](./environment_variables.md) +- [FAQ](./faq.md) — troubleshooting (OOM during SFT, defaults), common pitfalls. + +The runnable artifacts (TOML recipe, paired launch shell) live in [`examples/`](../examples/README.md); all commands below run from the repo root with the environment activated. + +## Inputs You Provide This package ships the training stack — the registered `action_policy_droid_nano` experiment, the DROID action dataset class with the recipe knobs (`action_space=joint_pos`, `use_state`, @@ -28,45 +37,63 @@ be provided per environment: filtering is run out-of-band (not yet in this repo). Point `DROID_ROOT` at the resulting `…/droid_lerobot/success` directory (must contain `meta/info.json`). 2. **DCP base checkpoint** — convert `nvidia/Cosmos3-Nano` to DCP and point - `BASE_CHECKPOINT_PATH` at it (see [Full reproduction](#full-reproduction)). Action heads are + `BASE_CHECKPOINT_PATH` at it (see [Full Reproduction](#full-reproduction)). Action heads are not loaded from it (they init fresh). -## Dataset — DROID LeRobot +## Dataset -To be released. +The **DROID LeRobot** dataset. To be released. ## Recipe -| knob | value | -| ----------------- | ------------------------------------------------------------------- | -| init | `nvidia/Cosmos3-Nano` (public Hugging Face repo) | -| action space | `joint_pos` (absolute joint position, 8-D incl. gripper) | -| state | `use_state=true` (proprioception; valid only with `joint_pos`) | -| resolution | `480` | -| viewpoint / video | `concat_view` / `video_mode=null` | -| chunk length | `32` (tokenizer `encode_exact_durations=[33]`) | -| lr | `2e-4` | -| samples/rank | `32` (H200-safe; 64 OOMs at 480p). global batch = `32 × world_size` | -| eval | disabled for the reproduction run | - -## Full reproduction +| knob | value | +| ----------------- | ----------------------------------------------------------------------------------------------------- | +| init | `nvidia/Cosmos3-Nano` (public Hugging Face repo) | +| action space | `joint_pos` (absolute joint position, 8-D incl. gripper) | +| state | `use_state=true` (proprioception; valid only with `joint_pos`) | +| task mode | `policy` (single-task; the `joint` multi-task default is avoided) | +| resolution | `480` | +| viewpoint / video | `concat_view` / `video_mode=null` | +| chunk length | `32` (tokenizer `encode_exact_durations=[33]`) | +| sequence packing | `max_num_tokens_after_packing=-1` (full vision sequence per step) | +| shuffle | episode-shuffle stream (decorrelates the per-step global batch) | +| window filter | keep-ranges (`KarlP/droid`) — trains the curated ≈74% window set | +| lr | `2e-4` | +| global batch | `8192` (e.g. 128 samples/rank × 64 ranks; lower per-rank + raise `grad_accum_iter` to fit GPU memory) | +| eval | disabled for the reproduction run | + +> The dataset streams an **episode-shuffle** order (decorrelates the per-step global batch — a +> plain sequential read feeds every rank the same overlapping windows → unstable grad-norm). The +> **keep-ranges window filter** drops idle/non-task frames (trains the curated ≈74% window set); +> the reproduction enables it by default — see [Full Reproduction](#full-reproduction). + +## Full Reproduction The OSS flow mirrors the other recipes (see [docs/training.md](./training.md)): ```shell -# Step 1: prepare DROID LeRobot v3.0 success split -> $DATASET_PATH (see "Inputs you provide") +# Step 1: prepare DROID LeRobot v3.0 success split -> $DATASET_PATH (see "Inputs You Provide") # Step 2: convert the base checkpoint -> $BASE_CHECKPOINT_PATH python -m cosmos_framework.scripts.convert_model_to_dcp \ - --checkpoint-path Cosmos3-Nano \ - -o $BASE_CHECKPOINT_PATH + -o $BASE_CHECKPOINT_PATH \ + --checkpoint-path Cosmos3-Nano + +# Step 3: download the keep-ranges window filter (drops idle/non-task frames -> trains +# the curated ~74% window set, matching the released model). +hf download KarlP/droid keep_ranges_1_0_1.json --local-dir $FILTER_DIR -# Step 3: launch. The TOML selects the experiment + scalars; the dataset/action +# Step 4: launch. The TOML selects the experiment + scalars; the dataset/action # knobs come from the registered experiment. export DATASET_PATH=/path/to/dataset/success export BASE_CHECKPOINT_PATH=/path/to/base_checkpoint export WAN_VAE_PATH=/path/to/Wan2.2_VAE.pth export NPROC_PER_NODE=8 +# Enable the keep-ranges filter via EXTRA_TAIL_OVERRIDES (space-separated Hydra +# overrides; an exported string survives `bash `). +export EXTRA_TAIL_OVERRIDES="\ +dataloader_train.dataloader.datasets.droid.dataset.use_filter_dict=True \ +dataloader_train.dataloader.datasets.droid.dataset.filter_dict_path=$FILTER_DIR/keep_ranges_1_0_1.json" bash examples/launch_sft_action_policy_droid.sh ``` @@ -74,14 +101,11 @@ The recipe TOML (`examples/toml/sft_config/action_policy_droid_repro.toml`) sets knobs (`max_iter`, `save_iter`, `grad_clip`, parallelism, wandb); the dataset/action knobs (`joint_pos`, `use_state`, `concat_view`, 480p, chunk 32, count-based batch) live in the registered `action_policy_droid_nano` experiment per the schema's design. For multi-node HSDP, -set `model.parallelism.data_parallel_replicate_degree = ` (intra-node shard stays 8). +set `model.config.parallelism.data_parallel_replicate_degree = ` (intra-node shard stays 8). -## Smoke reproduction - -Config/import/data sanity without burning a full run: small node count + a handful of iters via -`--config-overrides "trainer.max_iter=10" "checkpoint.save_iter=10"` (and a small -`data_parallel_shard_degree`). Use this to validate the recipe composes and the dataset opens -before any large allocation. +The **keep-ranges filter** maps each DROID trajectory key to a list of `[start, end]` frame +ranges; only windows whose start falls inside a kept range are trained on (episodes absent from +the dict are dropped). To train on the full window set instead, leave `EXTRA_TAIL_OVERRIDES` unset. ## Checkpoints @@ -89,8 +113,3 @@ before any large allocation. `////checkpoints/iter_/`. - The run is **resumable** from the latest checkpoint (re-launch with the same `job.name`). - Export to HF safetensors via `cosmos_framework.scripts.export_model` (see [docs/training.md](./training.md)). - -## Non-goals - -- **Closed-loop / action evaluation is out of scope** for this reproduction pass (training - reproduction only), unless explicitly expanded. diff --git a/examples/_sft_launcher_common.sh b/examples/_sft_launcher_common.sh index 684acf8..5920ae8 100644 --- a/examples/_sft_launcher_common.sh +++ b/examples/_sft_launcher_common.sh @@ -20,6 +20,10 @@ # (e.g. data_setting.max_tokens=16000 for VLM smokes). # MASTER_PORT torchrun --master_port; default 50012. # NPROC_PER_NODE torchrun --nproc_per_node; default 8. +# NNODES torchrun --nnodes; multi-node only (unset = single-node). +# NODE_RANK torchrun --node_rank; this worker's 0-based index. +# MASTER_ADDR torchrun --master_addr; rank-0 host (multi-node only — it +# has no torchrun env fallback, so it must be passed here). # LOG_FILENAME override $LOG_DIR/${LOG_FILENAME} # (default _sft.log). # @@ -83,8 +87,16 @@ if (( ${#TAIL_OVERRIDES[@]} > 0 )); then TRAILING_ARGS=(-- "${TAIL_OVERRIDES[@]}") fi +# torchrun topology. Single-node by default; a SLURM/Lepton wrapper sets NNODES / +# NODE_RANK / MASTER_ADDR for multi-node. Each is appended only when set, so with all +# three unset the invocation is identical to the single-node case. +TORCHRUN_ARGS=(--nproc_per_node="${NPROC_PER_NODE:-8}" --master_port="${MASTER_PORT:-50012}") +[[ -n "${NNODES:-}" ]] && TORCHRUN_ARGS+=(--nnodes="$NNODES") +[[ -n "${NODE_RANK:-}" ]] && TORCHRUN_ARGS+=(--node_rank="$NODE_RANK") +[[ -n "${MASTER_ADDR:-}" ]] && TORCHRUN_ARGS+=(--master_addr="$MASTER_ADDR") + IMAGINAIRE_OUTPUT_ROOT="$IMAGINAIRE_OUTPUT_ROOT" PYTHONPATH=. \ - torchrun --nproc_per_node="${NPROC_PER_NODE:-8}" --master_port="${MASTER_PORT:-50012}" -m cosmos_framework.scripts.train \ + torchrun "${TORCHRUN_ARGS[@]}" -m cosmos_framework.scripts.train \ --sft-toml="$TOML_FILE" \ "${TRAILING_ARGS[@]}" \ 2>&1 | tee "$LOG_FILE" diff --git a/examples/launch_sft_action_policy_droid.sh b/examples/launch_sft_action_policy_droid.sh index 6ab0bc9..a6b80ee 100755 --- a/examples/launch_sft_action_policy_droid.sh +++ b/examples/launch_sft_action_policy_droid.sh @@ -8,7 +8,7 @@ # examples/toml/sft_config/action_policy_droid_repro.toml (selects the # registered `action_policy_droid_nano` experiment; res480, joint_pos 8D + # use_state, trains the generation + action heads). See -# docs/action_policy_droid_posttraining.md. +# docs/action_policy_droid_posttrain.md. # # Env vars (override for your filesystem): # DATASET_PATH DROID LeRobot v3.0 success split (…/droid_lerobot/success) @@ -16,10 +16,11 @@ # WAN_VAE_PATH Wan2.2 VAE .pth (Wan-AI/Wan2.2-TI2V-5B) # WANDB_API_KEY for online logging (TOML wandb_mode="online") # NPROC_PER_NODE torchrun --nproc_per_node (default 8) +# EXTRA_TAIL_OVERRIDES space-separated Hydra overrides (e.g. the keep-ranges filter) # # Single-node smoke (config/data sanity, a few iters): -# TAIL_OVERRIDES=(trainer.max_iter=10 checkpoint.save_iter=10 \ -# dataloader_train.max_samples_per_batch=32) +# export EXTRA_TAIL_OVERRIDES="trainer.max_iter=10 checkpoint.save_iter=10 \ +# dataloader_train.max_samples_per_batch=32" # bash examples/launch_sft_action_policy_droid.sh # # Multi-node: launch on every worker; the trainer reads torchrun's @@ -34,6 +35,14 @@ TOML_FILE="examples/toml/sft_config/action_policy_droid_repro.toml" # The experiment reads ${oc.env:DROID_ROOT}; bridge the launcher's DATASET_PATH to it. export DROID_ROOT="${DROID_ROOT:-$DATASET_PATH}" -EXTRA_DATASET_CHECK='[[ -f "$DROID_ROOT/meta/info.json" ]] || { echo "ERROR: missing $DROID_ROOT/meta/info.json (prepare DROID LeRobot v3.0 — see docs/action_policy_droid_posttraining.md)" >&2; exit 1; }' +EXTRA_DATASET_CHECK='[[ -f "$DROID_ROOT/meta/info.json" ]] || { echo "ERROR: missing $DROID_ROOT/meta/info.json (prepare DROID LeRobot v3.0 — see docs/action_policy_droid_posttrain.md)" >&2; exit 1; }' + +# Extra Hydra overrides from the environment: a space-separated string word-split into +# the TAIL_OVERRIDES array. An exported string survives `bash ` (a child +# process), unlike a TAIL_OVERRIDES array set in your shell. Use it e.g. to enable the +# keep-ranges window filter (see docs/action_policy_droid_posttrain.md). +TAIL_OVERRIDES=( + ${EXTRA_TAIL_OVERRIDES:-} +) source "$(dirname "${BASH_SOURCE[0]}")/_sft_launcher_common.sh" From 3d8c662a73314232312a2f28b08f70510756f54c Mon Sep 17 00:00:00 2001 From: Hao Liang Date: Tue, 16 Jun 2026 22:12:16 -0700 Subject: [PATCH 2/3] action base dataset: skip normalization when action_normalization is None MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit joint_pos uses raw (un-normalized) joint actions, so DROIDLeRobotDataset sets action_normalization=None — but _build_result called normalize_action() unconditionally, which raises 'Unknown normalization method: None'. Guard it so None means raw actions (caught by a 2-node sanity run on the rebased branch). Co-Authored-By: Claude Opus 4.8 Signed-off-by: Hao Liang --- cosmos_framework/data/vfm/action/datasets/base_dataset.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cosmos_framework/data/vfm/action/datasets/base_dataset.py b/cosmos_framework/data/vfm/action/datasets/base_dataset.py index 564d48e..56e9599 100644 --- a/cosmos_framework/data/vfm/action/datasets/base_dataset.py +++ b/cosmos_framework/data/vfm/action/datasets/base_dataset.py @@ -186,7 +186,11 @@ def _build_result( **extras: Any, ) -> dict[str, Any]: idle_frames = self._compute_idle_frames(action) - normalized_action = normalize_action(action, self.action_normalization, self._load_norm_stats()) + # action_normalization=None -> use raw actions (no normalization), e.g. joint_pos. + if self.action_normalization is None: + normalized_action = action + else: + normalized_action = normalize_action(action, self.action_normalization, self._load_norm_stats()) formatted_video = (video * 255.0).clamp(0.0, 255.0).to(torch.uint8).permute(1, 0, 2, 3) return { "ai_caption": ai_caption, From c0fde7934bb33a558558b25d5ad48bd28cd80c8f Mon Sep 17 00:00:00 2001 From: Hao Liang Date: Wed, 17 Jun 2026 06:28:33 -0700 Subject: [PATCH 3/3] action recipe: default loss_scale=10 (vision FM weight) to match the reference The bare recipe trained with the NANO default loss_scale=1.0, weighting the vision flow-matching loss 10x lower than the Cosmos3-Nano-Policy-DROID reference (which uses 10.0). Set it post-construction so the recipe reproduces without launcher overrides. Co-Authored-By: Claude Opus 4.8 Signed-off-by: Hao Liang --- .../action/posttrain_config/action_policy_droid_nano.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cosmos_framework/configs/base/experiment/action/posttrain_config/action_policy_droid_nano.py b/cosmos_framework/configs/base/experiment/action/posttrain_config/action_policy_droid_nano.py index 8880295..c1972d2 100644 --- a/cosmos_framework/configs/base/experiment/action/posttrain_config/action_policy_droid_nano.py +++ b/cosmos_framework/configs/base/experiment/action/posttrain_config/action_policy_droid_nano.py @@ -237,6 +237,12 @@ action_policy_droid_nano["model"]["config"]["max_num_tokens_after_packing"] = -1 +# Weight the vision flow-matching loss 10x in the total loss (the NANO default is 1.0). +# loss_scale multiplies only the vision term, balancing it against the action loss +# (action_loss_weight=10) so both heads train at comparable gradient magnitude. +action_policy_droid_nano["model"]["config"]["rectified_flow_training_config"]["loss_scale"] = 10.0 + + for _item in [action_policy_droid_nano]: _name = [k for k, v in globals().items() if v is _item][0] cs.store(group="experiment", package="_global_", name=_name, node=_item)