diff --git a/.agents/knowledge/constraints.md b/.agents/knowledge/constraints.md
index 3623f515..0e0fd1c7 100644
--- a/.agents/knowledge/constraints.md
+++ b/.agents/knowledge/constraints.md
@@ -131,6 +131,7 @@ The adapter sets inference dtype for frozen components and training dtype for tr
 - Use relative imports within `flow_factory` package (e.g., `from ..hparams import *`)
 - Use absolute imports for external packages
 - Follow existing wildcard import patterns for `hparams`
+- **Top-level imports only**: All `import` / `from ... import ...` statements MUST live at the top of the module, never inside function bodies, methods, `__init__`, or conditional branches. Sanctioned exceptions: (a) optional dependencies wrapped in `try/except ImportError` (e.g., `deepspeed`, `xformers`); (b) backend-gated imports where the target symbol is only resolvable under a specific runtime backend already selected by a preceding feature check (e.g., DeepSpeed/FSDP submodules guarded by `is_deepspeed()` / `is_fsdp2()` in `models/abc.py`); (c) genuine unresolvable circular imports documented inline. Lazy imports added merely for "import speed" or "to keep the module light" are NOT acceptable — every hard dependency already runs through Python's import machinery on a typical import path. Inline imports hide the dependency surface from readers, `isort`, and static-analysis tools, and re-execute on every call in hot loops.
 
 ### 23. Type Annotations
 All public methods must have type annotations. Use `typing` module types (`List`, `Dict`, `Optional`, `Tuple`, `Union`) for Python 3.10 compatibility.
diff --git a/.agents/skills/ff-review/SKILL.md b/.agents/skills/ff-review/SKILL.md
index de301a1a..ae85957b 100644
--- a/.agents/skills/ff-review/SKILL.md
+++ b/.agents/skills/ff-review/SKILL.md
@@ -60,6 +60,7 @@ git status             # Modified files
 - [ ] English comments and docstrings
 - [ ] Apache 2.0 license header on new files
 - [ ] No unnecessary wildcard imports (except `hparams`)
+- [ ] **Top-level imports only** (constraint #22) — see that file for the three sanctioned exceptions (optional deps via `try/except ImportError`, backend-gated runtime feature checks like DeepSpeed/FSDP, unresolvable circular imports).
 
 ### Documentation
 - [ ] `guidance/` docs updated if behavior changed
diff --git a/examples/awm/lora/flux1/default.yaml b/examples/awm/lora/flux1/default.yaml
index 075e8e03..01ba0de4 100644
--- a/examples/awm/lora/flux1/default.yaml
+++ b/examples/awm/lora/flux1/default.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.1-dev"  # HuggingFace model ID or local path
   model_type: "flux1"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub'
 
diff --git a/examples/awm/lora/flux2_klein_base/default.yaml b/examples/awm/lora/flux2_klein_base/default.yaml
index 1986a2e4..f05d3855 100644
--- a/examples/awm/lora/flux2_klein_base/default.yaml
+++ b/examples/awm/lora/flux2_klein_base/default.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-klein-base-4B"  # Options: black-forest-labs/FLUX.2-klein-base-4B, black-forest-labs/FLUX.2-klein-base-9B
   model_type: "flux2-klein"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Attention backend for training.
 
diff --git a/examples/awm/lora/sd3_5/default.yaml b/examples/awm/lora/sd3_5/default.yaml
index bc32cf89..625cb99f 100644
--- a/examples/awm/lora/sd3_5/default.yaml
+++ b/examples/awm/lora/sd3_5/default.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "stabilityai/stable-diffusion-3.5-medium"
   model_type: "sd3-5"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Attention backend for training.
 
diff --git a/examples/crd/lora/sd3_5/default.yaml b/examples/crd/lora/sd3_5/default.yaml
index cea6a766..2ef8aaed 100644
--- a/examples/crd/lora/sd3_5/default.yaml
+++ b/examples/crd/lora/sd3_5/default.yaml
@@ -25,7 +25,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "stabilityai/stable-diffusion-3.5-medium"
   model_type: "sd3-5"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Attention backend for training.
 
diff --git a/examples/dgpo/lora/sd3_5/default.yaml b/examples/dgpo/lora/sd3_5/default.yaml
index e7794146..6876e14a 100644
--- a/examples/dgpo/lora/sd3_5/default.yaml
+++ b/examples/dgpo/lora/sd3_5/default.yaml
@@ -48,7 +48,7 @@ model:
   target_modules: "default"
   model_name_or_path: "stabilityai/stable-diffusion-3.5-medium"  # config.pretrained.model
   model_type: "sd3-5"
-  resume_path: null
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null
 
 # Training Configuration
diff --git a/examples/dgpo/lora/sd3_5/nocfg.yaml b/examples/dgpo/lora/sd3_5/nocfg.yaml
index b5519b93..23fd6295 100644
--- a/examples/dgpo/lora/sd3_5/nocfg.yaml
+++ b/examples/dgpo/lora/sd3_5/nocfg.yaml
@@ -39,7 +39,7 @@ model:
   target_modules: "default"
   model_name_or_path: "stabilityai/stable-diffusion-3.5-medium"  # config.pretrained.model
   model_type: "sd3-5"
-  resume_path: null
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null
 
 # Training Configuration
diff --git a/examples/dpo/lora/sd3_5/default.yaml b/examples/dpo/lora/sd3_5/default.yaml
index 8d885f51..e4d48884 100644
--- a/examples/dpo/lora/sd3_5/default.yaml
+++ b/examples/dpo/lora/sd3_5/default.yaml
@@ -50,7 +50,7 @@ model:
   target_modules: "default"
   model_name_or_path: "stabilityai/stable-diffusion-3.5-medium"  # Same as flow_grpo
   model_type: "sd3-5"
-  resume_path: null
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null
 
 log:
diff --git a/examples/grpo/full/flux1/default.yaml b/examples/grpo/full/flux1/default.yaml
index ab7b2514..4845e4f7 100644
--- a/examples/grpo/full/flux1/default.yaml
+++ b/examples/grpo/full/flux1/default.yaml
@@ -21,7 +21,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.1-dev"  # HuggingFace model ID or local path
   model_type: "flux1"
-  resume_path: null # Directory contains previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/flux1_kontext/default.yaml b/examples/grpo/full/flux1_kontext/default.yaml
index 8cefd8cf..99a57442 100644
--- a/examples/grpo/full/flux1_kontext/default.yaml
+++ b/examples/grpo/full/flux1_kontext/default.yaml
@@ -21,7 +21,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.1-Kontext-dev"  # HuggingFace model ID or local path
   model_type: "flux1-kontext"
-  resume_path: null # Directory contains previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/flux2/i2i.yaml b/examples/grpo/full/flux2/i2i.yaml
index 59d731a0..32ea5c04 100644
--- a/examples/grpo/full/flux2/i2i.yaml
+++ b/examples/grpo/full/flux2/i2i.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: ["attn.to_q", "attn.to_k", "attn.to_v", "attn.to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-dev"  # HuggingFace model ID or local path
   model_type: "flux2"  # Options: flux1, flux1-kontext, flux2, qwenimage, qwenimage-edit
-  resume_path: null # Directory contains previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/flux2/t2i.yaml b/examples/grpo/full/flux2/t2i.yaml
index 9d9691c0..b9af3835 100644
--- a/examples/grpo/full/flux2/t2i.yaml
+++ b/examples/grpo/full/flux2/t2i.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: ["attn.to_q", "attn.to_k", "attn.to_v", "attn.to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-dev"  # HuggingFace model ID or local path
   model_type: "flux2"  # Options: flux1, flux1-kontext, flux2, qwenimage, qwenimage-edit
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/flux2_klein/default.yaml b/examples/grpo/full/flux2_klein/default.yaml
index 2b834d11..fc515f68 100644
--- a/examples/grpo/full/flux2_klein/default.yaml
+++ b/examples/grpo/full/flux2_klein/default.yaml
@@ -21,7 +21,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-klein-4B"  # Options: black-forest-labs/FLUX.2-klein-4B, black-forest-labs/FLUX.2-klein-9B
   model_type: "flux2-klein"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/flux2_klein_base/default.yaml b/examples/grpo/full/flux2_klein_base/default.yaml
index 2cab32ce..af13dbeb 100644
--- a/examples/grpo/full/flux2_klein_base/default.yaml
+++ b/examples/grpo/full/flux2_klein_base/default.yaml
@@ -21,7 +21,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-klein-base-4B"  # Options: black-forest-labs/FLUX.2-klein-base-4B, black-forest-labs/FLUX.2-klein-base-9B
   model_type: "flux2-klein"
-  resume_path: null # Directory contains previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/qwen_image/default.yaml b/examples/grpo/full/qwen_image/default.yaml
index 538d43e9..dc6684ba 100644
--- a/examples/grpo/full/qwen_image/default.yaml
+++ b/examples/grpo/full/qwen_image/default.yaml
@@ -22,7 +22,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Qwen/Qwen-Image"  # HuggingFace model ID or local path
   model_type: "qwen-image"  # Options: flux1, flux1-kontext, flux2, qwenimage, qwenimage-edit
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_varlen_hub' # Attention backend for Qwen-Image Series, which uses masked attention with variable sequence length.
 
diff --git a/examples/grpo/full/qwen_image_edit_plus/default.yaml b/examples/grpo/full/qwen_image_edit_plus/default.yaml
index f81b515e..b0ba0730 100644
--- a/examples/grpo/full/qwen_image_edit_plus/default.yaml
+++ b/examples/grpo/full/qwen_image_edit_plus/default.yaml
@@ -21,7 +21,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Qwen/Qwen-Image-Edit-2509"  # Qwen/Qwen-Image-Edit-2509 or Qwen/Qwen-Image-Edit-2511
   model_type: "qwen-image-edit-plus"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_varlen_hub' # Attention backend for Qwen-Image Series, which uses masked attention with variable sequence length.
 
diff --git a/examples/grpo/full/sd3_5/default.yaml b/examples/grpo/full/sd3_5/default.yaml
index 6b02f8a7..b44f2a65 100644
--- a/examples/grpo/full/sd3_5/default.yaml
+++ b/examples/grpo/full/sd3_5/default.yaml
@@ -21,7 +21,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "stabilityai/stable-diffusion-3.5-medium"  # HuggingFace model ID or local path
   model_type: "sd3-5"  # Options: flux1, flux1-kontext, flux2, qwenimage, qwenimage-edit, sd3-5
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/wan21/i2v.yaml b/examples/grpo/full/wan21/i2v.yaml
index 8e6a363d..d7c5dc34 100644
--- a/examples/grpo/full/wan21/i2v.yaml
+++ b/examples/grpo/full/wan21/i2v.yaml
@@ -22,7 +22,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"  # Wan-AI/Wan2.1-I2V-14B-480P-Diffusers / Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
   model_type: "wan2_i2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/wan21/t2v.yaml b/examples/grpo/full/wan21/t2v.yaml
index 09c8bef5..fea1d8b9 100644
--- a/examples/grpo/full/wan21/t2v.yaml
+++ b/examples/grpo/full/wan21/t2v.yaml
@@ -22,7 +22,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Wan-AI/Wan2.1-T2V-14B-Diffusers"  # Wan-AI/Wan2.1-T2V-14B-Diffusers / Wan-AI/Wan2.2-T2V-A14B-Diffusers
   model_type: "wan2_t2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/wan22/i2v.yaml b/examples/grpo/full/wan22/i2v.yaml
index 63731384..6ffa6a5e 100644
--- a/examples/grpo/full/wan22/i2v.yaml
+++ b/examples/grpo/full/wan22/i2v.yaml
@@ -28,7 +28,7 @@ model:
   target_modules: "default"
   model_name_or_path: "Wan-AI/Wan2.2-I2V-A14B-Diffusers" # Wan-AI/Wan2.2-TI2V-5B-Diffusers / Wan-AI/Wan2.2-I2V-A14B-Diffusers
   model_type: "wan2_i2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/wan22/t2v.yaml b/examples/grpo/full/wan22/t2v.yaml
index 74d10030..a188a0f9 100644
--- a/examples/grpo/full/wan22/t2v.yaml
+++ b/examples/grpo/full/wan22/t2v.yaml
@@ -28,7 +28,7 @@ model:
   target_modules: "default"
   model_name_or_path: "Wan-AI/Wan2.2-T2V-A14B-Diffusers"  # Wan-AI/Wan2.1-T2V-14B-Diffusers / Wan-AI/Wan2.2-T2V-A14B-Diffusers
   model_type: "wan2_t2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/z_image/default.yaml b/examples/grpo/full/z_image/default.yaml
index 3abeab6a..e7794942 100644
--- a/examples/grpo/full/z_image/default.yaml
+++ b/examples/grpo/full/z_image/default.yaml
@@ -22,7 +22,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Tongyi-MAI/Z-Image"  # Options: Tongyi-MAI/Z-Image, Tongyi-MAI/Z-Image-Turbo
   model_type: "z-image"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/full/z_image_turbo/default.yaml b/examples/grpo/full/z_image_turbo/default.yaml
index 298a34bf..427e663b 100644
--- a/examples/grpo/full/z_image_turbo/default.yaml
+++ b/examples/grpo/full/z_image_turbo/default.yaml
@@ -22,7 +22,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Tongyi-MAI/Z-Image-Turbo"  # HuggingFace model ID or local path
   model_type: "z-image"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/flux1/default.yaml b/examples/grpo/lora/flux1/default.yaml
index 24fab9c2..9d814985 100644
--- a/examples/grpo/lora/flux1/default.yaml
+++ b/examples/grpo/lora/flux1/default.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.1-dev"  # HuggingFace model ID or local path
   model_type: "flux1"  # Options: flux1, flux1-kontext, flux2, qwenimage, qwenimage-edit
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/flux1_kontext/default.yaml b/examples/grpo/lora/flux1_kontext/default.yaml
index b6c6084d..269f4b03 100644
--- a/examples/grpo/lora/flux1_kontext/default.yaml
+++ b/examples/grpo/lora/flux1_kontext/default.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.1-Kontext-dev"  # HuggingFace model ID or local path
   model_type: "flux1-kontext"
-  resume_path: null # Directory contains previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/flux2/i2i.yaml b/examples/grpo/lora/flux2/i2i.yaml
index b3185cb3..21ef994c 100644
--- a/examples/grpo/lora/flux2/i2i.yaml
+++ b/examples/grpo/lora/flux2/i2i.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-dev"  # HuggingFace model ID or local path
   model_type: "flux2"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/flux2/t2i.yaml b/examples/grpo/lora/flux2/t2i.yaml
index 56dd92c5..17b89c83 100644
--- a/examples/grpo/lora/flux2/t2i.yaml
+++ b/examples/grpo/lora/flux2/t2i.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-dev"  # HuggingFace model ID or local path
   model_type: "flux2"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/flux2_klein/default.yaml b/examples/grpo/lora/flux2_klein/default.yaml
index ce2e9fc4..286be971 100644
--- a/examples/grpo/lora/flux2_klein/default.yaml
+++ b/examples/grpo/lora/flux2_klein/default.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-klein-9B"  # Options: black-forest-labs/FLUX.2-klein-4B, black-forest-labs/FLUX.2-klein-9B
   model_type: "flux2-klein"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/flux2_klein_base/default.yaml b/examples/grpo/lora/flux2_klein_base/default.yaml
index 21c8a4bd..890484d3 100644
--- a/examples/grpo/lora/flux2_klein_base/default.yaml
+++ b/examples/grpo/lora/flux2_klein_base/default.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-klein-base-9B"  # Options: black-forest-labs/FLUX.2-klein-base-4B, black-forest-labs/FLUX.2-klein-base-9B
   model_type: "flux2-klein"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/ltx2/i2av.yaml b/examples/grpo/lora/ltx2/i2av.yaml
index c960506b..1e4c2872 100644
--- a/examples/grpo/lora/ltx2/i2av.yaml
+++ b/examples/grpo/lora/ltx2/i2av.yaml
@@ -28,7 +28,7 @@ model:
   target_modules: "default"  # 28 Linear layers per block (video/audio attn + cross-modal attn + FFN)
   model_name_or_path: "Lightricks/LTX-2"  # Options: Lightricks/LTX-2, dg845/LTX-2.3-Diffusers
   model_type: "ltx2_i2av"
-  resume_path: null  # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null  # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/ltx2/t2av.yaml b/examples/grpo/lora/ltx2/t2av.yaml
index fb093871..ae27c722 100644
--- a/examples/grpo/lora/ltx2/t2av.yaml
+++ b/examples/grpo/lora/ltx2/t2av.yaml
@@ -27,7 +27,7 @@ model:
   target_modules: "default"  # 28 Linear layers per block (video/audio attn + cross-modal attn + FFN)
   model_name_or_path: "Lightricks/LTX-2"  # Options: Lightricks/LTX-2, dg845/LTX-2.3-Diffusers
   model_type: "ltx2_t2av"
-  resume_path: null  # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null  # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub'  # Attention backend.
 
diff --git a/examples/grpo/lora/ltx2/t2av_pickscore.yaml b/examples/grpo/lora/ltx2/t2av_pickscore.yaml
index e358a48a..9dfeae3d 100644
--- a/examples/grpo/lora/ltx2/t2av_pickscore.yaml
+++ b/examples/grpo/lora/ltx2/t2av_pickscore.yaml
@@ -25,7 +25,7 @@ model:
   target_modules: "default"  # 28 Linear layers per block (video/audio attn + cross-modal attn + FFN)
   model_name_or_path: "dg845/LTX-2.3-Diffusers"  # Options: Lightricks/LTX-2, dg845/LTX-2.3-Diffusers
   model_type: "ltx2_t2av"
-  resume_path: null  # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null  # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub'  # Attention backend.
 
diff --git a/examples/grpo/lora/qwen_image/default.yaml b/examples/grpo/lora/qwen_image/default.yaml
index cc34ddcb..ef785057 100644
--- a/examples/grpo/lora/qwen_image/default.yaml
+++ b/examples/grpo/lora/qwen_image/default.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Qwen/Qwen-Image-2512"  # Qwen/Qwen-Image or Qwen/Qwen-Image-2512
   model_type: "qwen-image"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_varlen_hub' # Attention backend for Qwen-Image Series, which uses masked attention with variable sequence length.
 
diff --git a/examples/grpo/lora/qwen_image_edit_plus/default.yaml b/examples/grpo/lora/qwen_image_edit_plus/default.yaml
index 42302ee6..cbe66e45 100644
--- a/examples/grpo/lora/qwen_image_edit_plus/default.yaml
+++ b/examples/grpo/lora/qwen_image_edit_plus/default.yaml
@@ -25,7 +25,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Qwen/Qwen-Image-Edit-2509"  # Qwen/Qwen-Image-Edit-2509 or Qwen/Qwen-Image-Edit-2511
   model_type: "qwen-image-edit-plus"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_varlen_hub' # Attention backend for Qwen-Image Series, which uses masked attention with variable sequence length.
 
diff --git a/examples/grpo/lora/sd3_5/default.yaml b/examples/grpo/lora/sd3_5/default.yaml
index 31f1b86b..f52936ee 100644
--- a/examples/grpo/lora/sd3_5/default.yaml
+++ b/examples/grpo/lora/sd3_5/default.yaml
@@ -26,7 +26,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "stabilityai/stable-diffusion-3.5-medium"  # HuggingFace model ID or local path
   model_type: "sd3-5"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Attention backend for training.
 
diff --git a/examples/grpo/lora/sd3_5/nocfg.yaml b/examples/grpo/lora/sd3_5/nocfg.yaml
index 29cd7938..bf1d6931 100644
--- a/examples/grpo/lora/sd3_5/nocfg.yaml
+++ b/examples/grpo/lora/sd3_5/nocfg.yaml
@@ -26,7 +26,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "stabilityai/stable-diffusion-3.5-medium"  # HuggingFace model ID or local path
   model_type: "sd3-5"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Attention backend for training.
 
diff --git a/examples/grpo/lora/wan21/i2v.yaml b/examples/grpo/lora/wan21/i2v.yaml
index 144f801f..52f0f13a 100644
--- a/examples/grpo/lora/wan21/i2v.yaml
+++ b/examples/grpo/lora/wan21/i2v.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"  # Wan-AI/Wan2.1-I2V-14B-480P-Diffusers / Wan-AI/Wan2.1-I2V-14B-720P-Diffusers
   model_type: "wan2_i2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/wan21/t2v.yaml b/examples/grpo/lora/wan21/t2v.yaml
index 0d68323c..55a46b9c 100644
--- a/examples/grpo/lora/wan21/t2v.yaml
+++ b/examples/grpo/lora/wan21/t2v.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Wan-AI/Wan2.1-T2V-14B-Diffusers"  # Wan-AI/Wan2.1-T2V-14B-Diffusers / Wan-AI/Wan2.2-T2V-A14B-Diffusers
   model_type: "wan2_t2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/wan21/v2v.yaml b/examples/grpo/lora/wan21/v2v.yaml
index 44fd3cf8..93929834 100644
--- a/examples/grpo/lora/wan21/v2v.yaml
+++ b/examples/grpo/lora/wan21/v2v.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Wan-AI/Wan2.1-T2V-14B-Diffusers"  # Wan-AI/Wan2.1-T2V-1.3B-Diffusers / Wan-AI/Wan2.1-T2V-14B-Diffusers
   model_type: "wan2_v2v"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/wan22/i2v.yaml b/examples/grpo/lora/wan22/i2v.yaml
index 96db5fac..105e62c4 100644
--- a/examples/grpo/lora/wan22/i2v.yaml
+++ b/examples/grpo/lora/wan22/i2v.yaml
@@ -33,7 +33,7 @@ model:
   target_modules: "transformer.default"
   model_name_or_path: "Wan-AI/Wan2.2-I2V-A14B-Diffusers" # Wan-AI/Wan2.2-TI2V-5B-Diffusers / Wan-AI/Wan2.2-I2V-A14B-Diffusers
   model_type: "wan2_i2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/wan22/t2v.yaml b/examples/grpo/lora/wan22/t2v.yaml
index d279ed84..3d3969d7 100644
--- a/examples/grpo/lora/wan22/t2v.yaml
+++ b/examples/grpo/lora/wan22/t2v.yaml
@@ -33,7 +33,7 @@ model:
   target_modules: "default"
   model_name_or_path: "Wan-AI/Wan2.2-T2V-A14B-Diffusers"  # Wan-AI/Wan2.1-T2V-14B-Diffusers / Wan-AI/Wan2.2-T2V-A14B-Diffusers
   model_type: "wan2_t2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/z_image/default.yaml b/examples/grpo/lora/z_image/default.yaml
index a517bd2b..07da324b 100644
--- a/examples/grpo/lora/z_image/default.yaml
+++ b/examples/grpo/lora/z_image/default.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Tongyi-MAI/Z-Image"  # Options: Tongyi-MAI/Z-Image, Tongyi-MAI/Z-Image-Turbo
   model_type: "z-image"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/grpo/lora/z_image_turbo/default.yaml b/examples/grpo/lora/z_image_turbo/default.yaml
index 293dc222..fdac1937 100644
--- a/examples/grpo/lora/z_image_turbo/default.yaml
+++ b/examples/grpo/lora/z_image_turbo/default.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Tongyi-MAI/Z-Image-Turbo"  # HuggingFace model ID or local path
   model_type: "z-image"  # Options: flux1, flux1-kontext, flux2, qwenimage, qwenimage-edit, z-image
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/nft/full/flux1/default.yaml b/examples/nft/full/flux1/default.yaml
index 61d90c45..502a2235 100644
--- a/examples/nft/full/flux1/default.yaml
+++ b/examples/nft/full/flux1/default.yaml
@@ -21,7 +21,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.1-dev"  # HuggingFace model ID or local path
   model_type: "flux1"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/nft/full/flux2_klein_base/default.yaml b/examples/nft/full/flux2_klein_base/default.yaml
index d66bb878..1ddd0675 100644
--- a/examples/nft/full/flux2_klein_base/default.yaml
+++ b/examples/nft/full/flux2_klein_base/default.yaml
@@ -21,7 +21,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-klein-base-4B"  # Options: black-forest-labs/FLUX.2-klein-base-4B, black-forest-labs/FLUX.2-klein-base-9B
   model_type: "flux2-klein"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/nft/full/wan22/t2v.yaml b/examples/nft/full/wan22/t2v.yaml
index 6cf4ddaa..4e788752 100644
--- a/examples/nft/full/wan22/t2v.yaml
+++ b/examples/nft/full/wan22/t2v.yaml
@@ -26,7 +26,7 @@ model:
   target_modules: "default"
   model_name_or_path: "Wan-AI/Wan2.2-T2V-A14B-Diffusers"  # Wan-AI/Wan2.1-T2V-14B-Diffusers / Wan-AI/Wan2.2-T2V-A14B-Diffusers
   model_type: "wan2_t2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/nft/full/z_image/default.yaml b/examples/nft/full/z_image/default.yaml
index 83d6b4c5..50252fb4 100644
--- a/examples/nft/full/z_image/default.yaml
+++ b/examples/nft/full/z_image/default.yaml
@@ -22,7 +22,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Tongyi-MAI/Z-Image"  # Options: Tongyi-MAI/Z-Image, Tongyi-MAI/Z-Image-Turbo
   model_type: "z-image"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/nft/full/z_image_turbo/default.yaml b/examples/nft/full/z_image_turbo/default.yaml
index b08702f5..3af795a2 100644
--- a/examples/nft/full/z_image_turbo/default.yaml
+++ b/examples/nft/full/z_image_turbo/default.yaml
@@ -22,7 +22,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Tongyi-MAI/Z-Image-Turbo"  # Options: Tongyi-MAI/Z-Image, Tongyi-MAI/Z-Image-Turbo
   model_type: "z-image"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/nft/lora/flux1/default.yaml b/examples/nft/lora/flux1/default.yaml
index 93cdb5ac..88d33b52 100644
--- a/examples/nft/lora/flux1/default.yaml
+++ b/examples/nft/lora/flux1/default.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.1-dev"  # HuggingFace model ID or local path
   model_type: "flux1"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Use flash attention 3 backend.
 
diff --git a/examples/nft/lora/flux1/rational_rewards_t2i.yaml b/examples/nft/lora/flux1/rational_rewards_t2i.yaml
index 38071fb9..ff40b2c3 100644
--- a/examples/nft/lora/flux1/rational_rewards_t2i.yaml
+++ b/examples/nft/lora/flux1/rational_rewards_t2i.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default"
   model_name_or_path: "black-forest-labs/FLUX.1-dev"
   model_type: "flux1"
-  resume_path: null
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null
 
 log:
diff --git a/examples/nft/lora/flux1_kontext/rational_rewards_edit.yaml b/examples/nft/lora/flux1_kontext/rational_rewards_edit.yaml
index bf0f900e..ad2ebdc8 100644
--- a/examples/nft/lora/flux1_kontext/rational_rewards_edit.yaml
+++ b/examples/nft/lora/flux1_kontext/rational_rewards_edit.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default"
   model_name_or_path: "black-forest-labs/FLUX.1-Kontext-dev"
   model_type: "flux1-kontext"
-  resume_path: null
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null
 
 log:
diff --git a/examples/nft/lora/flux2_klein_base/default.yaml b/examples/nft/lora/flux2_klein_base/default.yaml
index 9274f075..a79fedb8 100644
--- a/examples/nft/lora/flux2_klein_base/default.yaml
+++ b/examples/nft/lora/flux2_klein_base/default.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "black-forest-labs/FLUX.2-klein-base-4B"  # Options: black-forest-labs/FLUX.2-klein-base-4B, black-forest-labs/FLUX.2-klein-base-9B
   model_type: "flux2-klein"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/nft/lora/qwen_image/rational_rewards_t2i.yaml b/examples/nft/lora/qwen_image/rational_rewards_t2i.yaml
index 7559e802..e99289f8 100644
--- a/examples/nft/lora/qwen_image/rational_rewards_t2i.yaml
+++ b/examples/nft/lora/qwen_image/rational_rewards_t2i.yaml
@@ -25,7 +25,7 @@ model:
   target_modules: "default"
   model_name_or_path: "Qwen/Qwen-Image-2512"
   model_type: "qwen-image"
-  resume_path: null
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null
 
 log:
diff --git a/examples/nft/lora/qwen_image_edit_plus/rational_rewards_edit.yaml b/examples/nft/lora/qwen_image_edit_plus/rational_rewards_edit.yaml
index 8137aade..b326cf76 100644
--- a/examples/nft/lora/qwen_image_edit_plus/rational_rewards_edit.yaml
+++ b/examples/nft/lora/qwen_image_edit_plus/rational_rewards_edit.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default"
   model_name_or_path: "Qwen/Qwen-Image-Edit-2509"
   model_type: "qwen-image-edit-plus"
-  resume_path: null
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null
 
 log:
diff --git a/examples/nft/lora/sd3_5/default.yaml b/examples/nft/lora/sd3_5/default.yaml
index 7eafa33b..ba2b5cb8 100644
--- a/examples/nft/lora/sd3_5/default.yaml
+++ b/examples/nft/lora/sd3_5/default.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "stabilityai/stable-diffusion-3.5-medium"
   model_type: "sd3-5"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Attention backend for training.
 
diff --git a/examples/nft/lora/wan21/i2v.yaml b/examples/nft/lora/wan21/i2v.yaml
index 23680484..78b9552f 100644
--- a/examples/nft/lora/wan21/i2v.yaml
+++ b/examples/nft/lora/wan21/i2v.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default"
   model_name_or_path: "Wan-AI/Wan2.1-I2V-14B-720P-Diffusers"   # Wan-AI/Wan2.1-I2V-14B-480P-Diffusers / Wan-AI/Wan2.1-I2V-14B-480P-Diffusers
   model_type: "wan2_i2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Use flash attention 3 backend.
 
diff --git a/examples/nft/lora/wan21/t2v.yaml b/examples/nft/lora/wan21/t2v.yaml
index b2a215ae..3c7ce05f 100644
--- a/examples/nft/lora/wan21/t2v.yaml
+++ b/examples/nft/lora/wan21/t2v.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default"
   model_name_or_path: "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"  # Wan-AI/Wan2.1-T2V-1.3B-Diffusers / Wan-AI/Wan2.1-T2V-14B-Diffusers
   model_type: "wan2_t2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Use flash attention 3 backend.
 
diff --git a/examples/nft/lora/wan22/t2v.yaml b/examples/nft/lora/wan22/t2v.yaml
index a493ec46..546e592a 100644
--- a/examples/nft/lora/wan22/t2v.yaml
+++ b/examples/nft/lora/wan22/t2v.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default"
   model_name_or_path: "Wan-AI/Wan2.2-T2V-A14B-Diffusers"  # Wan-AI/Wan2.2-TI2V-5B-Diffusers / Wan-AI/Wan2.2-T2V-A14B-Diffusers
   model_type: "wan2_t2v"  # wan2_t2v, wan2_i2v, wan2_v2v
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Use flash attention 3 backend.
 
diff --git a/examples/nft/lora/z_image/default.yaml b/examples/nft/lora/z_image/default.yaml
index 50e9fdce..c0b809dd 100644
--- a/examples/nft/lora/z_image/default.yaml
+++ b/examples/nft/lora/z_image/default.yaml
@@ -24,7 +24,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "Tongyi-MAI/Z-Image"  # Options: Tongyi-MAI/Z-Image, Tongyi-MAI/Z-Image-Turbo
   model_type: "z-image"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
 
 log:
diff --git a/examples/template/sd3_5/async_reward.yaml b/examples/template/sd3_5/async_reward.yaml
index a7f19e1d..db004bfb 100644
--- a/examples/template/sd3_5/async_reward.yaml
+++ b/examples/template/sd3_5/async_reward.yaml
@@ -23,7 +23,7 @@ model:
   target_modules: "default" # Options: all, default, or list of module names like ["to_k", "to_q", "to_v", "to_out.0"]
   model_name_or_path: "stabilityai/stable-diffusion-3.5-medium"  # HuggingFace model ID or local path
   model_type: "sd3-5"
-  resume_path: null # Path to load previous checkpoint/lora adapter
+  resume_path: null # Local path or HF repo id (e.g. 'owner/repo[/subdir][@rev]') for previous checkpoint/lora adapter
   resume_type: null # Options: lora, full, state. Null to auto-detect based on `finetune_type`
   # attn_backend: '_flash_3_hub' # Attention backend for training.
 
diff --git a/src/flow_factory/hparams/model_args.py b/src/flow_factory/hparams/model_args.py
index 9c7de4e0..d86c7947 100644
--- a/src/flow_factory/hparams/model_args.py
+++ b/src/flow_factory/hparams/model_args.py
@@ -80,7 +80,15 @@ class ModelArguments(ArgABC):
 
     resume_path : Optional[str] = field(
         default=None,
-        metadata={"help": "Resume from checkpoint directory."}
+        metadata={
+            "help": "Resume from checkpoint. Accepts either a local directory or a "
+                    "Hugging Face repo spec ('owner/repo[/subfolder][@revision]', or "
+                    "explicit 'hf://owner/repo[/subfolder][@revision]'). When a local "
+                    "path doesn't exist, falls back to Hugging Face Hub download. "
+                    "Multi-node: HF_TOKEN must be set on every node; downloads happen "
+                    "once per node; consider HF_HUB_ENABLE_HF_TRANSFER=1 for large "
+                    "checkpoints to avoid NCCL watchdog timeouts."
+        }
     )
 
     resume_type : Optional[Literal['lora', 'full', 'state']] = field(
diff --git a/src/flow_factory/models/abc.py b/src/flow_factory/models/abc.py
index d3a00572..d1a36818 100644
--- a/src/flow_factory/models/abc.py
+++ b/src/flow_factory/models/abc.py
@@ -38,6 +38,7 @@
 from peft import get_peft_model, LoraConfig, PeftModel
 
 from huggingface_hub import split_torch_state_dict_into_shards
+from huggingface_hub.errors import RepositoryNotFoundError, HfHubHTTPError
 from accelerate import Accelerator, DistributedType
 from accelerate.state import PartialState
 from accelerate.utils.modeling import (
@@ -58,6 +59,9 @@
     mapping_lora_state_dict,
     infer_lora_config,
     infer_target_modules,
+    parse_hf_checkpoint_path,
+    download_hf_checkpoint,
+    HF_PATH_PREFIX,
 )
 from ..samples import BaseSample
 from ..ema import EMAModuleWrapper
@@ -1455,6 +1459,70 @@ def save_checkpoint(
             logger.info(f"Checkpoint saved successfully to {save_directory}")
 
     # -------------------------------------------- Load -------------------------------------------
+    def _resolve_checkpoint_path(self, path: str) -> str:
+        """
+        Resolve `path` to a local directory, downloading from Hugging Face Hub when needed.
+
+        Resolution order:
+            1. If `path` starts with ``hf://``, strip the prefix and force HF download
+               (lets users override a colliding local directory).
+            2. Otherwise, if `path` exists locally, return it as-is.
+            3. Otherwise, parse as ``owner/repo[/subfolder][@revision]`` and download
+               via Hugging Face Hub.
+
+        Multi-node-safe: all ranks call ``snapshot_download`` directly. Hugging
+        Face Hub's per-blob ``WeakFileLock`` serializes concurrent calls within
+        each filesystem domain (cross-node on POSIX-locking shared FS, per-node
+        on non-shared FS), so exactly one rank per filesystem domain actually
+        transfers bytes. Un-gated (rather than ``is_local_main_process`` plus a
+        barrier) so a failed download raises uniformly on every affected rank
+        instead of leaving siblings deadlocked at a barrier the failing rank
+        never reaches. Residual hazard: a rare single-rank transient failure
+        (e.g. one node's network blip) can produce asymmetric progress, in
+        which case the surviving ranks will eventually trip the NCCL watchdog
+        on the final barrier below.
+
+        Args:
+            path: Local filesystem path or HF spec (with or without ``hf://`` prefix).
+
+        Returns:
+            Absolute local directory path ready for the existing checkpoint loaders.
+
+        Raises:
+            FileNotFoundError: When the spec is neither a local path nor a reachable HF repo.
+        """
+        force_hf = path.startswith(HF_PATH_PREFIX)
+        spec = path[len(HF_PATH_PREFIX):] if force_hf else path
+
+        if not force_hf and os.path.exists(spec):
+            return spec
+
+        repo_id, subfolder, revision = parse_hf_checkpoint_path(spec)
+
+        try:
+            local_path = download_hf_checkpoint(repo_id, subfolder, revision)
+        except (RepositoryNotFoundError, HfHubHTTPError) as e:
+            raise FileNotFoundError(
+                f"Checkpoint {path!r} not found locally and could not be fetched "
+                f"from Hugging Face Hub (repo={repo_id!r}, subfolder={subfolder!r}, "
+                f"revision={revision!r}). For private repos, ensure HF_TOKEN is set "
+                f"on ALL nodes."
+            ) from e
+
+        # Sync after download so downstream loaders enter the lockstep dispatch
+        # together. On symmetric failure every rank raises above before this
+        # barrier is reached, so no deadlock; the residual asymmetric-failure
+        # case is documented in the docstring.
+        self.accelerator.wait_for_everyone()
+
+        if self.accelerator.is_local_main_process:
+            logger.info(
+                f"[local rank 0 / global rank {self.accelerator.process_index}] "
+                f"resolved checkpoint '{path}' -> {local_path}"
+            )
+
+        return local_path
+
     @staticmethod
     def load_sharded_checkpoint(checkpoint_dir: str, index_file: str) -> Dict[str, torch.Tensor]:
         """Load sharded safetensors checkpoint."""
@@ -1674,8 +1742,11 @@ def load_checkpoint(
                 - None: Auto-detect based on checkpoint directory contents
         """
         path = os.path.expanduser(path)
+        path = self._resolve_checkpoint_path(path)
         if not os.path.exists(path):
-            raise FileNotFoundError(f"Checkpoint path not found: {path}")
+            raise FileNotFoundError(
+                f"Checkpoint path not found locally or on Hugging Face Hub: {path!r}"
+            )
 
         # Auto-detect if not specified
         if resume_type is None:
diff --git a/src/flow_factory/utils/checkpoint.py b/src/flow_factory/utils/checkpoint.py
index 14c79227..fa101246 100644
--- a/src/flow_factory/utils/checkpoint.py
+++ b/src/flow_factory/utils/checkpoint.py
@@ -24,6 +24,7 @@
 from typing import Dict, Optional, List, Tuple, Literal
 
 from safetensors.torch import save_file, load_file
+from huggingface_hub import snapshot_download
 
 def mapping_lora_state_dict(
         state_dict: Dict[str, torch.Tensor],
@@ -137,4 +138,121 @@ def infer_target_modules(
         if match:
             target_modules.add(match.group(1))
     
-    return sorted(target_modules)
\ No newline at end of file
+    return sorted(target_modules)
+
+
+# ================================ Hugging Face Hub ================================
+HF_PATH_PREFIX = "hf://"
+
+
+def parse_hf_checkpoint_path(path: str) -> Tuple[str, Optional[str], Optional[str]]:
+    """
+    Parse a Hugging Face checkpoint path spec into ``(repo_id, subfolder, revision)``.
+
+    Accepts both bare and ``hf://``-prefixed specs:
+      - ``owner/repo``                          -> (``owner/repo``,  None,           None)
+      - ``hf://owner/repo``                     -> (``owner/repo``,  None,           None)
+      - ``owner/repo/sub/dir``                  -> (``owner/repo``,  ``sub/dir``,    None)
+      - ``owner/repo@v1.0``                     -> (``owner/repo``,  None,           ``v1.0``)
+      - ``hf://owner/repo/sub/dir@v1.0``        -> (``owner/repo``,  ``sub/dir``,    ``v1.0``)
+
+    Args:
+        path: A bare or ``hf://``-prefixed checkpoint spec.
+
+    Returns:
+        Tuple of (repo_id, subfolder, revision); subfolder and revision are ``None`` when absent.
+
+    Raises:
+        ValueError: If the spec lacks the ``owner/repo`` form (at minimum two path segments).
+    """
+    if not isinstance(path, str):
+        raise TypeError(
+            f"expected str for path, got {type(path).__name__}: {path!r}"
+        )
+
+    spec = path[len(HF_PATH_PREFIX):] if path.startswith(HF_PATH_PREFIX) else path
+
+    # Split off optional @revision (revision token cannot contain '/' or '@').
+    revision: Optional[str] = None
+    if "@" in spec:
+        spec, revision = spec.rsplit("@", 1)
+        if not revision or "/" in revision:
+            raise ValueError(
+                f"invalid revision in HF checkpoint path: {path!r} "
+                f"(expected 'owner/repo[/subfolder][@revision]', got revision={revision!r})"
+            )
+
+    parts = [p for p in spec.split("/") if p]
+    if len(parts) < 2:
+        raise ValueError(
+            f"invalid HF checkpoint path: {path!r} "
+            f"(expected at least 'owner/repo', got {len(parts)} non-empty segments)"
+        )
+
+    # Reject path-traversal segments. Without this, a spec like
+    # 'owner/repo/..' would resolve via os.path.join to a directory outside
+    # the snapshot root and let downstream loaders read from unintended
+    # locations. Backslashes are rejected to block Windows-style traversal.
+    for seg in parts:
+        if seg in (".", "..") or "\\" in seg:
+            raise ValueError(
+                f"invalid segment {seg!r} in HF checkpoint path: {path!r} "
+                f"('.', '..', and backslashes are not allowed)"
+            )
+
+    repo_id = "/".join(parts[:2])
+    subfolder = "/".join(parts[2:]) if len(parts) > 2 else None
+    return repo_id, subfolder, revision
+
+
+def download_hf_checkpoint(
+    repo_id: str,
+    subfolder: Optional[str] = None,
+    revision: Optional[str] = None,
+) -> str:
+    """
+    Download a Hugging Face checkpoint snapshot and return the local directory path.
+
+    Thin wrapper over ``huggingface_hub.snapshot_download``. When ``subfolder`` is
+    provided, restricts the download to that subtree via ``allow_patterns`` and
+    returns the path joined with the subfolder so the caller receives the directory
+    layout that the existing local-checkpoint loaders expect.
+
+    Authentication is taken from the standard ``HF_TOKEN`` / ``HUGGING_FACE_HUB_TOKEN``
+    environment variables (and the local ``~/.cache/huggingface/token`` cache). For
+    multi-node training the token must be available on every node.
+
+    Args:
+        repo_id: HF repository identifier in ``owner/repo`` form.
+        subfolder: Optional subdirectory within the repo to fetch.
+        revision: Optional git revision (branch, tag, or commit SHA).
+
+    Returns:
+        Absolute local directory path containing the snapshot (with ``subfolder`` appended when set).
+    """
+    if not isinstance(repo_id, str) or "/" not in repo_id:
+        raise ValueError(
+            f"expected 'owner/repo' for repo_id, got {repo_id!r}"
+        )
+
+    allow_patterns: Optional[List[str]] = None
+    if subfolder:
+        # Match the subfolder itself plus everything beneath it.
+        allow_patterns = [f"{subfolder}/*", f"{subfolder}/**"]
+
+    local_root = snapshot_download(
+        repo_id=repo_id,
+        revision=revision,
+        allow_patterns=allow_patterns,
+    )
+
+    if subfolder:
+        local_path = os.path.join(local_root, subfolder)
+        if not os.path.isdir(local_path):
+            raise FileNotFoundError(
+                f"HF snapshot for repo_id={repo_id!r} (revision={revision!r}) did not "
+                f"contain expected subfolder {subfolder!r}; downloaded root={local_root!r}"
+            )
+        return local_path
+
+    return local_root
\ No newline at end of file