From cf890fd5ab7b8a8025e1167f2578aef12a93cca9 Mon Sep 17 00:00:00 2001
From: Sam Oluwalana <soluwalana@nvidia.com>
Date: Tue, 16 Jun 2026 15:24:31 -0600
Subject: [PATCH 1/3] feat(customizer): expand skill to help evaluate lift on
 adapters trained

Signed-off-by: Sam Oluwalana <soluwalana@nvidia.com>
---
 .../skills/nemo-customizer/SKILL.md           |  81 +-
 .../references/dataset-formats.md             |  12 +
 .../references/eval_helpers.py                | 737 ++++++++++++++++++
 .../references/post-training-eval.md          | 209 +++++
 .../tests/test_eval_helpers.py                | 221 ++++++
 5 files changed, 1254 insertions(+), 6 deletions(-)
 create mode 100644 plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py
 create mode 100644 plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md
 create mode 100644 plugins/nemo-customizer/tests/test_eval_helpers.py
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md
index b907063734..895ba73b17 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md
@@ -141,6 +141,8 @@ Training never runs inside the `nemo` CLI process. After `submit`, the platform'
 - For submit/image/plugin errors (both backends), read `references/troubleshooting.md`. Unsloth needs the `nmp-unsloth-training` container image on the **platform host's** Docker daemon (see `docker/unsloth/README.md`).
 - **Missing training image on a remote platform** — if the user gave a non-localhost `NEMO_BASE_URL` / `NMP_BASE_URL` (e.g. `10.0.0.51:8080`) and the job errors with `Failed to pull image`, `manifest unknown`, or missing `nmp-unsloth-training` / automodel training image: **do not** run `docker build`, `docker pull`, or `docker buildx bake` on the agent machine. Report with **Report to user** (use **Output adapter fileset (planned):** on error), then append on-target build steps from `references/troubleshooting.md` § **Missing training images**.
 - **Gated HuggingFace models** (Llama, Gemma, …) — confirm `hf-token` + fileset `token_secret` before submit; download fails with `Failed to access upstream storage` / 502 when missing. See **HuggingFace token (gated models)** and `references/troubleshooting.md` § **Gated HuggingFace models**.
+- **Post-training eval format** — use the same CHAT `messages` JSONL as training. **Do not** flatten rows to `prompt`/`expected` for the evaluator. Send `messages[:-1]` at inference (exclude final assistant label); score against `messages[-1].content`. See `references/post-training-eval.md` and `references/eval_helpers.py`.
+- **LoRA adapters load automatically for eval** — when a job completes, the adapter is registered on the model entity and hot-reloaded on any **READY** deployment with `lora_enabled: true`. **Do not** update deployments or providers before eval. **Do** route LoRA eval through the **provider** gateway (`/provider/<name>/-/v1` with `model: default--<adapter>`); the model-entity path (`/model/<entity>/-/v1`) always hits the base model. See `references/post-training-eval.md` § **Request routing (base vs LoRA)**.
 
 ## Workflow
 
@@ -162,12 +164,14 @@ Common steps then **branch by plugin pick**:
 - [ ] nemo customization automodel submit /tmp/job.json --workspace default
 - [ ] Poll until top-level terminal (`poll_customization_job.sh`; default 15s interval, or 30–60s manual polls)
 - [ ] Report using output template below
+- [ ] Optional: compare base vs adapter on validation — `references/eval_helpers.py …` (CHAT format; adapters hot-reload automatically; see `references/post-training-eval.md`)
 
 # unsloth branch (submit → Docker GPU job)
 - [ ] Write /tmp/job.json using the UnslothJobInput shape (see Fast path — unsloth)
 - [ ] nemo customization unsloth submit /tmp/job.json --workspace default [--profile <gpu-profile>]
 - [ ] Poll until top-level terminal (`poll_customization_job.sh unsloth-<job-id>`; default 15s interval)
 - [ ] Report using output template below
+- [ ] Optional: compare base vs adapter on validation — `references/eval_helpers.py …` (CHAT format; adapters hot-reload automatically; see `references/post-training-eval.md`)
 ```
 
 ## Fast path — automodel
@@ -513,7 +517,7 @@ After polling reaches a **terminal** status (`completed`, `error`, or `cancelled
 
 | Status | Notes |
 |--------|-------|
-| `completed` | Brief success summary (e.g. adapter registered on model entity). When `metrics.train_loss` has ≥2 entries, add a loss-drop sentence: *Loss dropped from \<first value, 1 dp\> at step 1 to \<last value, 3 dp\> at step \<N\>; validation loss was \<val or n/a\>.* |
+| `completed` | Brief success summary (e.g. adapter registered on model entity). When `metrics.train_loss` has ≥2 entries, add a loss-drop sentence: *Loss dropped from \<first value, 1 dp\> at step 1 to \<last value, 3 dp\> at step \<N\>; validation loss was \<val or n/a\>.* Always append **Using the adapter** with discovered provider name and concrete gateway URLs (see below). |
 | `error` | Quote `error_details.message` or the failing step; note setup that succeeded before the failure (auth, dataset upload, submit). |
 | `cancelled` | Cancellation reason if available. |
 
@@ -580,21 +584,85 @@ After polling reaches a **terminal** status (`completed`, `error`, or `cancelled
 | Output save method | lora |
 ```
 
-**Using the adapter (`completed` only)** — after **Training configuration**, run `nemo models get <model-entity> --workspace default` (parse stdout only) to confirm the adapter is listed under `adapters`. Append this section:
+**Using the adapter (`completed` only)** — after **Training configuration**, run these discovery commands (parse stdout only; do not pipe `2>&1` into JSON parsers):
+
+1. `nemo models get <model-entity> --workspace default` — confirm `<output.name>` appears under `adapters` with `enabled: true`.
+2. `nemo inference providers list --workspace default -f json` — pick a **READY** provider whose `served_models` includes `default/<model-entity>` (base or LoRA composite). Record its `name` as `<provider>` (often matches the deployment name).
+
+On a deployment with `lora_enabled: true`, the adapter is **hot-reloaded automatically** — no deployment update or provider reconfiguration is required before inference or post-training eval. Append this section with **concrete URLs and provider name** from discovery:
 
 ```markdown
 ### Using the adapter
 
-The adapter `<output.name>` is attached to `default/<model-entity>`. List adapters with:
+The adapter `<output.name>` is registered on `default/<model-entity>`. Weights are hot-reloaded on LoRA-enabled deployments — no deployment or provider update is required after training.
+
+#### Request routing (base vs LoRA)
+
+| Target | Gateway path | OpenAI base URL | Request `"model"` field |
+|--------|--------------|-----------------|-------------------------|
+| **Base** weights | model-entity | `<NEMO_BASE_URL>/apis/inference-gateway/v2/workspaces/default/model/<model-entity>/-/v1` | `default/<model-entity>` |
+| **LoRA adapter** | **provider** | `<NEMO_BASE_URL>/apis/inference-gateway/v2/workspaces/default/provider/<provider>/-/v1` | `default--<output.name>` |
+
+**Common mistake:** posting to the model-entity URL with `"model": "default--<output.name>"` still runs the **base** model. Base-vs-adapter eval will look identical until LoRA requests use the **provider** URL above. See `references/post-training-eval.md` § **Request routing (base vs LoRA)**.
+
+#### Chat inference (CHAT-trained models)
+
+Match training context at inference — send **`messages[:-1]`** (all turns except the final assistant label). Single-turn rows are just the user message; multi-turn rows keep prior user/assistant history.
+
+| Setting | Value | Why |
+|---------|-------|-----|
+| `messages` | All turns except the final assistant label from the JSONL row | Same decode path as SFT |
+| `max_tokens` | `64` for short assistant labels | Training targets are brief (e.g. MCQA choice text) |
+| `temperature` | `0` | Reproducible eval / regression checks |
+| `chat_template_kwargs.enable_thinking` | `false` for Qwen3 short-answer SFT | Thinking mode needs extra tokens and changes output shape vs training |
+
+#### Example — LoRA adapter via provider
+
+\`\`\`bash
+export NEMO_BASE_URL=<platform-url>   # omit when using default localhost
+nemo inference gateway provider post v1/chat/completions <provider> --workspace default \\
+  --body '{
+    "model": "default--<output.name>",
+    "messages": [<all turns except final assistant label from the eval row>],
+    "max_tokens": 64,
+    "temperature": 0,
+    "chat_template_kwargs": {"enable_thinking": false}
+  }'
+\`\`\`
+
+#### Example — base model via model-entity (comparison)
+
+\`\`\`bash
+export NEMO_BASE_URL=<platform-url>
+nemo inference gateway model post v1/chat/completions <model-entity> --workspace default \\
+  --body '{
+    "model": "default/<model-entity>",
+    "messages": [<same prompt turns as LoRA example — exclude final assistant label>],
+    "max_tokens": 64,
+    "temperature": 0,
+    "chat_template_kwargs": {"enable_thinking": false}
+  }'
+\`\`\`
+
+#### Post-training eval (optional)
+
+Validation loss from training is **not** accuracy. To compare base vs adapter on the validation split with correct routing:
 
 \`\`\`bash
-export NEMO_BASE_URL=<platform-url>   # omit line when using default localhost
 cd /path/to/nemo-platform
-nemo models get <model-entity> --workspace default
+uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py \\
+  --base-url <platform-url> \\
+  --model-entity <model-entity> \\
+  --adapter <output.name> \\
+  --provider <provider> \\
+  --dataset-fileset <dataset-fileset> \\
+  --split validation.jsonl
 \`\`\`
+
+Uses CHAT `messages` rows unchanged from the training fileset (`messages[:-1]` at inference). Repeat `--adapter` for multi-adapter compare. `--provider` is optional when a READY provider is auto-discovered.
 ```
 
-Use the user's platform URL in `NEMO_BASE_URL` when they overrode it; omit the export line for default `http://127.0.0.1:8080`. The JSON `adapters` array shows `name`, `fileset`, `finetuning_type`, and `lora_config` for each registered adapter.
+Use the user's platform URL in `NEMO_BASE_URL` when they overrode it; omit the export line for default `http://127.0.0.1:8080`. Substitute `<provider>`, `<NEMO_BASE_URL>`, and entity/adapter names with values from discovery — do not leave generic placeholders in the user-facing report. Do **not** tell the user to update the deployment or add the adapter to a provider before calling it — registration on the model entity is sufficient.
 
 **Save report to `/tmp`** — unless the user opts out, write the full Markdown report (header, **Training configuration**, **Using the adapter** when `completed`, and **Resources created** when a slug or new filesets were used) to `/tmp/fine-tune-result-<slug-or-job-suffix>.md`. Use the random slug from the run when one was assigned; otherwise use the job id suffix (e.g. `a925b07ff678`).
 
@@ -626,5 +694,6 @@ For other terminal errors, keep the same header template; put remediation detail
 | W&B / MLflow field reference | `references/hyperparameters.md` § **Integrations (automodel + unsloth)** |
 | W&B secret + MLflow local server + jobs-launcher | `references/integrations-setup.md` |
 | Gated HF model auth (`hf-token`, fileset `token_secret`) | `references/troubleshooting.md` § **Gated HuggingFace models** |
+| Post-training eval (base vs LoRA, CHAT format parity) | `references/post-training-eval.md`, `references/eval_helpers.py` |
 
 Related: `plugins/nemo-automodel/README.md`, `plugins/nemo-unsloth/README.md`, `plugins/nemo-customizer/docs/CUSTOMIZATION.md`, skills **`nemo-files`**, **`nemo-status`**, **`nemo-secrets`**.
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/dataset-formats.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/dataset-formats.md
index b03b43e12c..4677f68474 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/dataset-formats.md
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/dataset-formats.md
@@ -54,3 +54,15 @@ Optional fields on the unsloth `dataset` block:
 - The automodel SFT format `{"prompt": "...", "completion": "..."}` is **not** directly consumable by unsloth — unsloth has no built-in `prompt`/`completion` concatenation. Convert to either messages or pre-rendered text before upload.
 
 EMBEDDING and CUSTOM (automodel-only schemas) are not supported by unsloth today.
+
+## Post-training evaluation
+
+Eval rows must use the **same CHAT `messages` shape** as training. Do not flatten to `prompt`/`expected` for the evaluator.
+
+| Training JSONL | Eval dataset | Eval `prompt_template` | Metric reference |
+|----------------|--------------|------------------------|------------------|
+| `messages` (single- or multi-turn) | Same fileset split (`validation.jsonl`) | `messages[:-1]` — exclude final assistant label — see `post-training-eval.md` | `{{ item.messages[-1].content }}` |
+
+LoRA inference and eval use the **provider** gateway (`/provider/<name>/-/v1`, `model: default--<adapter>`). Base uses the **model-entity** path. See `post-training-eval.md` § **Request routing** and the **Using the adapter** section in `SKILL.md`.
+
+Shared helpers and compare CLI: `references/eval_helpers.py`. Full workflow: `references/post-training-eval.md`.
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py
new file mode 100644
index 0000000000..dfdddca7e9
--- /dev/null
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py
@@ -0,0 +1,737 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Post-training evaluation helpers — keep eval dataset shape aligned with CHAT training JSONL.
+
+LoRA adapters registered on the model entity are hot-reloaded automatically on
+deployments with ``lora_enabled: true`` — no deployment update before eval.
+
+Run from the nemo-platform git root::
+
+    uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py \\
+        --model-entity qwen3-1.7b --adapter lora-a --adapter lora-b \\
+        --provider qwen3-1.7b-csqa-lora-deploy --dataset-fileset commonsense_qa --split validation
+
+Import in agent scripts (add references/ to sys.path or run via uv from repo root).
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import urllib.error
+import urllib.request
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Sequence
+
+# --- Train/eval format contract (CHAT JSONL) --------------------------------
+
+CHAT_ROW_KEYS = frozenset({"messages"})
+
+# Inference: all turns except the final assistant label (single- or multi-turn).
+CHAT_USER_PROMPT_TEMPLATE: dict[str, Any] = {
+    "messages": "{{ item.messages[:-1] }}",
+}
+
+# Metric reference: final assistant turn (the label to predict).
+CHAT_REFERENCE_TEMPLATE = "{{ item.messages[-1].content }}"
+
+# Back-compat alias for single-turn MCQA docs/snippets.
+CHAT_SINGLE_TURN_USER_PROMPT_TEMPLATE = {
+    "messages": [{"role": "user", "content": "{{ item.messages[0].content }}"}],
+}
+
+
+def assert_chat_row(row: dict[str, Any], *, index: int | None = None) -> None:
+    """Validate one dataset row matches automodel/unsloth CHAT training shape."""
+    label = f"row {index}" if index is not None else "row"
+    if "messages" not in row:
+        raise ValueError(
+            f"{label}: expected CHAT format with 'messages' array; got keys {sorted(row)}. "
+            "Do not flatten to prompt/expected — use references/post-training-eval.md."
+        )
+    messages = row["messages"]
+    if not isinstance(messages, list) or len(messages) < 2:
+        raise ValueError(f"{label}: messages must be a list with at least one prompt turn + final assistant label")
+    if messages[0].get("role") != "user":
+        raise ValueError(f"{label}: expected messages[0]=user")
+    if messages[-1].get("role") != "assistant":
+        raise ValueError(f"{label}: expected final messages[-1]=assistant (the label to score)")
+
+
+def reference_content(row: dict[str, Any]) -> str:
+    """Return the assistant label for a CHAT row (final turn)."""
+    assert_chat_row(row)
+    return row["messages"][-1]["content"]
+
+
+def load_chat_jsonl(path: Path | str) -> list[dict[str, Any]]:
+    """Load JSONL rows; validate CHAT shape; return rows unchanged."""
+    rows: list[dict[str, Any]] = []
+    with Path(path).open(encoding="utf-8") as handle:
+        for index, line in enumerate(handle, start=1):
+            if not line.strip():
+                continue
+            row = json.loads(line)
+            assert_chat_row(row, index=index)
+            rows.append(row)
+    return rows
+
+
+def load_chat_jsonl_from_platform(
+    *,
+    base_url: str,
+    workspace: str,
+    fileset: str,
+    remote_path: str,
+) -> list[dict[str, Any]]:
+    """Download a JSONL split from a platform fileset and validate CHAT rows."""
+    url = (
+        f"{base_url.rstrip('/')}/apis/files/v2/workspaces/{workspace}/filesets/"
+        f"{fileset}/-/{remote_path.lstrip('/')}"
+    )
+    with urllib.request.urlopen(url) as response:
+        content = response.read().decode("utf-8")
+    rows: list[dict[str, Any]] = []
+    for index, line in enumerate(content.splitlines(), start=1):
+        if not line.strip():
+            continue
+        row = json.loads(line)
+        assert_chat_row(row, index=index)
+        rows.append(row)
+    return rows
+
+
+def chat_metrics():
+    """Build default metrics for CHAT SFT eval (exact match + ROUGE + BLEU)."""
+    from nemo_evaluator_sdk import BLEUMetric, ROUGEMetric
+    from nemo_evaluator_sdk.metrics.exact_match import ExactMatchMetric
+
+    ref = CHAT_REFERENCE_TEMPLATE
+    return [
+        ExactMatchMetric(reference=ref),
+        ROUGEMetric(reference=ref),
+        BLEUMetric(references=[ref]),
+    ]
+
+
+def normalize_mcqa_answer(text: str) -> str:
+    """Normalize MCQA model output for comparison with bare choice-text references."""
+    text = text.strip()
+    bold = re.search(r"\*\*(?:[A-E]\.\s*)?([^*]+)\*\*", text)
+    if bold:
+        text = bold.group(1)
+    text = re.sub(r"^[A-E]\.\s*", "", text)
+    text = re.sub(r"\*\*([^*]+)\*\*", r"\1", text)
+    return text.strip().lower()
+
+
+def served_model_name(*, workspace: str, entity_or_adapter: str, finetuning: str = "base") -> str:
+    """Return the ``model`` field for base entity or LoRA adapter requests."""
+    if finetuning == "base":
+        return f"{workspace}/{entity_or_adapter}"
+    if finetuning == "lora":
+        return f"{workspace}--{entity_or_adapter}"
+    raise ValueError("finetuning must be 'base' or 'lora'")
+
+
+def adapter_composite_entity_name(
+    *, model_entity: str, workspace: str, adapter_name: str
+) -> str:
+    """LoRA composite model-entity path segment (for reference / OpenAI-route body only).
+
+    The model-entity proxy path ``model/{composite}/-/v1`` requires a dedicated
+    VirtualModel per composite and typically 404s on stock deployments. Prefer
+    :func:`provider_gateway_url` for adapter eval.
+    """
+    return f"{model_entity}&adapters/{workspace}/{adapter_name}"
+
+
+def model_entity_gateway_url(*, base_url: str, workspace: str, model_entity: str) -> str:
+    """OpenAI-compatible inference-gateway URL for a registered base model entity."""
+    return (
+        f"{base_url.rstrip('/')}/apis/inference-gateway/v2/workspaces/{workspace}/"
+        f"model/{model_entity}/-/v1"
+    )
+
+
+def provider_gateway_url(*, base_url: str, workspace: str, provider_name: str) -> str:
+    """OpenAI-compatible inference-gateway URL for a model provider (LoRA eval route)."""
+    return (
+        f"{base_url.rstrip('/')}/apis/inference-gateway/v2/workspaces/{workspace}/"
+        f"provider/{provider_name}/-/v1"
+    )
+
+
+def gateway_path_from_url(url: str) -> str:
+    """Return ``model-entity`` or ``provider`` from a gateway base URL."""
+    if "/provider/" in url:
+        return "provider"
+    if "/model/" in url:
+        return "model-entity"
+    return "unknown"
+
+
+def _platform_get_json(url: str) -> dict[str, Any]:
+    with urllib.request.urlopen(url) as response:
+        return json.loads(response.read().decode("utf-8"))
+
+
+def find_ready_provider_for_model_entity(
+    *,
+    base_url: str,
+    workspace: str,
+    model_entity: str,
+) -> str | None:
+    """Return a READY provider name that serves ``workspace/model_entity`` (base or LoRA)."""
+    url = (
+        f"{base_url.rstrip('/')}/apis/models/v2/workspaces/{workspace}/providers"
+        f"?page_size=100&filter.status=READY"
+    )
+    payload = _platform_get_json(url)
+    base_entity_id = f"{workspace}/{model_entity}"
+    matches: list[str] = []
+    for provider in payload.get("data", []):
+        if provider.get("status") != "READY":
+            continue
+        for served in provider.get("served_models") or []:
+            entity_id = served.get("model_entity_id") or ""
+            if entity_id == base_entity_id or entity_id.startswith(f"{base_entity_id}&adapters/"):
+                matches.append(provider["name"])
+                break
+    if not matches:
+        return None
+    # Prefer deployment-backed providers (stable) over arbitrary first hit.
+    return sorted(set(matches))[0]
+
+
+@dataclass
+class JobAdapterInfo:
+    job_name: str
+    adapter_name: str
+    epochs: int | None
+    backend: str
+    model_entity: str
+    dataset_ref: str
+    status: str
+    created_at: str | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "job_name": self.job_name,
+            "adapter_name": self.adapter_name,
+            "epochs": self.epochs,
+            "backend": self.backend,
+            "model_entity": self.model_entity,
+            "dataset_ref": self.dataset_ref,
+            "status": self.status,
+            "created_at": self.created_at,
+        }
+
+
+def adapter_from_completed_job(
+    *,
+    base_url: str,
+    workspace: str,
+    job_name: str,
+) -> JobAdapterInfo:
+    """Resolve adapter output name and training epochs from a platform job."""
+    url = f"{base_url.rstrip('/')}/apis/jobs/v2/workspaces/{workspace}/jobs/{job_name}"
+    try:
+        job = _platform_get_json(url)
+    except urllib.error.HTTPError as exc:
+        raise ValueError(f"Job not found: {workspace}/{job_name}") from exc
+    spec = job.get("spec") or {}
+    output_name = (spec.get("output") or {}).get("name") or spec.get("name")
+    if not output_name:
+        raise ValueError(f"Job {job_name} has no output adapter name in spec")
+    model = spec.get("model")
+    model_entity = model.get("name", "") if isinstance(model, dict) else (model or "")
+    if model_entity.startswith(f"{workspace}/"):
+        model_entity = model_entity.split("/", 1)[1]
+    dataset = spec.get("dataset") or {}
+    dataset_ref = dataset.get("path") or dataset.get("training") or ""
+    backend = job_name.split("-", 1)[0] if "-" in job_name else "unknown"
+    return JobAdapterInfo(
+        job_name=job_name,
+        adapter_name=output_name,
+        epochs=(spec.get("schedule") or {}).get("epochs"),
+        backend=backend,
+        model_entity=model_entity,
+        dataset_ref=dataset_ref,
+        status=job.get("status", "unknown"),
+        created_at=job.get("created_at"),
+    )
+
+
+def list_completed_job_adapters(
+    *,
+    base_url: str,
+    workspace: str,
+    model_entity: str,
+    dataset_fileset: str | None = None,
+    page_size: int = 500,
+) -> list[JobAdapterInfo]:
+    """List completed customization jobs and their output adapter names."""
+    url = (
+        f"{base_url.rstrip('/')}/apis/jobs/v2/workspaces/{workspace}/jobs"
+        f"?page_size={page_size}&filter.status=completed"
+    )
+    payload = _platform_get_json(url)
+    dataset_ref = f"{workspace}/{dataset_fileset}" if dataset_fileset else None
+    model_ref = f"{workspace}/{model_entity}"
+    results: list[JobAdapterInfo] = []
+    for job in payload.get("data", []):
+        if job.get("status") != "completed":
+            continue
+        spec = job.get("spec") or {}
+        out = (spec.get("output") or {}).get("name") or spec.get("name")
+        if not out:
+            continue
+        model = spec.get("model")
+        job_model = model.get("name", "") if isinstance(model, dict) else (model or "")
+        ds = spec.get("dataset") or {}
+        job_ds = ds.get("path") or ds.get("training") or ""
+        if model_ref not in str(job_model):
+            continue
+        if dataset_ref and dataset_ref not in str(job_ds):
+            continue
+        backend = job["name"].split("-", 1)[0] if "-" in job["name"] else "unknown"
+        results.append(
+            JobAdapterInfo(
+                job_name=job["name"],
+                adapter_name=out,
+                epochs=(spec.get("schedule") or {}).get("epochs"),
+                backend=backend,
+                model_entity=model_entity,
+                dataset_ref=job_ds,
+                status=job.get("status", "completed"),
+                created_at=job.get("created_at"),
+            )
+        )
+    results.sort(key=lambda item: item.created_at or "", reverse=True)
+    return results
+
+
+def build_online_eval_config(
+    *,
+    max_tokens: int = 64,
+    temperature: float = 0,
+    parallelism: int = 8,
+    enable_thinking: bool = False,
+    limit_samples: int | None = None,
+):
+    """RunConfigOnlineModel defaults aligned with Qwen3 CHAT SFT eval."""
+    from nemo_evaluator_sdk.values import InferenceParams, RunConfigOnlineModel
+
+    extra_body = {"chat_template_kwargs": {"enable_thinking": enable_thinking}} if not enable_thinking else None
+    inference_kwargs: dict[str, Any] = {"max_tokens": max_tokens, "temperature": temperature}
+    if extra_body:
+        inference_kwargs["extra_body"] = extra_body
+    return RunConfigOnlineModel(
+        parallelism=parallelism,
+        limit_samples=limit_samples,
+        inference=InferenceParams(**inference_kwargs),
+    )
+
+
+def build_platform_model_target(
+    *,
+    base_url: str,
+    workspace: str,
+    model_entity: str,
+    adapter_name: str | None = None,
+    provider_name: str | None = None,
+):
+    """SDK Model target for base entity or LoRA adapter on the platform gateway.
+
+    Base weights use the **model-entity** proxy
+    (``/model/{entity}/-/v1``). LoRA adapters must use the **provider** proxy
+    (``/provider/{name}/-/v1``) with ``model: {workspace}--{adapter}`` — the
+    model-entity path always routes to the base VirtualModel and ignores adapter
+    names in the request body.
+    """
+    from nemo_evaluator_sdk.enums import ModelFormat
+    from nemo_evaluator_sdk.values.models import Model
+
+    if adapter_name:
+        resolved_provider = provider_name or find_ready_provider_for_model_entity(
+            base_url=base_url,
+            workspace=workspace,
+            model_entity=model_entity,
+        )
+        if not resolved_provider:
+            raise ValueError(
+                f"No READY inference provider serves {workspace}/{model_entity}. "
+                "Deploy the base model with lora_enabled: true or pass --provider <name>."
+            )
+        return Model(
+            url=provider_gateway_url(
+                base_url=base_url,
+                workspace=workspace,
+                provider_name=resolved_provider,
+            ),
+            name=served_model_name(
+                workspace=workspace, entity_or_adapter=adapter_name, finetuning="lora"
+            ),
+            format=ModelFormat.NVIDIA_NIM,
+        )
+
+    return Model(
+        url=model_entity_gateway_url(base_url=base_url, workspace=workspace, model_entity=model_entity),
+        name=served_model_name(workspace=workspace, entity_or_adapter=model_entity, finetuning="base"),
+        format=ModelFormat.NVIDIA_NIM,
+    )
+
+
+@dataclass
+class EvalSummary:
+    target: str
+    model_name: str
+    gateway_url: str
+    gateway_path: str
+    num_samples: int
+    raw_exact_match: float
+    normalized_accuracy: float
+    aggregate_metrics: dict[str, dict[str, float | None]]
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "target": self.target,
+            "model_name": self.model_name,
+            "gateway_url": self.gateway_url,
+            "gateway_path": self.gateway_path,
+            "num_samples": self.num_samples,
+            "raw_exact_match": self.raw_exact_match,
+            "normalized_accuracy": self.normalized_accuracy,
+            "metrics": self.aggregate_metrics,
+        }
+
+
+def summarize_chat_eval_result(*, target: str, model_name: str, gateway_url: str, result) -> EvalSummary:
+    """Summarize Evaluator benchmark result for CHAT rows."""
+    em_rows = result.per_metric["exact-match"].row_scores
+    num_samples = len(em_rows)
+    raw_correct = sum(
+        1
+        for rs in em_rows
+        if rs.sample.get("output_text", "").strip() == reference_content(rs.item).strip()
+    )
+    norm_correct = sum(
+        1
+        for rs in em_rows
+        if normalize_mcqa_answer(rs.sample.get("output_text", ""))
+        == normalize_mcqa_answer(reference_content(rs.item))
+    )
+    aggregate_metrics: dict[str, dict[str, float | None]] = {}
+    for metric_name, metric_result in result.per_metric.items():
+        aggregate_metrics[metric_name] = {
+            score.name.split(".")[-1]: round(score.mean, 4) if score.mean is not None else None
+            for score in metric_result.aggregate_scores.scores
+        }
+    return EvalSummary(
+        target=target,
+        model_name=model_name,
+        gateway_url=gateway_url,
+        gateway_path=gateway_path_from_url(gateway_url),
+        num_samples=num_samples,
+        raw_exact_match=round(raw_correct / num_samples, 4) if num_samples else 0.0,
+        normalized_accuracy=round(norm_correct / num_samples, 4) if num_samples else 0.0,
+        aggregate_metrics=aggregate_metrics,
+    )
+
+
+def run_chat_online_eval(
+    *,
+    rows: Sequence[dict[str, Any]],
+    target,
+    config,
+    metrics=None,
+    prompt_template: dict[str, Any] | None = None,
+):
+    """Run online eval on CHAT rows using shared templates."""
+    from nemo_evaluator_sdk import Evaluator
+
+    for index, row in enumerate(rows):
+        assert_chat_row(row, index=index)
+    if metrics is None:
+        metrics = chat_metrics()
+    return Evaluator().run_sync(
+        metrics=metrics,
+        dataset=list(rows),
+        target=target,
+        prompt_template=prompt_template or CHAT_USER_PROMPT_TEMPLATE,
+        config=config,
+    )
+
+
+def _eval_target(
+    *,
+    base_url: str,
+    workspace: str,
+    model_entity: str,
+    adapter_name: str | None,
+    provider_name: str | None,
+    rows: Sequence[dict[str, Any]],
+    config,
+    target_label: str,
+) -> EvalSummary:
+    target = build_platform_model_target(
+        base_url=base_url,
+        workspace=workspace,
+        model_entity=model_entity,
+        adapter_name=adapter_name,
+        provider_name=provider_name,
+    )
+    result = run_chat_online_eval(rows=rows, target=target, config=config)
+    return summarize_chat_eval_result(
+        target=target_label,
+        model_name=target.name,
+        gateway_url=target.url,
+        result=result,
+    )
+
+
+def compare_adapters(
+    *,
+    base_url: str,
+    workspace: str,
+    model_entity: str,
+    adapter_names: Sequence[str],
+    rows: Sequence[dict[str, Any]],
+    include_base: bool = True,
+    provider_name: str | None = None,
+    max_tokens: int = 64,
+    enable_thinking: bool = False,
+    parallelism: int = 8,
+    limit_samples: int | None = None,
+) -> list[EvalSummary]:
+    """Compare base (optional) and one or more LoRA adapters on the same CHAT rows."""
+    config = build_online_eval_config(
+        max_tokens=max_tokens,
+        enable_thinking=enable_thinking,
+        parallelism=parallelism,
+        limit_samples=limit_samples,
+    )
+    summaries: list[EvalSummary] = []
+    if include_base:
+        summaries.append(
+            _eval_target(
+                base_url=base_url,
+                workspace=workspace,
+                model_entity=model_entity,
+                adapter_name=None,
+                provider_name=provider_name,
+                rows=rows,
+                config=config,
+                target_label="base",
+            )
+        )
+    for adapter_name in adapter_names:
+        summaries.append(
+            _eval_target(
+                base_url=base_url,
+                workspace=workspace,
+                model_entity=model_entity,
+                adapter_name=adapter_name,
+                provider_name=provider_name,
+                rows=rows,
+                config=config,
+                target_label=adapter_name,
+            )
+        )
+    return summaries
+
+
+def compare_base_vs_adapter(
+    *,
+    base_url: str,
+    workspace: str,
+    model_entity: str,
+    adapter_name: str,
+    rows: Sequence[dict[str, Any]],
+    provider_name: str | None = None,
+    max_tokens: int = 64,
+    enable_thinking: bool = False,
+    parallelism: int = 8,
+    limit_samples: int | None = None,
+) -> list[EvalSummary]:
+    """Compare base model vs one LoRA adapter on the same CHAT validation rows."""
+    summaries = compare_adapters(
+        base_url=base_url,
+        workspace=workspace,
+        model_entity=model_entity,
+        adapter_names=[adapter_name],
+        rows=rows,
+        include_base=True,
+        provider_name=provider_name,
+        max_tokens=max_tokens,
+        enable_thinking=enable_thinking,
+        parallelism=parallelism,
+        limit_samples=limit_samples,
+    )
+    if len(summaries) == 2:
+        summaries[1].target = "lora"
+    return summaries
+
+
+def lift_vs_base(summaries: Sequence[EvalSummary]) -> dict[str, float]:
+    """Normalized accuracy delta vs the base summary (if present)."""
+    base = next((summary for summary in summaries if summary.target == "base"), None)
+    if base is None:
+        return {}
+    return {
+        summary.target: round(summary.normalized_accuracy - base.normalized_accuracy, 4)
+        for summary in summaries
+        if summary.target != "base"
+    }
+
+
+def routing_sanity_warnings(
+    summaries: Sequence[EvalSummary],
+    *,
+    routing_tolerance_pp: float = 0.015,
+) -> list[str]:
+    """Return human-readable warnings when LoRA routing or scores look suspicious."""
+    warnings: list[str] = []
+    base = next((summary for summary in summaries if summary.target == "base"), None)
+    for summary in summaries:
+        if summary.target == "base":
+            if summary.gateway_path != "model-entity":
+                warnings.append(
+                    f"base eval used {summary.gateway_path} route; expected model-entity "
+                    f"({summary.gateway_url})"
+                )
+            continue
+        if summary.gateway_path != "provider":
+            warnings.append(
+                f"{summary.target}: LoRA eval used {summary.gateway_path} route "
+                f"({summary.gateway_url}); expected provider gateway — scores may match base"
+            )
+        if base and abs(summary.normalized_accuracy - base.normalized_accuracy) <= routing_tolerance_pp:
+            warnings.append(
+                f"{summary.target}: normalized accuracy {summary.normalized_accuracy:.1%} is within "
+                f"{routing_tolerance_pp:.1%} of base ({base.normalized_accuracy:.1%}) — verify provider routing"
+            )
+    return warnings
+
+
+def build_eval_payload(
+    *,
+    summaries: Sequence[EvalSummary],
+    base_url: str,
+    workspace: str,
+    model_entity: str,
+    adapter_names: Sequence[str],
+    provider_name: str | None,
+) -> dict[str, Any]:
+    """Assemble CLI/programmatic JSON output with routing metadata and warnings."""
+    routing: dict[str, Any] = {}
+    if any(summary.target == "base" for summary in summaries):
+        routing["base"] = {
+            "gateway_path": "model-entity",
+            "url": model_entity_gateway_url(
+                base_url=base_url, workspace=workspace, model_entity=model_entity
+            ),
+            "model_field": served_model_name(
+                workspace=workspace, entity_or_adapter=model_entity, finetuning="base"
+            ),
+        }
+    for adapter_name in adapter_names:
+        target = build_platform_model_target(
+            base_url=base_url,
+            workspace=workspace,
+            model_entity=model_entity,
+            adapter_name=adapter_name,
+            provider_name=provider_name,
+        )
+        routing[adapter_name] = {
+            "gateway_path": "provider",
+            "url": target.url,
+            "model_field": target.name,
+        }
+    warnings = routing_sanity_warnings(summaries)
+    payload: dict[str, Any] = {
+        "dataset_format": "chat (messages)",
+        "prompt_template": CHAT_USER_PROMPT_TEMPLATE,
+        "reference_template": CHAT_REFERENCE_TEMPLATE,
+        "routing": routing,
+        "results": [summary.to_dict() for summary in summaries],
+        "lift_vs_base": lift_vs_base(summaries),
+        "primary_metric": "normalized_accuracy",
+    }
+    if warnings:
+        payload["warnings"] = warnings
+    return payload
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Compare base vs LoRA on CHAT validation JSONL")
+    parser.add_argument("--base-url", default="http://127.0.0.1:8080")
+    parser.add_argument("--workspace", default="default")
+    parser.add_argument("--model-entity", required=True)
+    parser.add_argument(
+        "--adapter",
+        action="append",
+        required=True,
+        help="Adapter name(s) registered on the model entity (repeat for multi-adapter compare)",
+    )
+    parser.add_argument(
+        "--provider",
+        default=None,
+        help="Inference provider name for LoRA requests (auto-discovered when omitted)",
+    )
+    parser.add_argument("--dataset-fileset", required=True)
+    parser.add_argument("--split", default="validation.jsonl")
+    parser.add_argument("--max-tokens", type=int, default=64)
+    parser.add_argument("--enable-thinking", action="store_true")
+    parser.add_argument("--limit-samples", type=int, default=None)
+    parser.add_argument("--output", type=Path, default=None)
+    parser.add_argument(
+        "--no-base",
+        action="store_true",
+        help="Skip base-model eval (adapter-only comparison)",
+    )
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = _parse_args()
+    rows = load_chat_jsonl_from_platform(
+        base_url=args.base_url,
+        workspace=args.workspace,
+        fileset=args.dataset_fileset,
+        remote_path=args.split,
+    )
+    summaries = compare_adapters(
+        base_url=args.base_url,
+        workspace=args.workspace,
+        model_entity=args.model_entity,
+        adapter_names=args.adapter,
+        rows=rows,
+        include_base=not args.no_base,
+        provider_name=args.provider,
+        max_tokens=args.max_tokens,
+        enable_thinking=args.enable_thinking,
+        limit_samples=args.limit_samples,
+    )
+    payload = build_eval_payload(
+        summaries=summaries,
+        base_url=args.base_url,
+        workspace=args.workspace,
+        model_entity=args.model_entity,
+        adapter_names=args.adapter,
+        provider_name=args.provider,
+    )
+    text = json.dumps(payload, indent=2)
+    print(text)
+    if args.output:
+        args.output.parent.mkdir(parents=True, exist_ok=True)
+        args.output.write_text(text, encoding="utf-8")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md
new file mode 100644
index 0000000000..0972583d18
--- /dev/null
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md
@@ -0,0 +1,209 @@
+# Post-training evaluation (train/eval format parity)
+
+Use after a customization job reaches **`completed`** when the user wants to compare **base vs LoRA** on the validation split.
+
+## Format contract
+
+Training and evaluation must use the **same CHAT JSONL row shape**:
+
+```json
+{
+  "messages": [
+    {"role": "user", "content": "Question: …\nChoices:\n…\nAnswer:"},
+    {"role": "assistant", "content": "bank"}
+  ]
+}
+```
+
+Multi-turn rows use the same rule: the **final** `messages[-1]` turn is the assistant label; all preceding turns are context.
+
+| Do | Don't |
+|----|-------|
+| Pass rows with `messages` unchanged from the training fileset | Flatten to `prompt` / `expected` or `prompt` / `completion` for eval |
+| Send **`messages[:-1]`** at inference (exclude only the final assistant label) | Pass full `messages` including the label turn, or use `{"messages": "{{ item.messages }}"}` unfiltered |
+| Score against **`messages[-1].content`** (final assistant turn) | Score against a renamed `expected` field unless you also keep `messages` |
+
+Single-turn MCQA (user + assistant) is the degenerate case: `messages[:-1]` is just the user turn.
+
+Automodel and unsloth both train on this shape when `has_chat` is true (see `hf-conversion.md`, `dataset-formats.md`).
+
+## Evaluator templates (required)
+
+```python
+CHAT_USER_PROMPT_TEMPLATE = {
+    "messages": "{{ item.messages[:-1] }}",
+}
+CHAT_REFERENCE_TEMPLATE = "{{ item.messages[-1].content }}"
+```
+
+Import from `references/eval_helpers.py` — do not re-type these in one-off scripts.
+
+## Inference defaults (Qwen3 / thinking models)
+
+| Setting | Recommended | Avoid |
+|---------|-------------|-------|
+| `enable_thinking` | `false` via `chat_template_kwargs` for short-answer SFT | Thinking on without enough tokens — model never closes ``, strip hook fails |
+| `max_tokens` | `64` (short assistant labels) | `16` with thinking on; `1024` thinking on without strip (verbose prose) |
+| System prompt | Omit unless user asks — matches training | Extra system prompt changes decode path vs SFT |
+
+For thinking-enabled eval, set `reasoning=ReasoningParams(end_token="")` **and** ensure `max_tokens` is large enough for the model to emit the end token before generating the answer.
+
+## Inference after customization (wrap-up)
+
+Include this in the **Using the adapter** section of every completed customization report. Agents must discover `<provider>` from `nemo inference providers list --workspace default -f json` and fill concrete URLs — do not leave placeholders.
+
+### LoRA adapters load automatically
+
+After a customization job reaches **`completed`**, the platform registers the adapter on the base **model entity**. On a deployment with **`lora_enabled: true`**, enabled adapters are **hot-reloaded automatically** (adapter sidecar → vLLM). **Do not** update the deployment, re-create providers, or add the adapter to a `served_models` list before post-training eval — run eval as soon as the job completes.
+
+| Prerequisite (one-time) | Per-adapter step after training |
+|-------------------------|----------------------------------|
+| A **READY** inference deployment for the base model entity with `lora_enabled: true` | Confirm adapter appears under `nemo models get <model-entity>` → `adapters` |
+| Gateway reachable at the model-entity URL below | Target the adapter by name in the eval request (see table) |
+
+### Request routing (base vs LoRA)
+
+The model-entity proxy path **always** resolves to the base VirtualModel. Setting `"model": "default--<adapter-name>"` on `/model/<base-entity>/-/v1` does **not** select the adapter — gateway logs will show only the base path.
+
+| Target | Gateway route | URL pattern | Request `model` field |
+|--------|---------------|-------------|------------------------|
+| Base entity | **Model entity** | `{NEMO_BASE_URL}/apis/inference-gateway/v2/workspaces/default/model/<model-entity>/-/v1` | `default/<model-entity>` |
+| LoRA adapter | **Provider** | `{NEMO_BASE_URL}/apis/inference-gateway/v2/workspaces/default/provider/<provider>/-/v1` | `default--<adapter-name>` |
+
+`eval_helpers.py` auto-discovers a READY provider that serves the base entity (or pass `--provider <name>`). Adapter weights still hot-reload on the deployment — no provider update per adapter.
+
+Optional sanity checks:
+
+- `nemo models get <model-entity> --workspace default` — adapter listed with `enabled: true`
+- `nemo inference providers list --workspace default` — provider status **READY**
+- LoRA eval/inference logs should show `path=…/provider/<provider>/-/v1/chat/completions`, **not** `…/model/<model-entity>/-/v1`
+- JSON output includes `warnings` when routing looks wrong or adapter scores match base within ~1.5 pp
+
+### Why earlier evals looked wrong
+
+If base and LoRA scores were identical (~99% same outputs), the adapter was almost certainly called through the **model-entity** path. That path always resolves to the base VirtualModel — the `"model": "default--<adapter>"` field in the body is ignored. Fix: route LoRA through the **provider** URL with the same `model` field. `eval_helpers.build_platform_model_target()` and the CLI implement this split automatically.
+
+### MCQA metric interpretation
+
+For commonsense_qa-style MCQA, treat **`normalized_accuracy`** as the primary metric (`normalize_mcqa_answer` strips `A. foo` / markdown).
+
+| Observation | Likely meaning |
+|-------------|----------------|
+| Base & LoRA both ~59% normalized, within ~1 pp | LoRA hit **model-entity** path (base only) — check `warnings` and gateway logs |
+| Base raw exact 0%, normalized ~59% | Normal for base on MCQA (formatted prose answers) |
+| LoRA normalized >> base (e.g. 76% vs 59%) | Correct provider routing and real adapter lift |
+| Train loss dropped sharply but eval flat | Wrong eval routing or need more epochs — val loss ≠ accuracy |
+
+### Epoch / adapter ablations
+
+Resolve adapter names from completed job specs instead of guessing:
+
+```python
+from eval_helpers import list_completed_job_adapters, compare_adapters, build_eval_payload
+
+jobs = list_completed_job_adapters(
+    base_url="http://10.0.0.51:8080",
+    workspace="default",
+    model_entity="qwen3-1.7b",
+    dataset_fileset="commonsense_qa",
+)
+# jobs[0].epochs, jobs[0].adapter_name, jobs[0].backend — sorted newest first
+
+summaries = compare_adapters(
+    base_url="...",
+    workspace="default",
+    model_entity="qwen3-1.7b",
+    adapter_names=[jobs[0].adapter_name, jobs[2].adapter_name],
+    rows=rows,
+)
+payload = build_eval_payload(..., summaries=summaries, adapter_names=[...])
+# payload["lift_vs_base"], payload.get("warnings")
+```
+
+When comparing adapters from **different backends** (automodel vs unsloth) or batch configs, note confounds — epoch count alone may not explain the gap.
+
+### Production chat requests (same rules as eval)
+
+| Piece | LoRA adapter | Base model |
+|-------|--------------|------------|
+| HTTP base URL | `…/provider/<provider>/-/v1` | `…/model/<model-entity>/-/v1` |
+| `"model"` | `default--<adapter-name>` | `default/<model-entity>` |
+| `messages` | `messages[:-1]` from the training row (exclude final assistant label) | Same |
+| Qwen3 short SFT | `"chat_template_kwargs": {"enable_thinking": false}` | Same |
+| `max_tokens` / `temperature` | `64` / `0` typical for short labels | Same |
+
+CLI shortcuts (substitute names from the job):
+
+```bash
+# LoRA
+nemo inference gateway provider post v1/chat/completions <provider> --workspace default \
+  --body '{"model":"default--<adapter>","messages":[{"role":"user","content":"…"}],"max_tokens":64,"temperature":0,"chat_template_kwargs":{"enable_thinking":false}}'
+
+# Base
+nemo inference gateway model post v1/chat/completions <model-entity> --workspace default \
+  --body '{"model":"default/<model-entity>","messages":[{"role":"user","content":"…"}],"max_tokens":64,"temperature":0,"chat_template_kwargs":{"enable_thinking":false}}'
+```
+
+## Metrics
+
+| Task | Metrics |
+|------|---------|
+| MCQA / exact label | `ExactMatchMetric` + `normalize_mcqa_answer()` when models return `A. foo` or markdown |
+| Similarity | `ROUGEMetric`, `BLEUMetric` with `CHAT_REFERENCE_TEMPLATE` |
+
+Val loss from training is **not** accuracy — always run a generation eval for user-facing quality.
+
+## Helper script
+
+From **nemo-platform** git root:
+
+```bash
+# Base vs one adapter
+uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py \
+  --base-url http://10.0.0.51:8080 \
+  --model-entity qwen3-1.7b \
+  --adapter qwen3-1.7b-csqa-unsloth-jun16-e3 \
+  --provider qwen3-1.7b-csqa-lora-deploy \
+  --dataset-fileset commonsense_qa \
+  --split validation.jsonl \
+  --output /tmp/fine-tune-eval.json
+
+# Base vs multiple adapters (epoch ablation)
+uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py \
+  --base-url http://10.0.0.51:8080 \
+  --model-entity qwen3-1.7b \
+  --adapter qwen3-1.7b-commonsense-qa-lora-jun12-v2 \
+  --adapter qwen3-1.7b-csqa-unsloth-jun16-e3 \
+  --dataset-fileset commonsense_qa \
+  --split validation.jsonl \
+  --output /tmp/fine-tune-eval-multi.json
+```
+
+Programmatic use:
+
+```python
+from eval_helpers import (
+    load_chat_jsonl_from_platform,
+    compare_adapters,
+    compare_base_vs_adapter,
+    build_eval_payload,
+    list_completed_job_adapters,
+    routing_sanity_warnings,
+    CHAT_USER_PROMPT_TEMPLATE,
+)
+```
+
+(Add `references/` to `sys.path` or run via `uv run python` from repo root.)
+
+## Report to user
+
+After compare, report for **base and each adapter**:
+
+- **Normalized accuracy** (primary for MCQA)
+- Raw exact match (strict string — often 0% on base for formatted answers)
+- Lift vs base (`lift_vs_base` in JSON output)
+- ROUGE / BLEU aggregates if requested
+- Any `warnings` from routing sanity checks
+- Inference settings (`enable_thinking`, `max_tokens`) and dataset fileset ref
+
+Uses the **nemo-evaluator SDK** (`Evaluator`, metrics, `RunConfigOnlineModel`) under the hood — no separate evaluator skill doc required. For general BYOB/rubric eval outside customization, use the **nemo-evaluator** skill.
diff --git a/plugins/nemo-customizer/tests/test_eval_helpers.py b/plugins/nemo-customizer/tests/test_eval_helpers.py
new file mode 100644
index 0000000000..26755d53dc
--- /dev/null
+++ b/plugins/nemo-customizer/tests/test_eval_helpers.py
@@ -0,0 +1,221 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+import pytest
+
+SKILL_REFERENCES = (
+    Path(__file__).resolve().parents[1]
+    / "src"
+    / "nemo_customizer"
+    / "skills"
+    / "nemo-customizer"
+    / "references"
+)
+sys.path.insert(0, str(SKILL_REFERENCES))
+
+import eval_helpers  # noqa: E402
+
+
+def test_served_model_names() -> None:
+    assert eval_helpers.served_model_name(workspace="default", entity_or_adapter="qwen3-1.7b") == "default/qwen3-1.7b"
+    assert (
+        eval_helpers.served_model_name(workspace="default", entity_or_adapter="my-lora", finetuning="lora")
+        == "default--my-lora"
+    )
+
+
+def test_adapter_composite_entity_name() -> None:
+    assert (
+        eval_helpers.adapter_composite_entity_name(
+            model_entity="qwen3-1.7b",
+            workspace="default",
+            adapter_name="my-lora",
+        )
+        == "qwen3-1.7b&adapters/default/my-lora"
+    )
+
+
+def test_build_platform_model_target_routes_lora_via_provider() -> None:
+    target = eval_helpers.build_platform_model_target(
+        base_url="http://10.0.0.51:8080",
+        workspace="default",
+        model_entity="qwen3-1.7b",
+        adapter_name="my-lora",
+        provider_name="my-provider",
+    )
+    assert "/provider/my-provider/-/v1" in target.url
+    assert "/model/qwen3-1.7b/-/v1" not in target.url
+    assert target.name == "default--my-lora"
+
+
+def test_build_platform_model_target_routes_base_via_model_entity() -> None:
+    target = eval_helpers.build_platform_model_target(
+        base_url="http://10.0.0.51:8080",
+        workspace="default",
+        model_entity="qwen3-1.7b",
+    )
+    assert "/model/qwen3-1.7b/-/v1" in target.url
+    assert target.name == "default/qwen3-1.7b"
+
+
+def test_gateway_path_from_url() -> None:
+    assert eval_helpers.gateway_path_from_url("http://x/provider/p/-/v1") == "provider"
+    assert eval_helpers.gateway_path_from_url("http://x/model/m/-/v1") == "model-entity"
+
+
+def test_normalize_mcqa_answer() -> None:
+    assert eval_helpers.normalize_mcqa_answer("bank") == "bank"
+    assert eval_helpers.normalize_mcqa_answer("A. bank") == "bank"
+    assert eval_helpers.normalize_mcqa_answer("The correct answer is: **A. bank**") == "bank"
+
+
+def test_assert_chat_row_rejects_flattened() -> None:
+    with pytest.raises(ValueError, match="messages"):
+        eval_helpers.assert_chat_row({"prompt": "hi", "expected": "bye"})
+
+
+def test_assert_chat_row_accepts_single_turn() -> None:
+    row = {
+        "messages": [
+            {"role": "user", "content": "Question?"},
+            {"role": "assistant", "content": "yes"},
+        ]
+    }
+    eval_helpers.assert_chat_row(row)
+
+
+def test_assert_chat_row_accepts_multi_turn() -> None:
+    row = {
+        "messages": [
+            {"role": "user", "content": "Turn 1"},
+            {"role": "assistant", "content": "Reply 1"},
+            {"role": "user", "content": "Turn 2"},
+            {"role": "assistant", "content": "final label"},
+        ]
+    }
+    eval_helpers.assert_chat_row(row)
+    assert eval_helpers.reference_content(row) == "final label"
+
+
+def test_assert_chat_row_rejects_missing_final_assistant() -> None:
+    row = {
+        "messages": [
+            {"role": "user", "content": "Turn 1"},
+            {"role": "assistant", "content": "Reply 1"},
+            {"role": "user", "content": "Turn 2"},
+        ]
+    }
+    with pytest.raises(ValueError, match="assistant"):
+        eval_helpers.assert_chat_row(row)
+
+
+def test_load_chat_jsonl(tmp_path: Path) -> None:
+    path = tmp_path / "val.jsonl"
+    path.write_text(
+        json.dumps(
+            {
+                "messages": [
+                    {"role": "user", "content": "Q"},
+                    {"role": "assistant", "content": "A"},
+                ]
+            }
+        )
+        + "\n",
+        encoding="utf-8",
+    )
+    rows = eval_helpers.load_chat_jsonl(path)
+    assert len(rows) == 1
+    assert rows[0]["messages"][-1]["content"] == "A"
+
+
+def test_chat_templates_use_messages_slice() -> None:
+    assert "item.messages[:-1]" in eval_helpers.CHAT_USER_PROMPT_TEMPLATE["messages"]
+    assert "item.messages[-1]" in eval_helpers.CHAT_REFERENCE_TEMPLATE
+
+
+def test_lift_vs_base() -> None:
+    summaries = [
+        eval_helpers.EvalSummary(
+            target="base",
+            model_name="default/m",
+            gateway_url="http://x/model/m/-/v1",
+            gateway_path="model-entity",
+            num_samples=10,
+            raw_exact_match=0.0,
+            normalized_accuracy=0.5,
+            aggregate_metrics={},
+        ),
+        eval_helpers.EvalSummary(
+            target="lora-a",
+            model_name="default--a",
+            gateway_url="http://x/provider/p/-/v1",
+            gateway_path="provider",
+            num_samples=10,
+            raw_exact_match=0.7,
+            normalized_accuracy=0.75,
+            aggregate_metrics={},
+        ),
+    ]
+    assert eval_helpers.lift_vs_base(summaries) == {"lora-a": 0.25}
+
+
+def test_routing_sanity_warnings_detects_flat_scores() -> None:
+    summaries = [
+        eval_helpers.EvalSummary(
+            target="base",
+            model_name="default/m",
+            gateway_url="http://x/model/m/-/v1",
+            gateway_path="model-entity",
+            num_samples=10,
+            raw_exact_match=0.0,
+            normalized_accuracy=0.59,
+            aggregate_metrics={},
+        ),
+        eval_helpers.EvalSummary(
+            target="lora-a",
+            model_name="default--a",
+            gateway_url="http://x/model/m/-/v1",
+            gateway_path="model-entity",
+            num_samples=10,
+            raw_exact_match=0.0,
+            normalized_accuracy=0.59,
+            aggregate_metrics={},
+        ),
+    ]
+    warnings = eval_helpers.routing_sanity_warnings(summaries)
+    assert any("provider" in warning for warning in warnings)
+    assert any("within" in warning for warning in warnings)
+
+
+def test_adapter_from_completed_job_parses_spec(monkeypatch: pytest.MonkeyPatch) -> None:
+    payload = {
+        "name": "unsloth-abc",
+        "status": "completed",
+        "created_at": "2026-06-16T20:22:09",
+        "spec": {
+            "schedule": {"epochs": 3},
+            "output": {"name": "my-adapter"},
+            "model": {"name": "default/qwen3-1.7b"},
+            "dataset": {"path": "default/commonsense_qa"},
+        },
+    }
+
+    def fake_get(url: str) -> dict:
+        assert url.endswith("/jobs/unsloth-abc")
+        return payload
+
+    monkeypatch.setattr(eval_helpers, "_platform_get_json", fake_get)
+    info = eval_helpers.adapter_from_completed_job(
+        base_url="http://10.0.0.51:8080",
+        workspace="default",
+        job_name="unsloth-abc",
+    )
+    assert info.adapter_name == "my-adapter"
+    assert info.epochs == 3
+    assert info.backend == "unsloth"

From f75c273453176348429e491870fd11a3fd84f572 Mon Sep 17 00:00:00 2001
From: Sam Oluwalana <soluwalana@nvidia.com>
Date: Tue, 16 Jun 2026 15:45:43 -0600
Subject: [PATCH 2/3] Make the skill more generic, fix lint, fix code-rabbit

Signed-off-by: Sam Oluwalana <soluwalana@nvidia.com>
---
 .../skills/nemo-customizer/SKILL.md           |  17 +-
 .../references/eval_helpers.py                |  88 ++--
 .../references/post-training-eval.md          |  69 +--
 .../tests/test_eval_helpers.py                |  31 +-
 third_party/requirements-main.txt             | 472 +++++++++++++++++-
 5 files changed, 568 insertions(+), 109 deletions(-)

diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md
index 895ba73b17..745bc56c10 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md
@@ -113,7 +113,7 @@ Training never runs inside the `nemo` CLI process. After `submit`, the platform'
 - Resolve the CLI per **Pre-flight — CLI resolution** before any `nemo …` command; run from the **nemo-platform** git root, not a plugin subfolder.
 - Set `NEMO_BASE_URL` (or `NMP_BASE_URL`) only when the user gives a platform URL; default `http://127.0.0.1:8080` (same as `http://localhost:8080`). Track whether the user **overrode** the base URL — see **Platform unreachable** below.
 - **Platform unreachable** — if any platform API call fails with a connection error (`Connection error`, timeout, refused):
-  - **User gave a custom URL** (e.g. `10.0.0.51:8080`) or you exported a non-default `NEMO_BASE_URL` / `NMP_BASE_URL`: stop and tell the user the platform is not reachable at that address. Do **not** offer to start local services.
+  - **User gave a custom URL** (e.g. `$NMP_BASE_URL`) or you exported a non-default `NMP_BASE_URL` / `NEMO_BASE_URL`: stop and tell the user the platform is not reachable at that address. Do **not** offer to start local services.
   - **Default URL only** (no user override): **ask** whether to start the platform locally. If they agree, from the **nemo-platform** git root run in the **background**:
 
     ```bash
@@ -139,7 +139,7 @@ Training never runs inside the `nemo` CLI process. After `submit`, the platform'
 - **Do not use local `docker info`** to pick automodel vs unsloth. Run `nemo jobs list-execution-profiles -f json` against the user's platform (login first only if auth is enabled — see **Authentication**; see `references/troubleshooting.md`). Default output is a table — **`-f json` is required** for scripting; parse **stdout only** (do not pipe `2>&1` into `json.load`).
 - **Do not merge stderr into stdout when parsing JSON** — `submit`, `explain`, and `-f json` commands write **JSON on stdout**; harmless warnings like `Configuration file not found, using defaults` go to **stderr**. Piping with **`2>&1`** before `json.load` raises `JSONDecodeError` even when submit **succeeded** — a common cause of **duplicate jobs** when the agent re-submits after a parse error. Parse stdout only; redirect stderr if needed (`2>/dev/null`). See `references/troubleshooting.md` § **Parsing CLI JSON**.
 - For submit/image/plugin errors (both backends), read `references/troubleshooting.md`. Unsloth needs the `nmp-unsloth-training` container image on the **platform host's** Docker daemon (see `docker/unsloth/README.md`).
-- **Missing training image on a remote platform** — if the user gave a non-localhost `NEMO_BASE_URL` / `NMP_BASE_URL` (e.g. `10.0.0.51:8080`) and the job errors with `Failed to pull image`, `manifest unknown`, or missing `nmp-unsloth-training` / automodel training image: **do not** run `docker build`, `docker pull`, or `docker buildx bake` on the agent machine. Report with **Report to user** (use **Output adapter fileset (planned):** on error), then append on-target build steps from `references/troubleshooting.md` § **Missing training images**.
+- **Missing training image on a remote platform** — if the user gave a non-localhost `NMP_BASE_URL` / `NEMO_BASE_URL` and the job errors with `Failed to pull image`, `manifest unknown`, or missing `nmp-unsloth-training` / automodel training image: **do not** run `docker build`, `docker pull`, or `docker buildx bake` on the agent machine. Report with **Report to user** (use **Output adapter fileset (planned):** on error), then append on-target build steps from `references/troubleshooting.md` § **Missing training images**.
 - **Gated HuggingFace models** (Llama, Gemma, …) — confirm `hf-token` + fileset `token_secret` before submit; download fails with `Failed to access upstream storage` / 502 when missing. See **HuggingFace token (gated models)** and `references/troubleshooting.md` § **Gated HuggingFace models**.
 - **Post-training eval format** — use the same CHAT `messages` JSONL as training. **Do not** flatten rows to `prompt`/`expected` for the evaluator. Send `messages[:-1]` at inference (exclude final assistant label); score against `messages[-1].content`. See `references/post-training-eval.md` and `references/eval_helpers.py`.
 - **LoRA adapters load automatically for eval** — when a job completes, the adapter is registered on the model entity and hot-reloaded on any **READY** deployment with `lora_enabled: true`. **Do not** update deployments or providers before eval. **Do** route LoRA eval through the **provider** gateway (`/provider/<name>/-/v1` with `model: default--<adapter>`); the model-entity path (`/model/<entity>/-/v1`) always hits the base model. See `references/post-training-eval.md` § **Request routing (base vs LoRA)**.
@@ -600,8 +600,8 @@ The adapter `<output.name>` is registered on `default/<model-entity>`. Weights a
 
 | Target | Gateway path | OpenAI base URL | Request `"model"` field |
 |--------|--------------|-----------------|-------------------------|
-| **Base** weights | model-entity | `<NEMO_BASE_URL>/apis/inference-gateway/v2/workspaces/default/model/<model-entity>/-/v1` | `default/<model-entity>` |
-| **LoRA adapter** | **provider** | `<NEMO_BASE_URL>/apis/inference-gateway/v2/workspaces/default/provider/<provider>/-/v1` | `default--<output.name>` |
+| **Base** weights | model-entity | `$NMP_BASE_URL/apis/inference-gateway/v2/workspaces/default/model/<model-entity>/-/v1` | `default/<model-entity>` |
+| **LoRA adapter** | **provider** | `$NMP_BASE_URL/apis/inference-gateway/v2/workspaces/default/provider/<provider>/-/v1` | `default--<output.name>` |
 
 **Common mistake:** posting to the model-entity URL with `"model": "default--<output.name>"` still runs the **base** model. Base-vs-adapter eval will look identical until LoRA requests use the **provider** URL above. See `references/post-training-eval.md` § **Request routing (base vs LoRA)**.
 
@@ -619,7 +619,7 @@ Match training context at inference — send **`messages[:-1]`** (all turns exce
 #### Example — LoRA adapter via provider
 
 \`\`\`bash
-export NEMO_BASE_URL=<platform-url>   # omit when using default localhost
+export NMP_BASE_URL=<platform-url>   # omit when using default localhost; NEMO_BASE_URL also works
 nemo inference gateway provider post v1/chat/completions <provider> --workspace default \\
   --body '{
     "model": "default--<output.name>",
@@ -633,7 +633,7 @@ nemo inference gateway provider post v1/chat/completions <provider> --workspace
 #### Example — base model via model-entity (comparison)
 
 \`\`\`bash
-export NEMO_BASE_URL=<platform-url>
+export NMP_BASE_URL=<platform-url>
 nemo inference gateway model post v1/chat/completions <model-entity> --workspace default \\
   --body '{
     "model": "default/<model-entity>",
@@ -651,7 +651,6 @@ Validation loss from training is **not** accuracy. To compare base vs adapter on
 \`\`\`bash
 cd /path/to/nemo-platform
 uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py \\
-  --base-url <platform-url> \\
   --model-entity <model-entity> \\
   --adapter <output.name> \\
   --provider <provider> \\
@@ -659,10 +658,10 @@ uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer
   --split validation.jsonl
 \`\`\`
 
-Uses CHAT `messages` rows unchanged from the training fileset (`messages[:-1]` at inference). Repeat `--adapter` for multi-adapter compare. `--provider` is optional when a READY provider is auto-discovered.
+Uses CHAT `messages` rows unchanged from the training fileset (`messages[:-1]` at inference). Repeat `--adapter` for multi-adapter compare. `--provider` is optional when a READY provider is auto-discovered. Set `NMP_BASE_URL` (or pass `--base-url`) when the platform is not localhost — the helper reads `$NMP_BASE_URL` / `$NEMO_BASE_URL` by default.
 ```
 
-Use the user's platform URL in `NEMO_BASE_URL` when they overrode it; omit the export line for default `http://127.0.0.1:8080`. Substitute `<provider>`, `<NEMO_BASE_URL>`, and entity/adapter names with values from discovery — do not leave generic placeholders in the user-facing report. Do **not** tell the user to update the deployment or add the adapter to a provider before calling it — registration on the model entity is sufficient.
+Use the user's platform URL in `NMP_BASE_URL` when they overrode it; omit the export line for default `http://127.0.0.1:8080`. Substitute `<provider>`, concrete URLs, and entity/adapter names with values from discovery — do not leave generic placeholders in the user-facing report. Do **not** tell the user to update the deployment or add the adapter to a provider before calling it — registration on the model entity is sufficient.
 
 **Save report to `/tmp`** — unless the user opts out, write the full Markdown report (header, **Training configuration**, **Using the adapter** when `completed`, and **Resources created** when a slug or new filesets were used) to `/tmp/fine-tune-result-<slug-or-job-suffix>.md`. Use the random slug from the run when one was assigned; otherwise use the job id suffix (e.g. `a925b07ff678`).
 
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py
index dfdddca7e9..4bb2280d8f 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py
@@ -6,11 +6,13 @@
 LoRA adapters registered on the model entity are hot-reloaded automatically on
 deployments with ``lora_enabled: true`` — no deployment update before eval.
 
-Run from the nemo-platform git root::
+Run from the nemo-platform git root (reads ``$NMP_BASE_URL`` / ``$NEMO_BASE_URL`` when
+``--base-url`` is omitted)::
 
+    export NMP_BASE_URL=http://127.0.0.1:8080
     uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py \\
-        --model-entity qwen3-1.7b --adapter lora-a --adapter lora-b \\
-        --provider qwen3-1.7b-csqa-lora-deploy --dataset-fileset commonsense_qa --split validation
+        --model-entity <model-entity> --adapter <adapter-a> --adapter <adapter-b> \\
+        --provider <provider> --dataset-fileset <dataset-fileset> --split validation.jsonl
 
 Import in agent scripts (add references/ to sys.path or run via uv from repo root).
 """
@@ -19,6 +21,7 @@
 
 import argparse
 import json
+import os
 import re
 import urllib.error
 import urllib.request
@@ -43,6 +46,15 @@
     "messages": [{"role": "user", "content": "{{ item.messages[0].content }}"}],
 }
 
+PLATFORM_HTTP_TIMEOUT_SEC = 60
+
+
+def _assert_message_turn(turn: Any, *, label: str, index: int | str) -> dict[str, Any]:
+    """Validate one messages[] element is a dict before reading role/content."""
+    if not isinstance(turn, dict):
+        raise ValueError(f"{label}: messages[{index}] must be an object with role/content, got {type(turn).__name__}")
+    return turn
+
 
 def assert_chat_row(row: dict[str, Any], *, index: int | None = None) -> None:
     """Validate one dataset row matches automodel/unsloth CHAT training shape."""
@@ -55,9 +67,11 @@ def assert_chat_row(row: dict[str, Any], *, index: int | None = None) -> None:
     messages = row["messages"]
     if not isinstance(messages, list) or len(messages) < 2:
         raise ValueError(f"{label}: messages must be a list with at least one prompt turn + final assistant label")
-    if messages[0].get("role") != "user":
+    first = _assert_message_turn(messages[0], label=label, index=0)
+    if first.get("role") != "user":
         raise ValueError(f"{label}: expected messages[0]=user")
-    if messages[-1].get("role") != "assistant":
+    last = _assert_message_turn(messages[-1], label=label, index=-1)
+    if last.get("role") != "assistant":
         raise ValueError(f"{label}: expected final messages[-1]=assistant (the label to score)")
 
 
@@ -88,11 +102,8 @@ def load_chat_jsonl_from_platform(
     remote_path: str,
 ) -> list[dict[str, Any]]:
     """Download a JSONL split from a platform fileset and validate CHAT rows."""
-    url = (
-        f"{base_url.rstrip('/')}/apis/files/v2/workspaces/{workspace}/filesets/"
-        f"{fileset}/-/{remote_path.lstrip('/')}"
-    )
-    with urllib.request.urlopen(url) as response:
+    url = f"{base_url.rstrip('/')}/apis/files/v2/workspaces/{workspace}/filesets/{fileset}/-/{remote_path.lstrip('/')}"
+    with urllib.request.urlopen(url, timeout=PLATFORM_HTTP_TIMEOUT_SEC) as response:
         content = response.read().decode("utf-8")
     rows: list[dict[str, Any]] = []
     for index, line in enumerate(content.splitlines(), start=1):
@@ -137,9 +148,7 @@ def served_model_name(*, workspace: str, entity_or_adapter: str, finetuning: str
     raise ValueError("finetuning must be 'base' or 'lora'")
 
 
-def adapter_composite_entity_name(
-    *, model_entity: str, workspace: str, adapter_name: str
-) -> str:
+def adapter_composite_entity_name(*, model_entity: str, workspace: str, adapter_name: str) -> str:
     """LoRA composite model-entity path segment (for reference / OpenAI-route body only).
 
     The model-entity proxy path ``model/{composite}/-/v1`` requires a dedicated
@@ -151,18 +160,12 @@ def adapter_composite_entity_name(
 
 def model_entity_gateway_url(*, base_url: str, workspace: str, model_entity: str) -> str:
     """OpenAI-compatible inference-gateway URL for a registered base model entity."""
-    return (
-        f"{base_url.rstrip('/')}/apis/inference-gateway/v2/workspaces/{workspace}/"
-        f"model/{model_entity}/-/v1"
-    )
+    return f"{base_url.rstrip('/')}/apis/inference-gateway/v2/workspaces/{workspace}/model/{model_entity}/-/v1"
 
 
 def provider_gateway_url(*, base_url: str, workspace: str, provider_name: str) -> str:
     """OpenAI-compatible inference-gateway URL for a model provider (LoRA eval route)."""
-    return (
-        f"{base_url.rstrip('/')}/apis/inference-gateway/v2/workspaces/{workspace}/"
-        f"provider/{provider_name}/-/v1"
-    )
+    return f"{base_url.rstrip('/')}/apis/inference-gateway/v2/workspaces/{workspace}/provider/{provider_name}/-/v1"
 
 
 def gateway_path_from_url(url: str) -> str:
@@ -175,7 +178,7 @@ def gateway_path_from_url(url: str) -> str:
 
 
 def _platform_get_json(url: str) -> dict[str, Any]:
-    with urllib.request.urlopen(url) as response:
+    with urllib.request.urlopen(url, timeout=PLATFORM_HTTP_TIMEOUT_SEC) as response:
         return json.loads(response.read().decode("utf-8"))
 
 
@@ -186,10 +189,7 @@ def find_ready_provider_for_model_entity(
     model_entity: str,
 ) -> str | None:
     """Return a READY provider name that serves ``workspace/model_entity`` (base or LoRA)."""
-    url = (
-        f"{base_url.rstrip('/')}/apis/models/v2/workspaces/{workspace}/providers"
-        f"?page_size=100&filter.status=READY"
-    )
+    url = f"{base_url.rstrip('/')}/apis/models/v2/workspaces/{workspace}/providers?page_size=100&filter.status=READY"
     payload = _platform_get_json(url)
     base_entity_id = f"{workspace}/{model_entity}"
     matches: list[str] = []
@@ -276,8 +276,7 @@ def list_completed_job_adapters(
 ) -> list[JobAdapterInfo]:
     """List completed customization jobs and their output adapter names."""
     url = (
-        f"{base_url.rstrip('/')}/apis/jobs/v2/workspaces/{workspace}/jobs"
-        f"?page_size={page_size}&filter.status=completed"
+        f"{base_url.rstrip('/')}/apis/jobs/v2/workspaces/{workspace}/jobs?page_size={page_size}&filter.status=completed"
     )
     payload = _platform_get_json(url)
     dataset_ref = f"{workspace}/{dataset_fileset}" if dataset_fileset else None
@@ -373,9 +372,7 @@ def build_platform_model_target(
                 workspace=workspace,
                 provider_name=resolved_provider,
             ),
-            name=served_model_name(
-                workspace=workspace, entity_or_adapter=adapter_name, finetuning="lora"
-            ),
+            name=served_model_name(workspace=workspace, entity_or_adapter=adapter_name, finetuning="lora"),
             format=ModelFormat.NVIDIA_NIM,
         )
 
@@ -415,15 +412,12 @@ def summarize_chat_eval_result(*, target: str, model_name: str, gateway_url: str
     em_rows = result.per_metric["exact-match"].row_scores
     num_samples = len(em_rows)
     raw_correct = sum(
-        1
-        for rs in em_rows
-        if rs.sample.get("output_text", "").strip() == reference_content(rs.item).strip()
+        1 for rs in em_rows if rs.sample.get("output_text", "").strip() == reference_content(rs.item).strip()
     )
     norm_correct = sum(
         1
         for rs in em_rows
-        if normalize_mcqa_answer(rs.sample.get("output_text", ""))
-        == normalize_mcqa_answer(reference_content(rs.item))
+        if normalize_mcqa_answer(rs.sample.get("output_text", "")) == normalize_mcqa_answer(reference_content(rs.item))
     )
     aggregate_metrics: dict[str, dict[str, float | None]] = {}
     for metric_name, metric_result in result.per_metric.items():
@@ -601,8 +595,7 @@ def routing_sanity_warnings(
         if summary.target == "base":
             if summary.gateway_path != "model-entity":
                 warnings.append(
-                    f"base eval used {summary.gateway_path} route; expected model-entity "
-                    f"({summary.gateway_url})"
+                    f"base eval used {summary.gateway_path} route; expected model-entity ({summary.gateway_url})"
                 )
             continue
         if summary.gateway_path != "provider":
@@ -632,12 +625,8 @@ def build_eval_payload(
     if any(summary.target == "base" for summary in summaries):
         routing["base"] = {
             "gateway_path": "model-entity",
-            "url": model_entity_gateway_url(
-                base_url=base_url, workspace=workspace, model_entity=model_entity
-            ),
-            "model_field": served_model_name(
-                workspace=workspace, entity_or_adapter=model_entity, finetuning="base"
-            ),
+            "url": model_entity_gateway_url(base_url=base_url, workspace=workspace, model_entity=model_entity),
+            "model_field": served_model_name(workspace=workspace, entity_or_adapter=model_entity, finetuning="base"),
         }
     for adapter_name in adapter_names:
         target = build_platform_model_target(
@@ -667,9 +656,18 @@ def build_eval_payload(
     return payload
 
 
+def default_base_url() -> str:
+    """Platform URL from env or localhost default."""
+    return os.environ.get("NMP_BASE_URL") or os.environ.get("NEMO_BASE_URL") or "http://127.0.0.1:8080"
+
+
 def _parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description="Compare base vs LoRA on CHAT validation JSONL")
-    parser.add_argument("--base-url", default="http://127.0.0.1:8080")
+    parser.add_argument(
+        "--base-url",
+        default=default_base_url(),
+        help="Platform URL (default: $NMP_BASE_URL, $NEMO_BASE_URL, or http://127.0.0.1:8080)",
+    )
     parser.add_argument("--workspace", default="default")
     parser.add_argument("--model-entity", required=True)
     parser.add_argument(
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md
index 0972583d18..2e843f1732 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md
@@ -9,8 +9,8 @@ Training and evaluation must use the **same CHAT JSONL row shape**:
 ```json
 {
   "messages": [
-    {"role": "user", "content": "Question: …\nChoices:\n…\nAnswer:"},
-    {"role": "assistant", "content": "bank"}
+    {"role": "user", "content": "<user turn or multi-turn prompt>"},
+    {"role": "assistant", "content": "<label to predict>"}
   ]
 }
 ```
@@ -23,7 +23,7 @@ Multi-turn rows use the same rule: the **final** `messages[-1]` turn is the assi
 | Send **`messages[:-1]`** at inference (exclude only the final assistant label) | Pass full `messages` including the label turn, or use `{"messages": "{{ item.messages }}"}` unfiltered |
 | Score against **`messages[-1].content`** (final assistant turn) | Score against a renamed `expected` field unless you also keep `messages` |
 
-Single-turn MCQA (user + assistant) is the degenerate case: `messages[:-1]` is just the user turn.
+Single-turn rows (one user prompt + one assistant label) are the degenerate case: `messages[:-1]` is just the user turn.
 
 Automodel and unsloth both train on this shape when `has_chat` is true (see `hf-conversion.md`, `dataset-formats.md`).
 
@@ -38,15 +38,15 @@ CHAT_REFERENCE_TEMPLATE = "{{ item.messages[-1].content }}"
 
 Import from `references/eval_helpers.py` — do not re-type these in one-off scripts.
 
-## Inference defaults (Qwen3 / thinking models)
+## Inference defaults (thinking models, e.g. Qwen3)
 
 | Setting | Recommended | Avoid |
 |---------|-------------|-------|
-| `enable_thinking` | `false` via `chat_template_kwargs` for short-answer SFT | Thinking on without enough tokens — model never closes ``, strip hook fails |
+| `enable_thinking` | `false` via `chat_template_kwargs` for short-answer SFT | Thinking on without enough tokens — model never closes thinking tag so the strip hook fails |
 | `max_tokens` | `64` (short assistant labels) | `16` with thinking on; `1024` thinking on without strip (verbose prose) |
 | System prompt | Omit unless user asks — matches training | Extra system prompt changes decode path vs SFT |
 
-For thinking-enabled eval, set `reasoning=ReasoningParams(end_token="")` **and** ensure `max_tokens` is large enough for the model to emit the end token before generating the answer.
+For thinking-enabled eval, set `reasoning=ReasoningParams(end_token="``")` **and** ensure `max_tokens` is large enough for the model to emit the end token before generating the answer.
 
 ## Inference after customization (wrap-up)
 
@@ -67,8 +67,10 @@ The model-entity proxy path **always** resolves to the base VirtualModel. Settin
 
 | Target | Gateway route | URL pattern | Request `model` field |
 |--------|---------------|-------------|------------------------|
-| Base entity | **Model entity** | `{NEMO_BASE_URL}/apis/inference-gateway/v2/workspaces/default/model/<model-entity>/-/v1` | `default/<model-entity>` |
-| LoRA adapter | **Provider** | `{NEMO_BASE_URL}/apis/inference-gateway/v2/workspaces/default/provider/<provider>/-/v1` | `default--<adapter-name>` |
+| Base entity | **Model entity** | `$NMP_BASE_URL/apis/inference-gateway/v2/workspaces/default/model/<model-entity>/-/v1` | `default/<model-entity>` |
+| LoRA adapter | **Provider** | `$NMP_BASE_URL/apis/inference-gateway/v2/workspaces/default/provider/<provider>/-/v1` | `default--<adapter-name>` |
+
+(`NEMO_BASE_URL` is an alias for `NMP_BASE_URL`.)
 
 `eval_helpers.py` auto-discovers a READY provider that serves the base entity (or pass `--provider <name>`). Adapter weights still hot-reload on the deployment — no provider update per adapter.
 
@@ -83,37 +85,40 @@ Optional sanity checks:
 
 If base and LoRA scores were identical (~99% same outputs), the adapter was almost certainly called through the **model-entity** path. That path always resolves to the base VirtualModel — the `"model": "default--<adapter>"` field in the body is ignored. Fix: route LoRA through the **provider** URL with the same `model` field. `eval_helpers.build_platform_model_target()` and the CLI implement this split automatically.
 
-### MCQA metric interpretation
+### Classification / short-answer metric interpretation
 
-For commonsense_qa-style MCQA, treat **`normalized_accuracy`** as the primary metric (`normalize_mcqa_answer` strips `A. foo` / markdown).
+For multiple-choice or short-label SFT, treat **`normalized_accuracy`** as the primary metric when labels need normalization (`normalize_mcqa_answer` strips `A. foo`, markdown, etc.).
 
 | Observation | Likely meaning |
 |-------------|----------------|
-| Base & LoRA both ~59% normalized, within ~1 pp | LoRA hit **model-entity** path (base only) — check `warnings` and gateway logs |
-| Base raw exact 0%, normalized ~59% | Normal for base on MCQA (formatted prose answers) |
-| LoRA normalized >> base (e.g. 76% vs 59%) | Correct provider routing and real adapter lift |
-| Train loss dropped sharply but eval flat | Wrong eval routing or need more epochs — val loss ≠ accuracy |
+| Base & LoRA normalized scores match within ~1–2 pp | LoRA likely hit **model-entity** path (base only) — check `warnings` and gateway logs |
+| Base raw exact match low, normalized much higher | Normal when the base model emits formatted prose but normalized labels match |
+| LoRA normalized clearly above base | Correct provider routing and real adapter lift |
+| Train loss dropped sharply but eval flat | Wrong eval routing, mismatched inference settings, or need more epochs — val loss ≠ accuracy |
 
 ### Epoch / adapter ablations
 
 Resolve adapter names from completed job specs instead of guessing:
 
 ```python
+import os
 from eval_helpers import list_completed_job_adapters, compare_adapters, build_eval_payload
 
+base_url = os.environ.get("NMP_BASE_URL") or os.environ.get("NEMO_BASE_URL") or "http://127.0.0.1:8080"
+
 jobs = list_completed_job_adapters(
-    base_url="http://10.0.0.51:8080",
+    base_url=base_url,
     workspace="default",
-    model_entity="qwen3-1.7b",
-    dataset_fileset="commonsense_qa",
+    model_entity="<model-entity>",
+    dataset_fileset="<dataset-fileset>",
 )
 # jobs[0].epochs, jobs[0].adapter_name, jobs[0].backend — sorted newest first
 
 summaries = compare_adapters(
-    base_url="...",
+    base_url=base_url,
     workspace="default",
-    model_entity="qwen3-1.7b",
-    adapter_names=[jobs[0].adapter_name, jobs[2].adapter_name],
+    model_entity="<model-entity>",
+    adapter_names=[jobs[0].adapter_name, jobs[1].adapter_name],
     rows=rows,
 )
 payload = build_eval_payload(..., summaries=summaries, adapter_names=[...])
@@ -129,7 +134,7 @@ When comparing adapters from **different backends** (automodel vs unsloth) or ba
 | HTTP base URL | `…/provider/<provider>/-/v1` | `…/model/<model-entity>/-/v1` |
 | `"model"` | `default--<adapter-name>` | `default/<model-entity>` |
 | `messages` | `messages[:-1]` from the training row (exclude final assistant label) | Same |
-| Qwen3 short SFT | `"chat_template_kwargs": {"enable_thinking": false}` | Same |
+| Short-answer SFT (e.g. Qwen3) | `"chat_template_kwargs": {"enable_thinking": false}` | Same |
 | `max_tokens` / `temperature` | `64` / `0` typical for short labels | Same |
 
 CLI shortcuts (substitute names from the job):
@@ -158,23 +163,23 @@ Val loss from training is **not** accuracy — always run a generation eval for
 From **nemo-platform** git root:
 
 ```bash
-# Base vs one adapter
+export NMP_BASE_URL=http://127.0.0.1:8080   # user platform URL when not localhost
+
+# Base vs one adapter (--base-url optional when NMP_BASE_URL is set)
 uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py \
-  --base-url http://10.0.0.51:8080 \
-  --model-entity qwen3-1.7b \
-  --adapter qwen3-1.7b-csqa-unsloth-jun16-e3 \
-  --provider qwen3-1.7b-csqa-lora-deploy \
-  --dataset-fileset commonsense_qa \
+  --model-entity <model-entity> \
+  --adapter <adapter-name> \
+  --provider <provider> \
+  --dataset-fileset <dataset-fileset> \
   --split validation.jsonl \
   --output /tmp/fine-tune-eval.json
 
 # Base vs multiple adapters (epoch ablation)
 uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py \
-  --base-url http://10.0.0.51:8080 \
-  --model-entity qwen3-1.7b \
-  --adapter qwen3-1.7b-commonsense-qa-lora-jun12-v2 \
-  --adapter qwen3-1.7b-csqa-unsloth-jun16-e3 \
-  --dataset-fileset commonsense_qa \
+  --model-entity <model-entity> \
+  --adapter <adapter-a> \
+  --adapter <adapter-b> \
+  --dataset-fileset <dataset-fileset> \
   --split validation.jsonl \
   --output /tmp/fine-tune-eval-multi.json
 ```
diff --git a/plugins/nemo-customizer/tests/test_eval_helpers.py b/plugins/nemo-customizer/tests/test_eval_helpers.py
index 26755d53dc..3ed1dfeec5 100644
--- a/plugins/nemo-customizer/tests/test_eval_helpers.py
+++ b/plugins/nemo-customizer/tests/test_eval_helpers.py
@@ -3,6 +3,7 @@
 
 from __future__ import annotations
 
+import importlib.util
 import json
 import sys
 from pathlib import Path
@@ -10,16 +11,25 @@
 import pytest
 
 SKILL_REFERENCES = (
-    Path(__file__).resolve().parents[1]
-    / "src"
-    / "nemo_customizer"
-    / "skills"
-    / "nemo-customizer"
-    / "references"
+    Path(__file__).resolve().parents[1] / "src" / "nemo_customizer" / "skills" / "nemo-customizer" / "references"
 )
-sys.path.insert(0, str(SKILL_REFERENCES))
 
-import eval_helpers  # noqa: E402
+
+def _load_eval_helpers():
+    module_name = "nemo_customizer_eval_helpers_test"
+    spec = importlib.util.spec_from_file_location(
+        module_name,
+        SKILL_REFERENCES / "eval_helpers.py",
+    )
+    assert spec is not None and spec.loader is not None
+    module = importlib.util.module_from_spec(spec)
+    # Dataclasses resolve cls.__module__ during decoration; register before exec.
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+eval_helpers = _load_eval_helpers()
 
 
 def test_served_model_names() -> None:
@@ -103,6 +113,11 @@ def test_assert_chat_row_accepts_multi_turn() -> None:
     assert eval_helpers.reference_content(row) == "final label"
 
 
+def test_assert_chat_row_rejects_non_dict_message_turns() -> None:
+    with pytest.raises(ValueError, match="messages\\[0\\] must be an object"):
+        eval_helpers.assert_chat_row({"messages": ["x", "y"]})
+
+
 def test_assert_chat_row_rejects_missing_final_assistant() -> None:
     row = {
         "messages": [
diff --git a/third_party/requirements-main.txt b/third_party/requirements-main.txt
index fd7c9edde3..33c782572b 100644
--- a/third_party/requirements-main.txt
+++ b/third_party/requirements-main.txt
@@ -48,10 +48,13 @@
     #   nemo-safe-synthesizer-plugin
     #   nemo-switchyard
     #   nemo-unsloth-plugin
+    #   nmp-automodel
     #   nmp-common
+    #   nmp-customization-common
     #   nmp-inference-gateway
     #   nmp-platform
     #   nmp-platform-runner
+    #   nmp-unsloth
 -e ./packages/nmp_common ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')
     # via
     #   nemo-platform
@@ -59,6 +62,7 @@
     #   nmp-auth
     #   nmp-automodel
     #   nmp-core-mcp
+    #   nmp-customization-common
     #   nmp-entities
     #   nmp-files
     #   nmp-guardrails
@@ -73,6 +77,12 @@
     #   nmp-secrets
     #   nmp-studio
     #   nmp-unsloth
+-e ./packages/nmp_customization_common ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')
+    # via
+    #   nemo-automodel-plugin
+    #   nemo-unsloth-plugin
+    #   nmp-automodel
+    #   nmp-unsloth
 -e ./packages/nmp_platform ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')
     # via nemoplatform
 -e ./packages/nmp_platform_runner ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')
@@ -104,6 +114,7 @@
     #   nmp-automodel
     #   nmp-common
     #   nmp-core-mcp
+    #   nmp-customization-common
     #   nmp-entities
     #   nmp-unsloth
 -e ./services/automodel ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')
@@ -187,24 +198,48 @@ aiohappyeyeballs==2.6.1 ; (platform_machine == 'arm64' and sys_platform == 'darw
     # via aiohttp
 aiohttp==3.13.5 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:023ecba036ddd840b0b19bf195bfae970083fd7024ce1ac22e9bba90464620e9 \
+    --hash=sha256:02e048037a6501a5ec1f6fc9736135aec6eb8a004ce48838cb951c515f32c80b \
+    --hash=sha256:2294172ce08a82fb7c7273485895de1fa1186cc8294cfeb6aef4af42ad261174 \
+    --hash=sha256:26d2f8546f1dfa75efa50c3488215a903c0168d253b75fba4210f57ab77a0fb8 \
     --hash=sha256:2d6d44a5b48132053c2f6cd5c8cb14bc67e99a63594e336b0f2af81e94d5530c \
+    --hash=sha256:31cebae8b26f8a615d2b546fee45d5ffb76852ae6450e2a03f42c9102260d6fe \
     --hash=sha256:327cc432fdf1356fb4fbc6fe833ad4e9f6aacb71a8acaa5f1855e4b25910e4a9 \
+    --hash=sha256:33add2463dde55c4f2d9635c6ab33ce154e5ecf322bd26d09af95c5f81cfa286 \
     --hash=sha256:3a807cabd5115fb55af198b98178997a5e0e57dead43eb74a93d9c07d6d4a7dc \
     --hash=sha256:3df334e39d4c2f899a914f1dba283c1aadc311790733f705182998c6f7cae665 \
+    --hash=sha256:55b3bdd3292283295774ab585160c4004f4f2f203946997f49aac032c84649e9 \
     --hash=sha256:57653eac22c6a4c13eb22ecf4d673d64a12f266e72785ab1c8b8e5940d0e8090 \
     --hash=sha256:60869c7ac4aaabe7110f26499f3e6e5696eae98144735b12a9c3d9eae2b51a49 \
+    --hash=sha256:676e5651705ad5d8a70aeb8eb6936c436d8ebbd56e63436cb7dd9bb36d2a9a46 \
     --hash=sha256:7996023b2ed59489ae4762256c8516df9820f751cf2c5da8ed2fb20ee50abab3 \
     --hash=sha256:7ab7229b6f9b5c1ba4910d6c41a9eb11f543eadb3f384df1b4c293f4e73d44d6 \
     --hash=sha256:7becdf835feff2f4f335d7477f121af787e3504b48b449ff737afb35869ba7bb \
+    --hash=sha256:7c35b0bf0b48a70b4cb4fc5d7bed9b932532728e124874355de1a0af8ec4bc88 \
     --hash=sha256:888e78eb5ca55a615d285c3c09a7a91b42e9dd6fc699b166ebd5dee87c9ccf14 \
+    --hash=sha256:8b14eb3262fad0dc2f89c1a43b13727e709504972186ff6a99a3ecaa77102b6c \
+    --hash=sha256:999802d5fa0389f58decd24b537c54aa63c01c3219ce17d1214cbda3c2b22d2d \
+    --hash=sha256:9b16c653d38eb1a611cc898c41e76859ca27f119d25b53c12875fd0474ae31a8 \
     --hash=sha256:9d98cc980ecc96be6eb4c1994ce35d28d8b1f5e5208a23b421187d1209dbb7d1 \
     --hash=sha256:a2567b72e1ffc3ab25510db43f355b29eeada56c0a622e58dcdb19530eb0a3cb \
     --hash=sha256:a5029cc80718bbd545123cd8fe5d15025eccaaaace5d0eeec6bd556ad6163d61 \
     --hash=sha256:a60eaa2d440cd4707696b52e40ed3e2b0f73f65be07fd0ef23b6b539c9c0b0b4 \
     --hash=sha256:a84792f8631bf5a94e52d9cc881c0b824ab42717165a5579c760b830d9392ac9 \
+    --hash=sha256:aa6d0d932e0f39c02b80744273cd5c388a2d9bc07760a03164f229c8e02662f6 \
     --hash=sha256:ab2899f9fa2f9f741896ebb6fa07c4c883bfa5c7f2ddd8cf2aafa86fa981b2d2 \
     --hash=sha256:b18f31b80d5a33661e08c89e202edabf1986e9b49c42b4504371daeaa11b47c1 \
-    --hash=sha256:e7d2f8616f0ff60bd332022279011776c3ac0faa0f1b463f7bb12326fbc97a1c
+    --hash=sha256:b38765950832f7d728297689ad78f5f2cf79ff82487131c4d26fe6ceecdc5f8e \
+    --hash=sha256:c2b2355dc094e5f7d45a7bb262fe7207aa0460b37a0d87027dcf21b5d890e7d5 \
+    --hash=sha256:c719f65bebcdf6716f10e9eff80d27567f7892d8988c06de12bbbd39307c6e3a \
+    --hash=sha256:c9883051c6972f58bfc4ebb2116345ee2aa151178e99c3f2b2bbe2af712abd13 \
+    --hash=sha256:ca9ac61ac6db4eb6c2a0cd1d0f7e1357647b638ccc92f7e9d8d133e71ed3c6ac \
+    --hash=sha256:d97f93fdae594d886c5a866636397e2bcab146fd7a132fd6bb9ce182224452f8 \
+    --hash=sha256:df23d57718f24badef8656c49743e11a89fd6f5358fa8a7b96e728fda2abf7d3 \
+    --hash=sha256:e5e5f7debc7a57af53fdf5c5009f9391d9f4c12867049d509bf7bb164a6e295b \
+    --hash=sha256:e7d2f8616f0ff60bd332022279011776c3ac0faa0f1b463f7bb12326fbc97a1c \
+    --hash=sha256:ec707059ee75732b1ba130ed5f9580fe10ff75180c812bc267ded039db5128c6 \
+    --hash=sha256:f1162a1492032c82f14271e831c8f4b49f2b6078f4f5fc74de2c912fa225d51d \
+    --hash=sha256:fb0540c854ac9c0c5ad495908fdfd3e332d553ec731698c0e29b1877ba0d2ec6 \
+    --hash=sha256:fe6970addfea9e5e081401bcbadf865d2b6da045472f58af08427e108d618540
     # via
     #   aiobotocore
     #   aiohttp-retry
@@ -401,6 +436,7 @@ certifi==2026.2.25 ; (platform_machine == 'arm64' and sys_platform == 'darwin')
     #   requests
     #   sentry-sdk
 cffi==2.0.0 ; (platform_machine == 'arm64' and platform_python_implementation != 'PyPy' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux') \
+    --hash=sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e \
     --hash=sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187 \
     --hash=sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c \
     --hash=sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94 \
@@ -408,15 +444,20 @@ cffi==2.0.0 ; (platform_machine == 'arm64' and platform_python_implementation !=
     --hash=sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529 \
     --hash=sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca \
     --hash=sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743 \
+    --hash=sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5 \
     --hash=sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b \
     --hash=sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93 \
+    --hash=sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037 \
     --hash=sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26 \
     --hash=sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c \
+    --hash=sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664 \
     --hash=sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9 \
     --hash=sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062 \
     --hash=sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26 \
     --hash=sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b \
-    --hash=sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c
+    --hash=sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c \
+    --hash=sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3 \
+    --hash=sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2
     # via cryptography
 chardet==5.2.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \
@@ -426,23 +467,47 @@ chardet==5.2.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (
     #   diff-cover
     #   sqlfluff
 charset-normalizer==3.4.6 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:0c173ce3a681f309f31b87125fecec7a5d1347261ea11ebbb856fa6006b23c8c \
     --hash=sha256:0e28d62a8fc7a1fa411c43bd65e346f3bce9716dc51b897fbe930c5987b402d5 \
     --hash=sha256:11afb56037cbc4b1555a34dd69151e8e069bee82e613a73bef6e714ce733585f \
     --hash=sha256:1ae6b62897110aa7c79ea2f5dd38d1abca6db663687c0b1ad9aed6f6bae3d9d6 \
+    --hash=sha256:231d4da14bcd9301310faf492051bee27df11f2bc7549bc0bb41fef11b82daa2 \
+    --hash=sha256:2b1a63e8224e401cafe7739f77efd3f9e7f5f2026bda4aead8e59afab537784f \
     --hash=sha256:2ef7fedc7a6ecbe99969cd09632516738a97eeb8bd7258bf8a0f23114c057dab \
+    --hash=sha256:30f445ae60aad5e1f8bdbb3108e39f6fbc09f4ea16c815c66578878325f8f15a \
+    --hash=sha256:34315ff4fc374b285ad7f4a0bf7dcbfe769e1b104230d40f49f700d4ab6bbd84 \
     --hash=sha256:404a1e552cf5b675a87f0651f8b79f5f1e6fd100ee88dc612f89aa16abd4486f \
     --hash=sha256:423fb7e748a08f854a08a222b983f4df1912b1daedce51a72bd24fe8f26a1843 \
+    --hash=sha256:530d548084c4a9f7a16ed4a294d459b4f229db50df689bfe92027452452943a0 \
     --hash=sha256:530e8cebeea0d76bdcf93357aa5e41336f48c3dc709ac52da2bb167c5b8271d9 \
     --hash=sha256:5f8ddd609f9e1af8c7bd6e2aca279c931aefecd148a14402d4e368f3171769fd \
+    --hash=sha256:5feb91325bbceade6afab43eb3b508c63ee53579fe896c77137ded51c6b6958e \
     --hash=sha256:60c74963d8350241a79cb8feea80e54d518f72c26db618862a8f53e5023deaf9 \
+    --hash=sha256:613f19aa6e082cf96e17e3ffd89383343d0d589abda756b7764cf78361fd41dc \
+    --hash=sha256:695f5c2823691a25f17bc5d5ffe79fa90972cc34b002ac6c843bb8a1720e950d \
     --hash=sha256:6cceb5473417d28edd20c6c984ab6fee6c6267d38d906823ebfe20b03d607dc2 \
+    --hash=sha256:7a6967aaf043bceabab5412ed6bd6bd26603dae84d5cb75bf8d9a74a4959d398 \
+    --hash=sha256:80d0a5615143c0b3225e5e3ef22c8d5d51f3f72ce0ea6fb84c943546c7b25b6c \
     --hash=sha256:82060f995ab5003a2d6e0f4ad29065b7672b6593c8c63559beefe5b443242c3e \
+    --hash=sha256:836ab36280f21fc1a03c99cd05c6b7af70d2697e374c7af0b61ed271401a72a2 \
+    --hash=sha256:8e5a94886bedca0f9b78fecd6afb6629142fd2605aa70a125d49f4edc6037ee6 \
+    --hash=sha256:90ca27cd8da8118b18a52d5f547859cc1f8354a00cd1e8e5120df3e30d6279e5 \
+    --hash=sha256:92734d4d8d187a354a556626c221cd1a892a4e0802ccb2af432a1d85ec012194 \
     --hash=sha256:947cf925bc916d90adba35a64c82aace04fa39b46b52d4630ece166655905a69 \
     --hash=sha256:9cc4fc6c196d6a8b76629a70ddfcd4635a6898756e2d9cac5565cf0654605d73 \
     --hash=sha256:a056d1ad2633548ca18ffa2f85c202cfb48b68615129143915b8dc72a806a923 \
+    --hash=sha256:a26611d9987b230566f24a0a125f17fe0de6a6aff9f25c9f564aaa2721a5fb88 \
     --hash=sha256:a4ea868bc28109052790eb2b52a9ab33f3aa7adc02f96673526ff47419490e21 \
     --hash=sha256:ac2393c73378fea4e52aa56285a3d64be50f1a12395afef9cce47772f60334c2 \
-    --hash=sha256:b35b200d6a71b9839a46b9b7fff66b6638bb52fc9658aa58796b0326595d3021
+    --hash=sha256:b35b200d6a71b9839a46b9b7fff66b6638bb52fc9658aa58796b0326595d3021 \
+    --hash=sha256:bc72863f4d9aba2e8fd9085e63548a324ba706d2ea2c83b260da08a59b9482de \
+    --hash=sha256:c907cdc8109f6c619e6254212e794d6548373cc40e1ec75e6e3823d9135d29cc \
+    --hash=sha256:d60377dce4511655582e300dc1e5a5f24ba0cb229005a1d5c8d0cb72bb758ab8 \
+    --hash=sha256:d73beaac5e90173ac3deb9928a74763a6d230f494e4bfb422c217a0ad8e629bf \
+    --hash=sha256:e3c701e954abf6fc03a49f7c579cc80c2c6cc52525340ca3186c41d3f33482ef \
+    --hash=sha256:f1ce721c8a7dfec21fcbdfe04e8f68174183cf4e8188e0645e92aa23985c57ff \
+    --hash=sha256:f6e4333fb15c83f7d1482a76d45a0818897b3d33f00efd215528ff7c51b8e35d \
+    --hash=sha256:f820f24b09e3e779fe84c3c456cb4108a7aa639b0d1f02c28046e11bfcd088ed
     # via requests
 circuitbreaker==2.1.3 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:1a4baee510f7bea3c91b194dcce7c07805fe96c4423ed5594b75af438531d084 \
@@ -497,6 +562,7 @@ cryptography==46.0.7 ; (platform_machine == 'arm64' and sys_platform == 'darwin'
     --hash=sha256:04959522f938493042d595a736e7dbdff6eb6cc2339c11465b3ff89343b65f65 \
     --hash=sha256:128c5edfe5e5938b86b03941e94fac9ee793a94452ad1365c9fc3f4f62216832 \
     --hash=sha256:1d25aee46d0c6f1a501adcddb2d2fee4b979381346a78558ed13e50aa8a59067 \
+    --hash=sha256:24402210aa54baae71d99441d15bb5a1919c195398a87b563df84468160a65de \
     --hash=sha256:35719dc79d4730d30f1c2b6474bd6acda36ae2dfae1e3c16f2051f215df33ce0 \
     --hash=sha256:3986ac1dee6def53797289999eabe84798ad7817f3e97779b5061a95b0ee4968 \
     --hash=sha256:420b1e4109cc95f0e5700eed79908cef9268265c773d3a66f7af1eef53d409ef \
@@ -510,9 +576,14 @@ cryptography==46.0.7 ; (platform_machine == 'arm64' and sys_platform == 'darwin'
     --hash=sha256:84d4cced91f0f159a7ddacad249cc077e63195c36aac40b4150e7a57e84fffe7 \
     --hash=sha256:8a469028a86f12eb7d2fe97162d0634026d92a21f3ae0ac87ed1c4a447886c83 \
     --hash=sha256:91bbcb08347344f810cbe49065914fe048949648f6bd5c2519f34619142bbe85 \
+    --hash=sha256:935ce7e3cfdb53e3536119a542b839bb94ec1ad081013e9ab9b7cfd478b05006 \
+    --hash=sha256:9694078c5d44c157ef3162e3bf3946510b857df5a3955458381d1c7cfc143ddb \
     --hash=sha256:a1529d614f44b863a7b480c6d000fe93b59acee9c82ffa027cfadc77521a9f5e \
     --hash=sha256:abad9dac36cbf55de6eb49badd4016806b3165d396f64925bf2999bcb67837ba \
     --hash=sha256:b36a4695e29fe69215d75960b22577197aca3f7a25b9cf9d165dcfe9d80bc325 \
+    --hash=sha256:cbd5fb06b62bd0721e1170273d3f4d5a277044c47ca27ee257025146c34cbdd1 \
+    --hash=sha256:cdf1a610ef82abb396451862739e3fc93b071c844399e15b90726ef7470eeaf2 \
+    --hash=sha256:cdfbe22376065ffcf8be74dc9a909f032df19bc58a699456a21712d6e5eabfd0 \
     --hash=sha256:d02c738dacda7dc2a74d1b2b3177042009d5cab7c7079db74afc19e56ca1b455 \
     --hash=sha256:d3b99c535a9de0adced13d159c5a9cf65c325601aa30f4be08afd680643e9c15 \
     --hash=sha256:e4cfd68c5f3e0bfdad0d38e023239b96a2fe84146481852dffbcca442c245aa5 \
@@ -699,24 +770,44 @@ fastar==0.9.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (p
     --hash=sha256:108bb46c080ca152bb331f1e0576177d36e9badba51b1d5724d2823542e0dd1f \
     --hash=sha256:17e2c3b46408193ea13c1e1177275ca7951e88bd3dce16baccb8de4f5e0dc2e8 \
     --hash=sha256:2394980cc126a3263e115600bc4ff9e7320cddde83c99fc334ab530be5b7166e \
+    --hash=sha256:24b13fc4ef3f1e3c9cc2dcf07ad9445900db9d3ce09b73021547a55994d0407f \
+    --hash=sha256:3feede2d72ec0782b5ccc18568f36cbe33816be396551aa47b3e1b73c322cdd2 \
+    --hash=sha256:40b8c08df809e5e58d1839ccb37bafe4485deb6ee56bb7c5f0cbb72d701eb965 \
+    --hash=sha256:4a734506b071d2a8844771fe735fbd6d67dd0eec80eef5f189bbe763ebe7a0b8 \
+    --hash=sha256:4d012644421d669d9746157193f4eafd371e8ae56ff7aef97612a4922418664c \
+    --hash=sha256:52f96a3d4cfbe4f06b376706fa0562f3a1d2329bc37168119af0e47e1ac21cab \
+    --hash=sha256:57e9b94e485713c79bb259f7ecff1213527d05e9aa43a157c3fbc88812cf163e \
     --hash=sha256:59bc500d7b6bdaf2ffb2b632bc6b0f97ddfb3bb7d31b54d61ceb00b5698d6484 \
+    --hash=sha256:59d860e82a531e9cc67e7f500a299bffbe6e93d80bbf48401fd8f452a0c58f28 \
     --hash=sha256:5a67b061b1099cf3b8b6234dd3605fa16f5078ab6b51c8d77ad7a5d11c3cf834 \
     --hash=sha256:5c03fad1ad9ac57cf03a4db9e18c7109c37416ff4eb9ebfca98fcd2b233a26c4 \
+    --hash=sha256:75c70be3a7da3ff9342f64c15ec3749c13ef56bc28e69075d82d03768532a8d0 \
     --hash=sha256:76be31936cabce31cbb6381128f851cf0a6da2d5c25357615cd1504b26dc31cf \
+    --hash=sha256:7bf6958bb6f94e5ec522e4a255b8e940d3561ad973f0be5dde6115b5a0854af5 \
     --hash=sha256:87006c8770dfc558aefe927590bbcdaf9648ca4472a9ee6d10dfb7c0bda4ce5b \
+    --hash=sha256:8eac084ab215aaf65fa406c9b9da1ac4e697c3d3a1a183e09c488e555802f62d \
+    --hash=sha256:912efe3121dc1f3c05940cfa1c6b09b8868d702d24566506aa1d0d96e429923a \
     --hash=sha256:9ec841a69fea73361c6df6d9183915c09e9ce3bd96493763fa46019e79918400 \
+    --hash=sha256:a79c53c3003958dca88a7ec3dd805bf9c2fb2a659110039f44571d57e329e3d4 \
     --hash=sha256:acb62e2369834fb23d26327157f0a2dbec40b230c709fa85b1ce96cf010e6fbf \
     --hash=sha256:b665c33afcd1d581b82235b690d999c5446ccc2c4d80c4a95f30df3b43d22494 \
     --hash=sha256:c75e779f72d845037d4bf6692d01ac66f014eaef965c9231d41d5cc1276b89fc \
     --hash=sha256:c8ac3e8aaee57dfc822b04f570f0a963c2381a9dc8990fe0c6e965efd23fd451 \
+    --hash=sha256:c93bf4732d0dd6adae4a8b3bbebe19af76ee1072b7688bf39c5a1d120425a772 \
     --hash=sha256:c9bd8879ebf05aa247e60e454bb7568cbdd44f016b8c58e31e5398039403e61d \
+    --hash=sha256:d0aff74ea98642784c941d3cd8c35943258d4b9626157858901c5b181683339b \
+    --hash=sha256:d17d311cfbb559154ba940972b6d07a3a7ac221a2a01208f119ad03495f01d32 \
+    --hash=sha256:d2a9a49f9217f4f60f9ba23fdd1f7f3f04fed97391145eb9460ec83ca0b4bd33 \
+    --hash=sha256:d2ef34e7088f308e73460e1b8d9b0479a743f679816782a80db6ae87ee68714a \
     --hash=sha256:d49114d5f0b76c5cc242875d90fa4706de45e0456ddedf416608ecd0787fb410 \
     --hash=sha256:d62a4fd86eda3bea7cc32efd64d43b6d0fcdbbec009558b750fc362f20142789 \
     --hash=sha256:d9ac410d32cbb514e966c45f0fedd0f9447b0dea9e734af714648da503603df6 \
     --hash=sha256:de264da9e8ef6407aa0b23c7c47ed4e34fde867e7c1f6e3cb98945a93e5f89f2 \
     --hash=sha256:ec7852de506d022ad36ad56f4aefb10c259dd59e485bf87af827954d404ba9d5 \
     --hash=sha256:f07c6bdeedfeb30ef459f21fa9ab06e2b6727f7e7653176d3abb7a85f447c400 \
-    --hash=sha256:fad70e257daefb42bab68dcd68beaf2e2a99da056d65f2c9f988449a4e869306
+    --hash=sha256:f2f399fffb74bcd9e9d4507e253ace2430b5ccf61000596bda41e90414bcf4f2 \
+    --hash=sha256:fad70e257daefb42bab68dcd68beaf2e2a99da056d65f2c9f988449a4e869306 \
+    --hash=sha256:fb06d0a0cc3cf52a9c07559bb16ab99eb75afe0b3d5ce68f5c299569460851ac
     # via fastapi-cloud-cli
 fastembed==0.8.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:40bee672657574a1009e35ec50030a55f2b426842cb011845379817641bbbbd0 \
@@ -763,29 +854,53 @@ flatbuffers==25.12.19 ; (platform_machine == 'arm64' and sys_platform == 'darwin
     --hash=sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4
     # via onnxruntime
 frozenlist==1.8.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0 \
+    --hash=sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121 \
     --hash=sha256:09474e9831bc2b2199fad6da3c14c7b0fbdd377cce9d3d77131be28906cb7d84 \
     --hash=sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d \
     --hash=sha256:11847b53d722050808926e785df837353bd4d75f1d494377e59b23594d834967 \
+    --hash=sha256:1a7fa382a4a223773ed64242dbe1c9c326ec09457e6b8428efb4118c685c3dfd \
     --hash=sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed \
     --hash=sha256:2552f44204b744fba866e573be4c1f9048d6a324dfe14475103fd51613eb1d1f \
+    --hash=sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143 \
+    --hash=sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e \
+    --hash=sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8 \
     --hash=sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad \
+    --hash=sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82 \
     --hash=sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29 \
     --hash=sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383 \
     --hash=sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52 \
+    --hash=sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d \
+    --hash=sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1 \
+    --hash=sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714 \
     --hash=sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65 \
     --hash=sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506 \
     --hash=sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41 \
+    --hash=sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608 \
     --hash=sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa \
     --hash=sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1 \
+    --hash=sha256:8585e3bb2cdea02fc88ffa245069c36555557ad3609e83be0ec71f54fd4abb52 \
     --hash=sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a \
+    --hash=sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3 \
     --hash=sha256:957e7c38f250991e48a9a73e6423db1bb9dd14e722a10f6b8bb8e16a0f55f695 \
     --hash=sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4 \
     --hash=sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51 \
+    --hash=sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8 \
     --hash=sha256:b6db2185db9be0a04fecf2f241c70b63b1a242e2805be291855078f2b404dd6b \
+    --hash=sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b \
+    --hash=sha256:c8d1634419f39ea6f5c427ea2f90ca85126b54b50837f31497f3bf38266e853d \
+    --hash=sha256:c9a63152fe95756b85f31186bddf42e4c02c6321207fd6601a1c89ebac4fe567 \
+    --hash=sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e \
+    --hash=sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52 \
+    --hash=sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1 \
     --hash=sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94 \
     --hash=sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822 \
     --hash=sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11 \
+    --hash=sha256:edee74874ce20a373d62dc28b0b18b93f645633c2943fd90ee9d898550770581 \
+    --hash=sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51 \
     --hash=sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40 \
+    --hash=sha256:f4be2e3d8bc8aabd566f8d5b8ba7ecc09249d74ba3c9ed52e54dc23a293f0b92 \
+    --hash=sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5 \
     --hash=sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4 \
     --hash=sha256:fa47e444b8ba08fffd1c18e8cdb9a75db1b6a27f17507522834ad13ed5922b93 \
     --hash=sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027
@@ -822,6 +937,7 @@ greenlet==3.3.2 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or
     --hash=sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f \
     --hash=sha256:2eaf067fc6d886931c7962e8c6bede15d2f01965560f3359b27c80bde2d151f2 \
     --hash=sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd \
+    --hash=sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2 \
     --hash=sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070 \
     --hash=sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99 \
     --hash=sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be \
@@ -829,11 +945,16 @@ greenlet==3.3.2 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or
     --hash=sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a \
     --hash=sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395 \
     --hash=sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358 \
+    --hash=sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac \
+    --hash=sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55 \
     --hash=sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4 \
     --hash=sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986 \
     --hash=sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd \
+    --hash=sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb \
     --hash=sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab \
-    --hash=sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86
+    --hash=sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86 \
+    --hash=sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd \
+    --hash=sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92
     # via
     #   nmp-entities
     #   sqlalchemy
@@ -851,8 +972,11 @@ grpcio==1.80.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (
     --hash=sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0 \
     --hash=sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f \
     --hash=sha256:ba0915d51fd4ced2db5ff719f84e270afe0e2d4c45a7bdb1e8d036e4502928c2 \
+    --hash=sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0 \
     --hash=sha256:ce1794f4ea6cc3ca29463f42d665c32ba1b964b48958a66497917fe9069f26e6 \
     --hash=sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de \
+    --hash=sha256:dfab85db094068ff42e2a3563f60ab3dddcc9d6488a35abf0132daec13209c8a \
+    --hash=sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad \
     --hash=sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2
     # via
     #   opentelemetry-exporter-otlp-proto-grpc
@@ -940,6 +1064,7 @@ httpx==0.28.1 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (p
     #   nemoguardrails
     #   nmp-auth
     #   nmp-automodel
+    #   nmp-customization-common
     #   nmp-guardrails
     #   nmp-unsloth
     #   nvidia-nat-core
@@ -1037,23 +1162,32 @@ jinja2==3.1.6 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (p
     #   sqlfluff
 jiter==0.10.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:07a7142c38aacc85194391108dc91b5b57093c978a9932bd86a36862759d9500 \
+    --hash=sha256:0c5867d40ab716e4684858e4887489685968a47e3ba222e44cde6e4a2154f959 \
     --hash=sha256:13252b58c1f4d8c5b63ab103c03d909e8e1e7842d302473f482915d95fefd605 \
+    --hash=sha256:13ddbc6ae311175a3b03bd8994881bc4635c923754932918e18da841632349db \
     --hash=sha256:14a4c418b1ec86a195f1ca69da8b23e8926c752b685af665ce30777233dfe070 \
     --hash=sha256:23ba7722d6748b6920ed02a8f1726fb4b33e0fd2f3f621816a8b486c66410ab2 \
     --hash=sha256:28ed2a4c05a1f32ef0e1d24c2611330219fed727dae01789f4a335617634b1ca \
     --hash=sha256:2e2227db6ba93cb3e2bf67c87e594adde0609f146344e8207e8730364db27041 \
+    --hash=sha256:371eab43c0a288537d30e1f0b193bc4eca90439fc08a022dd83e5e07500ed026 \
     --hash=sha256:395bb9a26111b60141757d874d27fdea01b17e8fac958b91c20128ba8f4acc8a \
     --hash=sha256:4c440ea003ad10927a30521a9062ce10b5479592e8a70da27f21eeb457b4a9c5 \
     --hash=sha256:4d613e4b379a07d7c8453c5712ce7014e86c6ac93d990a0b8e7377e18505e98d \
+    --hash=sha256:5161e201172de298a8a1baad95eb85db4fb90e902353b1f6a41d64ea64644e25 \
     --hash=sha256:520ef6d981172693786a49ff5b09eda72a42e539f14788124a07530f785c3ad6 \
     --hash=sha256:533efbce2cacec78d5ba73a41756beff8431dfa1694b6346ce7af3a12c42202b \
+    --hash=sha256:554dedfd05937f8fc45d17ebdf298fe7e0c77458232bcb73d9fbbf4c6455f5b3 \
     --hash=sha256:558cc7e44fd8e507a236bee6a02fa17199ba752874400a0ca6cd6e2196cdb7dc \
+    --hash=sha256:5bc299da7789deacf95f64052d97f75c16d4fc8c4c214a22bf8d859a4288a1c2 \
     --hash=sha256:62755d1bcea9876770d4df713d82606c8c1a3dca88ff39046b85a048566d56ea \
+    --hash=sha256:6c675736059020365cebc845a820214765162728b51ab1e03a1b7b3abb70f74c \
     --hash=sha256:7202ae396446c988cb2a5feb33a543ab2165b786ac97f53b59aafb803fef0744 \
     --hash=sha256:7d1bbf3c465de4a24ab12fb7766a0003f6f9bce48b8b6a886158c4d569452dc5 \
     --hash=sha256:901b92f2e2947dc6dfcb52fd624453862e16665ea909a08398dde19c0731b7f4 \
+    --hash=sha256:919d139cdfa8ae8945112398511cb7fca58a77382617d279556b344867a37e61 \
     --hash=sha256:cafc4628b616dc32530c20ee53d71589816cf385dd9449633e910d596b1f5c8a \
-    --hash=sha256:d0cb9a125d5a3ec971a094a845eadde2db0de85b33c9f13eb94a0c63d463879e
+    --hash=sha256:d0cb9a125d5a3ec971a094a845eadde2db0de85b33c9f13eb94a0c63d463879e \
+    --hash=sha256:f62cf8ba0618eda841b9bf61797f21c5ebd15a7a1e19daab76e4e4b498d515b2
     # via
     #   anthropic
     #   instructor
@@ -1089,23 +1223,39 @@ jsonpath-ng==1.8.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin')
     #   nvidia-nat-core
 jsonpath-rust-bindings==1.1.1 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:0017af7054fb6bce55863a7065ae465a9c47fd93fb94f002ca98bb8adf15101a \
+    --hash=sha256:02373d581a093d0640e60858884d67ec93259e7b6d6bd8e5874400ad99558e00 \
     --hash=sha256:0ca169ac219bc141775fb19df8165d4d0162e6ed77102e1ab19a74a80c1f9051 \
     --hash=sha256:13446ad021abe05d622a01eaa648c238ef3b98e9fc0bd837a589bafb246ca3bc \
+    --hash=sha256:146b69ce20cb9869e05a6d369f4a10b52f98e1f8575f1ac5b49e285fa2032380 \
+    --hash=sha256:1ff4cd052f733d5f270329c552a04e08a1520053355d35f0be886714dff46955 \
     --hash=sha256:26955685acf0208b6061419cab4bd79fe869ebce57f3cec1e9b20f0e0af56b35 \
+    --hash=sha256:330f457556d06abc1ea36b6738eb172288afff6bd251350eaba42bed2f459fd3 \
+    --hash=sha256:366cba544c080c08530cef0cc19922f0380f0caab6e7e5a0ddfb70de288d5abc \
+    --hash=sha256:36a40ed04d2db70897cde2ac92f6c9aae2ed1b426aa4c97a47f3e2be911ea4ba \
     --hash=sha256:3c220c2d27ab6a0791e3af10e2a7c53ccd1dc2dfc8681999fed4458392aa0372 \
     --hash=sha256:40c23781d28a8b126c8a2b337e4fe275cc8f35a149bda769e3ec2760dfb58b91 \
     --hash=sha256:44de7464ad227028c36e8d713653b4bfe5eb7524ac1a4b0a71e8bcb3bd4f4f3a \
+    --hash=sha256:4eacb98f80fff7d43956503ca7b42e491f7084c7b9bd8b5b6bad3f50d08480df \
+    --hash=sha256:50f16c3dd6eb572dda74731508d2fca1abbb927ab4f6511fb65eeba6e59fd041 \
+    --hash=sha256:6716caa0855dbf9d021509a3caa00a9fa7cc241930f40830c24e85d0e17a6246 \
     --hash=sha256:734eee89754c829a0fb55a30467c8a33081976375b763c907f71f7018682c26c \
+    --hash=sha256:7bf30e27a81d07c79cc58c86600687e5adfe0f7b1aaf8069a737085bebfaea71 \
     --hash=sha256:7f2a526c87a245f708dc1d8d4988c471384c369a5909b8b730e63b6a7f0c2d60 \
     --hash=sha256:8c390c33582cd268d35b86eb0f550229e0cf26f03bb06c470db4712d6fa4dc0f \
+    --hash=sha256:9212d3746a57015fc3722488f61c4afc465d993f68371d864be8fa5b0c58d635 \
     --hash=sha256:9d656507b5913f9515ff136797c5850df907c5040fa1368baa428f7e829e33f0 \
     --hash=sha256:a239166bd1418897de327c952a9d9ff912d1fabc9da82e688204ccfcd7b22584 \
     --hash=sha256:a43107f6efc4e66ee046c338741429a268fd972e887721b01bf0f32e47387e30 \
+    --hash=sha256:aa7e9d25b00c227c51e7a916a13fbf22cf483df622699dbc3ef051861ec1de85 \
     --hash=sha256:b06b24668085b2791acbfefdfe2f2824d36be539c7647c00aee33242b4d3385d \
+    --hash=sha256:b9583e965fe5f8f21cd0d047244db9716a119e0e82a06f2336e6b14c9a9637af \
     --hash=sha256:ce1c6804706012c3c7a194903ef20befafa3cc913a4ef553696bc837ac738a66 \
     --hash=sha256:ce7039a2f497674785a423076e803a1fa547c2f9cf568b25e2ac83ff5890b98f \
     --hash=sha256:d21101114514d34b21ab216eef1d7bb41155311fa61284e8f2dbdb93bde41c78 \
+    --hash=sha256:dc0c3488f04dbd318fa876fb880e8cb7d1e53abcf8b0d9e697e10a0a15ac3158 \
     --hash=sha256:ddbf025592bf88fc5395d9d023d7bcc8fab977898c406e0a5722925c3b887c71 \
+    --hash=sha256:e423363b47080830bbb4d8257c0f26bda8ee655a18c4f934952bfe4c46e8d510 \
+    --hash=sha256:ebb9a05a2b80195ac47aec0ce98d861c102459d16225fefb0f7e0158196c4a58 \
     --hash=sha256:f55ee1e7fdb6bb2363c40a6d6ce0285e53bd52b4ecae7bef3909eeb11a9b4cd2 \
     --hash=sha256:fbfeb05c7a6854104e97a0e3234f312004b3f4e678d14b68180a6a4f33f4d7c3 \
     --hash=sha256:fe44737c6c72079ef30c85f975c19fa0114c13039fe538d8c5b259007a35a0ff
@@ -1294,30 +1444,46 @@ loguru==0.7.3 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (p
     --hash=sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c
     # via fastembed
 lxml==6.1.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:05b9b8787e35bec69e68daf4952b2e6dfcfb0db7ecf1a06f8cdfbbac4eb71aad \
     --hash=sha256:07f98f5496f96bf724b1e3c933c107f0cbf2745db18c03d2e13a291c3afd2635 \
+    --hash=sha256:0d082495c5fcf426e425a6e28daaba1fcb6d8f854a4ff01effb1f1f381203eb9 \
+    --hash=sha256:0f0f08beb0182e3e9a86fae124b3c47a7b41b7b69b225e1377db983802404e54 \
+    --hash=sha256:1ae225f66e5938f4fa29d37e009a3bb3b13032ac57eb4eb42afa44f6e4054e69 \
+    --hash=sha256:23a5dc68e08ed13331d61815c08f260f46b4a60fdd1640bbeb82cf89a9d90289 \
     --hash=sha256:264c605ab9c0e4aa1a679636f4582c4d3313700009fac3ec9c3412ed0d8f3e1d \
     --hash=sha256:363e47283bde87051b821826e71dde47f107e08614e1aa312ba0c5711e77738c \
     --hash=sha256:37fabd1452852636cf38ecdcc9dd5ca4bba7a35d6c53fa09725deeb894a87491 \
     --hash=sha256:3ae5d8d5427f3cc317e7950f2da7ad276df0cfa37b8de2f5658959e618ea8512 \
     --hash=sha256:419c58fc92cc3a2c3fa5f78c63dbf5da70c1fa9c1b25f25727ecee89a96c7de2 \
+    --hash=sha256:43e4d297f11080ec9d64a4b1ad7ac02b4484c9f0e2179d9c4ef78e886e747b88 \
     --hash=sha256:4642e04449a1e164b5ff71ffd901ddb772dfabf5c9adf1b7be5dffe1212bc037 \
+    --hash=sha256:4937460dc5df0cdd2f06a86c285c28afda06aefa3af949f9477d3e8df430c485 \
     --hash=sha256:5715e0e28736a070f3f34a7ccc09e2fdcba0e3060abbcf61a1a5718ff6d6b105 \
     --hash=sha256:5cfa1a34df366d9dc0d5eaf420f4cf2bb1e1bebe1066d1c2fc28c179f8a4004c \
+    --hash=sha256:63aeafc26aac0be8aff14af7871249e87ea1319be92090bfd632ec68e03b16a5 \
+    --hash=sha256:690022c7fae793b0489aa68a658822cea83e0d5933781811cabbf5ea3bcfe73d \
     --hash=sha256:73becf6d8c81d4c76b1014dbd3584cb26d904492dcf73ca85dc8bff08dcd6d2d \
+    --hash=sha256:7e39ab3a28af7784e206d8606ec0e4bcad0190f63a492bca95e94e5a4aef7f6e \
     --hash=sha256:7f4a77d6f7edf9230cee3e1f7f6764722a41604ee5681844f18db9a81ea0ec33 \
+    --hash=sha256:8e369cbd690e788c8d15e56222d91a09c6a417f49cbc543040cba0fe2e25a79e \
     --hash=sha256:9147d8e386ec3b82c3b15d88927f734f565b0aaadef7def562b853adca45784a \
     --hash=sha256:942454ff253da14218f972b23dc72fa4edf6c943f37edd19cd697618b626fac5 \
     --hash=sha256:976a6b39b1b13e8c354ad8d3f261f3a4ac6609518af91bdb5094760a08f132c4 \
+    --hash=sha256:9eb667bf50856c4a58145f8ca2d5e5be160191e79eb9e30855a476191b3c3495 \
     --hash=sha256:a0092f2b107b69601adf562a57c956fbb596e05e3e6651cabd3054113b007e45 \
     --hash=sha256:a2853c8b2170cc6cd54a6b4d50d2c1a8a7aeca201f23804b4898525c7a152cfc \
     --hash=sha256:bc783ee3147e60a25aa0445ea82b3e8aabb83b240f2b95d32cb75587ff781814 \
     --hash=sha256:bfd57d8008c4965709a919c3e9a98f76c2c7cb319086b3d26858250620023b13 \
+    --hash=sha256:cbd7b79cdcb4986ad78a2662625882747f09db5e4cd7b2ae178a88c9c51b3dfe \
     --hash=sha256:cc16682cc987a3da00aa56a3aa3075b08edb10d9b1e476938cfdbee8f3b67181 \
     --hash=sha256:cec05be8c876f92a5aa07b01d60bbb4d11cfbdd654cad0561c0d7b5c043a61b9 \
     --hash=sha256:d036ee7b99d5148072ac7c9b847193decdfeac633db350363f7bce4fff108f0e \
     --hash=sha256:d2f17a16cd8751e8eb233a7e41aecdf8e511712e00088bf9be455f604cd0d28d \
+    --hash=sha256:d6d8efe71429635f0559579092bb5e60560d7b9115ee38c4adbea35632e7fa24 \
     --hash=sha256:db88156fcf544cdbf0d95588051515cfdfd4c876fc66444eb98bceb5d6db76de \
+    --hash=sha256:e3c4f84b24a1fcba435157d111c4b755099c6ff00a3daee1ad281817de75ed11 \
     --hash=sha256:e69aa6805905807186eb00e66c6d97a935c928275182eb02ee40ba00da9623b2 \
+    --hash=sha256:f15401d8d3dbf239e23c818afc10c7207f7b95f9a307e092122b6f86dd43209a \
     --hash=sha256:fc46da94826188ed45cb53bd8e3fc076ae22675aea2087843d4735627f867c6d \
     --hash=sha256:fcf3da95e93349e0647d48d4b36a12783105bcc74cb0c416952f9988410846a3
     # via
@@ -1354,20 +1520,28 @@ markupsafe==3.0.3 ; (platform_machine == 'arm64' and sys_platform == 'darwin') o
     --hash=sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a \
     --hash=sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf \
     --hash=sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219 \
+    --hash=sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb \
     --hash=sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6 \
     --hash=sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce \
     --hash=sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37 \
     --hash=sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d \
     --hash=sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a \
     --hash=sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19 \
+    --hash=sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9 \
     --hash=sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50 \
     --hash=sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698 \
     --hash=sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b \
+    --hash=sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc \
+    --hash=sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115 \
+    --hash=sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f \
     --hash=sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12 \
     --hash=sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025 \
     --hash=sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009 \
+    --hash=sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a \
     --hash=sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f \
     --hash=sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1 \
+    --hash=sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6 \
+    --hash=sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f \
     --hash=sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b \
     --hash=sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676 \
     --hash=sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d \
@@ -1396,21 +1570,33 @@ mmh3==5.2.1 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (pla
     --hash=sha256:17fbb47f0885ace8327ce1235d0416dc86a211dcd8cc1e703f41523be32cfec8 \
     --hash=sha256:1d9f9a3ce559a5267014b04b82956993270f63ec91765e13e9fd73daf2d2738e \
     --hash=sha256:26fb5b9c3946bf7f1daed7b37e0c03898a6f062149127570f8ede346390a0825 \
+    --hash=sha256:2778fed822d7db23ac5008b181441af0c869455b2e7d001f4019636ac31b6fe4 \
+    --hash=sha256:2bd9f19f7f1fcebd74e830f4af0f28adad4975d40d80620be19ffb2b2af56c9f \
     --hash=sha256:3737303ca9ea0f7cb83028781148fcda4f1dac7821db0c47672971dabcf63593 \
+    --hash=sha256:3c38d142c706201db5b2345166eeef1e7740e3e2422b470b8ba5c8727a9b4c7a \
     --hash=sha256:3d74a03fb57757ece25aa4b3c1c60157a1cece37a020542785f942e2f827eed5 \
+    --hash=sha256:41aac7002a749f08727cb91babff1daf8deac317c0b1f317adc69be0e6c375d1 \
+    --hash=sha256:50885073e2909251d4718634a191c49ae5f527e5e1736d738e365c3e8be8f22b \
+    --hash=sha256:67e41a497bac88cc1de96eeba56eeb933c39d54bc227352f8455aa87c4ca4000 \
     --hash=sha256:707151644085dd0f20fe4f4b573d28e5130c4aaa5f587e95b60989c5926653b5 \
     --hash=sha256:82f3802bfc4751f420d591c5c864de538b71cea117fce67e4595c2afede08a15 \
     --hash=sha256:8e6c219e375f6341d0959af814296372d265a8ca1af63825f65e2e87c618f006 \
+    --hash=sha256:8f767ba0911602ddef289404e33835a61168314ebd3c729833db2ed685824211 \
+    --hash=sha256:960b1b3efa39872ac8b6cc3a556edd6fb90ed74f08c9c45e028f1005b26aa55d \
     --hash=sha256:9d8089d853c7963a8ce87fff93e2a67075c0bc08684a08ea6ad13577c38ffc38 \
     --hash=sha256:a482ac121de6973897c92c2f31defc6bafb11c83825109275cffce54bb64933f \
     --hash=sha256:b3f99e1756fc48ad507b95e5d86f2fb21b3d495012ff13e6592ebac14033f166 \
     --hash=sha256:bbea5b775f0ac84945191fb83f845a6fd9a21a03ea7f2e187defac7e401616ad \
     --hash=sha256:be77c402d5e882b6fbacfd90823f13da8e0a69658405a39a569c6b58fdb17b03 \
+    --hash=sha256:c88653877aeb514c089d1b3d473451677b8b9a6d1497dbddf1ae7934518b06d2 \
+    --hash=sha256:d30b650595fdbe32366b94cb14f30bb2b625e512bd4e1df00611f99dc5c27fd4 \
     --hash=sha256:d51fde50a77f81330523562e3c2734ffdca9c4c9e9d355478117905e1cfe16c6 \
+    --hash=sha256:d57dea657357230cc780e13920d7fa7db059d58fe721c80020f94476da4ca0a1 \
     --hash=sha256:dae0f0bd7d30c0ad61b9a504e8e272cb8391eed3f1587edf933f4f6b33437450 \
     --hash=sha256:db0562c5f71d18596dcd45e854cf2eeba27d7543e1a3acdafb7eef728f7fe85d \
     --hash=sha256:e48d4dbe0f88e53081da605ae68644e5182752803bbc2beb228cca7f1c4454d6 \
     --hash=sha256:eee884572b06bbe8a2b54f424dbd996139442cf83c76478e1ec162512e0dd2c7 \
+    --hash=sha256:fc78739b5ec6e4fb02301984a3d442a91406e7700efbe305071e7fd1c78278f2 \
     --hash=sha256:fceef7fe67c81e1585198215e42ad3fdba3a25644beda8fbdaf85f4d7b93175a
     # via fastembed
 more-itertools==10.8.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
@@ -1424,31 +1610,55 @@ mpmath==1.3.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (p
     --hash=sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c
     # via sympy
 multidict==6.7.1 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:03ede2a6ffbe8ef936b92cb4529f27f42be7f56afcdab5ab739cd5f27fb1cbf9 \
+    --hash=sha256:0b4c48648d7649c9335cf1927a8b87fa692de3dcb15faa676c6a6f1f1aabda43 \
+    --hash=sha256:10ae39c9cfe6adedcdb764f5e8411d4a92b055e35573a2eaa88d3323289ef93c \
     --hash=sha256:128441d052254f42989ef98b7b6a6ecb1e6f708aa962c7984235316db59f50fa \
     --hash=sha256:12fad252f8b267cc75b66e8fc51b3079604e8d43a75428ffe193cd9e2195dfd6 \
+    --hash=sha256:1b99af4d9eec0b49927b4402bcbb58dea89d3e0db8806a4086117019939ad3dd \
+    --hash=sha256:1d540e51b7e8e170174555edecddbd5538105443754539193e3e1061864d444d \
     --hash=sha256:21f830fe223215dffd51f538e78c172ed7c7f60c9b96a2bf05c4848ad49921c3 \
+    --hash=sha256:25167cc263257660290fba06b9318d2026e3c910be240a146e1f66dd114af2b0 \
     --hash=sha256:253282d70d67885a15c8a7716f3a73edf2d635793ceda8173b9ecc21f2fb8292 \
+    --hash=sha256:273d23f4b40f3dce4d6c8a821c741a86dec62cded82e1175ba3d99be128147ed \
     --hash=sha256:2b41f5fed0ed563624f1c17630cb9941cf2309d4df00e494b551b5f3e3d67a23 \
     --hash=sha256:2e2d2ed645ea29f31c4c7ea1552fcfd7cb7ba656e1eafd4134a6620c9f5fdd9e \
     --hash=sha256:3758692429e4e32f1ba0df23219cd0b4fc0a52f476726fff9337d1a57676a582 \
+    --hash=sha256:38fb49540705369bab8484db0689d86c0a33a0a9f2c1b197f506b71b4b6c19b0 \
+    --hash=sha256:398c1478926eca669f2fd6a5856b6de9c0acf23a2cb59a14c0ba5844fa38077e \
+    --hash=sha256:3d51ff4785d58d3f6c91bdbffcb5e1f7ddfda557727043aa20d20ec4f65e324a \
+    --hash=sha256:401c5a650f3add2472d1d288c26deebc540f99e2fb83e9525007a74cd2116f1d \
     --hash=sha256:41f2952231456154ee479651491e94118229844dd7226541788be783be2b5108 \
     --hash=sha256:439cbebd499f92e9aa6793016a8acaa161dfa749ae86d20960189f5398a19144 \
+    --hash=sha256:4cfb48c6ea66c83bcaaf7e4dfa7ec1b6bbcf751b7db85a328902796dfde4c060 \
     --hash=sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56 \
+    --hash=sha256:5c4b9bfc148f5a91be9244d6264c53035c8a0dcd2f51f1c3c6e30e30ebaa1c84 \
+    --hash=sha256:619e5a1ac57986dbfec9f0b301d865dddf763696435e2962f6d9cf2fdff2bb71 \
     --hash=sha256:6aac4f16b472d5b7dc6f66a0d49dd57b0e0902090be16594dc9ebfd3d17c47e7 \
+    --hash=sha256:6b83cabdc375ffaaa15edd97eb7c0c672ad788e2687004990074d7d6c9b140c8 \
     --hash=sha256:6d3bc717b6fe763b8be3f2bee2701d3c8eb1b2a8ae9f60910f1b2860c82b6c49 \
     --hash=sha256:7ff981b266af91d7b4b3793ca3382e53229088d193a85dfad6f5f4c27fc73e5d \
     --hash=sha256:935434b9853c7c112eee7ac891bc4cb86455aa631269ae35442cb316790c1445 \
+    --hash=sha256:95922cee9a778659e91db6497596435777bd25ed116701a4c034f8e46544955a \
     --hash=sha256:97891f3b1b3ffbded884e2916cacf3c6fc87b66bb0dde46f7357404750559f33 \
+    --hash=sha256:98bc624954ec4d2c7cb074b8eefc2b5d0ce7d482e410df446414355d158fe4ca \
+    --hash=sha256:9c90fed18bffc0189ba814749fdcc102b536e83a9f738a9003e569acd540a733 \
     --hash=sha256:9d624335fd4fa1c08a53f8b4be7676ebde19cd092b3895c421045ca87895b429 \
+    --hash=sha256:a088b62bd733e2ad12c50dad01b7d0166c30287c166e137433d3b410add807a6 \
     --hash=sha256:a90f75c956e32891a4eda3639ce6dd86e87105271f43d43442a3aedf3cddf172 \
+    --hash=sha256:af959b9beeb66c822380f222f0e0a1889331597e81f1ded7f374f3ecb0fd6c52 \
     --hash=sha256:b0fa96985700739c4c7853a43c0b3e169360d6855780021bfc6d0f1ce7c123e7 \
     --hash=sha256:bfde23ef6ed9db7eaee6c37dcec08524cb43903c60b285b172b6c094711b3961 \
+    --hash=sha256:c3a32d23520ee37bf327d1e1a656fec76a2edd5c038bf43eddfa0572ec49c60b \
     --hash=sha256:c76c4bec1538375dad9d452d246ca5368ad6e1c9039dadcf007ae59c70619ea1 \
+    --hash=sha256:c9035dde0f916702850ef66460bc4239d89d08df4d02023a5926e7446724212c \
+    --hash=sha256:da62917e6076f512daccfbbde27f46fed1c98fee202f0559adec8ee0de67f71a \
     --hash=sha256:e82d14e3c948952a1a85503817e038cba5905a3352de76b9a465075d072fba23 \
     --hash=sha256:e954b24433c768ce78ab7929e84ccf3422e46deb45a4dc9f93438f8217fa2d34 \
     --hash=sha256:eb0ce7b2a32d09892b3dd6cc44877a0d02a33241fafca5f25c8b6b62374f8b75 \
     --hash=sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d \
     --hash=sha256:f2a0a924d4c2e9afcd7ec64f9de35fcd96915149b2216e1cb2c10a56df483855 \
+    --hash=sha256:f5dd81c45b05518b9aa4da4aa74e1c93d715efa234fd3e8a179df611cc85e5f4 \
     --hash=sha256:fc5907494fccf3e7d3f94f95c91d6336b092b5fc83811720fae5e2765890dfba
     # via
     #   aiobotocore
@@ -1791,11 +2001,20 @@ orjson==3.11.8 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (
     --hash=sha256:0022bb50f90da04b009ce32c512dc1885910daa7cb10b7b0cba4505b16db82a8 \
     --hash=sha256:003646067cc48b7fcab2ae0c562491c9b5d2cbd43f1e5f16d98fd118c5522d34 \
     --hash=sha256:093d489fa039ddade2db541097dbb484999fcc65fc2b0ff9819141e2ab364f25 \
+    --hash=sha256:14778ffd0f6896aa613951a7fbf4690229aa7a543cb2bfbe9f358e08aafa9546 \
     --hash=sha256:1cd0b77e77c95758f8e1100139844e99f3ccc87e71e6fc8e1c027e55807c549f \
+    --hash=sha256:29c009e7a2ca9ad0ed1376ce20dd692146a5d9fe4310848904b6b4fee5c5c137 \
     --hash=sha256:3f23426851d98478c8970da5991f84784a76682213cd50eb73a1da56b95239dc \
+    --hash=sha256:3f262401086a3960586af06c054609365e98407151f5ea24a62893a40d80dbbb \
+    --hash=sha256:436c4922968a619fb7fef1ccd4b8b3a76c13b67d607073914d675026e911a65c \
     --hash=sha256:53a0f57e59a530d18a142f4d4ba6dfc708dc5fdedce45e98ff06b44930a2a48f \
+    --hash=sha256:54153d21520a71a4c82a0dbb4523e468941d549d221dc173de0f019678cf3813 \
+    --hash=sha256:55120759e61309af7fcf9e961c6f6af3dde5921cdb3ee863ef63fd9db126cae6 \
+    --hash=sha256:58a4a208a6fbfdb7a7327b8f201c6014f189f721fd55d047cafc4157af1bc62a \
+    --hash=sha256:5d8b5231de76c528a46b57010bbd83fb51e056aa0220a372fd5065e978406f1c \
     --hash=sha256:5f8952d6d2505c003e8f0224ff7858d341fa4e33fef82b91c4ff0ef070f2393c \
     --hash=sha256:6a3d159d5ffa0e3961f353c4b036540996bf8b9697ccc38261c0eac1fd3347a6 \
+    --hash=sha256:6eda5b8b6be91d3f26efb7dc6e5e68ee805bc5617f65a328587b35255f138bf4 \
     --hash=sha256:705b895b781b3e395c067129d8551655642dfe9437273211d5404e87ac752b53 \
     --hash=sha256:708c95f925a43ab9f34625e45dcdadf09ec8a6e7b664a938f2f8d5650f6c090b \
     --hash=sha256:76070a76e9c5ae661e2d9848f216980d8d533e0f8143e6ed462807b242e3c5e8 \
@@ -1804,9 +2023,12 @@ orjson==3.11.8 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (
     --hash=sha256:97c8f5d3b62380b70c36ffacb2a356b7c6becec86099b177f73851ba095ef623 \
     --hash=sha256:9b48e274f8824567d74e2158199e269597edf00823a1b12b63d48462bbf5123e \
     --hash=sha256:a5c370674ebabe16c6ccac33ff80c62bf8a6e59439f5e9d40c1f5ab8fd2215b7 \
+    --hash=sha256:ea56a955056a6d6c550cf18b3348656a9d9a4f02e2d0c02cabf3c73f1055d506 \
     --hash=sha256:ebaed4cef74a045b83e23537b52ef19a367c7e3f536751e355a2a394f8648559 \
     --hash=sha256:ed193ce51d77a3830cad399a529cd4ef029968761f43ddc549e1bc62b40d88f8 \
-    --hash=sha256:f30491bc4f862aa15744b9738517454f1e46e56c972a2be87d70d727d5b2a8f8
+    --hash=sha256:f30491bc4f862aa15744b9738517454f1e46e56c972a2be87d70d727d5b2a8f8 \
+    --hash=sha256:f89b6d0b3a8d81e1929d3ab3d92bbc225688bd80a770c49432543928fe09ac55 \
+    --hash=sha256:ff51f9d657d1afb6f410cb435792ce4e1fe427aab23d2fcd727a2876e21d4cb6
     # via
     #   langgraph-sdk
     #   langsmith
@@ -1814,19 +2036,25 @@ orjson==3.11.8 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (
     #   pymilvus
 ormsgpack==1.12.2 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:0b39e629fd2e1c5b2f46f99778450b59454d1f901bc507963168985e79f09c5d \
+    --hash=sha256:29a9f17a3dac6054c0dce7925e0f4995c727f7c41859adf9b5572180f640d172 \
     --hash=sha256:34d5b28b3570e9fed9a5a76528fc7230c3c76333bc214798958e58e9b79cc18a \
+    --hash=sha256:3708693412c28f3538fb5a65da93787b6bbab3484f6bc6e935bfb77a62400ae5 \
     --hash=sha256:39c1bd2092880e413902910388be8715f70b9f15f20779d44e673033a6146f2d \
     --hash=sha256:43013a3f3e2e902e1d05e72c0f1aeb5bedbb8e09240b51e26792a3c89267e181 \
     --hash=sha256:50b7249244382209877deedeee838aef1542f3d0fc28b8fe71ca9d7e1896a0d7 \
     --hash=sha256:58d379d72b6c5e964851c77cfedfb386e474adee4fd39791c2c5d9efb53505cc \
+    --hash=sha256:5af04800d844451cf102a59c74a841324868d3f1625c296a06cc655c542a6685 \
     --hash=sha256:5ea60cb5f210b1cfbad8c002948d73447508e629ec375acb82910e3efa8ff355 \
     --hash=sha256:7a29d09b64b9694b588ff2f80e9826bdceb3a2b91523c5beae1fab27d5c940e7 \
     --hash=sha256:7c8b1667a72cbba74f0ae7ecf3105a5e01304620ed14528b2cb4320679d2869b \
     --hash=sha256:8463a3fc5f09832e67bdb0e2fda6d518dc4281b133166146a67f54c08496442e \
     --hash=sha256:944a2233640273bee67521795a73cf1e959538e0dfb7ac635505010455e53b33 \
+    --hash=sha256:958dcb270d30a7cb633a45ee62b9444433fa571a752d2ca484efdac07480876e \
     --hash=sha256:bd5f4bf04c37888e864f08e740c5a573c4017f6fd6e99fa944c5c935fabf2dd9 \
     --hash=sha256:c6a4c34ddef109647c769d69be65fa1de7a6022b02ad45546a69b3216573eb4a \
     --hash=sha256:cec70477d4371cd524534cd16472d8b9cc187e0e3043a8790545a9a9b296c258 \
+    --hash=sha256:df6961442140193e517303d0b5d7bc2e20e69a879c2d774316125350c4a76b92 \
+    --hash=sha256:eddffb77eff0bad4e67547d67a130604e7e2dfbb7b0cde0796045be4090f35c6 \
     --hash=sha256:f3601f19afdbea273ed70b06495e5794606a8b690a568d6c996a90d7255e51c1 \
     --hash=sha256:fcd55e5f6ba0dbce624942adf9f152062135f991a0126064889f68eb850de0dd
     # via langgraph-checkpoint
@@ -1991,31 +2219,51 @@ prompt-toolkit==3.0.52 ; (platform_machine == 'arm64' and sys_platform == 'darwi
     #   nemo-platform-sdk
     #   nemoguardrails
 propcache==0.4.1 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4 \
     --hash=sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3 \
     --hash=sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf \
+    --hash=sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393 \
+    --hash=sha256:2bb07ffd7eaad486576430c89f9b215f9e4be68c4866a96e97db9e97fead85dc \
     --hash=sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe \
     --hash=sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75 \
     --hash=sha256:3d902a36df4e5989763425a8ab9e98cd8ad5c52c823b34ee7ef307fd50582566 \
+    --hash=sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367 \
+    --hash=sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874 \
     --hash=sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf \
     --hash=sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1 \
+    --hash=sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6 \
+    --hash=sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61 \
     --hash=sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af \
+    --hash=sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa \
     --hash=sha256:60a8fda9644b7dfd5dece8c61d8a85e271cb958075bfc4e01083c148b61a7caf \
+    --hash=sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c \
+    --hash=sha256:67fad6162281e80e882fb3ec355398cf72864a54069d060321f6cd0ade95fe85 \
     --hash=sha256:6918ecbd897443087a3b7cd978d56546a812517dcaaca51b49526720571fa93e \
     --hash=sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1 \
     --hash=sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b \
     --hash=sha256:981333cb2f4c1896a12f4ab92a9cc8f09ea664e9b7dbdc4eff74627af3a11c0f \
     --hash=sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66 \
     --hash=sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0 \
+    --hash=sha256:a9695397f85973bb40427dedddf70d8dc4a44b22f1650dd4af9eedf443d45165 \
+    --hash=sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778 \
+    --hash=sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b \
     --hash=sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237 \
+    --hash=sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859 \
     --hash=sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835 \
     --hash=sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74 \
+    --hash=sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9 \
     --hash=sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f \
     --hash=sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2 \
+    --hash=sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7 \
+    --hash=sha256:e9b0d8d0845bbc4cfcdcbcdbf5086886bc8157aa963c31c777ceff7846c77757 \
     --hash=sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72 \
+    --hash=sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24 \
     --hash=sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207 \
+    --hash=sha256:f10207adf04d08bec185bae14d9606a1444715bc99180f9331c9c02093e1959e \
     --hash=sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d \
     --hash=sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e \
     --hash=sha256:fc38cba02d1acba4e2869eef1a57a43dfbd3d49a59bf90dda7444ec2be6a5570 \
+    --hash=sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af \
     --hash=sha256:fd6f30fdcf9ae2a70abd34da54f18da086160e4d7d9251f81f3da0ff84fc5a48
     # via
     #   aiohttp
@@ -2024,6 +2272,7 @@ protobuf==6.33.6 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or
     --hash=sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901 \
     --hash=sha256:9720e6961b251bde64edfdab7d500725a2af5280f3f4c87e57c0208376aa8c3a \
     --hash=sha256:a6768d25248312c297558af96a9f9c929e8c4cee0659cb07e780731095f38135 \
+    --hash=sha256:c96c37eec15086b79762ed265d59ab204dabc53056e3443e702d2681f4b39ce3 \
     --hash=sha256:e2afbae9b8e1825e3529f88d514754e094278bb95eadc0e199751cdd9a2e82a2 \
     --hash=sha256:e9db7e292e0ab79dd108d7f1a94fe31601ce1ee3f7b79e0692043423020b0593
     # via
@@ -2049,20 +2298,32 @@ psutil==7.2.2 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (p
     #   ngcsdk
     #   opentelemetry-instrumentation-system-metrics
 psycopg2-binary==2.9.11 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:00ce1830d971f43b667abe4a56e42c1e2d594b32da4802e44a73bacacb25535f \
     --hash=sha256:04195548662fa544626c8ea0f06561eb6203f1984ba5b4562764fbeb4c3d14b1 \
     --hash=sha256:2c226ef95eb2250974bf6fa7a842082b31f68385c4f3268370e3f3870e7859ee \
     --hash=sha256:2e164359396576a3cc701ba8af4751ae68a07235d7a380c631184a611220d9a4 \
+    --hash=sha256:31b32c457a6025e74d233957cc9736742ac5a6cb196c6b68499f6bb51390bd6a \
     --hash=sha256:366df99e710a2acd90efed3764bb1e28df6c675d33a7fb40df9b7281694432ee \
     --hash=sha256:5c6ff3335ce08c75afaed19e08699e8aacf95d4a260b495a4a8545244fe2ceb3 \
     --hash=sha256:62b6d93d7c0b61a1dd6197d208ab613eb7dcfdcca0a49c42ceb082257991de9d \
     --hash=sha256:763c93ef1df3da6d1a90f86ea7f3f806dc06b21c198fa87c3c25504abec9404a \
+    --hash=sha256:84011ba3109e06ac412f95399b704d3d6950e386b7994475b231cf61eec2fc1f \
     --hash=sha256:8c55b385daa2f92cb64b12ec4536c66954ac53654c7f15a203578da4e78105c0 \
+    --hash=sha256:a1cf393f1cdaf6a9b57c0a719a1068ba1069f022a59b8b1fe44b006745b59757 \
+    --hash=sha256:a311f1edc9967723d3511ea7d2708e2c3592e3405677bf53d5c7246753591fbb \
     --hash=sha256:ab8905b5dcb05bf3fb22e0cf90e10f469563486ffb6a96569e51f897c750a76a \
+    --hash=sha256:b31e90fdd0f968c2de3b26ab014314fe814225b6c324f770952f7d38abf17e3c \
     --hash=sha256:b6aed9e096bf63f9e75edf2581aa9a7e7186d97ab5c177aa6c87797cd591236c \
     --hash=sha256:ba34475ceb08cccbdd98f6b46916917ae6eeb92b5ae111df10b544c3a4621dc4 \
+    --hash=sha256:bf940cd7e7fec19181fdbc29d76911741153d51cab52e5c21165f3262125685e \
+    --hash=sha256:c0377174bf1dd416993d16edc15357f6eb17ac998244cca19bc67cdc0e2e5766 \
     --hash=sha256:cffe9d7697ae7456649617e8bb8d7a45afb71cd13f7ab22af3e5c61f04840908 \
+    --hash=sha256:d526864e0f67f74937a8fce859bd56c979f5e2ec57ca7c627f5f1071ef7fee60 \
+    --hash=sha256:d57c9c387660b8893093459738b6abddbb30a7eab058b77b0d0d1c7d521ddfd7 \
     --hash=sha256:ebb415404821b6d1c47353ebe9c8645967a5235e6d88f914147e7fd411419e6f \
+    --hash=sha256:edcb3aeb11cb4bf13a2af3c53a15b3d612edeb6409047ea0b5d6a21a9d744b34 \
     --hash=sha256:ef7a6beb4beaa62f88592ccc65df20328029d721db309cb3250b0aae0fa146c3 \
+    --hash=sha256:f07c9c4a5093258a03b28fab9b4f151aa376989e7f35f855088234e656ee6a94 \
     --hash=sha256:f090b7ddd13ca842ebfe301cd587a76a4cf0913b1e429eb92c1be5dbeb1a19bc \
     --hash=sha256:fa0f693d3c68ae925966f0b14b8edda71696608039f4ed61b1fe9ffa468d16db
     # via nmp-entities
@@ -2071,20 +2332,26 @@ py-key-value-aio==0.4.4 ; (platform_machine == 'arm64' and sys_platform == 'darw
     --hash=sha256:e3012e6243ed7cc09bb05457bd4d03b1ba5c2b1ca8700096b3927db79ffbbe55
     # via fastmcp
 py-rust-stemmers==0.1.5 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:07b3b8582313ef8a7f544acf2c887f27c3dd48c5ddca028fa0f498de7380e24f \
+    --hash=sha256:0ae0540453843bc36937abb54fdbc0d5d60b51ef47aa9667afd05af9248e09eb \
     --hash=sha256:191ea8bf922c984631ffa20bf02ef0ad7eec0465baeaed3852779e8f97c7e7a3 \
     --hash=sha256:1c3593d895453fa06bf70a7b76d6f00d06def0f91fc253fe4260920650c5e078 \
     --hash=sha256:1f9efc4da5e734bdd00612e7506de3d0c9b7abc4b89d192742a0569d0d1fe749 \
     --hash=sha256:31ff4fb9417cec35907c18a6463e3d5a4941a5aa8401f77fbb4156b3ada69e3f \
+    --hash=sha256:35d32f6e7bdf6fd90e981765e32293a8be74def807147dea9fdc1f65d6ce382f \
     --hash=sha256:4d62410ada44a01e02974b85d45d82f4b4c511aae9121e5f3c1ba1d0bea9126b \
     --hash=sha256:4e308fc7687901f0c73603203869908f3156fa9c17c4ba010a7fcc98a7a1c5f2 \
+    --hash=sha256:541d4b5aa911381e3d37ec483abb6a2cf2351b4f16d5e8d77f9aa2722956662a \
     --hash=sha256:5845709d48afc8b29e248f42f92431155a3d8df9ba30418301c49c6072b181b0 \
     --hash=sha256:804944eeb5c5559443d81f30c34d6e83c6292d72423f299e42f9d71b9d240941 \
     --hash=sha256:85944262c248ea30444155638c9e148a3adc61fe51cf9a3705b4055b564ec95d \
     --hash=sha256:910d87d39ba75da1fe3d65df88b926b4b454ada8d73893cbd36e258a8a648158 \
     --hash=sha256:96ccc7fd042ffc3f7f082f2223bb7082ed1423aa6b43d5d89ab23e321936c045 \
     --hash=sha256:a231dc6f0b2a5f12a080dfc7abd9e6a4ea0909290b10fd0a4620e5a0f52c3d17 \
+    --hash=sha256:a979c3f4ff7ad94a0d4cf566ca7bfecebb59e66488cc158e64485cf0c9a7879f \
     --hash=sha256:b28ef729a4c83c7d9418be3c23c0372493fcccc67e86783ff04596ef8a208cdf \
     --hash=sha256:c52c5c326de78c70cfc71813fa56818d1bd4894264820d037d2be0e805b477bd \
+    --hash=sha256:cc2cc8d2b36bc05b8b06506199ac63d437360ae38caefd98cd19e479d35afd42 \
     --hash=sha256:d8f374c0f26ef35fb87212686add8dff394bcd9a1364f14ce40fe11504e25e30 \
     --hash=sha256:e48bfd5e3ce9d223bfb9e634dc1425cf93ee57eef6f56aa9a7120ada3990d4be \
     --hash=sha256:e9c310cfb5c2470d7c7c8a0484725965e7cab8b1237e106a0863d5741da3e1f7 \
@@ -2159,6 +2426,7 @@ pydantic==2.12.5 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or
     #   nmp-auth
     #   nmp-automodel
     #   nmp-common
+    #   nmp-customization-common
     #   nmp-entities
     #   nmp-files
     #   nmp-inference-gateway
@@ -2179,27 +2447,40 @@ pydantic==2.12.5 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or
 pydantic-core==2.41.5 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740 \
     --hash=sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84 \
+    --hash=sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33 \
     --hash=sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0 \
     --hash=sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e \
     --hash=sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0 \
     --hash=sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34 \
     --hash=sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808 \
+    --hash=sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594 \
     --hash=sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a \
     --hash=sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284 \
     --hash=sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586 \
+    --hash=sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294 \
     --hash=sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc \
     --hash=sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c \
+    --hash=sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e \
+    --hash=sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05 \
+    --hash=sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e \
     --hash=sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b \
     --hash=sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b \
+    --hash=sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1 \
     --hash=sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858 \
     --hash=sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2 \
+    --hash=sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2 \
+    --hash=sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770 \
     --hash=sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc \
     --hash=sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1 \
     --hash=sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56 \
     --hash=sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c \
     --hash=sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e \
+    --hash=sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e \
+    --hash=sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc \
+    --hash=sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8 \
     --hash=sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69 \
     --hash=sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c \
+    --hash=sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75 \
     --hash=sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f \
     --hash=sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad \
     --hash=sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b
@@ -2225,6 +2506,7 @@ pydantic-settings==2.8.1 ; (platform_machine == 'arm64' and sys_platform == 'dar
     #   nmp-auth
     #   nmp-automodel
     #   nmp-common
+    #   nmp-customization-common
     #   nmp-entities
     #   nmp-files
     #   nmp-guardrails
@@ -2325,9 +2607,12 @@ pyyaml==6.0.3 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (p
     --hash=sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1 \
     --hash=sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4 \
     --hash=sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea \
+    --hash=sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c \
     --hash=sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824 \
+    --hash=sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00 \
     --hash=sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e \
     --hash=sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28 \
+    --hash=sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5 \
     --hash=sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d \
     --hash=sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc \
     --hash=sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f \
@@ -2379,28 +2664,52 @@ referencing==0.36.2 ; (platform_machine == 'arm64' and sys_platform == 'darwin')
     #   jsonschema-path
     #   jsonschema-specifications
 regex==2026.5.9 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:01f0f5f55f4b64dacec85dc116d3c05fd23ad3ff037bbc73a2085775953c2611 \
     --hash=sha256:01f28d868834624c934b8d2e0aa1c8341337e37831f4a012f18a5afcba4cbaf3 \
+    --hash=sha256:0f9eede6a5cbdc02d4978090186390936e1776a7d1359b21e41014c609880bcf \
     --hash=sha256:1268eddd8486dc561d08eee1156e40aa3a8fe10f4bdec8fa653b455fcbffd12c \
+    --hash=sha256:205109e96b3cf5adf8f4cd62bedde9487feb282b9497a3535451e5a24cd706a0 \
     --hash=sha256:2a661a7d270a61f7cf460caee8b9fa2d5ef9e5c681234bcb9e0fe14f488e7dfc \
     --hash=sha256:2acfb48634f64996b57f90f39afa692ff362162722581921fe92239a59960f3c \
     --hash=sha256:2efa205e6d98b24d1f3ab395c11aa15cdf10935bca283d0285e0499c284fba21 \
+    --hash=sha256:39617fb0cde9c0e6306dc70e3bfc096f3da793219879f7ae7aa341a69fbdcf6d \
+    --hash=sha256:3dd4a3ff360dfb836fecdb93a4598f9d6e2ac81e3e397125145c6221bf58cf4c \
     --hash=sha256:4ebe8f0b5ec5a5024dc4a4c59f444c4e9afc5f2abdbb8962065b75d27fb971f9 \
     --hash=sha256:4eeb011098fcb77af513dcef521a3dbecbf8849b1e38940759d293b7a93f5026 \
+    --hash=sha256:508f56a89ba9cb26e4168cbc37dbd60a28d82430a9e18ad1d25fe0883c314ca2 \
+    --hash=sha256:57e8915c7986aa33d25e4d3629cef711cd2863f2961b10409f0c04cb8b7d9020 \
     --hash=sha256:57eeeb05db7979413dec5438f2db21d7ecbba787cde7a711df1a6f6df672aa06 \
     --hash=sha256:6441cc660d76107934a09c22167200839a0e89604a6297f78a974e66e931d2c0 \
     --hash=sha256:728d8bfd28a8845c8b6bc5dc7ce010453d206396786c0765c2740cb65f37791e \
+    --hash=sha256:7e30b874d341fac767d7df5a0870540541c2c054b80cfaac116e8d367a8a7ff2 \
+    --hash=sha256:7e87577720152d2caae19fe2baaf1f8d5ca12091e9e229f03915c37d1e4b9178 \
+    --hash=sha256:8e76e8161ad00694cfce6767d5dea860c6391ac5b83e5c3a39661e696f11fc7e \
     --hash=sha256:8f3af7a4903c5c04a11a196a5aa75cdd7dd3f8508132f9fb3259d9f5908e3b88 \
+    --hash=sha256:91328f1c23d47595ca3ef0a7557fa129c5a23404b775c770697d2f35b33e0107 \
+    --hash=sha256:93a7860539414dddaefba2b40f8771765ae17949d4c7182b876ce429e11a8309 \
+    --hash=sha256:97cf3bc1b7d7d2306772ec07366c80d9df00ff79e79cea32898883a646d2fae2 \
     --hash=sha256:992604d02e6d9c6d786c24a706a71ecffe1020fc1ef264044474cd81fa2c3919 \
     --hash=sha256:a8234aa23ec39894bfe4a3f1b85616a7032481964a13ac6fc9f10de4f6fca270 \
     --hash=sha256:a8820737949116ffff55fe18f9fc644530063ba6ebfcb8314239416e78f1347c \
     --hash=sha256:aa0fbdbac82cb3e4450d0ccde7d7a35607f4cb2dd9fba4b8b69bfaf8c9fa6aed \
     --hash=sha256:b6d189041f15691cfa2b6c4290448ec221244d225b3f5fe9e7771b34ffcdf6e2 \
     --hash=sha256:b96350aa424e79d4fd6b567b344dcbe2b2d6bfc48dfe7717587e1fa6d43da6ff \
+    --hash=sha256:c8b9b9d294cfea3cd19c718ade7cc93492b2c4991abd9a68d0b3477ae6d8e100 \
+    --hash=sha256:c9411dd64ca95477225734a93dfc8583b51916b8d5942f99d6cac21e09965451 \
     --hash=sha256:ccf5249114cc3e772ecdd88a98a86eca0fd74c61ce32a94743758c083fc05d48 \
+    --hash=sha256:cd2846168eb9ee3c513902bc8225409cb1caab31d04728b145171fa1625d9621 \
+    --hash=sha256:d29eebfc9525db68cad3c97eedd7f754fa265aa5cd0cf4f863b2421e1b48fc9f \
     --hash=sha256:d626b84406444b165fc0ba981604edea39f0588ff1f92baa23fe50799ea9afdb \
+    --hash=sha256:d659eee77986549c9ea45b861c7567e44d6287c3dc9a4565478853f7b9fe2ff6 \
+    --hash=sha256:daff2bdbaf1d23e52fdff7c0b7bc2048b68f978df6a4d107ac981f94caef2e66 \
     --hash=sha256:dd2810d22146b6d838acc5ec15602cb6b47920aa4e33015df3868eedfd20bab8 \
+    --hash=sha256:ddda5340e6c01a293027dd46232fa79eaff1b48058ce7a98f572b6445b088041 \
+    --hash=sha256:debb893095e944091c16e641a6e33c1b0f4cb61ab945ec5afbf53ce7068834d8 \
     --hash=sha256:dfbe4579b9f08036aa7d101d1835437a20783574ac66327e6b29b4018a138081 \
+    --hash=sha256:e82db382b44d0111b22601c509c89f64434816c9e0eef9d1989cda8cc6ff1c04 \
+    --hash=sha256:ea9c8ecfa1b73c73b626534d6626e5340d429630943672b8480724f44e84b962 \
     --hash=sha256:ef31cbfe458e21c6122ba8150ff060e0c7789ed0d26eb423f25472584920b555 \
+    --hash=sha256:f079e50a0d3cc3cd5091fa9ff45869a2e6b2cd35895731edafb0327901a8d86d \
     --hash=sha256:f7a7c26137296beba7784de6eba69c6a93a63ccebc385e4962fe67e267a91225 \
     --hash=sha256:fd03c4f0e33280d15cae17159b899245d6b7c53d21def19b263b39655061f5ce \
     --hash=sha256:fd190e88a895a8901325fad284a3f74ea52b1da8525b76cc811fa9b1edf0ce2b
@@ -2500,57 +2809,99 @@ rignore==0.7.6 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (
     --hash=sha256:297e500c15766e196f68aaaa70e8b6db85fa23fdc075b880d8231fdfba738cd7 \
     --hash=sha256:392dcabfecbe176c9ebbcb40d85a5e86a5989559c4f988c2741da7daf1b5be25 \
     --hash=sha256:3efdcf1dd84d45f3e2bd2f93303d9be103888f56dfa7c3349b5bf4f0657ec696 \
+    --hash=sha256:53fb28882d2538cb2d231972146c4927a9d9455e62b209f85d634408c4103538 \
+    --hash=sha256:5719ea14ea2b652c0c0894be5dfde954e1853a80dea27dd2fbaa749618d837f5 \
+    --hash=sha256:5991e46ab9b4868334c9e372ab0892b0150f3f586ff2b1e314272caeb38aaedb \
+    --hash=sha256:62020dbb89a1dd4b84ab3d60547b3b2eb2723641d5fb198463643f71eaaed57d \
+    --hash=sha256:65cece3b36e5b0826d946494734c0e6aaf5a0337e18ff55b071438efe13d559e \
+    --hash=sha256:684014e42e4341ab3ea23a203551857fcc03a7f8ae96ca3aefb824663f55db32 \
     --hash=sha256:6e01cad2b0b92f6b1993f29fc01f23f2d78caf4bf93b11096d28e9d578eb08ce \
     --hash=sha256:77356ebb01ba13f8a425c3d30fcad40e57719c0e37670d022d560884a30e4767 \
+    --hash=sha256:7bbcdc52b5bf9f054b34ce4af5269df5d863d9c2456243338bc193c28022bd7b \
+    --hash=sha256:87409f7eeb1103d6b77f3472a3a0d9a5953e3ae804a55080bdcb0120ee43995b \
     --hash=sha256:90f0a00ce0c866c275bf888271f1dc0d2140f29b82fcf33cdbda1e1a6af01010 \
+    --hash=sha256:a04a3b73b75ddc12c9c9b21efcdaab33ca3832941d6f1d67bffd860941cd448a \
     --hash=sha256:aaf938530dcc0b47c4cfa52807aa2e5bfd5ca6d57a621125fe293098692f6345 \
+    --hash=sha256:b34acd532769d5a6f153a52a98dcb81615c949ab11697ce26b2eb776af2e174d \
+    --hash=sha256:b5fd5ab3840b8c16851d327ed06e9b8be6459702a53e5ab1fc4073b684b3789e \
     --hash=sha256:b9e624f6be6116ea682e76c5feb71ea91255c67c86cb75befe774365b2931961 \
+    --hash=sha256:ba5524f5178deca4d7695e936604ebc742acb8958f9395776e1fcb8133f8257a \
     --hash=sha256:bda49950d405aa8d0ebe26af807c4e662dd281d926530f03f29690a2e07d649a \
+    --hash=sha256:c081f17290d8a2b96052b79207622aa635686ea39d502b976836384ede3d303c \
     --hash=sha256:c1ad295537041dc2ed4b540fb1a3906bd9ede6ccdad3fe79770cd89e04e3c73c \
+    --hash=sha256:ced2a248352636a5c77504cb755dc02c2eef9a820a44d3f33061ce1bb8a7f2d2 \
     --hash=sha256:d24321efac92140b7ec910ac7c53ab0f0c86a41133d2bb4b0e6a7c94967f44dd \
+    --hash=sha256:d7e4bb66c13cd7602dc8931822c02dfbbd5252015c750ac5d6152b186f0a8be0 \
     --hash=sha256:d8955b57e42f2a5434670d5aa7b75eaf6e74602ccd8955dddf7045379cd762fb \
-    --hash=sha256:ee4a18b82cbbc648e4aac1510066682fe62beb5dc88e2c67c53a83954e541360
+    --hash=sha256:ee4a18b82cbbc648e4aac1510066682fe62beb5dc88e2c67c53a83954e541360 \
+    --hash=sha256:f782dbd3a65a5ac85adfff69e5c6b101285ef3f845c3a3cae56a54bebf9fe116
     # via fastapi-cloud-cli
 rouge-score==0.1.2 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:c7d4da2683e68c9abf0135ef915d63a46643666f848e558a1b9f7ead17ff0f04
     # via nemo-evaluator-sdk
 rpds-py==0.30.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:07ae8a593e1c3c6b82ca3292efbe73c30b61332fd612e05abee07c79359f292f \
+    --hash=sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136 \
+    --hash=sha256:12f90dd7557b6bd57f40abe7747e81e0c0b119bef015ea7726e69fe550e394a4 \
+    --hash=sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2 \
+    --hash=sha256:250fa00e9543ac9b97ac258bd37367ff5256666122c2d0f2bc97577c60a1818c \
     --hash=sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4 \
+    --hash=sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6 \
     --hash=sha256:33f559f3104504506a44bb666b93a33f5d33133765b0c216a5bf2f1e1503af89 \
     --hash=sha256:3896fa1be39912cf0757753826bc8bdc8ca331a28a7c4ae46b7a21280b06bb85 \
+    --hash=sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa \
     --hash=sha256:3adbb8179ce342d235c31ab8ec511e66c73faa27a47e076ccc92421add53e2bb \
     --hash=sha256:422c3cb9856d80b09d30d2eb255d0754b23e090034e1deb4083f8004bd0761e4 \
     --hash=sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23 \
+    --hash=sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db \
     --hash=sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27 \
     --hash=sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083 \
     --hash=sha256:58edca431fb9b29950807e301826586e5bbf24163677732429770a697ffe6738 \
     --hash=sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7 \
+    --hash=sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2 \
     --hash=sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05 \
     --hash=sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5 \
+    --hash=sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7 \
     --hash=sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394 \
     --hash=sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6 \
     --hash=sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e \
     --hash=sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95 \
+    --hash=sha256:946fe926af6e44f3697abbc305ea168c2c31d3e3ef1058cf68f379bf0335a78d \
+    --hash=sha256:9854cf4f488b3d57b9aaeb105f06d78e5529d3145b1e4a41750167e8c213c6d3 \
+    --hash=sha256:993914b8e560023bc0a8bf742c5f303551992dcb85e247b1e5c7f4a7d145bda5 \
+    --hash=sha256:99b47d6ad9a6da00bec6aabe5a6279ecd3c06a329d4aa4771034a21e335c3a97 \
+    --hash=sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e \
+    --hash=sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd \
     --hash=sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e \
     --hash=sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94 \
     --hash=sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28 \
     --hash=sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000 \
+    --hash=sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1 \
     --hash=sha256:d9a0ca5da0386dee0655b4ccdf46119df60e0f10da268d04fe7cc87886872ba7 \
     --hash=sha256:dc4f992dfe1e2bc3ebc7444f6c7051b4bc13cd8e33e43511e8ffd13bf407010d \
     --hash=sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84 \
+    --hash=sha256:dea5b552272a944763b34394d04577cf0f9bd013207bc32323b5a89a53cf9c2f \
     --hash=sha256:dff13836529b921e22f15cb099751209a60009731a68519630a24d61f0b1b30a \
     --hash=sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8 \
-    --hash=sha256:ee6af14263f25eedc3bb918a3c04245106a42dfd4f5c2285ea6f997b1fc3f89a
+    --hash=sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9 \
+    --hash=sha256:ee6af14263f25eedc3bb918a3c04245106a42dfd4f5c2285ea6f997b1fc3f89a \
+    --hash=sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f
     # via
     #   jsonschema
     #   referencing
 ruff==0.15.7 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:04f1ae61fc20fe0b148617c324d9d009b5f63412c0b16474f3d5f1a1a665f7ac \
     --hash=sha256:112c1fa316a558bb34319282c1200a8bf0495f1b735aeb78bfcb2991e6087580 \
+    --hash=sha256:1852ce241d2bc89e5dc823e03cff4ce73d816b5c6cdadd27dbfe7b03217d2a12 \
+    --hash=sha256:4806d8e09ef5e84eb19ba833d0442f7e300b23fe3f0981cae159a248a10f0036 \
     --hash=sha256:5f3e4b221fb4bd293f79912fc5e93a9063ebd6d0dcbd528f91b89172a9b8436c \
     --hash=sha256:6b39329b60eba44156d138275323cc726bbfbddcec3063da57caa8a8b1d50adf \
     --hash=sha256:7fbc2448094262552146cbe1b9643a92f66559d3761f1ad0656d4991491af49e \
-    --hash=sha256:dce0896488562f09a27b9c91b1f58a097457143931f3c4d519690dea54e624c5
+    --hash=sha256:87768c151808505f2bfc93ae44e5f9e7c8518943e5074f76ac21558ef5627c85 \
+    --hash=sha256:a81cc5b6910fb7dfc7c32d20652e50fa05963f6e13ead3c5915c41ac5d16668e \
+    --hash=sha256:b15e48602c9c1d9bdc504b472e90b90c97dc7d46c7028011ae67f3861ceba7b4 \
+    --hash=sha256:dce0896488562f09a27b9c91b1f58a097457143931f3c4d519690dea54e624c5 \
+    --hash=sha256:e0d19644f801849229db8345180a71bee5407b429dd217f853ec515e968a6912
     # via data-designer-engine
 s3transfer==0.14.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456 \
@@ -2563,11 +2914,16 @@ sacrebleu==2.6.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or
     #   nemo-evaluator-sdk
     #   nemoplatform
 safetensors==0.8.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:040070828e36dc8e122178bbbd5830ff9e97920affb84cbe0f46442497bed358 \
+    --hash=sha256:4124502b78f03534117c848f87a39b8f31e577b15eff423bf8bfb95f2a8c30d0 \
+    --hash=sha256:4a95ae2b05d7726d751da4ebf626a2ca782b706e101bd894c95bc2450b1cffcc \
     --hash=sha256:7a46e5ff292c356d6991e60942ba7f79817682d3a2cef0702136448cb9c4d235 \
+    --hash=sha256:7bc0a787ba8a35be368ee3574edfa2b1ad389eebd0a72e482ae275490e3f6c98 \
     --hash=sha256:87eec7ffed2b809f05a398a8becb7d013f19f7837cd15d9748580d6cf30dbaf4 \
     --hash=sha256:8e080062fcde23be189565e1c3305d16751a218ecf9412c8601e64204eb6f846 \
     --hash=sha256:c80201d22cbf405b80647a60ada77bba06c8fba2da2743ba1e89cdcc39a81f25 \
     --hash=sha256:fabaf3e0f18a6618d9b36560682562157f77c2b71fcffc7b432be2baed9d753d \
+    --hash=sha256:fcdd41ec4628fee5799f807c73c353629130fbd942aa23d83c623dd6c9d52d78 \
     --hash=sha256:fd6f3f93c9a0a7cc2788ee63fb763353d4bd2e89b0751bc78fcf7dda00bea774
     # via transformers
 scikit-network==0.33.5 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
@@ -2758,6 +3114,7 @@ tenacity==9.1.4 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or
     #   langchain-community
     #   langchain-core
     #   nmp-automodel
+    #   nmp-customization-common
     #   nmp-models
     #   nmp-unsloth
 tiktoken==0.12.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
@@ -2789,12 +3146,16 @@ tiktoken==0.12.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or
     #   nemo-anonymizer
     #   ragas
 tokenizers==0.22.2 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e \
     --hash=sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001 \
     --hash=sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7 \
+    --hash=sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd \
     --hash=sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4 \
     --hash=sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67 \
+    --hash=sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a \
     --hash=sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5 \
-    --hash=sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917
+    --hash=sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917 \
+    --hash=sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b
     # via
     #   fastembed
     #   langchain-huggingface
@@ -2948,15 +3309,20 @@ urllib3==2.7.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (
     #   requests
     #   sentry-sdk
 uuid-utils==0.14.1 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:0972488e3f9b449e83f006ead5a0e0a33ad4a13e4462e865b7c286ab7d7566a3 \
     --hash=sha256:0b5d2ad28063d422ccc2c28d46471d47b61a58de885d35113a8f18cb547e25bf \
+    --hash=sha256:50fffc2827348c1e48972eed3d1c698959e63f9d030aa5dd82ba451113158a62 \
+    --hash=sha256:60e0854a90d67f4b0cc6e54773deb8be618f4c9bad98d3326f081423b5d14fae \
     --hash=sha256:93a3b5dc798a54a1feb693f2d1cb4cf08258c32ff05ae4929b5f0a2ca624a4f0 \
     --hash=sha256:9bfc95f64af80ccf129c604fb6b8ca66c6f256451e32bc4570f760e4309c9b69 \
     --hash=sha256:b197cd5424cf89fb019ca7f53641d05bfe34b1879614bed111c9c313b5574cd8 \
     --hash=sha256:b56b0cacd81583834820588378e432b0696186683b813058b707aedc1e16c4b1 \
+    --hash=sha256:bb3cf14de789097320a3c56bfdfdd51b1225d11d67298afbedee7e84e3837c96 \
     --hash=sha256:bec8f8ef627af86abf8298e7ec50926627e29b34fa907fcfbedb45aaa72bca43 \
     --hash=sha256:c1dbe718765f70f5b7f9b7f66b6a937802941b1cc56bcf642ce0274169741e01 \
     --hash=sha256:c915d53f22945e55fe0d3d3b0b87fd965a57f5fd15666fd92d6593a73b1dd297 \
-    --hash=sha256:ce6743ba194de3910b5feb1a62590cd2587e33a73ab6af8a01b642ceb5055862
+    --hash=sha256:ce6743ba194de3910b5feb1a62590cd2587e33a73ab6af8a01b642ceb5055862 \
+    --hash=sha256:da2234387b45fde40b0fedfee64a0ba591caeea9c48c7698ab6e2d85c7991533
     # via
     #   langchain-core
     #   langsmith
@@ -3019,19 +3385,25 @@ wasmtime==43.0.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or
     # via nmp-auth
 watchdog==6.0.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2 \
+    --hash=sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f \
+    --hash=sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c \
     --hash=sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c \
     --hash=sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c \
     --hash=sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0 \
     --hash=sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13 \
+    --hash=sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379 \
     --hash=sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282 \
     --hash=sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b \
     --hash=sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c \
-    --hash=sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948
+    --hash=sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948 \
+    --hash=sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26
     # via nemoguardrails
 watchfiles==1.1.1 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219 \
     --hash=sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803 \
     --hash=sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94 \
+    --hash=sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6 \
+    --hash=sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae \
     --hash=sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43 \
     --hash=sha256:399600947b170270e80134ac854e21b3ccdefa11a9529a3decc1327088180f10 \
     --hash=sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374 \
@@ -3040,18 +3412,28 @@ watchfiles==1.1.1 ; (platform_machine == 'arm64' and sys_platform == 'darwin') o
     --hash=sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77 \
     --hash=sha256:421e29339983e1bebc281fab40d812742268ad057db4aee8c4d2bce0af43b741 \
     --hash=sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a \
+    --hash=sha256:5f3f58818dc0b07f7d9aa7fe9eb1037aecb9700e63e1f6acfed13e9fef648f5d \
     --hash=sha256:5fac835b4ab3c6487b5dbad78c4b3724e26bcc468e886f8ba8cc4306f68f6701 \
     --hash=sha256:6e43d39a741e972bab5d8100b5cdacf69db64e34eb19b6e9af162bccf63c5cc6 \
+    --hash=sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620 \
     --hash=sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef \
     --hash=sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af \
     --hash=sha256:89eef07eee5e9d1fda06e38822ad167a044153457e6fd997f8a858ab7564a336 \
+    --hash=sha256:9bb9f66367023ae783551042d31b1d7fd422e8289eedd91f26754a66f44d5cff \
     --hash=sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2 \
     --hash=sha256:aebfd0861a83e6c3d1110b78ad54704486555246e542be3e2bb94195eabb2606 \
+    --hash=sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04 \
     --hash=sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610 \
+    --hash=sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150 \
     --hash=sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b \
     --hash=sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d \
+    --hash=sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70 \
     --hash=sha256:ce19e06cbda693e9e7686358af9cd6f5d61312ab8b00488bc36f5aabbaf77e24 \
     --hash=sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e \
+    --hash=sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb \
+    --hash=sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428 \
+    --hash=sha256:f537afb3276d12814082a2e9b242bdcf416c2e8fd9f799a737990a1dbe906e5b \
+    --hash=sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa \
     --hash=sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf
     # via
     #   fastmcp
@@ -3129,28 +3511,44 @@ xdg-base-dirs==6.0.2 ; (platform_machine == 'arm64' and sys_platform == 'darwin'
     --hash=sha256:950504e14d27cf3c9cb37744680a43bf0ac42efefc4ef4acf98dc736cab2bced
     # via garak-api
 xxhash==3.6.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:08d45aef063a4531b785cd72de4887766d01dc8f362a515693df349fdb825e0c \
+    --hash=sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1 \
     --hash=sha256:297b7fbf86c82c550e12e8fb71968b3f033d27b874276ba3624ea868c11165a8 \
+    --hash=sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa \
     --hash=sha256:2b6821e94346f96db75abaa6e255706fb06ebd530899ed76d32cd99f20dc52fa \
+    --hash=sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5 \
+    --hash=sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11 \
     --hash=sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae \
     --hash=sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d \
     --hash=sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2 \
+    --hash=sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89 \
+    --hash=sha256:4ccbff013972390b51a18ef1255ef5ac125c92dc9143b2d1909f59abc765540e \
     --hash=sha256:51312c768403d8540487dbbfb557454cfc55589bbde6424456951f7fcd4facb3 \
     --hash=sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db \
     --hash=sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033 \
+    --hash=sha256:6812c25fe0d6c36a46ccb002f40f27ac903bf18af9f6dd8f9669cb4d176ab18f \
+    --hash=sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42 \
     --hash=sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f \
     --hash=sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd \
+    --hash=sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d \
     --hash=sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1 \
+    --hash=sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374 \
     --hash=sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263 \
     --hash=sha256:881b47fc47e051b37d94d13e7455131054b56749b91b508b0907eb07900d1c13 \
     --hash=sha256:8b29ee68625ab37b04c0b40c3fafdf24d2f75ccd778333cfb698f65f6c463f62 \
+    --hash=sha256:929142361a48ee07f09121fe9e96a84950e8d4df3bb298ca5d88061969f34d7b \
     --hash=sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2 \
     --hash=sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204 \
+    --hash=sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546 \
     --hash=sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9 \
     --hash=sha256:c6dc31591899f5e5666f04cc2e529e69b4072827085c1ef15294d91a004bc1bd \
+    --hash=sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf \
     --hash=sha256:dea26ae1eb293db089798d3973a5fc928a18fdd97cc8801226fae705b02b14b0 \
+    --hash=sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292 \
     --hash=sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6 \
     --hash=sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd \
     --hash=sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7 \
+    --hash=sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0 \
     --hash=sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee
     # via
     #   datasets
@@ -3170,32 +3568,64 @@ yara-python==4.5.1 ; (platform_machine == 'arm64' and sys_platform == 'darwin')
     --hash=sha256:f533848781f0e46e44eda77055eae4ec934cf56c1f473e787704f1a348e90094
     # via nmp-guardrails
 yarl==1.23.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
+    --hash=sha256:03214408cfa590df47728b84c679ae4ef00be2428e11630277be0727eba2d7cc \
+    --hash=sha256:0e40111274f340d32ebcc0a5668d54d2b552a6cca84c9475859d364b380e3222 \
+    --hash=sha256:115136c4a426f9da976187d238e84139ff6b51a20839aa6e3720cd1026d768de \
     --hash=sha256:13a563739ae600a631c36ce096615fe307f131344588b0bc0daec108cdb47b25 \
     --hash=sha256:16c6994ac35c3e74fb0ae93323bf8b9c2a9088d55946109489667c510a7d010e \
     --hash=sha256:170e26584b060879e29fac213e4228ef063f39128723807a312e5c7fec28eff2 \
     --hash=sha256:1932b6b8bba8d0160a9d1078aae5838a66039e8832d41d2992daa9a3a08f7860 \
+    --hash=sha256:1c3a3598a832590c5a3ce56ab5576361b5688c12cb1d39429cf5dba30b510760 \
+    --hash=sha256:1dc702e42d0684f42d6519c8d581e49c96cefaaab16691f03566d30658ee8788 \
     --hash=sha256:2569b67d616eab450d262ca7cb9f9e19d2f718c70a8b88712859359d0ab17035 \
     --hash=sha256:34b6cf500e61c90f305094911f9acc9c86da1a05a7a3f5be9f68817043f486e4 \
+    --hash=sha256:39004f0ad156da43e86aa71f44e033de68a44e5a31fc53507b36dd253970054a \
     --hash=sha256:3ceb13c5c858d01321b5d9bb65e4cf37a92169ea470b70fec6f236b2c9dd7e34 \
+    --hash=sha256:4764a6a7588561a9aef92f65bda2c4fb58fe7c675c0883862e6df97559de0bfb \
+    --hash=sha256:4966242ec68afc74c122f8459abd597afd7d8a60dc93d695c1334c5fd25f762f \
+    --hash=sha256:4a59ba56f340334766f3a4442e0efd0af895fae9e2b204741ef885c446b3a1a8 \
+    --hash=sha256:4c41e021bc6d7affb3364dc1e1e5fa9582b470f283748784bd6ea0558f87f42c \
     --hash=sha256:5023346c4ee7992febc0068e7593de5fa2bf611848c08404b35ebbb76b1b0512 \
     --hash=sha256:531ef597132086b6cf96faa7c6c1dcd0361dd5f1694e5cc30375907b9b7d3ea9 \
+    --hash=sha256:53ad387048f6f09a8969631e4de3f1bf70c50e93545d64af4f751b2498755072 \
     --hash=sha256:53b1ea6ca88ebd4420379c330aea57e258408dd0df9af0992e5de2078dc9f5d5 \
     --hash=sha256:578110dd426f0d209d1509244e6d4a3f1a3e9077655d98c5f22583d63252a08a \
+    --hash=sha256:609d3614d78d74ebe35f54953c5bbd2ac647a7ddb9c30a5d877580f5e86b22f2 \
     --hash=sha256:6b41389c19b07c760c7e427a3462e8ab83c4bb087d127f0e854c706ce1b9215c \
     --hash=sha256:6f0fd84de0c957b2d280143522c4f91a73aada1923caee763e24a2b3fda9f8a5 \
     --hash=sha256:7c6b9461a2a8b47c65eef63bb1c76a4f1c119618ffa99ea79bc5bb1e46c5821b \
+    --hash=sha256:803a3c3ce4acc62eaf01eaca1208dcf0783025ef27572c3336502b9c232005e7 \
+    --hash=sha256:8419ebd326430d1cbb7efb5292330a2cf39114e82df5cc3d83c9a0d5ebeaf2f2 \
     --hash=sha256:877b0738624280e34c55680d6054a307aa94f7d52fa0e3034a9cc6e790871da7 \
+    --hash=sha256:88f9fb0116fbfcefcab70f85cf4b74a2b6ce5d199c41345296f49d974ddb4123 \
+    --hash=sha256:95451e6ce06c3e104556d73b559f5da6c34a069b6b62946d3ad66afcd51642ea \
     --hash=sha256:99c8a9ed30f4164bc4c14b37a90208836cbf50d4ce2a57c71d0f52c7fb4f7598 \
     --hash=sha256:9cbf44c5cb4a7633d078788e1b56387e3d3cf2b8139a3be38040b22d6c3221c8 \
+    --hash=sha256:9ee33b875f0b390564c1fb7bc528abf18c8ee6073b201c6ae8524aca778e2d83 \
+    --hash=sha256:a0e317df055958a0c1e79e5d2aa5a5eaa4a6d05a20d4b0c9c3f48918139c9fc6 \
     --hash=sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f \
     --hash=sha256:a3d2bff8f37f8d0f96c7ec554d16945050d54462d6e95414babaa18bfafc7f51 \
+    --hash=sha256:a8d00f29b42f534cc8aa3931cfe773b13b23e561e10d2b26f27a8d309b0e82a1 \
+    --hash=sha256:aafe5dcfda86c8af00386d7781d4c2181b5011b7be3f2add5e99899ea925df05 \
     --hash=sha256:aecfed0b41aa72b7881712c65cf764e39ce2ec352324f5e0837c7048d9e6daaa \
     --hash=sha256:b2c6b50c7b0464165472b56b42d4c76a7b864597007d9c085e8b63e185cf4a7a \
     --hash=sha256:b35d13d549077713e4414f927cdc388d62e543987c572baee613bf82f11a4b99 \
+    --hash=sha256:b5405bb8f0e783a988172993cfc627e4d9d00432d6bbac65a923041edacf997d \
+    --hash=sha256:be61f6fff406ca40e3b1d84716fde398fc08bc63dd96d15f3a14230a0973ed86 \
+    --hash=sha256:c75eb09e8d55bceb4367e83496ff8ef2bc7ea6960efb38e978e8073ea59ecb67 \
     --hash=sha256:cde9a2ecd91668bcb7f077c4966d8ceddb60af01b52e6e3e2680e4cf00ad1a59 \
+    --hash=sha256:d1009abedb49ae95b136a8904a3f71b342f849ffeced2d3747bf29caeda218c4 \
+    --hash=sha256:d7504f2b476d21653e4d143f44a175f7f751cd41233525312696c76aa3dbb23f \
     --hash=sha256:dc52310451fc7c629e13c4e061cbe2dd01684d91f2f8ee2821b083c58bd72432 \
+    --hash=sha256:e0fd068364a6759bc794459f0a735ab151d11304346332489c7972bacbe9e72b \
     --hash=sha256:e5723c01a56c5028c807c701aa66722916d2747ad737a046853f6c46f4875543 \
-    --hash=sha256:e7b0460976dc75cb87ad9cc1f9899a4b97751e7d4e77ab840fc9b6d377b8fd24
+    --hash=sha256:e7b0460976dc75cb87ad9cc1f9899a4b97751e7d4e77ab840fc9b6d377b8fd24 \
+    --hash=sha256:e9d9a4d06d3481eab79803beb4d9bd6f6a8e781ec078ac70d7ef2dcc29d1bea5 \
+    --hash=sha256:ead11956716a940c1abc816b7df3fa2b84d06eaed8832ca32f5c5e058c65506b \
+    --hash=sha256:f2af5c81a1f124609d5f33507082fc3f739959d4719b56877ab1ee7e7b3d602b \
+    --hash=sha256:f514f6474e04179d3d33175ed3f3e31434d3130d42ec153540d5b157deefd735 \
+    --hash=sha256:fda207c815b253e34f7e1909840fd14299567b1c0eb4908f8c2ce01a41265401 \
+    --hash=sha256:fe8f8f5e70e6dbdfca9882cd9deaac058729bcf323cf7a58660901e55c9c94f6
     # via aiohttp
 zipp==3.23.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e \
@@ -3203,27 +3633,39 @@ zipp==3.23.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (pl
     # via importlib-metadata
 zstandard==0.25.0 ; (platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') \
     --hash=sha256:01582723b3ccd6939ab7b3a78622c573799d5d8737b534b86d0e06ac18dbde4a \
+    --hash=sha256:06acb75eebeedb77b69048031282737717a63e71e4ae3f77cc0c3b9508320df6 \
+    --hash=sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250 \
     --hash=sha256:10ef2a79ab8e2974e2075fb984e5b9806c64134810fac21576f0668e7ea19f8f \
     --hash=sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3 \
     --hash=sha256:1869da9571d5e94a85a5e8d57e4e8807b175c9e4a6294e3b66fa4efb074d90f6 \
+    --hash=sha256:22a086cff1b6ceca18a8dd6096ec631e430e93a8e70a9ca5efa7561a00f826fa \
     --hash=sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611 \
     --hash=sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b \
+    --hash=sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e \
     --hash=sha256:474d2596a2dbc241a556e965fb76002c1ce655445e4e3bf38e5477d413165ffa \
     --hash=sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf \
     --hash=sha256:5a56ba0db2d244117ed744dfa8f6f5b366e14148e00de44723413b2f3938a902 \
+    --hash=sha256:5f1ad7bf88535edcf30038f6919abe087f606f62c00a87d7e33e7fc57cb69fcc \
+    --hash=sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98 \
+    --hash=sha256:6a573a35693e03cf1d67799fd01b50ff578515a8aeadd4595d2a7fa9f3ec002a \
     --hash=sha256:6c0e5a65158a7946e7a7affa6418878ef97ab66636f13353b8502d7ea03c8097 \
     --hash=sha256:6dffecc361d079bb48d7caef5d673c88c8988d3d33fb74ab95b7ee6da42652ea \
+    --hash=sha256:7149623bba7fdf7e7f24312953bcf73cae103db8cae49f8154dd1eadc8a29ecb \
     --hash=sha256:72d35d7aa0bba323965da807a462b0966c91608ef3a48ba761678cb20ce5d8b7 \
     --hash=sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b \
     --hash=sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a \
     --hash=sha256:913cbd31a400febff93b564a23e17c3ed2d56c064006f54efec210d586171c00 \
     --hash=sha256:9300d02ea7c6506f00e627e287e0492a5eb0371ec1670ae852fefffa6164b072 \
+    --hash=sha256:98750a309eb2f020da61e727de7d7ba3c57c97cf6213f6f6277bb7fb42a8e065 \
+    --hash=sha256:99c0c846e6e61718715a3c9437ccc625de26593fea60189567f0118dc9db7512 \
     --hash=sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1 \
     --hash=sha256:a3f79487c687b1fc69f19e487cd949bf3aae653d181dfb5fde3bf6d18894706f \
     --hash=sha256:aaf21ba8fb76d102b696781bddaa0954b782536446083ae3fdaa6f16b25a1c4b \
     --hash=sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea \
     --hash=sha256:bfd06b1c5584b657a2892a6014c2f4c20e0db0208c159148fa78c65f7e0b0277 \
-    --hash=sha256:f373da2c1757bb7f1acaf09369cdc1d51d84131e50d5fa9863982fd626466313
+    --hash=sha256:f27662e4f7dbf9f9c12391cb37b4c4c3cb90ffbd3b1fb9284dadbbb8935fa708 \
+    --hash=sha256:f373da2c1757bb7f1acaf09369cdc1d51d84131e50d5fa9863982fd626466313 \
+    --hash=sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551
     # via
     #   clickhouse-connect
     #   langsmith

From fa0d9909080017626af483672289e2a9e3e49f23 Mon Sep 17 00:00:00 2001
From: Sam Oluwalana <soluwalana@nvidia.com>
Date: Tue, 16 Jun 2026 16:25:11 -0600
Subject: [PATCH 3/3] Fix non-existant env var, fix nits

Signed-off-by: Sam Oluwalana <soluwalana@nvidia.com>
---
 .../skills/nemo-customizer/SKILL.md           | 67 +++++++++++++------
 .../references/dataset-formats.md             |  2 +-
 .../references/eval_helpers.py                | 38 ++++++-----
 .../references/hyperparameters.md             | 14 ++--
 .../references/integrations-setup.md          |  2 +-
 .../references/post-training-eval.md          | 32 ++++++---
 .../references/troubleshooting.md             | 14 ++--
 .../scripts/poll_customization_job.sh         |  2 +-
 .../tests/test_eval_helpers.py                | 14 ++++
 9 files changed, 124 insertions(+), 61 deletions(-)

diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md
index 745bc56c10..0aaf6ac106 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/SKILL.md
@@ -111,9 +111,9 @@ Training never runs inside the `nemo` CLI process. After `submit`, the platform'
 ## Gotchas
 
 - Resolve the CLI per **Pre-flight — CLI resolution** before any `nemo …` command; run from the **nemo-platform** git root, not a plugin subfolder.
-- Set `NEMO_BASE_URL` (or `NMP_BASE_URL`) only when the user gives a platform URL; default `http://127.0.0.1:8080` (same as `http://localhost:8080`). Track whether the user **overrode** the base URL — see **Platform unreachable** below.
+- Set `NMP_BASE_URL` only when the user gives a platform URL; default `http://127.0.0.1:8080` (same as `http://localhost:8080`). The `nemo` CLI reads this env var (see SDK `NMP_BASE_URL`). Track whether the user **overrode** the base URL — see **Platform unreachable** below.
 - **Platform unreachable** — if any platform API call fails with a connection error (`Connection error`, timeout, refused):
-  - **User gave a custom URL** (e.g. `$NMP_BASE_URL`) or you exported a non-default `NMP_BASE_URL` / `NEMO_BASE_URL`: stop and tell the user the platform is not reachable at that address. Do **not** offer to start local services.
+  - **User gave a custom URL** or you exported a non-default `NMP_BASE_URL`: stop and tell the user the platform is not reachable at that address. Do **not** offer to start local services.
   - **Default URL only** (no user override): **ask** whether to start the platform locally. If they agree, from the **nemo-platform** git root run in the **background**:
 
     ```bash
@@ -139,10 +139,10 @@ Training never runs inside the `nemo` CLI process. After `submit`, the platform'
 - **Do not use local `docker info`** to pick automodel vs unsloth. Run `nemo jobs list-execution-profiles -f json` against the user's platform (login first only if auth is enabled — see **Authentication**; see `references/troubleshooting.md`). Default output is a table — **`-f json` is required** for scripting; parse **stdout only** (do not pipe `2>&1` into `json.load`).
 - **Do not merge stderr into stdout when parsing JSON** — `submit`, `explain`, and `-f json` commands write **JSON on stdout**; harmless warnings like `Configuration file not found, using defaults` go to **stderr**. Piping with **`2>&1`** before `json.load` raises `JSONDecodeError` even when submit **succeeded** — a common cause of **duplicate jobs** when the agent re-submits after a parse error. Parse stdout only; redirect stderr if needed (`2>/dev/null`). See `references/troubleshooting.md` § **Parsing CLI JSON**.
 - For submit/image/plugin errors (both backends), read `references/troubleshooting.md`. Unsloth needs the `nmp-unsloth-training` container image on the **platform host's** Docker daemon (see `docker/unsloth/README.md`).
-- **Missing training image on a remote platform** — if the user gave a non-localhost `NMP_BASE_URL` / `NEMO_BASE_URL` and the job errors with `Failed to pull image`, `manifest unknown`, or missing `nmp-unsloth-training` / automodel training image: **do not** run `docker build`, `docker pull`, or `docker buildx bake` on the agent machine. Report with **Report to user** (use **Output adapter fileset (planned):** on error), then append on-target build steps from `references/troubleshooting.md` § **Missing training images**.
+- **Missing training image on a remote platform** — if the user gave a non-localhost `NMP_BASE_URL` and the job errors with `Failed to pull image`, `manifest unknown`, or missing `nmp-unsloth-training` / automodel training image: **do not** run `docker build`, `docker pull`, or `docker buildx bake` on the agent machine. Report with **Report to user** (use **Output adapter fileset (planned):** on error), then append on-target build steps from `references/troubleshooting.md` § **Missing training images**.
 - **Gated HuggingFace models** (Llama, Gemma, …) — confirm `hf-token` + fileset `token_secret` before submit; download fails with `Failed to access upstream storage` / 502 when missing. See **HuggingFace token (gated models)** and `references/troubleshooting.md` § **Gated HuggingFace models**.
 - **Post-training eval format** — use the same CHAT `messages` JSONL as training. **Do not** flatten rows to `prompt`/`expected` for the evaluator. Send `messages[:-1]` at inference (exclude final assistant label); score against `messages[-1].content`. See `references/post-training-eval.md` and `references/eval_helpers.py`.
-- **LoRA adapters load automatically for eval** — when a job completes, the adapter is registered on the model entity and hot-reloaded on any **READY** deployment with `lora_enabled: true`. **Do not** update deployments or providers before eval. **Do** route LoRA eval through the **provider** gateway (`/provider/<name>/-/v1` with `model: default--<adapter>`); the model-entity path (`/model/<entity>/-/v1`) always hits the base model. See `references/post-training-eval.md` § **Request routing (base vs LoRA)**.
+- **LoRA adapters load automatically for eval** — when a LoRA job completes (`save_method: lora`), the adapter is registered on the base model entity and hot-reloaded on any **READY** deployment with `lora_enabled: true`. **Do not** create or update deployments before LoRA eval. **Full SFT** (`finetuning_type: all_weights`) and **merged checkpoints** (`merged_16bit` / `merged_4bit`) register a new **model** entity at `output.name` — **deploy that entity for inference** before chat or eval; full weights are not hot-reloaded onto the base deployment. For LoRA eval, route through the **provider** gateway (`/provider/<name>/-/v1` with `model: default--<adapter>`); the model-entity path (`/model/<entity>/-/v1`) always hits the base model. See `references/post-training-eval.md` § **Request routing (base vs LoRA)**.
 
 ## Workflow
 
@@ -150,7 +150,7 @@ Common steps then **branch by plugin pick**:
 
 ```text
 - [ ] Resolve CLI (Pre-flight — CLI resolution); cd nemo-platform
-- [ ] export NEMO_BASE_URL (if user provided endpoint); note whether base URL is user-overridden
+- [ ] export NMP_BASE_URL (if user provided endpoint); note whether base URL is user-overridden
 - [ ] nemo auth status — skip login if auth disabled; if auth enabled and unsigned JWT allowed, `nemo auth login --unsigned-token --email <…>`; if OIDC, `nemo auth login`
 - [ ] nemo jobs list-execution-profiles -f json — apply Plugin pick rules above (retry login on 401/403)
 - [ ] On connection error: default URL → ask to start platform (see Platform unreachable); custom URL → report unreachable and stop
@@ -164,14 +164,14 @@ Common steps then **branch by plugin pick**:
 - [ ] nemo customization automodel submit /tmp/job.json --workspace default
 - [ ] Poll until top-level terminal (`poll_customization_job.sh`; default 15s interval, or 30–60s manual polls)
 - [ ] Report using output template below
-- [ ] Optional: compare base vs adapter on validation — `references/eval_helpers.py …` (CHAT format; adapters hot-reload automatically; see `references/post-training-eval.md`)
+- [ ] Optional: compare base vs adapter on validation — `references/eval_helpers.py …` (LoRA only; CHAT format; adapters hot-reload automatically; see `references/post-training-eval.md`)
 
 # unsloth branch (submit → Docker GPU job)
 - [ ] Write /tmp/job.json using the UnslothJobInput shape (see Fast path — unsloth)
 - [ ] nemo customization unsloth submit /tmp/job.json --workspace default [--profile <gpu-profile>]
 - [ ] Poll until top-level terminal (`poll_customization_job.sh unsloth-<job-id>`; default 15s interval)
 - [ ] Report using output template below
-- [ ] Optional: compare base vs adapter on validation — `references/eval_helpers.py …` (CHAT format; adapters hot-reload automatically; see `references/post-training-eval.md`)
+- [ ] Optional: compare base vs adapter on validation — `references/eval_helpers.py …` (LoRA only; CHAT format; adapters hot-reload automatically; see `references/post-training-eval.md`)
 ```
 
 ## Fast path — automodel
@@ -181,7 +181,7 @@ Substitute `<hf-repo>`, `<hf-dataset>`, `<model-entity>`, `<weights-fileset>`, `
 **Setup**
 
 ```bash
-export NEMO_BASE_URL=http://127.0.0.1:8080   # user override only
+export NMP_BASE_URL=http://127.0.0.1:8080   # user override only
 cd /path/to/nemo-platform
 nemo auth status   # skip login if auth disabled; if enabled + unsigned JWT allowed → login --unsigned-token --email admin@example.com
 nemo jobs list-execution-profiles -f json   # platform GPU profiles → automodel; set training.execution_profile if needed
@@ -403,7 +403,7 @@ Pick the path by whether the **base model fits in ~48 GB on one GPU** (LoRA or f
 | 4B–8B | 1 | 2 | `5e-6` |
 | >8B | 1 | 1 | lower LR or use TP / shorter seq |
 
-Output type is **model** (full checkpoint), not adapter. Expect much longer runs than LoRA at the same batch.
+Output type is **model** (full checkpoint), not adapter. Expect much longer runs than LoRA at the same batch. **Inference:** deploy `default/<output.name>` as a new model entity — full SFT does not hot-reload onto the base model's LoRA deployment.
 
 ### `max_seq_length` scaling
 
@@ -462,7 +462,7 @@ There is no `parallelism` block, no TP / PP / DP, no GBS divisibility math. Mult
 
 `load_in_4bit: true` (default) keeps base weights in 4-bit, which is what makes the "smaller per-device batch on bigger models" rule milder than vanilla HF. If you raise `per_device_train_batch_size` and hit OOM (exit 137) or training crashes (exit 1), halve `per_device_train_batch_size` first and double `gradient_accumulation_steps` to keep the effective batch the same.
 
-**Save method.** Default `output.save_method: "lora"` (adapter only — small, fast, deploy-friendly). Use `"merged_16bit"` if the user wants a full-weight checkpoint to deploy without an adapter loader; `"merged_4bit"` only when storage is tight (lossy). Merged methods require `training.finetuning_type: "lora"`.
+**Save method.** Default `output.save_method: "lora"` (adapter only — small, fast, hot-reloads on LoRA-enabled deployments). Use `"merged_16bit"` if the user wants a full-weight checkpoint to deploy as a standalone model entity; `"merged_4bit"` only when storage is tight (lossy). Merged methods require `training.finetuning_type: "lora"`. Merged and full SFT outputs must be **deployed for inference** — they do not hot-reload onto the base adapter deployment.
 
 **Tuning loop (unsloth):**
 
@@ -517,7 +517,7 @@ After polling reaches a **terminal** status (`completed`, `error`, or `cancelled
 
 | Status | Notes |
 |--------|-------|
-| `completed` | Brief success summary (e.g. adapter registered on model entity). When `metrics.train_loss` has ≥2 entries, add a loss-drop sentence: *Loss dropped from \<first value, 1 dp\> at step 1 to \<last value, 3 dp\> at step \<N\>; validation loss was \<val or n/a\>.* Always append **Using the adapter** with discovered provider name and concrete gateway URLs (see below). |
+| `completed` | Brief success summary. LoRA (`save_method: lora`): adapter registered on base model entity. Full SFT / merged checkpoint: new model entity at `output.name`. When `metrics.train_loss` has ≥2 entries, add a loss-drop sentence: *Loss dropped from \<first value, 1 dp\> at step 1 to \<last value, 3 dp\> at step \<N\>; validation loss was \<val or n/a\>.* Append **Using the adapter** (LoRA) or **Using the fine-tuned model** (full SFT / merged) with discovered provider name and concrete gateway URLs (see below). |
 | `error` | Quote `error_details.message` or the failing step; note setup that succeeded before the failure (auth, dataset upload, submit). |
 | `cancelled` | Cancellation reason if available. |
 
@@ -584,17 +584,26 @@ After polling reaches a **terminal** status (`completed`, `error`, or `cancelled
 | Output save method | lora |
 ```
 
-**Using the adapter (`completed` only)** — after **Training configuration**, run these discovery commands (parse stdout only; do not pipe `2>&1` into JSON parsers):
+**Using the output (`completed` only)** — after **Training configuration**, branch on output type:
+
+| Output | When | Report section |
+|--------|------|----------------|
+| LoRA adapter | `save_method: lora` (default) | **Using the adapter** — below |
+| Full model | `finetuning_type: all_weights`, or `save_method: merged_16bit` / `merged_4bit` | **Using the fine-tuned model** — below |
+
+### Using the adapter (LoRA / `save_method: lora`)
+
+Run these discovery commands (parse stdout only; do not pipe `2>&1` into JSON parsers):
 
 1. `nemo models get <model-entity> --workspace default` — confirm `<output.name>` appears under `adapters` with `enabled: true`.
-2. `nemo inference providers list --workspace default -f json` — pick a **READY** provider whose `served_models` includes `default/<model-entity>` (base or LoRA composite). Record its `name` as `<provider>` (often matches the deployment name).
+2. `nemo inference providers list --workspace default -f json` — pick a **READY** provider whose `served_models` includes `default/<model-entity>` (base entity). Record its `name` as `<provider>` (often matches the deployment name).
 
-On a deployment with `lora_enabled: true`, the adapter is **hot-reloaded automatically** — no deployment update or provider reconfiguration is required before inference or post-training eval. Append this section with **concrete URLs and provider name** from discovery:
+On a deployment with `lora_enabled: true`, the adapter is **hot-reloaded automatically** — no new deployment, deployment update, or provider reconfiguration before inference or post-training eval. Append this section with **concrete URLs and provider name** from discovery:
 
 ```markdown
 ### Using the adapter
 
-The adapter `<output.name>` is registered on `default/<model-entity>`. Weights are hot-reloaded on LoRA-enabled deployments — no deployment or provider update is required after training.
+The adapter `<output.name>` is registered on `default/<model-entity>`. Weights are hot-reloaded on LoRA-enabled deployments serving the **base** entity — no new deployment or provider update after training.
 
 #### Request routing (base vs LoRA)
 
@@ -619,7 +628,7 @@ Match training context at inference — send **`messages[:-1]`** (all turns exce
 #### Example — LoRA adapter via provider
 
 \`\`\`bash
-export NMP_BASE_URL=<platform-url>   # omit when using default localhost; NEMO_BASE_URL also works
+export NMP_BASE_URL=<platform-url>   # omit when using default localhost
 nemo inference gateway provider post v1/chat/completions <provider> --workspace default \\
   --body '{
     "model": "default--<output.name>",
@@ -658,10 +667,30 @@ uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer
   --split validation.jsonl
 \`\`\`
 
-Uses CHAT `messages` rows unchanged from the training fileset (`messages[:-1]` at inference). Repeat `--adapter` for multi-adapter compare. `--provider` is optional when a READY provider is auto-discovered. Set `NMP_BASE_URL` (or pass `--base-url`) when the platform is not localhost — the helper reads `$NMP_BASE_URL` / `$NEMO_BASE_URL` by default.
+Uses CHAT `messages` rows unchanged from the training fileset (`messages[:-1]` at inference). Repeat `--adapter` for multi-adapter compare. `--provider` is optional when a READY provider is auto-discovered. Set `NMP_BASE_URL` (or pass `--base-url`) when the platform is not localhost. LoRA only — full SFT / merged outputs need a deployed model entity (see **Using the fine-tuned model**).
+```
+
+### Using the fine-tuned model (full SFT / merged checkpoint)
+
+When `finetuning_type: all_weights` or `save_method` is `merged_16bit` / `merged_4bit`, the job registers a **model** entity at `output.name` with full fine-tuned weights. **Deploy that entity before inference or eval** — full checkpoints are not hot-reloaded onto the base model's LoRA deployment.
+
+1. `nemo models get <output.name> --workspace default` — confirm the fine-tuned model entity exists.
+2. Create or update an inference deployment / provider that serves `default/<output.name>` (same workflow as deploying any model entity).
+3. Append this section with the **READY** provider or deployment name and concrete gateway URL.
+
+```markdown
+### Using the fine-tuned model
+
+Fine-tuned weights are on model entity `default/<output.name>`. Unlike LoRA adapters, full checkpoints **require a new inference deployment** (or provider update) before chat or eval.
+
+| Target | Gateway path | OpenAI base URL | Request `"model"` field |
+|--------|--------------|-----------------|-------------------------|
+| Fine-tuned model | model-entity | `$NMP_BASE_URL/apis/inference-gateway/v2/workspaces/default/model/<output.name>/-/v1` | `default/<output.name>` |
+
+Use the same chat settings as LoRA inference (`messages[:-1]`, `max_tokens`, `temperature`, `enable_thinking` as appropriate). Post-training eval: run generation eval against this model-entity URL (not `eval_helpers.py --adapter`, which is LoRA-specific).
 ```
 
-Use the user's platform URL in `NMP_BASE_URL` when they overrode it; omit the export line for default `http://127.0.0.1:8080`. Substitute `<provider>`, concrete URLs, and entity/adapter names with values from discovery — do not leave generic placeholders in the user-facing report. Do **not** tell the user to update the deployment or add the adapter to a provider before calling it — registration on the model entity is sufficient.
+Use the user's platform URL in `NMP_BASE_URL` when they overrode it; omit the export line for default `http://127.0.0.1:8080`. Substitute `<provider>`, concrete URLs, and entity names with values from discovery — do not leave generic placeholders in the user-facing report. For **LoRA**, do **not** tell the user to update the deployment before calling the adapter — registration on the base model entity is sufficient. For **full SFT / merged**, tell the user they must deploy `<output.name>` before inference.
 
 **Save report to `/tmp`** — unless the user opts out, write the full Markdown report (header, **Training configuration**, **Using the adapter** when `completed`, and **Resources created** when a slug or new filesets were used) to `/tmp/fine-tune-result-<slug-or-job-suffix>.md`. Use the random slug from the run when one was assigned; otherwise use the job id suffix (e.g. `a925b07ff678`).
 
@@ -669,7 +698,7 @@ Use the user's platform URL in `NMP_BASE_URL` when they overrode it; omit the ex
 
 | Error type | Append |
 |------------|--------|
-| Missing training image + user-overridden `NEMO_BASE_URL` / `NMP_BASE_URL` | `references/troubleshooting.md` § **Missing training images** — on-target build steps, env vars, re-submit commands. **Do not** `docker build` locally for a remote platform. |
+| Missing training image + user-overridden `NMP_BASE_URL` | `references/troubleshooting.md` § **Missing training images** — on-target build steps, env vars, re-submit commands. **Do not** `docker build` locally for a remote platform. |
 | Download fails / `Failed to access upstream storage` / 502 on gated HF model | `references/troubleshooting.md` § **Gated HuggingFace models** — create/update `hf-token`, add `token_secret` to fileset, confirm HF license, re-submit. |
 | W&B not syncing / no `[launcher]` secret lines / `WandbCallback requires wandb` / wandb 401 | `references/troubleshooting.md` § **W&B / integrations not working** (jobs-launcher build, secret update, unsloth image). Setup: `references/integrations-setup.md`. |
 
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/dataset-formats.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/dataset-formats.md
index 4677f68474..d1d026656f 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/dataset-formats.md
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/dataset-formats.md
@@ -63,6 +63,6 @@ Eval rows must use the **same CHAT `messages` shape** as training. Do not flatte
 |----------------|--------------|------------------------|------------------|
 | `messages` (single- or multi-turn) | Same fileset split (`validation.jsonl`) | `messages[:-1]` — exclude final assistant label — see `post-training-eval.md` | `{{ item.messages[-1].content }}` |
 
-LoRA inference and eval use the **provider** gateway (`/provider/<name>/-/v1`, `model: default--<adapter>`). Base uses the **model-entity** path. See `post-training-eval.md` § **Request routing** and the **Using the adapter** section in `SKILL.md`.
+LoRA inference and eval use the **provider** gateway on the **base** entity (`/provider/<name>/-/v1`, `model: default--<adapter>`). Base model uses the model-entity path. Full SFT / merged checkpoints use the **output** model entity's model-entity URL — deploy first. See `post-training-eval.md` and the **Using the adapter** / **Using the fine-tuned model** sections in `SKILL.md`.
 
 Shared helpers and compare CLI: `references/eval_helpers.py`. Full workflow: `references/post-training-eval.md`.
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py
index 4bb2280d8f..1c866d4fcd 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py
@@ -3,11 +3,16 @@
 
 """Post-training evaluation helpers — keep eval dataset shape aligned with CHAT training JSONL.
 
-LoRA adapters registered on the model entity are hot-reloaded automatically on
-deployments with ``lora_enabled: true`` — no deployment update before eval.
+**LoRA** (``output.save_method: lora``): adapters registered on the base model entity
+are hot-reloaded on deployments with ``lora_enabled: true`` — no deployment update or
+new inference deployment before eval.
 
-Run from the nemo-platform git root (reads ``$NMP_BASE_URL`` / ``$NEMO_BASE_URL`` when
-``--base-url`` is omitted)::
+**Full SFT** (``finetuning_type: all_weights``) or **merged LoRA checkpoints**
+(``save_method: merged_16bit`` / ``merged_4bit``): the job registers a new **model**
+entity at ``output.name``. Deploy that entity for inference before chat or eval — full
+weights are not hot-reloaded onto the base model's deployment.
+
+Run from the nemo-platform git root (reads ``$NMP_BASE_URL`` when ``--base-url`` is omitted)::
 
     export NMP_BASE_URL=http://127.0.0.1:8080
     uv run python plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/eval_helpers.py \\
@@ -355,17 +360,18 @@ def build_platform_model_target(
     from nemo_evaluator_sdk.enums import ModelFormat
     from nemo_evaluator_sdk.values.models import Model
 
-    if adapter_name:
-        resolved_provider = provider_name or find_ready_provider_for_model_entity(
-            base_url=base_url,
-            workspace=workspace,
-            model_entity=model_entity,
+    resolved_provider = provider_name or find_ready_provider_for_model_entity(
+        base_url=base_url,
+        workspace=workspace,
+        model_entity=model_entity,
+    )
+    if not resolved_provider:
+        raise ValueError(
+            f"No READY inference provider serves {workspace}/{model_entity}. "
+            "Deploy the base model (with lora_enabled: true for LoRA eval) or pass --provider <name>."
         )
-        if not resolved_provider:
-            raise ValueError(
-                f"No READY inference provider serves {workspace}/{model_entity}. "
-                "Deploy the base model with lora_enabled: true or pass --provider <name>."
-            )
+
+    if adapter_name:
         return Model(
             url=provider_gateway_url(
                 base_url=base_url,
@@ -658,7 +664,7 @@ def build_eval_payload(
 
 def default_base_url() -> str:
     """Platform URL from env or localhost default."""
-    return os.environ.get("NMP_BASE_URL") or os.environ.get("NEMO_BASE_URL") or "http://127.0.0.1:8080"
+    return os.environ.get("NMP_BASE_URL") or "http://127.0.0.1:8080"
 
 
 def _parse_args() -> argparse.Namespace:
@@ -666,7 +672,7 @@ def _parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--base-url",
         default=default_base_url(),
-        help="Platform URL (default: $NMP_BASE_URL, $NEMO_BASE_URL, or http://127.0.0.1:8080)",
+        help="Platform URL (default: $NMP_BASE_URL or http://127.0.0.1:8080)",
     )
     parser.add_argument("--workspace", default="default")
     parser.add_argument("--model-entity", required=True)
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/hyperparameters.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/hyperparameters.md
index e9c89849c8..ed735e3c88 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/hyperparameters.md
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/hyperparameters.md
@@ -556,7 +556,7 @@ See **Integrations (automodel + unsloth)** above.
 |-------|---------|-------|
 | `name` | auto-derived from `<model-entity>-<dataset>-<hex12>` | The output model entity / fileset name. |
 | `description` | `null` | Free-form description carried onto the entity and fileset. |
-| `save_method` | `"lora"` | `"lora"` (adapter — small, deploy via NIM/vLLM with adapter loader), `"merged_16bit"` (merged checkpoint, deploy without adapter), `"merged_4bit"` (lossy, storage-tight). `merged_*` requires `training.finetuning_type: "lora"`. |
+| `save_method` | `"lora"` | `"lora"` (adapter — hot-reloads on base LoRA deployment; no new inference deploy), `"merged_16bit"` (merged checkpoint — **deploy** `output.name` as model entity), `"merged_4bit"` (lossy, storage-tight; deploy like merged). `merged_*` requires `training.finetuning_type: "lora"`. |
 
 After `to_spec`, the canonical `OutputResponse` also carries `type` (`"adapter"` for `save_method: "lora"`, `"model"` otherwise) and `fileset` (defaults to `name`); both are derived — submitter doesn't set them.
 
@@ -588,12 +588,12 @@ Drop `rank` before lowering batch when OOM. Higher `alpha/rank` ratios amplify a
 
 ### Save-method picker
 
-| User wants | `save_method` |
-|------------|---------------|
-| Smallest artefact, deploy via adapter loader (default NIM / vLLM) | `lora` |
-| Full-weight checkpoint to deploy without an adapter | `merged_16bit` |
-| Disk-tight merged checkpoint (lossy) | `merged_4bit` |
-| Full SFT (no LoRA) | `lora` is invalid here; output is always a full model — leave `save_method` at default and ignore the merged options |
+| User wants | `save_method` | Inference after training |
+|------------|---------------|--------------------------|
+| Smallest artefact; hot-reload on base LoRA deployment | `lora` | No new deploy — adapter loads on existing `lora_enabled` deployment |
+| Full-weight checkpoint as standalone model | `merged_16bit` | **Deploy** `output.name` as new model entity |
+| Disk-tight merged checkpoint (lossy) | `merged_4bit` | **Deploy** `output.name` as new model entity |
+| Full SFT (no LoRA) | `lora` is invalid; output is always a full model | **Deploy** `output.name` as new model entity |
 
 `merged_*` require `training.finetuning_type: "lora"`. The schema validator surfaces a clear error if violated.
 
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/integrations-setup.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/integrations-setup.md
index 9927d26c17..bbad950c37 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/integrations-setup.md
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/integrations-setup.md
@@ -96,7 +96,7 @@ Job JSON references the secret by name:
 Store the API key in the **platform** secret store. A local `wandb login` cache on your laptop is **not** used by training containers.
 
 ```bash
-export NEMO_BASE_URL=http://<platform-host>:8080   # omit when using default localhost
+export NMP_BASE_URL=http://<platform-host>:8080   # omit when using default localhost
 cd /path/to/nemo-platform
 
 # Create (first time)
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md
index 2e843f1732..4fb787ad82 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/post-training-eval.md
@@ -50,16 +50,32 @@ For thinking-enabled eval, set `reasoning=ReasoningParams(end_token="``")` **and
 
 ## Inference after customization (wrap-up)
 
-Include this in the **Using the adapter** section of every completed customization report. Agents must discover `<provider>` from `nemo inference providers list --workspace default -f json` and fill concrete URLs — do not leave placeholders.
+Include this in the completed-job report. Agents must discover `<provider>` from `nemo inference providers list --workspace default -f json` and fill concrete URLs — do not leave placeholders.
 
-### LoRA adapters load automatically
+### LoRA adapters: no new deployment
 
-After a customization job reaches **`completed`**, the platform registers the adapter on the base **model entity**. On a deployment with **`lora_enabled: true`**, enabled adapters are **hot-reloaded automatically** (adapter sidecar → vLLM). **Do not** update the deployment, re-create providers, or add the adapter to a `served_models` list before post-training eval — run eval as soon as the job completes.
+Applies when the job used **`finetuning_type: lora`** and **`output.save_method: lora`** (adapter output).
+
+After a customization job reaches **`completed`**, the platform registers the adapter on the base **model entity**. On a deployment with **`lora_enabled: true`**, enabled adapters are **hot-reloaded automatically** (adapter sidecar → vLLM). **Do not** create a new inference deployment, update the deployment, re-create providers, or add the adapter to a `served_models` list before post-training eval — run eval as soon as the job completes.
 
 | Prerequisite (one-time) | Per-adapter step after training |
 |-------------------------|----------------------------------|
-| A **READY** inference deployment for the base model entity with `lora_enabled: true` | Confirm adapter appears under `nemo models get <model-entity>` → `adapters` |
-| Gateway reachable at the model-entity URL below | Target the adapter by name in the eval request (see table) |
+| A **READY** inference deployment for the **base** model entity with `lora_enabled: true` | Confirm adapter appears under `nemo models get <model-entity>` → `adapters` |
+| Gateway reachable at the provider URL below | Target the adapter by name in the eval request (see table) |
+
+### Full SFT / merged checkpoints: deploy the output model
+
+Applies when the job used **`finetuning_type: all_weights`** (full-weight SFT) or **`save_method: merged_16bit` / `merged_4bit`** (merged LoRA checkpoint). Output `type` is **`model`**, not `adapter`.
+
+The fine-tuned weights live on a **new model entity** at `output.name` (`default/<output.name>`). **You must deploy that entity for inference** — create a new inference deployment or add it to a provider's `served_models` before chat or eval. Full checkpoints are **not** hot-reloaded onto the base model's LoRA deployment.
+
+| Step | Action |
+|------|--------|
+| Confirm registration | `nemo models get <output.name> --workspace default` — entity exists with fine-tuned weights |
+| Deploy for inference | Create or update an inference deployment / provider that serves `default/<output.name>` |
+| Inference / eval route | **Model-entity** URL on `<output.name>` with `model: default/<output.name>` (not the base entity) |
+
+Post-training eval for full models: compare against the base entity on its deployment, or eval the fine-tuned entity directly via `eval_helpers.py` is LoRA-oriented (`--adapter`); for full SFT, run generation eval against the **output** model entity's gateway URL.
 
 ### Request routing (base vs LoRA)
 
@@ -70,9 +86,7 @@ The model-entity proxy path **always** resolves to the base VirtualModel. Settin
 | Base entity | **Model entity** | `$NMP_BASE_URL/apis/inference-gateway/v2/workspaces/default/model/<model-entity>/-/v1` | `default/<model-entity>` |
 | LoRA adapter | **Provider** | `$NMP_BASE_URL/apis/inference-gateway/v2/workspaces/default/provider/<provider>/-/v1` | `default--<adapter-name>` |
 
-(`NEMO_BASE_URL` is an alias for `NMP_BASE_URL`.)
-
-`eval_helpers.py` auto-discovers a READY provider that serves the base entity (or pass `--provider <name>`). Adapter weights still hot-reload on the deployment — no provider update per adapter.
+`eval_helpers.py` auto-discovers a READY provider that serves the base entity (or pass `--provider <name>`). LoRA adapter weights hot-reload on that deployment — no provider update per adapter. (Full SFT / merged outputs need a separate deployment — see above.)
 
 Optional sanity checks:
 
@@ -104,7 +118,7 @@ Resolve adapter names from completed job specs instead of guessing:
 import os
 from eval_helpers import list_completed_job_adapters, compare_adapters, build_eval_payload
 
-base_url = os.environ.get("NMP_BASE_URL") or os.environ.get("NEMO_BASE_URL") or "http://127.0.0.1:8080"
+base_url = os.environ.get("NMP_BASE_URL") or "http://127.0.0.1:8080"
 
 jobs = list_completed_job_adapters(
     base_url=base_url,
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/troubleshooting.md b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/troubleshooting.md
index b0f799f91f..88d8c47d11 100644
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/troubleshooting.md
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/references/troubleshooting.md
@@ -20,7 +20,7 @@ Any `nemo …` call may fail with `Connection error`, timeout, or connection ref
 
 | Situation | Action |
 |-----------|--------|
-| User gave a platform host/URL (e.g. `10.0.0.51:8080`) or you set `NEMO_BASE_URL` / `NMP_BASE_URL` to something other than `http://127.0.0.1:8080` or `http://localhost:8080` | Report that the platform is not reachable at that address. Ask them to confirm the host is up and the URL is correct. **Do not** start local services. |
+| User gave a platform host/URL (e.g. `10.0.0.51:8080`) or you set `NMP_BASE_URL` to something other than `http://127.0.0.1:8080` or `http://localhost:8080` | Report that the platform is not reachable at that address. Ask them to confirm the host is up and the URL is correct. **Do not** start local services. |
 | Default URL only — no user override | **Ask** whether to start the platform locally. If they agree, from the **nemo-platform** git root run in the **background**, then poll until healthy and retry the failed command: |
 
 ```bash
@@ -45,7 +45,7 @@ If the user already has a listener on `:8080` but health fails, see **nemo-statu
 
 ## Backend choice (automodel vs unsloth)
 
-**Do not** run `docker info` on the agent machine. The platform often runs elsewhere (`NEMO_BASE_URL`). Ask the **connected platform** what executors it exposes.
+**Do not** run `docker info` on the agent machine. The platform often runs elsewhere (`NMP_BASE_URL`). Ask the **connected platform** what executors it exposes.
 
 List profiles (login first only if auth is enabled — see **Authentication** in `SKILL.md`):
 
@@ -64,7 +64,7 @@ Each entry has `provider`, `profile` (name), and `backend` (e.g. `docker`, `kube
 | Response includes **`provider`: `gpu` or `gpu_distributed`** | **`automodel`** (default) |
 | No GPU profiles (only `subprocess` and/or CPU `provider`) | Report that GPU customization is unavailable |
 
-Both backends are **`submit`-only**. After submit, the platform's **Docker executor** runs GPU container steps on the daemon attached to the connected platform host (`platform.runtime: docker`). Training does not run in the CLI shell — query execution profiles on the platform (`NEMO_BASE_URL`), not GPU availability in the agent's terminal.
+Both backends are **`submit`-only**. After submit, the platform's **Docker executor** runs GPU container steps on the daemon attached to the connected platform host (`platform.runtime: docker`). Training does not run in the CLI shell — query execution profiles on the platform (`NMP_BASE_URL`), not GPU availability in the agent's terminal.
 
 ### Pick execution profile
 
@@ -175,13 +175,13 @@ After secret + fileset are wired, re-submit the same job JSON (use a fresh `outp
 
 ## Missing training images
 
-Job errors like `Failed to pull image … nmp-unsloth-training:… Not Found`, `manifest unknown`, or a missing automodel training image mean the **connected platform's Docker daemon** (the one that runs GPU job steps) does not have the image. With the default `NEMO_BASE_URL` / `NMP_BASE_URL` (`127.0.0.1:8080` / `localhost:8080`), that daemon is usually on the same machine as the agent; with a user-overridden URL (e.g. `10.0.0.51:8080`), it is on the remote target host instead.
+Job errors like `Failed to pull image … nmp-unsloth-training:… Not Found`, `manifest unknown`, or a missing automodel training image mean the **connected platform's Docker daemon** (the one that runs GPU job steps) does not have the image. With the default `NMP_BASE_URL` (`127.0.0.1:8080` / `localhost:8080`), that daemon is usually on the same machine as the agent; with a user-overridden URL (e.g. `10.0.0.51:8080`), it is on the remote target host instead.
 
 **Did the user override the base URL?** (same rule as **Platform unreachable** — track this from the start of the workflow.)
 
 | Situation | Action |
 |-----------|--------|
-| **Remote platform** — user gave a host/URL (e.g. `10.0.0.51:8080`) or you set `NEMO_BASE_URL` / `NMP_BASE_URL` to something other than `http://127.0.0.1:8080` or `http://localhost:8080` | **Do not** run `docker build`, `docker pull`, or `docker buildx bake` on the agent machine — that only affects the agent's local daemon, not the remote platform. Tell the user they must build or load the image **on the target host** (the machine whose Docker daemon runs the GPU job steps). Report with **Report to user** in `SKILL.md`, then append **Report follow-up — missing image (remote platform)** below. Stop; do not retry submit until the user confirms the image is available on the target. |
+| **Remote platform** — user gave a host/URL (e.g. `10.0.0.51:8080`) or you set `NMP_BASE_URL` to something other than `http://127.0.0.1:8080` or `http://localhost:8080` | **Do not** run `docker build`, `docker pull`, or `docker buildx bake` on the agent machine — that only affects the agent's local daemon, not the remote platform. Tell the user they must build or load the image **on the target host** (the machine whose Docker daemon runs the GPU job steps). Report with **Report to user** in `SKILL.md`, then append **Report follow-up — missing image (remote platform)** below. Stop; do not retry submit until the user confirms the image is available on the target. |
 | **Local platform** — default URL only (`127.0.0.1:8080` / `localhost:8080`) | Build or pull on **that same host** where `nemo services run` and Docker share a daemon. See build commands below and `docker/unsloth/README.md` (unsloth) or automodel docker docs. Set env vars **before** starting/restarting the platform. |
 
 Image env vars are read when the platform starts (not per job):
@@ -227,7 +227,7 @@ When submit or poll returns a missing-image error and the base URL is **user-ove
 **Re-submit after the image is available:**
 
 ```bash
-export NEMO_BASE_URL=<user's platform URL>
+export NMP_BASE_URL=<user's platform URL>
 cd /path/to/nemo-platform
 nemo customization <plugin> submit /tmp/job.json --workspace default [--profile <gpu-profile>]
 ```
@@ -285,7 +285,7 @@ Set `jobs.executors.docker.launcher_tool_path` in `~/.nemo/config.yaml` to the *
 | `Unsloth does not support local run` | Used `run` instead of `submit` | `nemo customization unsloth submit <job.json> -w <workspace>` |
 | `Unsloth training requires platform.runtime: docker` | Platform not configured for Docker GPU jobs | Start platform with Docker runtime and a GPU execution profile |
 | Unknown execution profile | Default `gpu` profile missing or wrong | Re-list profiles; pass `--profile <exact-name>` on submit |
-| Missing `nmp-unsloth-training` image / `Failed to pull image` / `manifest unknown` | Image not on the **platform host's** Docker daemon | **Remote platform** (`NEMO_BASE_URL` not localhost): tell user to build on the target — **do not** `docker build` locally. **Local platform**: build on same host; see **Missing training images** above and `docker/unsloth/README.md` |
+| Missing `nmp-unsloth-training` image / `Failed to pull image` / `manifest unknown` | Image not on the **platform host's** Docker daemon | **Remote platform** (`NMP_BASE_URL` not localhost): tell user to build on the target — **do not** `docker build` locally. **Local platform**: build on same host; see **Missing training images** above and `docker/unsloth/README.md` |
 | `torch.cuda.is_available()` False in training step logs | GPU not exposed to the container step | Confirm the execution profile is GPU-backed; check platform Docker GPU setup |
 | Job stuck in `active` after training step completes | Upload / model-entity steps still running | Keep polling top-level status (same as automodel) |
 
diff --git a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/scripts/poll_customization_job.sh b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/scripts/poll_customization_job.sh
index 302ff02325..52d030ce81 100755
--- a/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/scripts/poll_customization_job.sh
+++ b/plugins/nemo-customizer/src/nemo_customizer/skills/nemo-customizer/scripts/poll_customization_job.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 # Poll customization job until top-level status is terminal.
 # Usage: poll_customization_job.sh <plugin>-<job-id> [interval_seconds]
-# Requires: NEMO_BASE_URL or NMP_BASE_URL; run from nemo-platform root.
+# Requires: NMP_BASE_URL; run from nemo-platform root.
 # Resolves `nemo` on PATH, else `uv run nemo` (see SKILL.md Pre-flight).
 # Exit 0 on completed; exit 1 on error, cancelled, or get-status failure.
 
diff --git a/plugins/nemo-customizer/tests/test_eval_helpers.py b/plugins/nemo-customizer/tests/test_eval_helpers.py
index 3ed1dfeec5..5dd81b6a4f 100644
--- a/plugins/nemo-customizer/tests/test_eval_helpers.py
+++ b/plugins/nemo-customizer/tests/test_eval_helpers.py
@@ -69,11 +69,25 @@ def test_build_platform_model_target_routes_base_via_model_entity() -> None:
         base_url="http://10.0.0.51:8080",
         workspace="default",
         model_entity="qwen3-1.7b",
+        provider_name="my-provider",
     )
     assert "/model/qwen3-1.7b/-/v1" in target.url
+    assert "/provider/" not in target.url
     assert target.name == "default/qwen3-1.7b"
 
 
+def test_build_platform_model_target_requires_ready_provider_for_base(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(eval_helpers, "find_ready_provider_for_model_entity", lambda **kwargs: None)
+    with pytest.raises(ValueError, match="No READY inference provider"):
+        eval_helpers.build_platform_model_target(
+            base_url="http://10.0.0.51:8080",
+            workspace="default",
+            model_entity="qwen3-1.7b",
+        )
+
+
 def test_gateway_path_from_url() -> None:
     assert eval_helpers.gateway_path_from_url("http://x/provider/p/-/v1") == "provider"
     assert eval_helpers.gateway_path_from_url("http://x/model/m/-/v1") == "model-entity"