diff --git a/.github/actions/docker-build/action.yml b/.github/actions/docker-build/action.yml
index c3637b853bc6..5c3299fc3877 100644
--- a/.github/actions/docker-build/action.yml
+++ b/.github/actions/docker-build/action.yml
@@ -80,7 +80,9 @@ runs:
         ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
       run: |
         sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
-        curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
+        curl --retry 5 --retry-delay 2 --fail --show-error -sSL -o /tmp/rustup-init.sh https://sh.rustup.rs
+        sh /tmp/rustup-init.sh -y --default-toolchain stable
+        rm -f /tmp/rustup-init.sh
         . "$HOME/.cargo/env"
         echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
         cargo install cbindgen
diff --git a/.github/actions/install-vcluster-cli/action.yml b/.github/actions/install-vcluster-cli/action.yml
index 572d979a153a..b3e127b36e24 100644
--- a/.github/actions/install-vcluster-cli/action.yml
+++ b/.github/actions/install-vcluster-cli/action.yml
@@ -23,10 +23,14 @@ runs:
             aarch64) VCLUSTER_ARCH="arm64" ;;
             *)       echo "Unsupported architecture: ${ARCH}"; exit 1 ;;
           esac
-          curl -sL -o /tmp/vcluster \
+          TMP_BIN="$(mktemp -p /tmp vcluster.XXXXXX)"
+          curl --retry 5 --retry-delay 2 \
+            --connect-timeout 10 --max-time 120 \
+            --fail --show-error -sL \
+            -o "${TMP_BIN}" \
             "https://github.com/loft-sh/vcluster/releases/download/${{ inputs.vcluster_version }}/vcluster-linux-${VCLUSTER_ARCH}"
-          sudo mv /tmp/vcluster /usr/local/bin/vcluster
-          sudo chmod +x /usr/local/bin/vcluster
+          sudo install -m 0755 "${TMP_BIN}" /usr/local/bin/vcluster
+          rm -f "${TMP_BIN}"
           vcluster version
         fi
         echo "::endgroup::"
diff --git a/.github/workflows/nightly-ci.yml b/.github/workflows/nightly-ci.yml
index d92bbe88d71c..d671a7c98618 100644
--- a/.github/workflows/nightly-ci.yml
+++ b/.github/workflows/nightly-ci.yml
@@ -258,7 +258,7 @@ jobs:
         PROTOC_VER="30.2"
         PROTOC_ZIP="protoc-${PROTOC_VER}-linux-x86_64.zip"
         PROTOC_SHA256="327e9397c6fb3ea2a542513a3221334c6f76f7aa524a7d2561142b67b312a01f"
-        curl -fsSLO "$PB_REL/download/v${PROTOC_VER}/${PROTOC_ZIP}"
+        curl --retry 5 --retry-delay 2 -fsSLO "$PB_REL/download/v${PROTOC_VER}/${PROTOC_ZIP}"
         echo "${PROTOC_SHA256}  ${PROTOC_ZIP}" | sha256sum -c -
         unzip "${PROTOC_ZIP}" -d $HOME/.local
         rm "${PROTOC_ZIP}"
diff --git a/.github/workflows/pre-merge.yml b/.github/workflows/pre-merge.yml
index 3693b06728a2..7e96b753e350 100644
--- a/.github/workflows/pre-merge.yml
+++ b/.github/workflows/pre-merge.yml
@@ -81,7 +81,7 @@ jobs:
         PROTOC_VER="30.2"
         PROTOC_ZIP="protoc-${PROTOC_VER}-linux-x86_64.zip"
         PROTOC_SHA256="327e9397c6fb3ea2a542513a3221334c6f76f7aa524a7d2561142b67b312a01f"
-        curl -fsSLO "$PB_REL/download/v${PROTOC_VER}/${PROTOC_ZIP}"
+        curl --retry 5 --retry-delay 2 -fsSLO "$PB_REL/download/v${PROTOC_VER}/${PROTOC_ZIP}"
         echo "${PROTOC_SHA256}  ${PROTOC_ZIP}" | sha256sum -c -
         unzip "${PROTOC_ZIP}" -d $HOME/.local
         rm "${PROTOC_ZIP}"
@@ -140,7 +140,7 @@ jobs:
         PROTOC_VER="30.2"
         PROTOC_ZIP="protoc-${PROTOC_VER}-linux-x86_64.zip"
         PROTOC_SHA256="327e9397c6fb3ea2a542513a3221334c6f76f7aa524a7d2561142b67b312a01f"
-        curl -fsSLO "$PB_REL/download/v${PROTOC_VER}/${PROTOC_ZIP}"
+        curl --retry 5 --retry-delay 2 -fsSLO "$PB_REL/download/v${PROTOC_VER}/${PROTOC_ZIP}"
         echo "${PROTOC_SHA256}  ${PROTOC_ZIP}" | sha256sum -c -
         unzip "${PROTOC_ZIP}" -d $HOME/.local
         rm "${PROTOC_ZIP}"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 24dd4547445f..cc145b4c037b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -176,7 +176,10 @@ jobs:
         env:
           CRANE_VERSION: v0.20.2
         run: |
-          curl -sL "https://github.com/google/go-containerregistry/releases/download/${CRANE_VERSION}/go-containerregistry_Linux_x86_64.tar.gz" \
+          curl --retry 5 --retry-delay 2 \
+            --connect-timeout 10 --max-time 120 \
+            --fail --show-error -sL \
+            "https://github.com/google/go-containerregistry/releases/download/${CRANE_VERSION}/go-containerregistry_Linux_x86_64.tar.gz" \
             | tar -xzf - crane
           sudo mv crane /usr/local/bin/
           crane version
@@ -440,7 +443,10 @@ jobs:
         env:
           CRANE_VERSION: v0.20.2
         run: |
-          curl -sL "https://github.com/google/go-containerregistry/releases/download/${CRANE_VERSION}/go-containerregistry_Linux_x86_64.tar.gz" \
+          curl --retry 5 --retry-delay 2 \
+            --connect-timeout 10 --max-time 120 \
+            --fail --show-error -sL \
+            "https://github.com/google/go-containerregistry/releases/download/${CRANE_VERSION}/go-containerregistry_Linux_x86_64.tar.gz" \
             | tar -xzf - crane
           sudo mv crane /usr/local/bin/
           crane version
diff --git a/components/src/dynamo/planner/config/planner_config.py b/components/src/dynamo/planner/config/planner_config.py
index 93e074bcb6fd..307757381d45 100644
--- a/components/src/dynamo/planner/config/planner_config.py
+++ b/components/src/dynamo/planner/config/planner_config.py
@@ -52,7 +52,8 @@ class PlannerConfig(BaseModel):
         "kubernetes", "virtual", "global-planner"
     ] = SLAPlannerDefaults.environment
     namespace: str = Field(
-        default_factory=lambda: os.environ.get("DYN_NAMESPACE", "dynamo")
+        default_factory=lambda: os.environ.get("DYN_NAMESPACE", "dynamo"),
+        exclude=True,
     )
     backend: Literal["vllm", "sglang", "trtllm", "mocker"] = SLAPlannerDefaults.backend
     mode: Literal["disagg", "prefill", "decode", "agg"] = SLAPlannerDefaults.mode
@@ -109,7 +110,8 @@ class PlannerConfig(BaseModel):
         default_factory=lambda: os.environ.get(
             "PROMETHEUS_ENDPOINT",
             "http://prometheus-kube-prometheus-prometheus.monitoring.svc.cluster.local:9090",
-        )
+        ),
+        exclude=True,
     )
     metric_reporting_prometheus_port: int = Field(
         default_factory=lambda: int(os.environ.get("PLANNER_PROMETHEUS_PORT", 0))
diff --git a/components/src/dynamo/sglang/request_handlers/handler_base.py b/components/src/dynamo/sglang/request_handlers/handler_base.py
index 8276113e0fc1..56f4c50ebfa7 100644
--- a/components/src/dynamo/sglang/request_handlers/handler_base.py
+++ b/components/src/dynamo/sglang/request_handlers/handler_base.py
@@ -24,6 +24,10 @@
 )
 
 import sglang as sgl
+from sglang.srt.managers.io_struct import (
+    DestroyWeightsUpdateGroupReqInput,
+    InitWeightsUpdateGroupReqInput,
+)
 
 from dynamo._core import Context
 from dynamo.common.utils.input_params import InputParamManager
@@ -733,6 +737,12 @@ def _priority_kwargs(self, priority: Any) -> Dict[str, Any]:
             return {"priority": normalized}
         return {}
 
+    def _weight_update_unsupported_response(self) -> dict:
+        return {
+            "success": False,
+            "message": "weight update control not supported on this worker",
+        }
+
     async def release_memory_occupation(self, body: dict) -> dict:
         """Release GPU memory occupation and unregister from discovery.
 
@@ -857,6 +867,30 @@ async def update_weights_from_disk(self, body: dict) -> dict:
             "num_paused_requests": num_paused_requests,
         }
 
+    async def init_weights_update_group(self, body: dict) -> dict:
+        """Initialize distributed weight-update NCCL group on the worker."""
+        if self.engine is None:
+            return self._weight_update_unsupported_response()
+
+        req = InitWeightsUpdateGroupReqInput(**body)
+        (
+            success,
+            message,
+        ) = await self.engine.tokenizer_manager.init_weights_update_group(req, None)
+        return {"success": success, "message": message}
+
+    async def destroy_weights_update_group(self, body: dict) -> dict:
+        """Destroy distributed weight-update NCCL group on the worker."""
+        if self.engine is None:
+            return self._weight_update_unsupported_response()
+
+        req = DestroyWeightsUpdateGroupReqInput(**body)
+        (
+            success,
+            message,
+        ) = await self.engine.tokenizer_manager.destroy_weights_update_group(req, None)
+        return {"success": success, "message": message}
+
     async def update_weights_from_tensor(self, body: dict) -> dict:
         """Update model weights from tensors without restarting the server."""
         from sglang.srt.managers.io_struct import UpdateWeightsFromTensorReqInput
@@ -980,6 +1014,15 @@ async def session_control(self, request, context=None):
             result = {"status": "error", "message": f"Unknown action: {action}"}
         yield result
 
+    async def get_weight_version(self, body: dict) -> dict:
+        """Return the active weight version currently served by the worker."""
+        _ = body
+        if self.engine is None:
+            return self._weight_update_unsupported_response()
+        return {
+            "weight_version": self.engine.tokenizer_manager.server_args.weight_version
+        }
+
     def register_engine_routes(self, runtime: DistributedRuntime) -> None:
         """Register all engine routes for this handler.
 
@@ -994,6 +1037,12 @@ def register_engine_routes(self, runtime: DistributedRuntime) -> None:
         runtime.register_engine_route(
             "resume_memory_occupation", self.resume_memory_occupation
         )
+        runtime.register_engine_route(
+            "init_weights_update_group", self.init_weights_update_group
+        )
+        runtime.register_engine_route(
+            "destroy_weights_update_group", self.destroy_weights_update_group
+        )
         runtime.register_engine_route(
             "update_weights_from_disk", self.update_weights_from_disk
         )
@@ -1009,6 +1058,7 @@ def register_engine_routes(self, runtime: DistributedRuntime) -> None:
         runtime.register_engine_route(
             "update_weight_version", self.update_weight_version
         )
+        runtime.register_engine_route("get_weight_version", self.get_weight_version)
         if getattr(self.config, "dynamo_args", None) and getattr(
             self.config.dynamo_args, "enable_rl", False
         ):
diff --git a/components/src/dynamo/sglang/tests/test_sglang_handler_base.py b/components/src/dynamo/sglang/tests/test_sglang_handler_base.py
new file mode 100644
index 000000000000..2b62698284b7
--- /dev/null
+++ b/components/src/dynamo/sglang/tests/test_sglang_handler_base.py
@@ -0,0 +1,72 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import types
+
+import pytest
+
+from dynamo.sglang.request_handlers.handler_base import BaseWorkerHandler
+
+pytestmark = [
+    pytest.mark.unit,
+    pytest.mark.sglang,
+    pytest.mark.gpu_0,
+    pytest.mark.pre_merge,
+]
+
+
+class _DummyRuntime:
+    def __init__(self):
+        self.routes = {}
+
+    def register_engine_route(self, name, handler):
+        self.routes[name] = handler
+
+
+class _DummyWorkerHandler(BaseWorkerHandler):
+    async def generate(self, request, context):
+        if False:
+            yield request, context
+
+
+def test_register_engine_routes_includes_weight_update_routes():
+    handler = _DummyWorkerHandler.__new__(_DummyWorkerHandler)
+    runtime = _DummyRuntime()
+
+    handler.register_engine_routes(runtime)
+
+    assert "init_weights_update_group" in runtime.routes
+    assert "destroy_weights_update_group" in runtime.routes
+    assert "get_weight_version" in runtime.routes
+
+
+@pytest.mark.asyncio
+async def test_get_weight_version_reads_active_version_from_server_args():
+    handler = _DummyWorkerHandler.__new__(_DummyWorkerHandler)
+    handler.engine = types.SimpleNamespace(
+        tokenizer_manager=types.SimpleNamespace(
+            server_args=types.SimpleNamespace(weight_version=17)
+        )
+    )
+
+    result = await handler.get_weight_version({})
+
+    assert result == {"weight_version": 17}
+
+
+@pytest.mark.asyncio
+async def test_weight_update_routes_return_unsupported_without_engine():
+    handler = _DummyWorkerHandler.__new__(_DummyWorkerHandler)
+    handler.engine = None
+
+    init_result = await handler.init_weights_update_group({})
+    destroy_result = await handler.destroy_weights_update_group({})
+    version_result = await handler.get_weight_version({})
+
+    expected = {
+        "success": False,
+        "message": "weight update control not supported on this worker",
+    }
+    assert init_result == expected
+    assert destroy_result == expected
+    assert version_result == expected
diff --git a/deploy/operator/internal/consts/consts.go b/deploy/operator/internal/consts/consts.go
index 2d8b67ca533b..a1b9222bb98c 100644
--- a/deploy/operator/internal/consts/consts.go
+++ b/deploy/operator/internal/consts/consts.go
@@ -27,6 +27,8 @@ const (
 	DynamoNixlPort     = 19090
 	DynamoNixlPortName = "nixl"
 
+	DynamoFPMBasePort = 20380
+
 	MpiRunSshPort = 2222
 
 	// Default security context values
diff --git a/deploy/operator/internal/controller/dynamocomponentdeployment_controller_test.go b/deploy/operator/internal/controller/dynamocomponentdeployment_controller_test.go
index 9216c3c453da..4aaf920cf286 100644
--- a/deploy/operator/internal/controller/dynamocomponentdeployment_controller_test.go
+++ b/deploy/operator/internal/controller/dynamocomponentdeployment_controller_test.go
@@ -773,6 +773,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
 											{Name: "CONTAINER_NAME", Value: commonconsts.MainContainerName},
 											{Name: commonconsts.DynamoComponentEnvVar, Value: commonconsts.ComponentTypeWorker},
 											{Name: commonconsts.DynamoDiscoveryBackendEnvVar, Value: "kubernetes"},
+											{Name: "DYN_FORWARDPASS_METRIC_PORT", Value: "20380"},
 											{Name: "DYN_HEALTH_CHECK_ENABLED", Value: "false"},
 											{Name: commonconsts.DynamoNamespaceEnvVar, Value: "default-test-lws-deploy"},
 											{Name: "DYN_PARENT_DGD_K8S_NAME", Value: "test-lws-deploy"},
@@ -916,6 +917,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
 											{Name: "CONTAINER_NAME", Value: commonconsts.MainContainerName},
 											{Name: commonconsts.DynamoComponentEnvVar, Value: commonconsts.ComponentTypeWorker},
 											{Name: commonconsts.DynamoDiscoveryBackendEnvVar, Value: "kubernetes"},
+											{Name: "DYN_FORWARDPASS_METRIC_PORT", Value: "20380"},
 											{Name: "DYN_HEALTH_CHECK_ENABLED", Value: "false"},
 											{Name: commonconsts.DynamoNamespaceEnvVar, Value: "default-test-lws-deploy"},
 											{Name: "DYN_PARENT_DGD_K8S_NAME", Value: "test-lws-deploy"},
diff --git a/deploy/operator/internal/controller/dynamographdeploymentrequest_controller.go b/deploy/operator/internal/controller/dynamographdeploymentrequest_controller.go
index 59c0a8982b4a..eb70054264a0 100644
--- a/deploy/operator/internal/controller/dynamographdeploymentrequest_controller.go
+++ b/deploy/operator/internal/controller/dynamographdeploymentrequest_controller.go
@@ -1307,6 +1307,12 @@ func (r *DynamoGraphDeploymentRequestReconciler) createProfilingJob(ctx context.
 				Value: string(dgdr.UID),
 			},
 		}
+		if r.Config.Infrastructure.PrometheusEndpoint != "" {
+			profilerEnv = append(profilerEnv, corev1.EnvVar{
+				Name:  "PROMETHEUS_ENDPOINT",
+				Value: r.Config.Infrastructure.PrometheusEndpoint,
+			})
+		}
 
 		// Build volume mounts
 		volumeMounts := []corev1.VolumeMount{
diff --git a/deploy/operator/internal/dynamo/component_worker.go b/deploy/operator/internal/dynamo/component_worker.go
index 38d9d0c45503..be19b61c3863 100644
--- a/deploy/operator/internal/dynamo/component_worker.go
+++ b/deploy/operator/internal/dynamo/component_worker.go
@@ -107,6 +107,10 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
 			Name:  "NIXL_TELEMETRY_PROMETHEUS_PORT",
 			Value: fmt.Sprintf("%d", commonconsts.DynamoNixlPort),
 		},
+		{
+			Name:  "DYN_FORWARDPASS_METRIC_PORT",
+			Value: fmt.Sprintf("%d", commonconsts.DynamoFPMBasePort),
+		},
 	}...)
 
 	if context.WorkerHashSuffix != "" {
diff --git a/deploy/operator/internal/dynamo/graph_test.go b/deploy/operator/internal/dynamo/graph_test.go
index a6ab18bfc825..f5cab1e18a0d 100644
--- a/deploy/operator/internal/dynamo/graph_test.go
+++ b/deploy/operator/internal/dynamo/graph_test.go
@@ -2160,6 +2160,10 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 														Name:  "NIXL_TELEMETRY_PROMETHEUS_PORT",
 														Value: "19090",
 													},
+													{
+														Name:  "DYN_FORWARDPASS_METRIC_PORT",
+														Value: "20380",
+													},
 													{
 														Name:  "DYN_PARENT_DGD_K8S_NAME",
 														Value: "test-dynamo-graph-deployment",
@@ -2374,6 +2378,10 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 														Name:  "NIXL_TELEMETRY_PROMETHEUS_PORT",
 														Value: "19090",
 													},
+													{
+														Name:  "DYN_FORWARDPASS_METRIC_PORT",
+														Value: "20380",
+													},
 													{
 														Name:  "DYN_PARENT_DGD_K8S_NAME",
 														Value: "test-dynamo-graph-deployment",
@@ -3187,6 +3195,10 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 														Name:  "NIXL_TELEMETRY_PROMETHEUS_PORT",
 														Value: "19090",
 													},
+													{
+														Name:  "DYN_FORWARDPASS_METRIC_PORT",
+														Value: "20380",
+													},
 													{
 														Name:  "DYN_PARENT_DGD_K8S_NAME",
 														Value: "test-dynamo-graph-deployment",
@@ -3388,6 +3400,10 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 														Name:  "NIXL_TELEMETRY_PROMETHEUS_PORT",
 														Value: "19090",
 													},
+													{
+														Name:  "DYN_FORWARDPASS_METRIC_PORT",
+														Value: "20380",
+													},
 													{
 														Name:  "DYN_PARENT_DGD_K8S_NAME",
 														Value: "test-dynamo-graph-deployment",
@@ -5635,6 +5651,7 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
 							{Name: "CONTAINER_NAME", Value: commonconsts.MainContainerName},
 							{Name: commonconsts.DynamoComponentEnvVar, Value: "worker"},
 							{Name: commonconsts.DynamoDiscoveryBackendEnvVar, Value: "kubernetes"},
+							{Name: "DYN_FORWARDPASS_METRIC_PORT", Value: "20380"},
 							{Name: "DYN_HEALTH_CHECK_ENABLED", Value: "false"},
 							{Name: commonconsts.DynamoNamespaceEnvVar, Value: "default-test-deployment"},
 							{Name: "DYN_PARENT_DGD_K8S_NAME", Value: "test-deployment"},
diff --git a/recipes/README.md b/recipes/README.md
index 0783a8c87377..500aad85fa01 100644
--- a/recipes/README.md
+++ b/recipes/README.md
@@ -68,6 +68,7 @@ These recipes are under active development and may require additional setup step
 | Model | Framework | Mode | GPUs | Deployment | Notes |
 |-------|-----------|------|------|------------|-------|
 | **[GLM-5-NVFP4](glm-5-nvfp4/sglang/disagg/)** | SGLang | Disagg Prefill/Decode | 20x GB200 | ✅ | NVFP4, EAGLE speculative decoding, TP16 decode + TP4 prefill. Requires [custom container build](glm-5-nvfp4/). |
+| **[Nemotron-3-Nano-Omni-NVFP4](nemotron-3-nano-omni/vllm/agg/)** | vLLM | Aggregated | 1x GPU | ✅ | Multimodal text/image/video/audio serving. Requires [custom container build](nemotron-3-nano-omni/). |
 | **[nvidia/Kimi-K2.5-NVFP4](kimi-k2.5/trtllm/agg/nvidia/)** | TensorRT-LLM | Aggregated | 8x B200 | ✅ | Text only — MoE model, TP8×EP8, reasoning + tool calling. Vision input not yet functional. |
 | **[DeepSeek-V4-Flash](deepseek-v4/deepseek-v4-flash/vllm/agg/)** | vLLM | Aggregated | 4x B200 | ✅ | Text only — MoE model (284B / 13B active), DP=4 + EP, FP8 KV cache, reasoning + tool calling. Requires [custom container build](deepseek-v4/container/). |
 | **[DeepSeek-V4-Flash](deepseek-v4/deepseek-v4-flash/sglang/agg/)** | SGLang | Aggregated | 4x B200 | ✅ | Text only — MoE model (284B / 13B active), TP=4, MXFP4 MoE via FlashInfer, EAGLE MTP (3 steps / 4 draft tokens), reasoning + tool calling. Prebuilt image available; optional [custom container build](deepseek-v4/container/). |
diff --git a/recipes/nemotron-3-nano-omni/Dockerfile b/recipes/nemotron-3-nano-omni/Dockerfile
new file mode 100644
index 000000000000..1d13afe628ff
--- /dev/null
+++ b/recipes/nemotron-3-nano-omni/Dockerfile
@@ -0,0 +1,60 @@
+# syntax=docker/dockerfile:1.10.0
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Layer the ai-dynamo wheel (and the omni multimodal extras) onto an upstream
+# vLLM image. Dynamo is installed from https://pypi.nvidia.com/ai-dynamo/ —
+# no source build, no Rust toolchain. Match DYNAMO_VERSION to a nightly that
+# targets the same vLLM minor as BASE_IMAGE (see the wheel's METADATA for the
+# pinned vllm version).
+#
+# Build:
+#   docker build -t <registry>/nemotron-omni-vllm:latest \
+#                -f recipes/nemotron-3-nano-omni/Dockerfile \
+#                recipes/nemotron-3-nano-omni
+#
+# Override defaults with --build-arg, e.g.:
+#   --build-arg BASE_IMAGE=vllm/vllm-openai:v0.20.0
+#   --build-arg DYNAMO_VERSION=1.2.0.dev20260427
+
+ARG BASE_IMAGE="vllm/vllm-openai:v0.20.0"
+ARG DYNAMO_VERSION="1.2.0.dev20260427"
+
+FROM ${BASE_IMAGE}
+USER root
+
+ARG DYNAMO_VERSION
+
+# ai-dynamo Python package (dynamo.frontend, dynamo.vllm, ...) installed with
+# --no-deps so that:
+#   - the base image's vLLM 0.20 / torch / cuda stack is preserved (otherwise
+#     pip would try to re-install vllm[flashinfer,otel,runai]==0.20.0 which is
+#     declared by ai-dynamo[vllm]), and
+#   - we skip `nixl` (KV transport, only used for disagg) and `ray` (multi-node
+#     orchestration), neither of which an aggregated single-GPU recipe needs.
+RUN pip install --no-cache-dir --no-deps \
+      --extra-index-url https://pypi.nvidia.com \
+      ai-dynamo==${DYNAMO_VERSION}
+
+# ai-dynamo's declared core dependencies, plus the Rust _core extension wheel
+# (`ai-dynamo-runtime`) that the Python package imports natively. `kubernetes`,
+# `pydantic`, and `pydantic-settings` are explicitly pinned because the latest
+# unpinned versions (kubernetes 35.x, pydantic 2.13.x) violate ai-dynamo's
+# declared compatibility ranges.
+RUN pip install --no-cache-dir \
+      --extra-index-url https://pypi.nvidia.com \
+      ai-dynamo-runtime==${DYNAMO_VERSION} \
+      "kubernetes<33.0.0,>=32.0.1" \
+      "pydantic<2.13" "pydantic-settings<2.13.0" \
+      msgpack msgspec prometheus-client pyzmq transformers
+
+# ai-dynamo[vllm] extras minus nixl + ray (handled above), plus the
+# multimodal-video Python deps (av/ftfy/nvtx/sentencepiece) that the Nemotron
+# Nano Omni model needs at runtime but which aren't pulled in by any extra.
+RUN pip install --no-cache-dir \
+      blake3 librosa soundfile uvloop \
+      av ftfy nvtx sentencepiece
+
+# vllm/vllm-openai's default ENTRYPOINT runs `vllm serve`; reset it so the
+# image behaves as a plain dynamo runtime image.
+ENTRYPOINT ["/bin/bash"]
diff --git a/recipes/nemotron-3-nano-omni/README.md b/recipes/nemotron-3-nano-omni/README.md
new file mode 100644
index 000000000000..e4cbfe6031a1
--- /dev/null
+++ b/recipes/nemotron-3-nano-omni/README.md
@@ -0,0 +1,185 @@
+<!--
+SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+-->
+
+# Nemotron 3 Nano Omni NVFP4
+
+Serves [nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4](https://huggingface.co/nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4)
+using vLLM with an aggregated Dynamo deployment.
+
+This recipe builds a custom container that layers the `ai-dynamo` wheel
+(from <https://pypi.nvidia.com/ai-dynamo/>) onto an upstream vLLM image — no
+source build, no Rust toolchain.
+
+## Topology
+
+| Role | Replicas | GPUs/replica | Notes |
+|------|----------|--------------|-------|
+| Frontend | 1 | 0 | Dynamo frontend with prefix-hash KV routing |
+| vLLM worker | 1 | 1 | Text, image, video, and audio inputs |
+
+## Prerequisites
+
+- A Kubernetes cluster with the [Dynamo Operator](../../docs/kubernetes/README.md) installed
+- One NVIDIA GPU per worker replica
+- Shared PVC storage for the Hugging Face model cache
+- Hugging Face access to `nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4`
+
+## Step 1: Build the Container
+
+```bash
+docker build \
+  -t <your-registry>/nemotron-omni-vllm:latest \
+  -f recipes/nemotron-3-nano-omni/Dockerfile \
+  recipes/nemotron-3-nano-omni
+docker push <your-registry>/nemotron-omni-vllm:latest
+```
+
+Useful build args:
+
+- `BASE_IMAGE=<image>` — pin to a different vLLM base (default `vllm/vllm-openai:v0.20.0`).
+- `DYNAMO_VERSION=<version>` — pin to a specific `ai-dynamo` release or nightly from <https://pypi.nvidia.com/ai-dynamo/>. Default tracks the latest tested nightly. Make sure the chosen wheel's `vllm` dependency matches `BASE_IMAGE`.
+
+## Step 2: Download the Model
+
+Create the PVC, Hugging Face token secret, and download the model weights:
+
+```bash
+export NAMESPACE=<your-namespace>
+
+# Create the namespace if it does not already exist.
+kubectl create namespace ${NAMESPACE} --dry-run=client -o yaml | kubectl apply -f -
+
+# First edit storageClassName in model-cache.yaml for your cluster.
+kubectl apply -f recipes/nemotron-3-nano-omni/model-cache/model-cache.yaml -n ${NAMESPACE}
+
+kubectl create secret generic hf-token-secret \
+  --from-literal=HF_TOKEN=<your-hf-token> \
+  -n ${NAMESPACE}
+
+kubectl apply -f recipes/nemotron-3-nano-omni/model-cache/model-download.yaml -n ${NAMESPACE}
+kubectl wait --for=condition=complete job/model-download -n ${NAMESPACE} --timeout=3600s
+```
+
+## Step 3: Deploy
+
+Edit `vllm/agg/deploy.yaml` and replace all `<placeholder>` values:
+
+- `<your-registry>/nemotron-omni-vllm:latest` - your built container image
+
+If your registry is private, add the appropriate `imagePullSecrets` to the
+deployment.
+
+```bash
+kubectl apply -f recipes/nemotron-3-nano-omni/vllm/agg/deploy.yaml -n ${NAMESPACE}
+```
+
+Monitor startup:
+
+```bash
+kubectl get pods -n ${NAMESPACE} -l nvidia.com/dynamo-graph-deployment-name=nemotron-omni-vllm-agg -w
+```
+
+## Step 4: Test
+
+```bash
+kubectl port-forward svc/nemotron-omni-vllm-agg-frontend 8000:8000 -n ${NAMESPACE}
+```
+
+In another terminal, send a minimal text request:
+
+```bash
+curl http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
+    "messages": [{"role": "user", "content": "Hello!"}],
+    "max_tokens": 128
+  }'
+```
+
+To exercise the multimodal path, attach an image:
+
+```bash
+curl http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
+    "messages": [{
+      "role": "user",
+      "content": [
+        {"type": "image_url", "image_url": {"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"}},
+        {"type": "text", "text": "Describe what is in this image."}
+      ]
+    }],
+    "max_tokens": 256
+  }'
+```
+
+…or an audio clip:
+
+```bash
+curl http://localhost:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4",
+    "messages": [{
+      "role": "user",
+      "content": [
+        {"type": "audio_url", "audio_url": {"url": "https://raw.githubusercontent.com/yuekaizhang/Triton-ASR-Client/main/datasets/mini_en/wav/1221-135766-0002.wav"}},
+        {"type": "text", "text": "Transcribe this audio clip."}
+      ]
+    }],
+    "max_tokens": 256
+  }'
+```
+
+## Key Configuration Notes
+
+- `--enable-multimodal` enables image, video, and audio inputs.
+- `--media-io-kwargs '{"video": {"num_frames": 512, "fps": 1}}'` samples long
+  videos at one frame per second, capped at 512 frames.
+- `--dyn-tool-call-parser nemotron_nano` and
+  `--dyn-reasoning-parser nemotron_nano` enable Nemotron Nano tool-call and
+  reasoning parsing.
+- The frontend uses `--router-mode kv --no-kv-events`, which approximates
+  KV-aware routing with prefix hashing without requiring backend KV events.
+
+## Optional: Run without NATS
+
+The Dynamo runtime defaults to NATS for the event plane and connects to a
+NATS server if `NATS_SERVER` is set in the environment (the operator
+auto-injects this on most clusters). On clusters without NATS — or where
+you'd rather avoid the dependency — you can run on TCP request plane + ZMQ
+event plane only. Add to both Frontend and VllmWorker:
+
+```yaml
+mainContainer:
+  env:
+    - name: DYN_EVENT_PLANE
+      value: zmq
+  command: ["/bin/bash", "-lc"]
+  args:
+    # Operator-injected NATS_SERVER takes effect even when set to ""; we have
+    # to actually unset it before the runtime reads env.
+    - >-
+      unset NATS_SERVER &&
+      exec python3 -m dynamo.frontend ...   # or dynamo.vllm
+```
+
+The request plane defaults to TCP already, so no further flags are needed.
+
+## File Layout
+
+```text
+recipes/nemotron-3-nano-omni/
+  README.md
+  Dockerfile
+  model-cache/
+    model-cache.yaml
+    model-download.yaml
+  vllm/
+    agg/
+      deploy.yaml
+```
diff --git a/recipes/nemotron-3-nano-omni/model-cache/model-cache.yaml b/recipes/nemotron-3-nano-omni/model-cache/model-cache.yaml
new file mode 100644
index 000000000000..5d6e2b6e998b
--- /dev/null
+++ b/recipes/nemotron-3-nano-omni/model-cache/model-cache.yaml
@@ -0,0 +1,13 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: model-cache
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: 250Gi
+  storageClassName: "your-storage-class-name"
diff --git a/recipes/nemotron-3-nano-omni/model-cache/model-download.yaml b/recipes/nemotron-3-nano-omni/model-cache/model-download.yaml
new file mode 100644
index 000000000000..6e34cf65512b
--- /dev/null
+++ b/recipes/nemotron-3-nano-omni/model-cache/model-download.yaml
@@ -0,0 +1,48 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: model-download
+spec:
+  backoffLimit: 3
+  completions: 1
+  parallelism: 1
+  template:
+    metadata:
+      labels:
+        app: model-download
+    spec:
+      restartPolicy: Never
+      containers:
+        - name: model-download
+          image: python:3.10-slim
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop: ["ALL"]
+            seccompProfile:
+              type: RuntimeDefault
+          command: ["sh", "-c"]
+          envFrom:
+            - secretRef:
+                name: hf-token-secret
+          env:
+            - name: MODEL_NAME
+              value: nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4
+            - name: HF_HOME
+              value: /model-store
+            - name: HF_XET_HIGH_PERFORMANCE
+              value: "1"
+          args:
+            - |
+              set -eux
+              pip install --no-cache-dir huggingface_hub==1.11.0
+              hf download "$MODEL_NAME"
+          volumeMounts:
+            - name: model-cache
+              mountPath: /model-store
+      volumes:
+        - name: model-cache
+          persistentVolumeClaim:
+            claimName: model-cache
diff --git a/recipes/nemotron-3-nano-omni/vllm/agg/deploy.yaml b/recipes/nemotron-3-nano-omni/vllm/agg/deploy.yaml
new file mode 100644
index 000000000000..1c03c392f811
--- /dev/null
+++ b/recipes/nemotron-3-nano-omni/vllm/agg/deploy.yaml
@@ -0,0 +1,102 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# Nemotron Nano Omni aggregated vLLM deployment.
+#
+# Prerequisites:
+#   - Dynamo Platform installed
+#   - Model weights downloaded into the model-cache PVC
+#   - Container built from recipes/nemotron-3-nano-omni/Dockerfile
+#   - HF_TOKEN secret created:
+#       kubectl create secret generic hf-token-secret \
+#         --from-literal=HF_TOKEN=<your-token> -n <namespace>
+#
+# Replace image references before applying:
+#   <your-registry>/nemotron-omni-vllm:latest
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: nemotron-omni-vllm-agg
+spec:
+  backendFramework: vllm
+  pvcs:
+    - name: model-cache
+      create: false
+  services:
+    Frontend:
+      componentType: frontend
+      envFromSecret: hf-token-secret
+      replicas: 1
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /model-store
+      extraPodSpec:
+        mainContainer:
+          image: <your-registry>/nemotron-omni-vllm:latest
+          imagePullPolicy: IfNotPresent
+          startupProbe:
+            httpGet:
+              path: /health
+              port: 8000
+            periodSeconds: 10
+            timeoutSeconds: 1800
+            failureThreshold: 60
+          env:
+            - name: HF_HOME
+              value: /model-store
+          command:
+            - /bin/bash
+            - -lc
+          args:
+            - >-
+              exec python3 -m dynamo.frontend
+              --router-mode kv
+              --no-kv-events
+              --http-port 8000
+
+    VllmWorker:
+      componentType: worker
+      envFromSecret: hf-token-secret
+      replicas: 1
+      resources:
+        limits:
+          gpu: "1"
+        requests:
+          gpu: "1"
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /model-store
+      sharedMemory:
+        size: 16Gi
+      extraPodSpec:
+        mainContainer:
+          image: <your-registry>/nemotron-omni-vllm:latest
+          imagePullPolicy: IfNotPresent
+          startupProbe:
+            httpGet:
+              path: /health
+              port: 9090
+            periodSeconds: 10
+            timeoutSeconds: 10
+            failureThreshold: 120
+          env:
+            - name: HF_HOME
+              value: /model-store
+            # Match the --media-io-kwargs num_frames so dynamo's multimodal
+            # preprocessor and vLLM agree on the video frame ceiling.
+            - name: DYN_MM_VIDEO_NUM_FRAMES
+              value: "512"
+          command:
+            - /bin/bash
+            - -lc
+          args:
+            - >-
+              exec python3 -m dynamo.vllm
+              --model nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4
+              --served-model-name nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4
+              --max-model-len 131072
+              --enable-multimodal
+              --media-io-kwargs '{"video": {"num_frames": 512, "fps": 1}}'
+              --trust-remote-code
+              --video-pruning-rate 0.5
+              --dyn-tool-call-parser nemotron_nano
+              --dyn-reasoning-parser nemotron_nano