diff --git a/.agents/skills/nemoclaw-user-configure-inference/SKILL.md b/.agents/skills/nemoclaw-user-configure-inference/SKILL.md index af6255ceda..cb884fdf20 100644 --- a/.agents/skills/nemoclaw-user-configure-inference/SKILL.md +++ b/.agents/skills/nemoclaw-user-configure-inference/SKILL.md @@ -1,6 +1,6 @@ --- name: "nemoclaw-user-configure-inference" -description: "Connects NemoClaw to a local inference server. Use when setting up Ollama, vLLM, TensorRT-LLM, NIM, or any OpenAI-compatible local model server with NemoClaw. Trigger keywords - nemoclaw local inference, ollama nemoclaw, vllm nemoclaw, local model server, openai compatible endpoint, switch nemoclaw inference model, change inference runtime, nemoclaw additional model, nemoclaw sub-agent model, openclaw sub-agent, agents.list, sessions_spawn, vlm-demo, nemoclaw inference options, nemoclaw onboarding providers, nemoclaw inference routing, nemoclaw tool calling, ollama tool calls, vllm tool-call-parser, raw json in tui." +description: "Connects NemoClaw to a local inference server. Use when setting up Ollama, vLLM, TensorRT-LLM, NIM, or any OpenAI-compatible local model server with NemoClaw. Trigger keywords - nemoclaw local inference, ollama nemoclaw, vllm nemoclaw, local model server, openai compatible endpoint, switch nemoclaw inference model, change inference runtime, nemoclaw additional model, nemoclaw sub-agent model, openclaw sub-agent, agents.list, sessions_spawn, vlm-demo, nemoclaw dgx spark local inference, nemoclaw dgx station vllm, nemoclaw spark ollama, nemoclaw cdi gpu setup, nemoclaw inference options, nemoclaw onboarding providers, nemoclaw inference routing, nemoclaw tool calling, ollama tool calls, vllm tool-call-parser, raw json in tui." license: "Apache-2.0" --- @@ -453,11 +453,13 @@ If the provider itself needs to change (for example, switching from vLLM to a cl - **Load [references/switch-inference-providers.md](references/switch-inference-providers.md)** when switching inference providers, changing the model runtime, or reconfiguring inference routing. Changes the active inference model without restarting the sandbox. - **Load [references/set-up-sub-agent.md](references/set-up-sub-agent.md)** when users ask how to add a second model, configure a sub-agent model, use Omni for vision tasks, configure agents.list, or use sessions_spawn in NemoClaw. Shows the NemoClaw-specific file paths and update flow for adding an auxiliary OpenClaw sub-agent model. +- **Load [references/dgx-spark-station-local-inference.md](references/dgx-spark-station-local-inference.md)** when preparing DGX hardware, choosing Ollama or managed vLLM, checking GPU/CDI prerequisites, verifying the OpenShell gateway and local inference route, or troubleshooting CoreDNS, k3s image pull, CDI, or port 3000 conflicts. Guides DGX Spark and DGX Station users through end-to-end local inference setup with NemoClaw. - **Load [references/inference-options.md](references/inference-options.md)** when explaining which providers are available, what the onboard wizard presents, or how inference routing works. Lists all inference providers offered during NemoClaw onboarding. - **[references/tool-calling-reliability.md](references/tool-calling-reliability.md)** — Explains Ollama tool-call leak symptoms, when vLLM with a tool-call parser is recommended, and how to repoint NemoClaw to a parser-aware local endpoint. ## Related Skills +- [Set Up DGX Spark or DGX Station Local Inference](references/dgx-spark-station-local-inference.md) for an end-to-end DGX hardware walkthrough. - [Inference Options](references/inference-options.md) for the full list of providers available during onboarding. - [Tool-Calling Reliability](references/tool-calling-reliability.md) for diagnosing raw JSON tool-call output with local models. - [Switch Inference Models](references/switch-inference-providers.md) for runtime model switching. diff --git a/.agents/skills/nemoclaw-user-configure-inference/references/dgx-spark-station-local-inference.md b/.agents/skills/nemoclaw-user-configure-inference/references/dgx-spark-station-local-inference.md new file mode 100644 index 0000000000..66679d2226 --- /dev/null +++ b/.agents/skills/nemoclaw-user-configure-inference/references/dgx-spark-station-local-inference.md @@ -0,0 +1,159 @@ + + +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Set Up DGX Spark or DGX Station Local Inference + +Use this guide when you want NemoClaw to run with local inference on DGX Spark or DGX Station. +It pulls together the host checks, provider choice, onboarding flow, and the common Spark-specific failure modes that are otherwise spread across the quickstart, local inference, and troubleshooting pages. + +## Prerequisites + +Before onboarding, verify the host basics: + +- Docker is installed and running. +- Node.js 22.16 or later and npm 10 or later are available. +- The NVIDIA driver and container toolkit are installed. +- `nvidia-smi` works on the host. +- Port `3000` is free, or you are ready to choose a different dashboard port. + +Run: + +```bash +docker info +nvidia-smi +node --version +npm --version +``` + +DGX Spark and recent Docker installations can require NVIDIA Container Device Interface (CDI) specs for GPU passthrough. +NemoClaw checks and repairs the common missing-CDI case during install, but you can pre-generate the spec when needed: + +```bash +sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml +``` + +If this command is unavailable, install or repair the NVIDIA Container Toolkit before onboarding. + +## Choose a Local Inference Path + +DGX Spark and DGX Station have two common local-inference paths. + +| Path | Best for | Notes | +|---|---|---| +| Managed vLLM | Tool-heavy agents, stronger tool-call reliability, larger GPU-backed models | Offered by default on DGX Spark and DGX Station. Uses `Qwen/Qwen3.6-27B-FP8` unless you override the registry slug. | +| Ollama | Simpler local chat, existing Ollama model libraries, quick experiments | Convenient, but some model/template combinations can emit tool calls as plain text. Use vLLM when tool-call reliability matters. | + +For managed vLLM, the first run pulls the container image and model weights into local caches. +Plan for a long first run on fresh systems. + +For Ollama, make sure only one daemon owns port `11434`. +If another runtime is already using that port, stop it or move one service before onboarding. + +## Run Onboarding + +Start the standard onboard wizard: + +```bash +nemoclaw onboard +``` + +On DGX Spark and DGX Station, the interactive wizard prompts for the provider and policy choices after the third-party software notice. +Choose the local-inference path and review the suggested policy defaults before NemoClaw creates the sandbox. + +If you prefer to choose manually: + +1. Select the local provider you want: **Local vLLM** or **Local Ollama**. +2. For managed vLLM, accept the default model or set `NEMOCLAW_VLLM_MODEL` before running onboarding. +3. For Ollama, choose an installed model or a starter model that fits available memory. +4. Let NemoClaw validate the local endpoint before it creates the sandbox. + +For non-interactive managed vLLM setup on DGX Spark or DGX Station: + +```bash +NEMOCLAW_PROVIDER=install-vllm nemoclaw onboard --non-interactive --yes --yes-i-accept-third-party-software +``` + +To choose a supported managed-vLLM model: + +```bash +NEMOCLAW_PROVIDER=install-vllm \ +NEMOCLAW_VLLM_MODEL=qwen3.6-27b \ +nemoclaw onboard --non-interactive --yes --yes-i-accept-third-party-software +``` + +Supported managed-vLLM slugs are listed in [Use a Local Inference Server](../SKILL.md#override-the-managed-vllm-model). + +## Verify the Setup + +After onboarding completes, check the sandbox and local inference route: + +```bash +nemoclaw status +nemoclaw doctor +``` + +Healthy output should show: + +- The sandbox is running. +- The dashboard is reachable. +- The selected inference provider is healthy. +- For Ollama, the authenticated proxy health line is healthy when the proxy token is available. + +Open the TUI: + +```bash +nemoclaw connect +openclaw tui +``` + +Ask for a small tool-using action. +If you see raw JSON tool calls printed as chat text, switch to vLLM with a parser-aware model path and review [Tool-Calling Reliability](tool-calling-reliability.md). + +## Common DGX Spark and Station Fixes + +### CoreDNS CrashLoop + +If CoreDNS in the embedded k3s cluster crashes shortly after setup, run the CoreDNS fix script referenced by the troubleshooting guide, then recreate the sandbox. +The issue is usually a resolver path that points at `127.0.0.11`, which does not route inside the gateway container. + +### k3s Image Pull or Upload Takes Too Long + +Fresh systems may spend several minutes pulling images, uploading layers to the OpenShell gateway, or loading model weights. +If readiness times out while the host is still doing real work, raise both local inference and sandbox readiness budgets: + +```bash +export NEMOCLAW_LOCAL_INFERENCE_TIMEOUT=300 +export NEMOCLAW_SANDBOX_READY_TIMEOUT=600 +nemoclaw onboard +``` + +### CDI GPU Errors + +If gateway startup reports `unresolvable CDI devices nvidia.com/gpu=all`, regenerate CDI specs and rerun onboarding: + +```bash +sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml +nemoclaw onboard +``` + +If the error persists, repair the NVIDIA Container Toolkit installation and verify that `docker info` reports the expected CDI spec directories. + +### Port 3000 Conflict + +Some Spark systems already run services on port `3000`. +Set a different dashboard port before onboarding: + +```bash +export NEMOCLAW_DASHBOARD_PORT=18789 +nemoclaw onboard +``` + +Use a free port that does not overlap the configured gateway, vLLM, Ollama, or Ollama proxy ports. + +## Next Steps + +- [Use a Local Inference Server](../SKILL.md) for full Ollama, vLLM, NIM, and compatible-endpoint details. +- [Tool-Calling Reliability](tool-calling-reliability.md) for choosing between Ollama and parser-aware vLLM. +- Troubleshooting (use the `nemoclaw-user-reference` skill) for deeper DGX Spark failure-mode guidance. diff --git a/.agents/skills/nemoclaw-user-configure-inference/references/inference-options.md b/.agents/skills/nemoclaw-user-configure-inference/references/inference-options.md index 5242cff46c..1a7e7a3a03 100644 --- a/.agents/skills/nemoclaw-user-configure-inference/references/inference-options.md +++ b/.agents/skills/nemoclaw-user-configure-inference/references/inference-options.md @@ -74,7 +74,46 @@ When you select it, NemoClaw starts the router proxy on the host, waits for its The sandbox does not call the router port directly. The router model pool lives in `nemoclaw-blueprint/router/pool-config.yaml`. +Edit that file to define which models the router can choose from. The default pool routes between NVIDIA-hosted Nemotron models and uses the `tolerance` value to choose the lowest-cost model whose predicted quality stays within the configured threshold. + +```yaml +routing: + method: prefill + checkpoint: llm-router/checkpoints/prefill_router_qwen08b.pt + tolerance: 0.20 + encoder: Qwen/Qwen3.5-0.8B + +models: + - name: nano + litellm_model: "openai/nvidia/nvidia/Nemotron-3-Nano-30B-A3B" + cost_per_m_input_tokens: 0.05 + api_base: "https://inference-api.nvidia.com" + + - name: super + litellm_model: "openai/nvidia/nvidia/nemotron-3-super-v3" + cost_per_m_input_tokens: 0.10 + api_base: "https://inference-api.nvidia.com" +``` + +The `tolerance` parameter controls the accuracy-cost tradeoff. + +| Value | Behavior | +|-------|----------| +| `0.0` | Always pick the most accurate model. | +| `0.20` | Allow up to 20 percentage points below the best for a cheaper model (default). | +| `1.0` | Always pick the cheapest model. | + +The router runs on the host, not inside the sandbox. + +```text +Sandbox (agent) ──> OpenShell Gateway (L7 proxy) ──> Model Router (:4000) ──> NVIDIA API + └── PrefillRouter selects model +``` + +Credentials flow through the OpenShell provider system. +The sandbox never sees raw API keys. + To use the router in scripted setup, set: ```console diff --git a/.agents/skills/nemoclaw-user-configure-security/references/best-practices.md b/.agents/skills/nemoclaw-user-configure-security/references/best-practices.md index 440571e4d2..31f3101f50 100644 --- a/.agents/skills/nemoclaw-user-configure-security/references/best-practices.md +++ b/.agents/skills/nemoclaw-user-configure-security/references/best-practices.md @@ -184,6 +184,15 @@ For sensitive workloads, use a reviewed host-side immutability workflow after in - **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `2770` (setgid `sandbox:sandbox`) and `openclaw.json` with mode `660`, so the agent and its group can read and write config directly. A reviewed host-side immutability workflow should compare the intended ownership and mode with the live sandbox filesystem before treating the config tree as locked. - **Config integrity hash.** The image includes a SHA256 hash of `openclaw.json`. In the default mutable state, `.config-hash` is sandbox-owned and is not a tamper-proof trust anchor, so startup does not fail closed on that hash. When the hash is root-owned and read-only, startup enforces it and refuses to start if the hash does not match. +- **Content seal under shields up.** + When `nemoclaw shields up` runs against a clean lock, it captures a SHA-256 seal of `openclaw.json` and any other locked files into the host-side shields state file. + On sealed sandboxes, every `shields status` call recomputes the hash inside the sandbox and surfaces drift on any mismatch, so a host-root tamper that flips perms back to `444 root:root` after rewriting the file is still flagged. + Sandboxes locked before this seal landed have no recorded hash; perm-only verification cannot prove their bytes match the image-original, so the seal is **not** a retroactive proof of integrity for legacy state. + The same refusal applies to partial seals where the locked file set grew after the existing seal was captured (some entries sealed, some missing). + By default, `shields up` refuses to seal in either case and asks you to rebuild the sandbox first for a known-good baseline. + `shields status` on a legacy lockdown surfaces `UP (UNSEALED — content integrity unknown for legacy lockdown)` and exits with status 2 so scripts treat it as a failure until the operator seals an explicit baseline. + If you explicitly trust the current bytes, opt in via `NEMOCLAW_SHIELDS_ACCEPT_LEGACY_BASELINE=1`, which captures a seal over the current files and is acknowledged in the log line. + Once a sandbox is sealed, `shields up` refuses to re-seal a tampered baseline; restore the original file or rebuild the sandbox before re-running. - **Gateway token environment.** The gateway exports `OPENCLAW_GATEWAY_TOKEN` and writes it to `/tmp/nemoclaw-proxy-env.sh` for interactive sandbox sessions. Keep this in mind when deciding whether a workload should run with mutable config or an immutable config posture. | Aspect | Detail | diff --git a/.agents/skills/nemoclaw-user-get-started/SKILL.md b/.agents/skills/nemoclaw-user-get-started/SKILL.md index bab2b2d0e2..abe6798008 100644 --- a/.agents/skills/nemoclaw-user-get-started/SKILL.md +++ b/.agents/skills/nemoclaw-user-get-started/SKILL.md @@ -74,6 +74,7 @@ $ curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash On DGX Spark, DGX Station, and Windows WSL, an interactive installer offers express install after you accept the third-party software notice. Express install switches onboarding to non-interactive mode, allows `sudo` password prompts for required host changes, and selects the managed local inference path for that platform. Unless `NEMOCLAW_POLICY_TIER` is set, it applies sandbox policy in `suggested` mode with the `balanced` tier by default, using the base sandbox policy plus supported package, model, web-search, and local-inference presets. +On DGX Spark, express install uses `my-spark-assistant` as the sandbox name unless `NEMOCLAW_SANDBOX_NAME` is already set. On WSL, express install selects the Windows-host Ollama setup path. Set `NEMOCLAW_NO_EXPRESS=1` to skip the express prompt, or set `NEMOCLAW_PROVIDER` before launching the installer when you want to choose a provider yourself. diff --git a/.agents/skills/nemoclaw-user-get-started/references/prerequisites.md b/.agents/skills/nemoclaw-user-get-started/references/prerequisites.md index 525945dfaa..0c957055e1 100644 --- a/.agents/skills/nemoclaw-user-get-started/references/prerequisites.md +++ b/.agents/skills/nemoclaw-user-get-started/references/prerequisites.md @@ -60,7 +60,7 @@ The table is generated from [`ci/platform-matrix.json`](https://github.com/NVIDI |----|-------------------|--------|-------| | Linux | Docker | Tested | Primary tested path. | | macOS (Apple Silicon) | Colima, Docker Desktop | Tested with limitations | Install Xcode Command Line Tools (`xcode-select --install`) and start the runtime before running the installer. | -| DGX Spark | Docker | Tested | Use the standard installer and `nemoclaw onboard`. For an end-to-end walkthrough with local Ollama inference, see the [NVIDIA Spark playbook](https://build.nvidia.com/spark/nemoclaw). | +| DGX Spark | Docker | Tested | Use the standard installer and `nemoclaw onboard`. For local inference, see Set Up DGX Spark or DGX Station Local Inference (use the `nemoclaw-user-configure-inference` skill). | | Windows WSL2 | Docker Desktop (WSL backend) | Tested with limitations | Requires WSL2 with Docker Desktop backend. | ## Next Steps diff --git a/.agents/skills/nemoclaw-user-get-started/references/quickstart-hermes.md b/.agents/skills/nemoclaw-user-get-started/references/quickstart-hermes.md index 6ee5e06319..361ddc502b 100644 --- a/.agents/skills/nemoclaw-user-get-started/references/quickstart-hermes.md +++ b/.agents/skills/nemoclaw-user-get-started/references/quickstart-hermes.md @@ -5,11 +5,6 @@ Use NemoHermes when you want NemoClaw to create an OpenShell sandbox that runs Hermes instead of the default OpenClaw agent. The `nemohermes` command is an alias for `nemoclaw` with the Hermes agent pre-selected. -**Experimental Feature:** - -The Hermes agent option is experimental. -Interfaces, defaults, and supported features may change without notice, and it is not recommended for production use. - Review the [Prerequisites](prerequisites.md) before starting. Docker must be installed, running, and reachable from the current shell before Hermes onboarding can build the sandbox image. On Linux, the installer can install Docker, start the service, and add your user to the `docker` group. diff --git a/.agents/skills/nemoclaw-user-manage-sandboxes/SKILL.md b/.agents/skills/nemoclaw-user-manage-sandboxes/SKILL.md index 766db03f54..ff538fa0eb 100644 --- a/.agents/skills/nemoclaw-user-manage-sandboxes/SKILL.md +++ b/.agents/skills/nemoclaw-user-manage-sandboxes/SKILL.md @@ -187,7 +187,7 @@ Re-run the installer. Before it onboards anything, the installer calls `nemoclaw backup-all` (use the `nemoclaw-user-reference` skill) automatically, storing a snapshot of each running sandbox in `~/.nemoclaw/rebuild-backups/` as a safety net. If your existing gateway is from OpenShell earlier than `0.0.37`, the installer prompts before it runs the new automatic gateway upgrade path. The automatic path is offered only when the existing `nemoclaw` CLI supports `backup-all`; older installs must preserve sandbox state manually before retiring the gateway. -For unattended installs, set `NEMOCLAW_ACCEPT_EXPERIMENTAL_OPENSHELL_UPGRADE=1`, or manually run `nemoclaw backup-all` and `openshell gateway destroy -g nemoclaw || openshell gateway destroy` before rerunning the installer as `curl -fsSL https://www.nvidia.com/nemoclaw.sh | NEMOCLAW_OPENSHELL_UPGRADE_PREPARED=1 bash`. +For unattended installs, set `NEMOCLAW_ACCEPT_EXPERIMENTAL_OPENSHELL_UPGRADE=1`, or manually run `nemoclaw backup-all`, `openshell gateway remove nemoclaw || openshell gateway destroy -g nemoclaw || openshell gateway destroy` (both verbs are tried so the right one runs on either OpenShell release), and `sudo pkill -f openshell-gateway` if a privileged host gateway remains before rerunning the installer as `curl -fsSL https://www.nvidia.com/nemoclaw.sh | NEMOCLAW_OPENSHELL_UPGRADE_PREPARED=1 bash`. ```console $ curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash @@ -255,6 +255,14 @@ nemoclaw uninstall | `--keep-openshell` | Leave OpenShell binaries installed. | | `--delete-models` | Also remove NemoClaw-pulled Ollama models. | +**Note:** + +`nemoclaw uninstall` preserves `~/.nemoclaw/rebuild-backups/` (host-side snapshots that `nemoclaw snapshot create` and `nemoclaw backup-all` write), `~/.nemoclaw/backups/` (workspace backups that `scripts/backup-workspace.sh` writes), and `~/.nemoclaw/sandboxes.json` (the sandbox registry) by default. +Uninstall removes every other entry under `~/.nemoclaw/`. +Interactive runs prompt before they remove the preserved entries; the default answer keeps them. +For non-interactive runs (`--yes`, `NEMOCLAW_NON_INTERACTIVE=1`, or a non-TTY shell), set `NEMOCLAW_UNINSTALL_DESTROY_USER_DATA=1` to acknowledge data loss and remove the preserved entries as well. +See `nemoclaw uninstall` (use the `nemoclaw-user-reference` skill) for the full preservation contract. + `nemoclaw uninstall` runs the version-pinned `uninstall.sh` that shipped with your installed CLI, so it does not fetch anything over the network at uninstall time. If the `nemoclaw` CLI is missing or broken, fall back to the hosted script: diff --git a/.agents/skills/nemoclaw-user-manage-sandboxes/references/messaging-channels.md b/.agents/skills/nemoclaw-user-manage-sandboxes/references/messaging-channels.md index 8ca516df1f..00073ec0ce 100644 --- a/.agents/skills/nemoclaw-user-manage-sandboxes/references/messaging-channels.md +++ b/.agents/skills/nemoclaw-user-manage-sandboxes/references/messaging-channels.md @@ -41,7 +41,9 @@ Telegram uses a bot token from [BotFather](https://t.me/BotFather). Open Telegram, send `/newbot` to [@BotFather](https://t.me/BotFather), follow the prompts, and copy the token. For Telegram group chats, disable privacy mode before testing group replies: in @BotFather, run `/setprivacy`, choose the bot, then choose **Disable**. After changing privacy mode, remove the bot from each Telegram group and add it back so Telegram applies the new delivery setting to that group. -`TELEGRAM_ALLOWED_IDS` is a comma-separated list of Telegram user IDs for DM access. +`TELEGRAM_ALLOWED_IDS` is a comma-separated list of Telegram user or private-chat IDs for DM access. +For compatibility with older QA scripts, NemoClaw also treats `TELEGRAM_AUTHORIZED_CHAT_IDS` and `TELEGRAM_CHAT_ID` as aliases, but new automation should use `TELEGRAM_ALLOWED_IDS`. +Keep these aliases until QA automation and public repro templates have stopped exporting them for at least one full release. Group chats stay open by default so rebuilt sandboxes do not silently drop Telegram group messages because of an empty group allowlist. Set `TELEGRAM_REQUIRE_MENTION=1` to make the bot reply in Telegram groups only when users mention it. Pairing and `TELEGRAM_ALLOWED_IDS` still govern direct messages. @@ -158,6 +160,8 @@ If applying the preset fails, NemoClaw warns and tells you to re-apply manually Choose the rebuild so the running sandbox image picks up the new channel. For Telegram, Discord, and Slack, `channels add` also checks the rebuilt runtime for the selected bridge and reports startup, credential, or missing-plugin warnings before returning. If you need optional channel settings such as `TELEGRAM_ALLOWED_IDS`, `TELEGRAM_REQUIRE_MENTION`, `DISCORD_SERVER_ID`, `DISCORD_USER_ID`, `DISCORD_REQUIRE_MENTION`, `SLACK_ALLOWED_USERS`, or `SLACK_ALLOWED_CHANNELS`, export them before the rebuild starts. +Telegram Bot API `sendMessage` calls prove outbound delivery from the bot; to test inbound agent replies, send a message from the Telegram client as an allowed user. +For a repeatable live Telegram reply check, run `test/e2e/test-messaging-providers.sh` with `TELEGRAM_BOT_TOKEN_REAL`, `TELEGRAM_AUTHORIZED_CHAT_IDS` or `TELEGRAM_CHAT_ID`, and `NEMOCLAW_TELEGRAM_INBOUND_REPLY_E2E=1`. If you defer the rebuild, apply the change later: ```console diff --git a/.agents/skills/nemoclaw-user-overview/SKILL.md b/.agents/skills/nemoclaw-user-overview/SKILL.md index 01ed184ec2..ca2512599d 100644 --- a/.agents/skills/nemoclaw-user-overview/SKILL.md +++ b/.agents/skills/nemoclaw-user-overview/SKILL.md @@ -1,6 +1,6 @@ --- name: "nemoclaw-user-overview" -description: "Explains what NemoClaw covers: onboarding, lifecycle management, and OpenClaw operations within OpenShell containers, plus capabilities and why it exists. Use when users ask what NemoClaw is or what the project provides. For ecosystem placement or OpenShell-only paths, use the Ecosystem page; for internal mechanics, use How It Works. Trigger keywords - nemoclaw overview, openclaw always-on assistants, nvidia openshell, nvidia nemotron, nemoclaw ecosystem, openclaw openshell, nemoclaw vs openshell, sandboxed openclaw, how nemoclaw works, nemoclaw sandbox lifecycle blueprint, nemoclaw release notes, nemoclaw changelog." +description: "Explains what NemoClaw covers: onboarding, lifecycle management, and agent operations within OpenShell containers, plus capabilities and why it exists. Use when users ask what NemoClaw is or what the project provides. For ecosystem placement or OpenShell-only paths, use the Ecosystem page; for internal mechanics, use How It Works. Trigger keywords - nemoclaw overview, openclaw always-on assistants, hermes agent, nvidia openshell, nvidia nemotron, nemoclaw ecosystem, openclaw openshell, nemoclaw vs openshell, sandboxed openclaw, how nemoclaw works, nemoclaw sandbox lifecycle blueprint, nemoclaw release notes, nemoclaw changelog." license: "Apache-2.0" --- @@ -11,7 +11,7 @@ license: "Apache-2.0" ## References -- **Load [references/overview.md](references/overview.md)** when users ask what NemoClaw is or what the project provides. For ecosystem placement or OpenShell-only paths, use the Ecosystem page; for internal mechanics, use How It Works. Explains what NemoClaw covers: onboarding, lifecycle management, and OpenClaw operations within OpenShell containers, plus capabilities and why it exists. +- **Load [references/overview.md](references/overview.md)** when users ask what NemoClaw is or what the project provides. For ecosystem placement or OpenShell-only paths, use the Ecosystem page; for internal mechanics, use How It Works. Explains what NemoClaw covers: onboarding, lifecycle management, and agent operations within OpenShell containers, plus capabilities and why it exists. - **Load [references/ecosystem.md](references/ecosystem.md)** when users ask about the relationship between OpenClaw, OpenShell, and NemoClaw, or when to use NemoClaw versus OpenShell. Explains how OpenClaw, OpenShell, and NemoClaw form the ecosystem, NemoClaw's position in the stack, what NemoClaw adds beyond the community sandbox, and when to prefer NemoClaw versus integrating OpenShell and OpenClaw directly. - **Load [references/how-it-works.md](references/how-it-works.md)** for sandbox lifecycle and architecture mechanics; not for product definition (Overview) or multi-project placement (Ecosystem). Describes how NemoClaw works internally: CLI, plugin, blueprint runner, OpenShell orchestration, inference routing, and protection layers. - **Load [references/release-notes.md](references/release-notes.md)** when users ask about recent changes, the release cadence, or where to track versioned assets on GitHub. Includes the NemoClaw release notes. diff --git a/.agents/skills/nemoclaw-user-overview/references/how-it-works.md b/.agents/skills/nemoclaw-user-overview/references/how-it-works.md index b0f9f4a240..40f6c0f986 100644 --- a/.agents/skills/nemoclaw-user-overview/references/how-it-works.md +++ b/.agents/skills/nemoclaw-user-overview/references/how-it-works.md @@ -2,9 +2,9 @@ # NemoClaw Architecture Overview -This page explains how NemoClaw runs OpenClaw inside an OpenShell sandbox and how the gateway connects the agent to inference, integrations, and policy. +This page explains how NemoClaw runs supported agents inside an OpenShell sandbox and how the gateway connects the agent to inference, integrations, and policy. -NemoClaw does not replace OpenClaw or OpenShell. +NemoClaw does not replace OpenShell or your chosen agent runtime. It packages them into a repeatable setup with a host CLI, a versioned blueprint, default policies, inference setup, plugin configuration, and state helpers. You can use that setup directly or adapt it for your own OpenShell integration. @@ -23,7 +23,7 @@ The diagram has the following components: | Users and operators | Start from the CLI, installer, dashboard, or an end-user channel. | | NemoClaw control | Collects configuration, runs onboarding, prepares the blueprint, and asks OpenShell to create or update resources. | | OpenShell gateway | Owns sandbox lifecycle, networking, policy enforcement, inference routing, and integration egress. | -| NemoClaw sandbox | Runs OpenClaw with the NemoClaw plugin, the selected blueprint contents, and supporting tools. | +| NemoClaw sandbox | Runs the onboarded agent with the selected blueprint contents and supporting tools. OpenClaw sandboxes also load the NemoClaw plugin. | | Inference | Receives model requests through the gateway, using NVIDIA endpoints, NIM, or compatible APIs. | | Integrations | Reach messaging services, MCP servers, GitHub, package indexes, or model hubs through gateway-managed egress. | | State and artifacts | Store configuration, credentials, logs, workspace files, policies, and transcripts outside the running agent process. | @@ -64,7 +64,7 @@ This separation keeps the sandbox plugin small while allowing host orchestration ## Sandbox Creation -When you run `nemoclaw onboard`, NemoClaw creates an OpenShell sandbox that runs OpenClaw in an isolated container. +When you run `nemoclaw onboard`, NemoClaw creates an OpenShell sandbox that runs your selected agent in an isolated container. The host CLI and blueprint runner orchestrate this process through the OpenShell CLI: 1. NemoClaw resolves the blueprint, checks version compatibility, and verifies the digest. @@ -99,6 +99,6 @@ For details on the baseline rules, refer to Network Policies (use the `nemoclaw- ## Next Steps - Read [Ecosystem](ecosystem.md) for stack-level relationships and NemoClaw versus OpenShell-only paths. -- Follow the Quickstart (use the `nemoclaw-user-get-started` skill) to launch your first sandbox. +- Follow the Quickstart with OpenClaw (use the `nemoclaw-user-get-started` skill) or Quickstart with Hermes (use the `nemoclaw-user-get-started` skill) to launch your first sandbox. - Refer to the Architecture (use the `nemoclaw-user-reference` skill) for the full technical structure, including file layouts and the blueprint lifecycle. - Refer to Inference Options (use the `nemoclaw-user-configure-inference` skill) for detailed provider configuration. diff --git a/.agents/skills/nemoclaw-user-overview/references/overview.md b/.agents/skills/nemoclaw-user-overview/references/overview.md index 39c387a537..e85d6a4b7d 100644 --- a/.agents/skills/nemoclaw-user-overview/references/overview.md +++ b/.agents/skills/nemoclaw-user-overview/references/overview.md @@ -2,17 +2,18 @@ # Overview of NVIDIA NemoClaw -NVIDIA NemoClaw is an open-source reference stack that simplifies running [OpenClaw](https://openclaw.ai) always-on assistants more safely. -NemoClaw provides onboarding, lifecycle management, and OpenClaw operations within OpenShell containers. -It incorporates policy-based privacy and security guardrails, giving you control over your agents’ behavior and data handling. -This enables self-evolving claws to run more safely in clouds, on prem, RTX PCs and DGX Spark. +NVIDIA NemoClaw is an open-source reference stack for running always-on AI agents more safely inside OpenShell containers. +NemoClaw provides onboarding, lifecycle management, and agent operations for supported runtimes in OpenShell sandboxes. +It incorporates policy-based privacy and security guardrails, giving you control over your agents' behavior and data handling. +This enables self-evolving agents to run more safely in clouds, on prem, RTX PCs, and DGX Spark. NemoClaw pairs hosted models on inference providers or local endpoints with a hardened sandbox, routed inference, and declarative egress policy so deployment stays safer and more repeatable. -The sandbox runtime comes from [NVIDIA OpenShell](https://github.com/NVIDIA/OpenShell); NemoClaw adds the blueprint, `nemoclaw` CLI, onboarding, and related tooling as the reference way to run OpenClaw there. +The sandbox runtime comes from [NVIDIA OpenShell](https://github.com/NVIDIA/OpenShell). +NemoClaw adds the blueprint, `nemoclaw` CLI, onboarding, and related tooling as the reference way to run supported agents there. | Capability | Description | |-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------| -| Sandbox OpenClaw | Creates an OpenShell sandbox pre-configured for OpenClaw, with filesystem and network policies applied from the first boot. | +| Sandbox supported agents | Creates an OpenShell sandbox pre-configured for your selected agent, with filesystem and network policies applied from the first boot. | | Route inference | Configures OpenShell inference routing so agent traffic goes to the provider and model you chose during onboarding (NVIDIA Endpoints, OpenAI, Anthropic, Gemini, compatible endpoints, local Ollama, and others). The agent uses `inference.local` inside the sandbox; credentials stay on the host. | | Manage the lifecycle | Handles blueprint versioning, digest verification, and sandbox setup. | @@ -50,7 +51,7 @@ You can use NemoClaw for various use cases including the following. | Use Case | Description | |---------------------------|----------------------------------------------------------------------------------------------| -| Always-on assistant | Run an OpenClaw assistant with controlled network access and operator-approved egress. | +| Always-on assistant | Run a sandboxed agent with controlled network access and operator-approved egress. | | Sandboxed testing | Test agent behavior in a locked-down environment before granting broader permissions. | | Remote GPU deployment | Deploy a sandboxed agent to a remote GPU instance for persistent operation. | @@ -60,6 +61,7 @@ Navigate to the following topics to learn more about NemoClaw and how to install - [Architecture Overview](how-it-works.md) to understand how NemoClaw works. - [Ecosystem](ecosystem.md) to understand how OpenClaw, OpenShell, and NemoClaw relate in the wider stack, and when to use NemoClaw versus OpenShell. -- Quickstart (use the `nemoclaw-user-get-started` skill) to install NemoClaw and run your first sandboxed agent. +- Quickstart with OpenClaw (use the `nemoclaw-user-get-started` skill) to install NemoClaw and run your first OpenClaw sandbox. +- Quickstart with Hermes (use the `nemoclaw-user-get-started` skill) to install NemoClaw and run a Hermes sandbox. - Agent Skills (use the `nemoclaw-user-agent-skills` skill) to load NemoClaw guidance into an AI coding assistant. - Inference Options (use the `nemoclaw-user-configure-inference` skill) to check the inference providers that NemoClaw supports and how inference routing works. diff --git a/.agents/skills/nemoclaw-user-reference/references/commands.md b/.agents/skills/nemoclaw-user-reference/references/commands.md index 8fb96cfeb1..e0b13380d4 100644 --- a/.agents/skills/nemoclaw-user-reference/references/commands.md +++ b/.agents/skills/nemoclaw-user-reference/references/commands.md @@ -85,7 +85,7 @@ $ NEMOCLAW_SINGLE_SESSION=1 curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash When existing sandboxes were created with OpenShell earlier than `0.0.37`, the installer prompts before running the new automatic gateway upgrade path. For scripted installs, set `NEMOCLAW_ACCEPT_EXPERIMENTAL_OPENSHELL_UPGRADE=1` to allow the installer to back up registered sandbox state, retire the old gateway, install the current supported OpenShell release, and restore state during onboarding. The automatic path is disabled if the existing `nemoclaw` CLI does not advertise `backup-all`; preserve sandbox state manually before retiring the old gateway in that case. -To perform those steps manually, run `nemoclaw backup-all`, retire the old gateway with `openshell gateway destroy -g nemoclaw || openshell gateway destroy`, then rerun the installer as `curl -fsSL https://www.nvidia.com/nemoclaw.sh | NEMOCLAW_OPENSHELL_UPGRADE_PREPARED=1 bash`. +To perform those steps manually, run `nemoclaw backup-all`, retire the old gateway registration with `openshell gateway remove nemoclaw || openshell gateway destroy -g nemoclaw || openshell gateway destroy` (both verbs are tried so the right one runs on either OpenShell release), stop any remaining privileged host gateway with `sudo pkill -f openshell-gateway`, then rerun the installer as `curl -fsSL https://www.nvidia.com/nemoclaw.sh | NEMOCLAW_OPENSHELL_UPGRADE_PREPARED=1 bash`. The wizard prompts for a provider first, then collects the provider credential if needed. Supported non-experimental choices include NVIDIA Endpoints, OpenAI, Anthropic, Google Gemini, and compatible OpenAI or Anthropic endpoints. @@ -277,7 +277,9 @@ $ nemoclaw onboard --from ./Dockerfile.custom ### GPU passthrough -When `nemoclaw onboard` detects an NVIDIA GPU on the host (`nvidia-smi` succeeds), it enables OpenShell GPU passthrough at both the gateway and sandbox level by default. +When `nemoclaw onboard` detects an NVIDIA GPU on the host, it enables OpenShell GPU passthrough at both the gateway and sandbox level by default. +Detection proceeds along two paths. The `nvidia-smi`-based paths (the primary `--query-gpu=name,memory.total,memory.free` probe and the unified-memory `--query-gpu=name` fallback) require `nvidia-smi` to succeed and, on hosts whose firmware does not classify as a known NVIDIA platform (DGX Spark, DGX Station, Jetson, or Tegra), additionally require that the GPU name does not match the placeholder family observed on the Windows-on-ARM WSL2 nvidia-smi shim (`JMJWOA-Generic-*`) and that either the host is not ARM64 Linux (the observed shim is Windows-on-ARM only) or the NVIDIA kernel driver is bound (`/proc/driver/nvidia/` present), so that placeholder shims on non-NVIDIA hardware are not mistaken for real GPUs. +Jetson/Tegra hosts that ship without `nvidia-smi` continue to be detected via the devicetree firmware fallback (`/sys/firmware/devicetree/base/model`) or the Tegra device-node fallback (`/dev/nvhost-gpu`, `/dev/nvhost-ctrl-gpu`, `/dev/nvhost-ctrl`, or `/dev/nvmap`); both bypass the trust-tier gate above. Use `--no-gpu` to opt out when you want host-side inference providers only and do not need direct GPU access inside the sandbox. Use `--gpu` to require GPU passthrough and fail fast if an NVIDIA GPU is not detected. Use `--sandbox-gpu` or `--no-sandbox-gpu` to control only direct NVIDIA GPU access inside the sandbox. @@ -287,7 +289,9 @@ If the patch fails, onboarding keeps diagnostics and prints a manual cleanup com Prerequisites: -- NVIDIA GPU drivers installed and working (`nvidia-smi` must succeed). +- Ensure NVIDIA GPU drivers are installed and working. + - On generic NVIDIA hosts, `nvidia-smi` must succeed. + - On Jetson/Tegra hosts shipping without `nvidia-smi`, the devicetree firmware fallback substitutes. - NVIDIA Container Toolkit configured for Docker. When GPU passthrough is enabled and a gateway already exists without it, onboarding first checks whether replacing the CPU-only gateway is safe. @@ -384,6 +388,18 @@ $ nemoclaw my-assistant recover Show sandbox status, health, and inference configuration. +Pass `--json` to emit a structured per-sandbox report instead of the text renderer. +The JSON output includes at least `schemaVersion`, `name`, `found`, `model`, `provider`, `phase`, `gatewayState`, `inferenceHealth`, `rpcIssue`, `hostGpuDetected`, `sandboxGpuEnabled`, `sandboxGpuMode`, `sandboxGpuDevice`, `openshellDriver`, `openshellVersion`, and `policies`. +`openshellDriver` and `openshellVersion` are always strings (falling back to `"unknown"` when the registry has no value), so consumers can rely on `typeof` checks. +The command exits non-zero when the sandbox is missing locally, the gateway state is not `present`, or the gateway reports a schema/protobuf mismatch (mirrored as `rpcIssue`). +The alias form `nemoclaw status --json` requires the sandbox to be registered locally; the canonical form `nemoclaw sandbox status --json` is the one to use from automation that may run against an unknown sandbox name, since it still emits a JSON document with `found: false` instead of a text error. + +```console +$ nemoclaw my-assistant status +$ nemoclaw my-assistant status --json +$ nemoclaw sandbox status my-assistant --json +``` + The command probes every inference provider and reports one of three states on the `Inference` line: | State | Meaning | @@ -750,6 +766,21 @@ $ nemoclaw my-assistant channels start telegram |------|-------------| | `--dry-run` | Report the channel that would be re-enabled without updating the registry or rebuilding | +### `nemoclaw channels status` + +Run channel-specific runtime diagnostics. For WhatsApp the command probes the sandbox to separately report pairing/session state, the Noise WebSocket connection, inbound event delivery, and policy/config coverage; a paired channel with no observed inbound delivery exits non-zero with verdict `idle` so an unhealthy bridge cannot pass as healthy. + +```bash +nemoclaw my-assistant channels status --channel whatsapp +``` + +| Flag | Description | +|------|-------------| +| `--channel ` | Channel to inspect; defaults to `whatsapp` when registered | +| `--json` | Emit the diagnostic report as JSON (exit non-zero when the verdict is not `healthy` or `unknown`) | + +The probe is bounded by an in-sandbox `openshell sandbox exec` with a hard timeout, captures only short matched bridge log signals (e.g. `connection.open`, `401 unauthorized`, `qr expired`), and never forwards message bodies to the host diagnostic output. + ### `nemoclaw skill install ` Deploy a skill directory to a running sandbox. @@ -777,6 +808,9 @@ Upgrade a sandbox to the current agent version while preserving workspace state. The command backs up workspace state, destroys the old sandbox (including its host-side Docker image), recreates it with the current image via `onboard --resume`, and restores workspace state into the new sandbox. Credentials are stripped from backups before storage. Policy presets applied to the old sandbox are reapplied to the new one so your egress rules survive the rebuild. +The recorded sandbox GPU mode is preserved across rebuild. +A sandbox onboarded with an explicit GPU opt-out (stored as `sandboxGpuMode: "0"`, plus legacy registry entries that only record `gpuEnabled: false`) is recreated with the same opt-out, so the inner `onboard --resume` skips the Docker CDI GPU preflight on hosts without an NVIDIA GPU. +Auto-mode sandboxes remain auto. ```console $ nemoclaw my-assistant rebuild [--yes|-y|--force] [--verbose|-v] @@ -1187,6 +1221,30 @@ On Linux, uninstall removes `~/.local/state/nemoclaw`, which contains Docker-dri $ nemoclaw uninstall [--yes] [--keep-openshell] [--delete-models] [--gateway ] ``` +#### User-data preservation under `~/.nemoclaw/` + +To avoid uninstall destroying host-side user data, uninstall preserves the following entries under `~/.nemoclaw/` by default: + +| Entry | What it holds | +|---|---| +| `rebuild-backups/` | Host-side snapshots that `nemoclaw snapshot create` and `nemoclaw backup-all` write. `nemoclaw snapshot restore` reads them back after you reinstall. | +| `backups/` | Host-side workspace backups that `scripts/backup-workspace.sh` writes (see Backup and Restore (use the `nemoclaw-user-manage-sandboxes` skill)). | +| `sandboxes.json` | Host-side sandbox registry. NemoClaw uses it to map sandbox names back to their persistence directories when you reinstall. | + +Uninstall removes every other entry under `~/.nemoclaw/` (gateway source, runtime state, the Ollama auth proxy PID file, etc.). + +Decision matrix: + +| Context | Behaviour | +|---|---| +| Interactive TTY, preserved entries present, no env override | Prompts `Also remove them? [y/N]`. Default `N` keeps the entries. | +| Interactive TTY, user answers `y` | Removes everything under `~/.nemoclaw/` (the previous full-removal behaviour). | +| Non-interactive (`--yes`, `NEMOCLAW_NON_INTERACTIVE=1`, or non-TTY shell) | Preserves the entries and prints a one-line notice. | +| Any context with `NEMOCLAW_UNINSTALL_DESTROY_USER_DATA=1` | Skips the prompt and removes everything under `~/.nemoclaw/`. | + +The preserved entries survive uninstall as inert files on disk. +Reinstall NemoClaw and re-onboard the sandbox before `nemoclaw snapshot restore` can use them. + #### `nemoclaw uninstall` vs. the hosted `uninstall.sh` Both forms execute the same `uninstall.sh` with the same flags, but differ in where the script comes from and how much they trust the network. @@ -1374,6 +1432,7 @@ These flags change defaults for commands that manage existing sandboxes. |----------|--------|--------| | `NEMOCLAW_CLEANUP_GATEWAY` | `1`, `true`, or `yes` to enable; `0`, `false`, or `no` to disable | Sets the default for whether `nemoclaw destroy` removes the shared gateway when destroying the last sandbox. Command-line `--cleanup-gateway` and `--no-cleanup-gateway` still take precedence. | | `NEMOCLAW_DISABLE_INFERENCE_ROUTE_REPAIR` | `1` to enable | Skips the automatic DNS-proxy repair for stale `inference.local` routes during `nemoclaw connect` and `nemoclaw connect --probe-only`. Use only as a troubleshooting escape hatch. | +| `NEMOCLAW_SHIELDS_ACCEPT_LEGACY_BASELINE` | `1` to opt in | Applies in two cases: (1) sandboxes that were locked before the SHA-256 content seal landed (no `fileHashes` in shields state), and (2) partial seals where the locked file set grew after the existing seal was captured (some entries sealed, some missing). In both cases the existing on-disk bytes for the unsealed files have no independently verified baseline. By default, `shields up` refuses to capture a seal and asks you to rebuild the sandbox for a known-good baseline. Set this to `1` to accept the current bytes as the trusted baseline and let the seal be captured anyway. Once captured, subsequent `shields status` runs detect any future drift. | ## NemoHermes Alias diff --git a/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md b/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md index c321003a8b..8e0de82b16 100644 --- a/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md +++ b/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md @@ -918,6 +918,14 @@ New Telegram bots default to privacy mode enabled, which prevents group messages In @BotFather, run `/setprivacy`, choose the bot, and choose **Disable**. Then remove the bot from the affected group and add it back; Telegram applies the privacy-mode change to group delivery only after the bot rejoins. +For Telegram direct messages, make sure the rebuilt sandbox has a DM allowlist. +Set `TELEGRAM_ALLOWED_IDS` before rebuild; `TELEGRAM_AUTHORIZED_CHAT_IDS` and `TELEGRAM_CHAT_ID` are accepted as compatibility aliases. +Keep the aliases until QA automation and public repro templates have stopped exporting them for at least one full release. +Bot API `sendMessage` sends from the bot to a chat, so it only proves outbound Telegram API access. +To prove inbound agent routing, send a message from the Telegram client as an allowed user and then watch the gateway log for the agent turn and outbound reply. +For a reproducible live check that also exercises an alias, run `test/e2e/test-messaging-providers.sh` with `TELEGRAM_BOT_TOKEN_REAL`, either `TELEGRAM_AUTHORIZED_CHAT_IDS` or `TELEGRAM_CHAT_ID`, and `NEMOCLAW_TELEGRAM_INBOUND_REPLY_E2E=1`; when prompted, send a fresh direct message from that Telegram client. +The check waits for `[telegram] [default] inbound update received` and `[telegram] [default] outbound sendMessage attempted` in `/tmp/gateway.log`. + To diagnose, open a shell in the sandbox and inspect the gateway log: ```console @@ -1156,7 +1164,8 @@ Use `--follow` to stream logs in real time while debugging. ## DGX Spark -For an end-to-end Ollama walkthrough on DGX Spark, refer to the [NVIDIA Spark playbook](https://build.nvidia.com/spark/nemoclaw). +For an end-to-end NemoClaw walkthrough on DGX Spark or DGX Station, start with Set Up DGX Spark or DGX Station Local Inference (use the `nemoclaw-user-configure-inference` skill). +The NVIDIA Spark playbook remains a useful companion reference for Spark-specific examples. ### CoreDNS CrashLoop after onboarding @@ -1166,11 +1175,12 @@ Run `fix-coredns.sh` to point CoreDNS at the container gateway IP instead, then ### `k3s` cannot find a freshly built image After building a new sandbox image, `k3s` inside the gateway container sometimes fails to pull it even though the image exists on the host. -Destroy and restart the gateway, then re-run setup. +Remove the gateway registration, stop any leftover host gateway process, then re-run setup. ```console -$ openshell gateway destroy -$ openshell gateway start +$ openshell gateway remove nemoclaw +$ sudo pkill -f openshell-gateway +$ nemoclaw onboard --resume ``` ### GPU passthrough on Spark diff --git a/docs/get-started/prerequisites.mdx b/docs/get-started/prerequisites.mdx index fc577f2e0f..34f867e076 100644 --- a/docs/get-started/prerequisites.mdx +++ b/docs/get-started/prerequisites.mdx @@ -68,7 +68,7 @@ The table is generated from [`ci/platform-matrix.json`](https://github.com/NVIDI |----|-------------------|--------|-------| | Linux | Docker | Tested | Primary tested path. | | macOS (Apple Silicon) | Colima, Docker Desktop | Tested with limitations | Install Xcode Command Line Tools (`xcode-select --install`) and start the runtime before running the installer. | -| DGX Spark | Docker | Tested | Use the standard installer and `nemoclaw onboard`. For an end-to-end walkthrough with local Ollama inference, see the [NVIDIA Spark playbook](https://build.nvidia.com/spark/nemoclaw). | +| DGX Spark | Docker | Tested | Use the standard installer and `nemoclaw onboard`. For local inference, see [Set Up DGX Spark or DGX Station Local Inference](/inference/dgx-spark-station-local-inference). | | Windows WSL2 | Docker Desktop (WSL backend) | Tested with limitations | Requires WSL2 with Docker Desktop backend. | {/* platform-matrix:end */} diff --git a/docs/index.yml b/docs/index.yml index 4925942034..cbe7b8a3c2 100644 --- a/docs/index.yml +++ b/docs/index.yml @@ -46,6 +46,9 @@ navigation: - page: "Use Local Inference" path: inference/use-local-inference.mdx slug: use-local-inference + - page: "DGX Local Inference" + path: inference/dgx-spark-station-local-inference.mdx + slug: dgx-spark-station-local-inference - page: "Tool-Calling Reliability" path: inference/tool-calling-reliability.mdx slug: tool-calling-reliability diff --git a/docs/inference/dgx-spark-station-local-inference.mdx b/docs/inference/dgx-spark-station-local-inference.mdx new file mode 100644 index 0000000000..8936cb9f0d --- /dev/null +++ b/docs/inference/dgx-spark-station-local-inference.mdx @@ -0,0 +1,171 @@ +--- +title: "Set Up DGX Spark or DGX Station Local Inference" +sidebar-title: "DGX Local Inference" +description: "Walk through NemoClaw local inference setup on DGX Spark and DGX Station, including GPU/CDI checks, managed vLLM, Ollama, health verification, and common Spark-specific failures." +description-agent: "Guides DGX Spark and DGX Station users through end-to-end local inference setup with NemoClaw. Use when preparing DGX hardware, choosing Ollama or managed vLLM, checking GPU/CDI prerequisites, verifying the OpenShell gateway and local inference route, or troubleshooting CoreDNS, k3s image pull, CDI, or port 3000 conflicts." +keywords: ["nemoclaw dgx spark local inference", "nemoclaw dgx station vllm", "nemoclaw spark ollama", "nemoclaw cdi gpu setup"] +topics: ["local inference", "DGX Spark", "DGX Station", "onboarding"] +tags: ["DGX Spark", "DGX Station", "vLLM", "Ollama", "CDI"] +content: + type: "how_to" +difficulty: "intermediate" +audience: "NemoClaw users running local inference on DGX Spark or DGX Station" +status: "published" +--- +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Set Up DGX Spark or DGX Station Local Inference + +Use this guide when you want NemoClaw to run with local inference on DGX Spark or DGX Station. +It pulls together the host checks, provider choice, onboarding flow, and the common Spark-specific failure modes that are otherwise spread across the quickstart, local inference, and troubleshooting pages. + +## Prerequisites + +Before onboarding, verify the host basics: + +- Docker is installed and running. +- Node.js 22.16 or later and npm 10 or later are available. +- The NVIDIA driver and container toolkit are installed. +- `nvidia-smi` works on the host. +- Port `3000` is free, or you are ready to choose a different dashboard port. + +Run: + +```bash +docker info +nvidia-smi +node --version +npm --version +``` + +DGX Spark and recent Docker installations can require NVIDIA Container Device Interface (CDI) specs for GPU passthrough. +NemoClaw checks and repairs the common missing-CDI case during install, but you can pre-generate the spec when needed: + +```bash +sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml +``` + +If this command is unavailable, install or repair the NVIDIA Container Toolkit before onboarding. + +## Choose a Local Inference Path + +DGX Spark and DGX Station have two common local-inference paths. + +| Path | Best for | Notes | +|---|---|---| +| Managed vLLM | Tool-heavy agents, stronger tool-call reliability, larger GPU-backed models | Offered by default on DGX Spark and DGX Station. Uses `Qwen/Qwen3.6-27B-FP8` unless you override the registry slug. | +| Ollama | Simpler local chat, existing Ollama model libraries, quick experiments | Convenient, but some model/template combinations can emit tool calls as plain text. Use vLLM when tool-call reliability matters. | + +For managed vLLM, the first run pulls the container image and model weights into local caches. +Plan for a long first run on fresh systems. + +For Ollama, make sure only one daemon owns port `11434`. +If another runtime is already using that port, stop it or move one service before onboarding. + +## Run Onboarding + +Start the standard onboard wizard: + +```bash +nemoclaw onboard +``` + +On DGX Spark and DGX Station, the interactive wizard prompts for the provider and policy choices after the third-party software notice. +Choose the local-inference path and review the suggested policy defaults before NemoClaw creates the sandbox. + +If you prefer to choose manually: + +1. Select the local provider you want: **Local vLLM** or **Local Ollama**. +2. For managed vLLM, accept the default model or set `NEMOCLAW_VLLM_MODEL` before running onboarding. +3. For Ollama, choose an installed model or a starter model that fits available memory. +4. Let NemoClaw validate the local endpoint before it creates the sandbox. + +For non-interactive managed vLLM setup on DGX Spark or DGX Station: + +```bash +NEMOCLAW_PROVIDER=install-vllm nemoclaw onboard --non-interactive --yes --yes-i-accept-third-party-software +``` + +To choose a supported managed-vLLM model: + +```bash +NEMOCLAW_PROVIDER=install-vllm \ +NEMOCLAW_VLLM_MODEL=qwen3.6-27b \ +nemoclaw onboard --non-interactive --yes --yes-i-accept-third-party-software +``` + +Supported managed-vLLM slugs are listed in [Use a Local Inference Server](/inference/use-local-inference#override-the-managed-vllm-model). + +## Verify the Setup + +After onboarding completes, check the sandbox and local inference route: + +```bash +nemoclaw status +nemoclaw doctor +``` + +Healthy output should show: + +- The sandbox is running. +- The dashboard is reachable. +- The selected inference provider is healthy. +- For Ollama, the authenticated proxy health line is healthy when the proxy token is available. + +Open the TUI: + +```bash +nemoclaw connect +openclaw tui +``` + +Ask for a small tool-using action. +If you see raw JSON tool calls printed as chat text, switch to vLLM with a parser-aware model path and review [Tool-Calling Reliability](/inference/tool-calling-reliability). + +## Common DGX Spark and Station Fixes + +### CoreDNS CrashLoop + +If CoreDNS in the embedded k3s cluster crashes shortly after setup, run the CoreDNS fix script referenced by the troubleshooting guide, then recreate the sandbox. +The issue is usually a resolver path that points at `127.0.0.11`, which does not route inside the gateway container. + +### k3s Image Pull or Upload Takes Too Long + +Fresh systems may spend several minutes pulling images, uploading layers to the OpenShell gateway, or loading model weights. +If readiness times out while the host is still doing real work, raise both local inference and sandbox readiness budgets: + +```bash +export NEMOCLAW_LOCAL_INFERENCE_TIMEOUT=300 +export NEMOCLAW_SANDBOX_READY_TIMEOUT=600 +nemoclaw onboard +``` + +### CDI GPU Errors + +If gateway startup reports `unresolvable CDI devices nvidia.com/gpu=all`, regenerate CDI specs and rerun onboarding: + +```bash +sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml +nemoclaw onboard +``` + +If the error persists, repair the NVIDIA Container Toolkit installation and verify that `docker info` reports the expected CDI spec directories. + +### Port 3000 Conflict + +Some Spark systems already run services on port `3000`. +Set a different dashboard port before onboarding: + +```bash +export NEMOCLAW_DASHBOARD_PORT=18789 +nemoclaw onboard +``` + +Use a free port that does not overlap the configured gateway, vLLM, Ollama, or Ollama proxy ports. + +## Next Steps + +- [Use a Local Inference Server](/inference/use-local-inference) for full Ollama, vLLM, NIM, and compatible-endpoint details. +- [Tool-Calling Reliability](/inference/tool-calling-reliability) for choosing between Ollama and parser-aware vLLM. +- [Troubleshooting](/reference/troubleshooting#dgx-spark) for deeper DGX Spark failure-mode guidance. diff --git a/docs/inference/use-local-inference.mdx b/docs/inference/use-local-inference.mdx index 76c00c4a05..7ac04ab847 100644 --- a/docs/inference/use-local-inference.mdx +++ b/docs/inference/use-local-inference.mdx @@ -450,6 +450,7 @@ If the provider itself needs to change (for example, switching from vLLM to a cl ## Next Steps +- [Set Up DGX Spark or DGX Station Local Inference](/inference/dgx-spark-station-local-inference) for an end-to-end DGX hardware walkthrough. - [Inference Options](/inference/inference-options) for the full list of providers available during onboarding. - [Tool-Calling Reliability](/inference/tool-calling-reliability) for diagnosing raw JSON tool-call output with local models. - [Switch Inference Models](/inference/switch-inference-providers) for runtime model switching. diff --git a/docs/reference/troubleshooting.mdx b/docs/reference/troubleshooting.mdx index b89ebeac30..5224cc0700 100644 --- a/docs/reference/troubleshooting.mdx +++ b/docs/reference/troubleshooting.mdx @@ -1176,7 +1176,8 @@ Use `--follow` to stream logs in real time while debugging. ## DGX Spark -For an end-to-end Ollama walkthrough on DGX Spark, refer to the [NVIDIA Spark playbook](https://build.nvidia.com/spark/nemoclaw). +For an end-to-end NemoClaw walkthrough on DGX Spark or DGX Station, start with [Set Up DGX Spark or DGX Station Local Inference](/inference/dgx-spark-station-local-inference). +The NVIDIA Spark playbook remains a useful companion reference for Spark-specific examples. ### CoreDNS CrashLoop after onboarding diff --git a/test/dgx-local-inference-doc-copy.test.ts b/test/dgx-local-inference-doc-copy.test.ts new file mode 100644 index 0000000000..3184557f49 --- /dev/null +++ b/test/dgx-local-inference-doc-copy.test.ts @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { describe, expect, it } from "vitest"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const repoRoot = path.resolve(__dirname, ".."); +const dgxLocalInferenceDoc = path.join( + repoRoot, + "docs", + "inference", + "dgx-spark-station-local-inference.mdx", +); + +type FencedBlock = { + language: string; + line: number; + lines: string[]; +}; + +function collectFencedBlocks(markdown: string): FencedBlock[] { + const lines = markdown.split(/\r?\n/); + const blocks: FencedBlock[] = []; + let current: FencedBlock | null = null; + + for (const [index, line] of lines.entries()) { + const fence = line.match(/^```(\S*)\s*$/); + if (!fence) { + if (current) current.lines.push(line); + continue; + } + + if (current) { + blocks.push(current); + current = null; + continue; + } + + current = { + language: fence[1] ?? "", + line: index + 1, + lines: [], + }; + } + + return blocks; +} + +describe("DGX local inference docs copyable commands", () => { + it("uses bash command blocks without prompt prefixes", () => { + const markdown = fs.readFileSync(dgxLocalInferenceDoc, "utf8"); + const blocks = collectFencedBlocks(markdown); + const promptLines = blocks.flatMap((block) => + block.lines + .map((line, offset) => ({ line, lineNumber: block.line + offset + 1 })) + .filter(({ line }) => /^\s*\$ /.test(line)) + .map( + ({ line, lineNumber }) => + `${path.relative(repoRoot, dgxLocalInferenceDoc)}:${lineNumber}: ${line}`, + ), + ); + const languages = new Set(blocks.map((block) => block.language)); + + expect(promptLines).toEqual([]); + expect(languages).toEqual(new Set(["bash"])); + }); +});