From 50ec400174d1876c27badb9a935b8131b97c5ff2 Mon Sep 17 00:00:00 2001 From: Alex Lewontin Date: Wed, 20 May 2026 19:58:00 -0400 Subject: [PATCH 1/5] feat(bootstrap): add system gateway dir for installer-provided defaults Adds OPENSHELL_SYSTEM_GATEWAY_DIR, a read-only gateway registry that installers (snap, deb, systemd units) can seed with deployment-provided gateways. load_active_gateway and load_gateway_metadata fall back to the system dir when no per-user entry exists; list_gateways merges both, with per-user entries shadowing system entries on name collision. Signed-off-by: Alex Lewontin Originally-authored-by: Mark Shuttleworth --- architecture/gateway.md | 12 + crates/openshell-bootstrap/src/lib.rs | 9 +- crates/openshell-bootstrap/src/metadata.rs | 295 +++++++++++++++++++-- crates/openshell-bootstrap/src/paths.rs | 51 ++++ crates/openshell-cli/src/run.rs | 27 +- docs/sandboxes/manage-gateways.mdx | 2 + 6 files changed, 358 insertions(+), 38 deletions(-) diff --git a/architecture/gateway.md b/architecture/gateway.md index 01f377a2d..eae020b56 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -358,6 +358,18 @@ Driver-specific values that are not part of the inheritance allowlist (e.g. Podman `socket_path`, VM `vcpus`) only come from the driver's own table. +### Installer-seeded gateway registry + +The CLI reads its active-gateway and per-gateway metadata from +`$XDG_CONFIG_HOME/openshell/`. Installers (snap, deb, systemd units) that +want to surface a deployment-provided gateway without requiring the user to +register it set `OPENSHELL_SYSTEM_GATEWAY_DIR` to a read-only registry with +the same layout (`//metadata.json` plus an optional top-level +`active_gateway` file). The CLI falls back to this directory when no per-user +entry exists; per-user entries shadow system entries on name collision. System +entries are read-only from the CLI, so `gateway remove` rejects a pure system +entry instead of pretending to delete installer-owned state. + ## Operational Constraints - Gateway TLS and client certificate distribution are deployment concerns owned diff --git a/crates/openshell-bootstrap/src/lib.rs b/crates/openshell-bootstrap/src/lib.rs index 8845f0392..c7fc0a21f 100644 --- a/crates/openshell-bootstrap/src/lib.rs +++ b/crates/openshell-bootstrap/src/lib.rs @@ -21,8 +21,9 @@ use std::sync::Mutex; pub(crate) static XDG_TEST_LOCK: Mutex<()> = Mutex::new(()); pub use crate::metadata::{ - GatewayMetadata, clear_active_gateway, clear_last_sandbox_if_matches, - extract_host_from_ssh_destination, get_gateway_metadata, list_gateways, load_active_gateway, - load_gateway_metadata, load_last_sandbox, remove_gateway_metadata, resolve_ssh_hostname, - save_active_gateway, save_last_sandbox, store_gateway_metadata, + GatewayMetadata, GatewayMetadataSource, clear_active_gateway, clear_last_sandbox_if_matches, + extract_host_from_ssh_destination, gateway_metadata_source, get_gateway_metadata, + list_gateways, load_active_gateway, load_gateway_metadata, load_last_sandbox, + remove_gateway_metadata, resolve_ssh_hostname, save_active_gateway, save_last_sandbox, + store_gateway_metadata, }; diff --git a/crates/openshell-bootstrap/src/metadata.rs b/crates/openshell-bootstrap/src/metadata.rs index 108a99b8a..57f32251a 100644 --- a/crates/openshell-bootstrap/src/metadata.rs +++ b/crates/openshell-bootstrap/src/metadata.rs @@ -1,7 +1,10 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -use crate::paths::{active_gateway_path, gateways_dir, last_sandbox_path}; +use crate::paths::{ + active_gateway_path, gateways_dir, last_sandbox_path, system_active_gateway_path, + system_gateways_dir, +}; use miette::{IntoDiagnostic, Result, WrapErr}; use openshell_core::paths::ensure_parent_dir_restricted; use serde::{Deserialize, Serialize}; @@ -70,6 +73,15 @@ pub struct GatewayMetadata { pub vm_driver_state_dir: Option, } +/// Storage layer that provides a gateway metadata record. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum GatewayMetadataSource { + /// Per-user metadata under `$XDG_CONFIG_HOME/openshell/gateways`. + User, + /// Installer-provided metadata under `OPENSHELL_SYSTEM_GATEWAY_DIR`. + System, +} + fn stored_metadata_path(name: &str) -> Result { Ok(gateways_dir()?.join(name).join("metadata.json")) } @@ -148,8 +160,37 @@ pub fn store_gateway_metadata(name: &str, metadata: &GatewayMetadata) -> Result< Ok(()) } +/// Return where a gateway metadata record would be loaded from. +pub fn gateway_metadata_source(name: &str) -> Result> { + let primary = stored_metadata_path(name)?; + if primary.exists() { + return Ok(Some(GatewayMetadataSource::User)); + } + + let system = system_gateways_dir().map(|d| d.join(name).join("metadata.json")); + if system.as_ref().is_some_and(|p| p.exists()) { + return Ok(Some(GatewayMetadataSource::System)); + } + + Ok(None) +} + pub fn load_gateway_metadata(name: &str) -> Result { - let path = stored_metadata_path(name)?; + let primary = stored_metadata_path(name)?; + let system = system_gateways_dir().map(|d| d.join(name).join("metadata.json")); + let path = if primary.exists() { + primary + } else if let Some(p) = system.as_ref().filter(|p| p.exists()) { + p.clone() + } else { + return Err(miette::miette!( + "no metadata found for gateway '{name}' (looked in {} and {})", + primary.display(), + system + .as_ref() + .map_or_else(|| "".into(), |p| p.display().to_string()), + )); + }; let contents = std::fs::read_to_string(&path) .into_diagnostic() .wrap_err_with(|| format!("failed to read metadata from {}", path.display()))?; @@ -175,12 +216,19 @@ pub fn save_active_gateway(name: &str) -> Result<()> { /// Load the active gateway name from persistent storage. /// -/// Returns `None` if no active gateway has been set. +/// Returns `None` if no active gateway has been set. Falls back to the +/// system-level active gateway file when no per-user selection exists, so +/// installer-provided defaults can take effect on a fresh system. pub fn load_active_gateway() -> Option { - let path = active_gateway_path().ok()?; - let contents = std::fs::read_to_string(&path).ok()?; - let name = contents.trim().to_string(); - if name.is_empty() { None } else { Some(name) } + let read = |path: PathBuf| { + let contents = std::fs::read_to_string(&path).ok()?; + let name = contents.trim().to_string(); + (!name.is_empty()).then_some(name) + }; + active_gateway_path() + .ok() + .and_then(read) + .or_else(|| system_active_gateway_path().and_then(read)) } /// Save the last-used sandbox name for a gateway to persistent storage. @@ -218,29 +266,43 @@ pub fn clear_last_sandbox_if_matches(gateway: &str, sandbox: &str) { /// List all gateways that have stored metadata. /// -/// Scans `$XDG_CONFIG_HOME/openshell/gateways/` for subdirectories containing -/// `metadata.json` and returns the parsed metadata for each. +/// Scans `$XDG_CONFIG_HOME/openshell/gateways/` and, when set, the +/// `OPENSHELL_SYSTEM_GATEWAY_DIR` directory. Per-user entries shadow +/// system entries on name collision. pub fn list_gateways() -> Result> { - let dir = gateways_dir()?; - if !dir.exists() { - return Ok(Vec::new()); - } - let mut gateways = Vec::new(); - let entries = std::fs::read_dir(&dir) - .into_diagnostic() - .wrap_err_with(|| format!("failed to read directory {}", dir.display()))?; - - for entry in entries { - let entry = entry.into_diagnostic()?; - let path = entry.path(); - // Only consider directories that contain a metadata.json file - if path.is_dir() { - let gateway_name = entry.file_name().to_string_lossy().to_string(); - if let Ok(metadata) = load_gateway_metadata(&gateway_name) { + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); + + let mut scan = |dir: PathBuf| -> Result<()> { + if !dir.exists() { + return Ok(()); + } + let entries = std::fs::read_dir(&dir) + .into_diagnostic() + .wrap_err_with(|| format!("failed to read directory {}", dir.display()))?; + for entry in entries { + let entry = entry.into_diagnostic()?; + let path = entry.path(); + if !path.is_dir() { + continue; + } + let name = entry.file_name().to_string_lossy().to_string(); + if seen.contains(&name) { + continue; + } + if let Ok(contents) = std::fs::read_to_string(path.join("metadata.json")) + && let Ok(metadata) = serde_json::from_str::(&contents) + { + seen.insert(name); gateways.push(metadata); } } + Ok(()) + }; + + scan(gateways_dir()?)?; + if let Some(system) = system_gateways_dir() { + scan(system)?; } // Sort by name for stable output @@ -437,4 +499,187 @@ mod tests { ); }); } + + // ── system gateway dir fallback ─────────────────────────────────── + + /// Helper: hold the shared XDG test lock, point `XDG_CONFIG_HOME` at + /// `user` and `OPENSHELL_SYSTEM_GATEWAY_DIR` at `system`, run `f`, then + /// restore both env vars. + #[allow(unsafe_code)] + fn with_tmp_xdg_and_system( + user: &std::path::Path, + system: &std::path::Path, + f: F, + ) { + let _guard = crate::XDG_TEST_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let orig_xdg = std::env::var("XDG_CONFIG_HOME").ok(); + let orig_sys = std::env::var(crate::paths::SYSTEM_GATEWAY_DIR_ENV).ok(); + unsafe { + std::env::set_var("XDG_CONFIG_HOME", user); + std::env::set_var(crate::paths::SYSTEM_GATEWAY_DIR_ENV, system); + } + crate::paths::reset_system_gateways_dir_cache(); + f(); + unsafe { + match orig_xdg { + Some(v) => std::env::set_var("XDG_CONFIG_HOME", v), + None => std::env::remove_var("XDG_CONFIG_HOME"), + } + match orig_sys { + Some(v) => std::env::set_var(crate::paths::SYSTEM_GATEWAY_DIR_ENV, v), + None => std::env::remove_var(crate::paths::SYSTEM_GATEWAY_DIR_ENV), + } + } + crate::paths::reset_system_gateways_dir_cache(); + } + + /// Write a `//metadata.json` file for the given endpoint. + fn write_system_metadata(dir: &std::path::Path, name: &str, endpoint: &str) { + let gw_dir = dir.join(name); + std::fs::create_dir_all(&gw_dir).unwrap(); + let meta = GatewayMetadata { + name: name.to_string(), + gateway_endpoint: endpoint.to_string(), + ..Default::default() + }; + std::fs::write( + gw_dir.join("metadata.json"), + serde_json::to_string(&meta).unwrap(), + ) + .unwrap(); + } + + #[test] + fn load_active_gateway_falls_back_to_system_dir() { + let user = tempfile::tempdir().unwrap(); + let system = tempfile::tempdir().unwrap(); + with_tmp_xdg_and_system(user.path(), system.path(), || { + std::fs::write(system.path().join("active_gateway"), "from-system").unwrap(); + assert_eq!(load_active_gateway(), Some("from-system".to_string())); + }); + } + + #[test] + fn load_active_gateway_prefers_user_over_system() { + let user = tempfile::tempdir().unwrap(); + let system = tempfile::tempdir().unwrap(); + with_tmp_xdg_and_system(user.path(), system.path(), || { + save_active_gateway("from-user").unwrap(); + std::fs::write(system.path().join("active_gateway"), "from-system").unwrap(); + assert_eq!(load_active_gateway(), Some("from-user".to_string())); + }); + } + + #[test] + fn load_gateway_metadata_falls_back_to_system_dir() { + let user = tempfile::tempdir().unwrap(); + let system = tempfile::tempdir().unwrap(); + with_tmp_xdg_and_system(user.path(), system.path(), || { + write_system_metadata(system.path(), "sys-gw", "unix:///tmp/sys.sock"); + let meta = load_gateway_metadata("sys-gw").unwrap(); + assert_eq!(meta.name, "sys-gw"); + assert_eq!(meta.gateway_endpoint, "unix:///tmp/sys.sock"); + }); + } + + #[test] + fn gateway_metadata_source_reports_user_system_and_missing() { + let user = tempfile::tempdir().unwrap(); + let system = tempfile::tempdir().unwrap(); + with_tmp_xdg_and_system(user.path(), system.path(), || { + write_system_metadata(system.path(), "sys-gw", "unix:///tmp/sys.sock"); + assert_eq!( + gateway_metadata_source("sys-gw").unwrap(), + Some(GatewayMetadataSource::System) + ); + + let user_meta = GatewayMetadata { + name: "user-gw".to_string(), + gateway_endpoint: "https://user-endpoint".to_string(), + ..Default::default() + }; + store_gateway_metadata("user-gw", &user_meta).unwrap(); + assert_eq!( + gateway_metadata_source("user-gw").unwrap(), + Some(GatewayMetadataSource::User) + ); + + assert_eq!(gateway_metadata_source("missing").unwrap(), None); + }); + } + + #[test] + fn load_gateway_metadata_error_mentions_both_search_paths() { + let user = tempfile::tempdir().unwrap(); + let system = tempfile::tempdir().unwrap(); + with_tmp_xdg_and_system(user.path(), system.path(), || { + let err = load_gateway_metadata("missing").unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("missing"), "expected name in error: {msg}"); + assert!( + msg.contains(user.path().to_str().unwrap()), + "expected user path in error: {msg}" + ); + assert!( + msg.contains(system.path().to_str().unwrap()), + "expected system path in error: {msg}" + ); + }); + } + + #[test] + fn load_gateway_metadata_prefers_user_over_system() { + let user = tempfile::tempdir().unwrap(); + let system = tempfile::tempdir().unwrap(); + with_tmp_xdg_and_system(user.path(), system.path(), || { + let user_meta = GatewayMetadata { + name: "shared".to_string(), + gateway_endpoint: "https://user-endpoint".to_string(), + ..Default::default() + }; + store_gateway_metadata("shared", &user_meta).unwrap(); + write_system_metadata(system.path(), "shared", "https://system-endpoint"); + let meta = load_gateway_metadata("shared").unwrap(); + assert_eq!(meta.gateway_endpoint, "https://user-endpoint"); + }); + } + + #[test] + fn list_gateways_merges_user_and_system() { + let user = tempfile::tempdir().unwrap(); + let system = tempfile::tempdir().unwrap(); + with_tmp_xdg_and_system(user.path(), system.path(), || { + let user_meta = GatewayMetadata { + name: "alpha".to_string(), + gateway_endpoint: "https://alpha".to_string(), + ..Default::default() + }; + store_gateway_metadata("alpha", &user_meta).unwrap(); + write_system_metadata(system.path(), "beta", "https://beta"); + let gateways = list_gateways().unwrap(); + assert_eq!(gateways.len(), 2); + assert_eq!(gateways[0].name, "alpha"); + assert_eq!(gateways[1].name, "beta"); + }); + } + + #[test] + fn list_gateways_user_shadows_system_on_collision() { + let user = tempfile::tempdir().unwrap(); + let system = tempfile::tempdir().unwrap(); + with_tmp_xdg_and_system(user.path(), system.path(), || { + let user_meta = GatewayMetadata { + name: "local-vm".to_string(), + gateway_endpoint: "https://user-override".to_string(), + ..Default::default() + }; + store_gateway_metadata("local-vm", &user_meta).unwrap(); + write_system_metadata(system.path(), "local-vm", "unix:///tmp/sys.sock"); + let gateways = list_gateways().unwrap(); + assert_eq!(gateways.len(), 1); + assert_eq!(gateways[0].gateway_endpoint, "https://user-override"); + }); + } } diff --git a/crates/openshell-bootstrap/src/paths.rs b/crates/openshell-bootstrap/src/paths.rs index cd3cb7693..acea95a18 100644 --- a/crates/openshell-bootstrap/src/paths.rs +++ b/crates/openshell-bootstrap/src/paths.rs @@ -4,6 +4,18 @@ use miette::Result; use openshell_core::paths::xdg_config_dir; use std::path::PathBuf; +use std::sync::RwLock; + +/// Env var pointing at a system-level gateway registry directory. +/// +/// Set by installers (snap, deb, systemd unit, dev wrappers) that want +/// to surface deployment-provided gateways without requiring the user to +/// register them. The directory has the same layout as the per-user XDG +/// gateways directory: `//metadata.json` plus an optional +/// top-level `active_gateway` file. CLI behaviour treats it as read-only; +/// all writes go to the per-user XDG location, which shadows system +/// entries on name collision. +pub const SYSTEM_GATEWAY_DIR_ENV: &str = "OPENSHELL_SYSTEM_GATEWAY_DIR"; /// Path to the file that stores the active gateway name. /// @@ -19,6 +31,45 @@ pub fn gateways_dir() -> Result { Ok(xdg_config_dir()?.join("openshell").join("gateways")) } +/// Cached resolution of `OPENSHELL_SYSTEM_GATEWAY_DIR`. +enum CachedSystemDir { + Uninit, + Cached(Option), +} + +static CACHED_SYSTEM_GATEWAYS_DIR: RwLock = RwLock::new(CachedSystemDir::Uninit); + +/// Optional system-level gateway directory provided by an installer. +/// +/// `OPENSHELL_SYSTEM_GATEWAY_DIR` is read on the first call and cached for +/// the lifetime of the process so all callers observe a consistent value +/// even if the environment is mutated mid-run. +pub fn system_gateways_dir() -> Option { + if let CachedSystemDir::Cached(value) = &*CACHED_SYSTEM_GATEWAYS_DIR.read().unwrap() { + return value.clone(); + } + let mut guard = CACHED_SYSTEM_GATEWAYS_DIR.write().unwrap(); + if let CachedSystemDir::Cached(value) = &*guard { + return value.clone(); + } + let value = std::env::var_os(SYSTEM_GATEWAY_DIR_ENV).map(PathBuf::from); + *guard = CachedSystemDir::Cached(value.clone()); + value +} + +/// Test-only: clear the cached `system_gateways_dir` value so the next call +/// re-reads the environment. Required because the cache outlives any single +/// test in the same process. +#[cfg(test)] +pub fn reset_system_gateways_dir_cache() { + *CACHED_SYSTEM_GATEWAYS_DIR.write().unwrap() = CachedSystemDir::Uninit; +} + +/// Optional system-level "active gateway" file (sibling of the gateways dir). +pub fn system_active_gateway_path() -> Option { + system_gateways_dir().map(|d| d.join("active_gateway")) +} + /// Path to the file that stores the last-used sandbox name for a gateway. /// /// Location: `$XDG_CONFIG_HOME/openshell/gateways//last_sandbox` diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index b92be199e..61588281a 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -19,10 +19,10 @@ use hyper_util::{client::legacy::Client, rt::TokioExecutor}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use miette::{IntoDiagnostic, Result, WrapErr, miette}; use openshell_bootstrap::{ - GatewayMetadata, clear_active_gateway, clear_last_sandbox_if_matches, - extract_host_from_ssh_destination, get_gateway_metadata, list_gateways, load_active_gateway, - remove_gateway_metadata, resolve_ssh_hostname, save_active_gateway, save_last_sandbox, - store_gateway_metadata, + GatewayMetadata, GatewayMetadataSource, clear_active_gateway, clear_last_sandbox_if_matches, + extract_host_from_ssh_destination, gateway_metadata_source, get_gateway_metadata, + list_gateways, load_active_gateway, remove_gateway_metadata, resolve_ssh_hostname, + save_active_gateway, save_last_sandbox, store_gateway_metadata, }; use openshell_core::progress::{ PROGRESS_ACTIVE_DETAIL_KEY, PROGRESS_ACTIVE_STEP_KEY, PROGRESS_COMPLETE_LABEL_KEY, @@ -1458,11 +1458,20 @@ fn remove_gateway_registration(name: &str) { /// Remove a local gateway registration without touching the gateway service. pub fn gateway_remove(name: &str) -> Result<()> { - if get_gateway_metadata(name).is_none() { - return Err(miette::miette!( - "No gateway metadata found for '{name}'.\n\ - List available gateways: openshell gateway select" - )); + match gateway_metadata_source(name)? { + Some(GatewayMetadataSource::User) => {} + Some(GatewayMetadataSource::System) => { + return Err(miette::miette!( + "Gateway registration '{name}' is installed by the system and cannot be removed from user config.\n\ + Register a per-user gateway with the same name to override it, or select another gateway." + )); + } + None => { + return Err(miette::miette!( + "No gateway metadata found for '{name}'.\n\ + List available gateways: openshell gateway select" + )); + } } remove_gateway_registration(name); diff --git a/docs/sandboxes/manage-gateways.mdx b/docs/sandboxes/manage-gateways.mdx index 6cfa39121..d97d8eef3 100644 --- a/docs/sandboxes/manage-gateways.mdx +++ b/docs/sandboxes/manage-gateways.mdx @@ -82,6 +82,8 @@ One gateway is always the active gateway. All CLI commands target it by default. The active gateway is the persisted default. The `-g` flag and the `OPENSHELL_GATEWAY` environment variable override it when commands resolve a gateway. If `OPENSHELL_GATEWAY` is set to a different gateway, `openshell gateway select ` still saves the new default and warns that the current shell continues to use the environment value until you unset or update it. +Installers can seed read-only gateway entries for package-managed local services, such as a snap-provided `local-vm` gateway. These entries appear in `openshell gateway list` and can be selected like user registrations. `openshell gateway remove` removes only per-user registrations. Register a per-user gateway with the same name when you need to shadow an installer-provided default. + List all registered gateways: ```shell From c37cb8f9ddcb05409191126dabd3da78b96d5617 Mon Sep 17 00:00:00 2001 From: Alex Lewontin Date: Fri, 22 May 2026 14:07:19 -0400 Subject: [PATCH 2/5] chore(snap): move snapcraft.yaml to snap/ Signed-off-by: Alex Lewontin --- deploy/snap/README.md | 4 ++-- snapcraft.yaml => snap/snapcraft.yaml | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename snapcraft.yaml => snap/snapcraft.yaml (100%) diff --git a/deploy/snap/README.md b/deploy/snap/README.md index 419aacaa4..c61fd0557 100644 --- a/deploy/snap/README.md +++ b/deploy/snap/README.md @@ -1,6 +1,6 @@ # Building a snap package -OpenShell snap packages are defined by the root `snapcraft.yaml` and built with +OpenShell snap packages are defined by `snap/snapcraft.yaml` and built with Snapcraft from source. The helper task under `tasks/` still stages the same payload from pre-built @@ -15,7 +15,7 @@ binaries when you want to inspect the snap root or produce local artifacts. ## Build with Snapcraft -Build the snap from source with the root manifest: +Build the snap from source with the project manifest: ```shell snapcraft pack diff --git a/snapcraft.yaml b/snap/snapcraft.yaml similarity index 100% rename from snapcraft.yaml rename to snap/snapcraft.yaml From 0206478f5406e060419a3cfe835fc969540c7ccc Mon Sep 17 00:00:00 2001 From: Alex Lewontin Date: Fri, 22 May 2026 14:07:19 -0400 Subject: [PATCH 3/5] feat(snap): add default local-vm gateway on install Signed-off-by: Alex Lewontin --- architecture/gateway.md | 5 +- deploy/snap/README.md | 97 +++++++++++------------ deploy/snap/bin/openshell-gateway-wrapper | 27 ++++++- deploy/snap/meta/snap.yaml.in | 7 +- docs/about/installation.mdx | 6 ++ python/openshell/release_formula_test.py | 51 ++++++++++-- snap/hooks/configure | 5 ++ snap/hooks/install | 23 ++++++ snap/snapcraft.yaml | 35 +++++++- 9 files changed, 185 insertions(+), 71 deletions(-) create mode 100755 snap/hooks/configure create mode 100755 snap/hooks/install diff --git a/architecture/gateway.md b/architecture/gateway.md index eae020b56..9ef93c53f 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -368,7 +368,10 @@ the same layout (`//metadata.json` plus an optional top-level `active_gateway` file). The CLI falls back to this directory when no per-user entry exists; per-user entries shadow system entries on name collision. System entries are read-only from the CLI, so `gateway remove` rejects a pure system -entry instead of pretending to delete installer-owned state. +entry instead of pretending to delete installer-owned state. The snap gateway +service keeps its service-owned state and runtime files in `$SNAP_COMMON`, +while the snap CLI uses `~/.config/openshell/` for user-managed registrations +via the `dot-config-openshell` personal-files interface. ## Operational Constraints diff --git a/deploy/snap/README.md b/deploy/snap/README.md index c61fd0557..b9b88532c 100644 --- a/deploy/snap/README.md +++ b/deploy/snap/README.md @@ -11,7 +11,7 @@ binaries when you want to inspect the snap root or produce local artifacts. - Linux on `amd64` or `arm64` - `snap` from `snapd` - `snapcraft` -- Docker from the Docker snap (`sudo snap install docker`) +- KVM access for the VM driver — `/dev/kvm` reachable from your user ## Build with Snapcraft @@ -22,8 +22,8 @@ snapcraft pack ``` The manifest builds the Rust binaries inside Snapcraft, installs the CLI, -gateway, and sandbox supervisor into the snap, and keeps the same runtime -environment as the current deployment logic. +gateway, sandbox supervisor, and VM driver into the snap, and keeps the same +runtime environment as the current deployment logic. ## Staged helper flow @@ -45,7 +45,8 @@ mise run build:rust:snap ``` This convenience target builds the CLI with `bundled-z3`, the gateway, and -`openshell-sandbox` for the Docker driver to bind-mount into sandbox containers. +`openshell-sandbox` for the supervisor binary the VM driver injects into +sandbox guests. ## Pack the snap @@ -88,7 +89,7 @@ The snap exposes the CLI: - `openshell` -It also defines a system service with packaged Docker driver settings. +It also defines a system service. - `openshell.gateway` @@ -96,10 +97,14 @@ The gateway service uses `refresh-mode: endure` so snap refreshes do not restart it while sandboxes are active. Restart the service manually when you are ready to move the gateway to the refreshed snap revision. -`openshell-sandbox` is staged next to `openshell-gateway` as the Docker -supervisor binary. The gateway app starts through a small wrapper that sets -Snap-specific defaults and reads `$SNAP_COMMON/gateway.toml` when that file -exists. The service stores its gateway database under `$SNAP_COMMON`. +The gateway app starts through a small wrapper that pins snap-specific +defaults: an on-disk SQLite database, a loopback HTTP listener at +`http://127.0.0.1:17670`, plaintext (no TLS), trusted-local user access for +that loopback endpoint, and the `vm` compute driver. The wrapper keeps the +service's XDG state and runtime directories under `$SNAP_COMMON` so the daemon +never depends on inherited host paths such as `/run/user/`. Before the +gateway starts, it also ensures the local sandbox JWT bundle exists under snap +state so sandbox supervisors can authenticate back to the gateway. ## Interfaces @@ -113,65 +118,53 @@ The `openshell` CLI app plugs: The `openshell.gateway` service plugs: - `docker` +- `kvm` - `log-observe` - `network` - `network-bind` - `ssh-keys` - `system-observe` -## Start a Docker gateway from the snap +## Connecting after install -The snapped gateway talks to Docker through the Docker snap's -`docker:docker-daemon` slot. The snap declares `default-provider: docker` on -its Docker plug so snapd can install the Docker snap when OpenShell is -installed. Connect the interface before using the Docker driver: +On first install, the snap's install hook seeds a system-level gateway entry +named `local-vm` pointing at the snap-managed loopback HTTP endpoint, and +marks it active. The CLI discovers this through `OPENSHELL_SYSTEM_GATEWAY_DIR`, +so a fresh snap is usable without any manual `openshell gateway add`. ```shell -sudo snap connect openshell:docker docker:docker-daemon -sudo snap connect openshell:log-observe -sudo snap connect openshell:system-observe -sudo snap connect openshell:ssh-keys -``` - -The gateway uses Docker's default Unix socket location. The Docker snap exposes -that socket through the connected `docker` interface, so no `DOCKER_HOST` -override is required. The OpenShell snap still requires the Docker snap because -it relies on the `docker:docker-daemon` slot; it does not work with Docker -installed from a Debian package or Docker's upstream packages. - -The service runs the gateway with Snap-specific environment defaults: - -```shell -OPENSHELL_DISABLE_TLS=true \ -OPENSHELL_DB_URL="sqlite:$SNAP_COMMON/gateway.db?mode=rwc" \ -openshell.gateway +openshell status +openshell sandbox create --name demo +openshell sandbox connect demo ``` -This stores the gateway SQLite database at -`/var/snap/openshell/common/gateway.db`. Create -`/var/snap/openshell/common/gateway.toml` when you need to override gateway or -Docker driver settings. +`openshell gateway list` will show the `local-vm` entry. Per-user gateway +registrations (made with `openshell gateway add`) shadow the system entry on +name collision, so an operator wanting a different default does not need to +remove anything. -## Connect with the OpenShell CLI +## Using user-managed gateway registrations -Register the snap-run gateway as a local plaintext gateway: +The snap declares a `dot-config-openshell` personal-files interface for +`~/.config/openshell`, and the CLI runs with `XDG_CONFIG_HOME` pointed at that +real home-directory config root. That keeps user-managed registrations and +imported mTLS bundles in the same location as other package formats, including +flows like the Kubernetes guide that write client TLS material into +`~/.config/openshell/gateways//mtls/`. -```shell -openshell gateway add http://127.0.0.1:17670 --local --name snap-docker -openshell gateway select snap-docker -openshell status -``` +## Connecting Docker (optional) -Then use normal sandbox commands: +The snap also declares the Docker interface. Connecting it lets the gateway +talk to a host Docker daemon if you want to switch the compute driver from +`vm` to `docker`: ```shell -openshell sandbox create --name demo -openshell sandbox connect demo +sudo snap connect openshell:docker docker:docker-daemon +sudo snap set openshell drivers=docker ``` -To avoid changing the default gateway, pass the gateway name per command: - -```shell -openshell --gateway snap-docker status -openshell --gateway snap-docker sandbox create --name demo -``` +The Docker snap exposes the Docker daemon through the connected `docker` +so no `DOCKER_HOST` override is required. The OpenShell snap requires the +Docker snap because it relies on the `docker:docker-daemon` slot; it does not +work with Docker installed from a Debian package or Docker's upstream +packages. diff --git a/deploy/snap/bin/openshell-gateway-wrapper b/deploy/snap/bin/openshell-gateway-wrapper index cfba8db36..733747b05 100755 --- a/deploy/snap/bin/openshell-gateway-wrapper +++ b/deploy/snap/bin/openshell-gateway-wrapper @@ -4,12 +4,33 @@ set -eu -CANONICAL_CONFIG_FILE="${SNAP_COMMON}/gateway.toml" +# Snap daemons may inherit host XDG paths from systemd; keep gateway-owned +# state and runtime files under snap-managed storage instead. +export XDG_DATA_HOME="${SNAP_COMMON}" +export PATH="${SNAP}/usr/sbin:${SNAP}/usr/bin:${SNAP}/sbin:${SNAP}/bin:${PATH}" +export XDG_STATE_HOME="${SNAP_COMMON}/state" +export XDG_RUNTIME_DIR="${SNAP_COMMON}/run" +mkdir -p "${XDG_STATE_HOME}" "${XDG_RUNTIME_DIR}" +export OPENSHELL_GATEWAY_CONFIG="${OPENSHELL_GATEWAY_CONFIG:-${SNAP_COMMON}/gateway.toml}" export OPENSHELL_DB_URL="${OPENSHELL_DB_URL:-sqlite:${SNAP_COMMON}/gateway.db?mode=rwc}" +export OPENSHELL_BIND_ADDRESS="${OPENSHELL_BIND_ADDRESS:-127.0.0.1}" +export OPENSHELL_SERVER_PORT="${OPENSHELL_SERVER_PORT:-17670}" export OPENSHELL_DISABLE_TLS="${OPENSHELL_DISABLE_TLS:-true}" +DRIVERS="$(snapctl get drivers 2>/dev/null || true)" +export OPENSHELL_DRIVERS="${OPENSHELL_DRIVERS:-${DRIVERS:-vm}}" -if [ -z "${OPENSHELL_GATEWAY_CONFIG:-}" ] && [ -f "$CANONICAL_CONFIG_FILE" ]; then - exec "${SNAP}/bin/openshell-gateway" --config "$CANONICAL_CONFIG_FILE" "$@" +if [ ! -f "${OPENSHELL_GATEWAY_CONFIG}" ]; then + printf '%s\n' \ + '[openshell]' \ + 'version = 1' \ + '' \ + '[openshell.gateway.auth]' \ + 'allow_unauthenticated_users = true' \ + > "${OPENSHELL_GATEWAY_CONFIG}" fi +"${SNAP}/bin/openshell-gateway" generate-certs \ + --output-dir "${XDG_STATE_HOME}/openshell/tls" \ + --server-san host.openshell.internal + exec "${SNAP}/bin/openshell-gateway" "$@" diff --git a/deploy/snap/meta/snap.yaml.in b/deploy/snap/meta/snap.yaml.in index 920dd9141..4adbe1062 100644 --- a/deploy/snap/meta/snap.yaml.in +++ b/deploy/snap/meta/snap.yaml.in @@ -25,6 +25,8 @@ architectures: apps: openshell: command: bin/openshell + environment: + OPENSHELL_SYSTEM_GATEWAY_DIR: "$SNAP_COMMON/system-gateways" plugs: - home - network @@ -34,11 +36,6 @@ apps: command: bin/openshell-gateway-wrapper daemon: simple refresh-mode: endure - environment: - XDG_DATA_HOME: "$SNAP_COMMON" - # Used for creating and locating certain sockets. - XDG_RUNTIME_DIR: "$SNAP_COMMON" - plugs: - docker - log-observe diff --git a/docs/about/installation.mdx b/docs/about/installation.mdx index cd9973f13..382120032 100644 --- a/docs/about/installation.mdx +++ b/docs/about/installation.mdx @@ -75,6 +75,12 @@ To keep the user service running after logout, enable linger: sudo loginctl enable-linger $USER ``` +## Snap + +The snap package installs the `openshell` CLI and an `openshell.gateway` service. On first install, the snap seeds a read-only `local-vm` gateway entry that points at the snap-managed `http://127.0.0.1:17670` endpoint and marks it active. Run `openshell status` after install to verify the CLI can reach the local gateway. + +The gateway service stores optional overrides in `$SNAP_COMMON/gateway.toml`, usually `/var/snap/openshell/common/gateway.toml`, keeps its runtime and generated state under `$SNAP_COMMON`, and uses the VM compute driver by default. The CLI uses `~/.config/openshell/` for user-managed gateway registrations and mTLS bundles via the `dot-config-openshell` personal-files interface. + ## Kubernetes Kubernetes deployments use the OpenShell Helm chart. For step-by-step installation, refer to [Kubernetes Setup](/kubernetes/setup). For chart values and packaging details, refer to the [Helm chart README](https://github.com/NVIDIA/OpenShell/blob/main/deploy/helm/openshell/README.md). diff --git a/python/openshell/release_formula_test.py b/python/openshell/release_formula_test.py index 81bf89fab..df4ea4ba8 100644 --- a/python/openshell/release_formula_test.py +++ b/python/openshell/release_formula_test.py @@ -94,25 +94,62 @@ def test_generate_homebrew_formula_uses_tagged_macos_driver_asset_without_defaul assert "brew services restart openshell" in formula -def test_snap_wrapper_uses_optional_gateway_config_without_generating_toml() -> None: +def test_snap_wrapper_configures_gateway_via_env_vars() -> None: repo_root = Path(__file__).resolve().parents[2] wrapper = (repo_root / "deploy/snap/bin/openshell-gateway-wrapper").read_text( encoding="utf-8" ) assert "init-gateway-config.sh" not in wrapper + assert "--config" not in wrapper + assert "CANONICAL_CONFIG_FILE" not in wrapper + + expected_exports = [ + 'export XDG_DATA_HOME="${SNAP_COMMON}"', + 'export PATH="${SNAP}/usr/sbin:${SNAP}/usr/bin:${SNAP}/sbin:${SNAP}/bin:${PATH}"', + 'export XDG_STATE_HOME="${SNAP_COMMON}/state"', + 'export XDG_RUNTIME_DIR="${SNAP_COMMON}/run"', + 'export OPENSHELL_GATEWAY_CONFIG="${OPENSHELL_GATEWAY_CONFIG:-${SNAP_COMMON}/gateway.toml}"', + 'export OPENSHELL_DB_URL="${OPENSHELL_DB_URL:-sqlite:${SNAP_COMMON}/gateway.db?mode=rwc}"', + 'export OPENSHELL_BIND_ADDRESS="${OPENSHELL_BIND_ADDRESS:-127.0.0.1}"', + 'export OPENSHELL_SERVER_PORT="${OPENSHELL_SERVER_PORT:-17670}"', + 'export OPENSHELL_DISABLE_TLS="${OPENSHELL_DISABLE_TLS:-true}"', + 'export OPENSHELL_DRIVERS="${OPENSHELL_DRIVERS:-${DRIVERS:-vm}}"', + ] + for export in expected_exports: + assert export in wrapper, f"missing export: {export}" + assert 'mkdir -p "${XDG_STATE_HOME}" "${XDG_RUNTIME_DIR}"' in wrapper + assert 'DRIVERS="$(snapctl get drivers 2>/dev/null || true)"' in wrapper + assert "allow_unauthenticated_users = true" in wrapper assert ( - 'export OPENSHELL_DB_URL="${OPENSHELL_DB_URL:-sqlite:${SNAP_COMMON}/gateway.db?mode=rwc}"' - in wrapper - ) - assert 'export OPENSHELL_DISABLE_TLS="${OPENSHELL_DISABLE_TLS:-true}"' in wrapper - assert ( - 'exec "${SNAP}/bin/openshell-gateway" --config "$CANONICAL_CONFIG_FILE" "$@"' + 'generate-certs \\\n --output-dir "${XDG_STATE_HOME}/openshell/tls" \\\n --server-san host.openshell.internal' in wrapper ) + assert 'exec "${SNAP}/bin/openshell-gateway" "$@"' in wrapper +def test_snap_install_hook_seeds_localhost_gateway_metadata() -> None: + repo_root = Path(__file__).resolve().parents[2] + install_hook = (repo_root / "snap/hooks/install").read_text(encoding="utf-8") + + assert '"gateway_endpoint": "http://127.0.0.1:17670"' in install_hook + assert '"auth_mode": "plaintext"' in install_hook + + +def test_snap_cli_sets_system_gateway_dir_via_app_env() -> None: + repo_root = Path(__file__).resolve().parents[2] + snapcraft = (repo_root / "snap/snapcraft.yaml").read_text(encoding="utf-8") + + assert "command: bin/openshell" in snapcraft + assert 'XDG_CONFIG_HOME: "$SNAP_REAL_HOME/.config"' in snapcraft + assert 'OPENSHELL_SYSTEM_GATEWAY_DIR: "$SNAP_COMMON/system-gateways"' in snapcraft + assert "interface: personal-files" in snapcraft + assert "- $HOME/.config/openshell" in snapcraft + assert "- dot-config-openshell" in snapcraft + assert "- openssh-client" in snapcraft + + def test_rpm_spec_uses_gateway_defaults_without_config_helper() -> None: repo_root = Path(__file__).resolve().parents[2] spec = (repo_root / "openshell.spec").read_text(encoding="utf-8") diff --git a/snap/hooks/configure b/snap/hooks/configure new file mode 100755 index 000000000..044d6aa49 --- /dev/null +++ b/snap/hooks/configure @@ -0,0 +1,5 @@ +#!/bin/sh +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -eu diff --git a/snap/hooks/install b/snap/hooks/install new file mode 100755 index 000000000..1519e4e31 --- /dev/null +++ b/snap/hooks/install @@ -0,0 +1,23 @@ +#!/bin/sh +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -eu + +SYSTEM_GATEWAY_DIR="${SNAP_COMMON}/system-gateways" +DEFAULT_GATEWAY="local-vm" + +if [ ! -f "${SYSTEM_GATEWAY_DIR}/active_gateway" ]; then + mkdir -p "$SYSTEM_GATEWAY_DIR" + printf '%s' "$DEFAULT_GATEWAY" > "${SYSTEM_GATEWAY_DIR}/active_gateway" + chmod 644 "${SYSTEM_GATEWAY_DIR}/active_gateway" +fi +chmod 755 "$SYSTEM_GATEWAY_DIR" + +if [ ! -f "${SYSTEM_GATEWAY_DIR}/${DEFAULT_GATEWAY}/metadata.json" ]; then + mkdir -p "${SYSTEM_GATEWAY_DIR}/${DEFAULT_GATEWAY}" + printf '{\n "name": "%s",\n "gateway_endpoint": "http://127.0.0.1:17670",\n "is_remote": false,\n "gateway_port": 0,\n "auth_mode": "plaintext"\n}\n' \ + "$DEFAULT_GATEWAY" > "${SYSTEM_GATEWAY_DIR}/${DEFAULT_GATEWAY}/metadata.json" + chmod 644 "${SYSTEM_GATEWAY_DIR}/${DEFAULT_GATEWAY}/metadata.json" +fi +chmod 755 "${SYSTEM_GATEWAY_DIR}/${DEFAULT_GATEWAY}" diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index 6257ca851..af24bf5c2 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -28,10 +28,20 @@ platforms: build-on: [arm64] build-for: [arm64] +plugs: + dot-config-openshell: + interface: personal-files + write: + - $HOME/.config/openshell + apps: openshell: command: bin/openshell + environment: + XDG_CONFIG_HOME: "$SNAP_REAL_HOME/.config" + OPENSHELL_SYSTEM_GATEWAY_DIR: "$SNAP_COMMON/system-gateways" plugs: + - dot-config-openshell - home - network - ssh-keys @@ -40,11 +50,9 @@ apps: command: bin/openshell-gateway-wrapper daemon: simple refresh-mode: endure - environment: - XDG_DATA_HOME: "$SNAP_COMMON" - XDG_RUNTIME_DIR: "$SNAP_COMMON" plugs: - docker + - kvm - log-observe - network - network-bind @@ -61,21 +69,40 @@ parts: - ca-certificates - clang - cmake + - bc + - bison + - cpio + - curl + - flex - git - libclang-dev + - jq + - libcap-ng-dev - libssl-dev + - libelf-dev - libz3-dev - pkg-config - python3 + - python3-pyelftools + - zstd + stage-packages: + - e2fsprogs + - iproute2 + - nftables override-pull: | craftctl default craftctl set version="$(python3 "$CRAFT_PROJECT_DIR/tasks/scripts/release.py" get-version --snap)" override-build: | set -euo pipefail + export OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="$CRAFT_PART_BUILD/target/vm-runtime-compressed" + FROM_SOURCE=1 "$CRAFT_PART_BUILD/tasks/scripts/vm/vm-setup.sh" + "$CRAFT_PART_BUILD/tasks/scripts/vm/build-supervisor-bundle.sh" + cargo build --release --locked -p openshell-cli --features bundled-z3 cargo build --release --locked -p openshell-server --bin openshell-gateway cargo build --release --locked -p openshell-sandbox --bin openshell-sandbox + cargo build --release --locked -p openshell-driver-vm --bin openshell-driver-vm install -D -m 0755 "$CRAFT_PART_BUILD/target/release/openshell" \ "$CRAFT_PART_INSTALL/bin/openshell" @@ -83,6 +110,8 @@ parts: "$CRAFT_PART_INSTALL/bin/openshell-gateway" install -D -m 0755 "$CRAFT_PART_BUILD/target/release/openshell-sandbox" \ "$CRAFT_PART_INSTALL/bin/openshell-sandbox" + install -D -m 0755 "$CRAFT_PART_BUILD/target/release/openshell-driver-vm" \ + "$CRAFT_PART_INSTALL/libexec/openshell/openshell-driver-vm" install -D -m 0755 "$CRAFT_PROJECT_DIR/deploy/snap/bin/openshell-gateway-wrapper" \ "$CRAFT_PART_INSTALL/bin/openshell-gateway-wrapper" install -D -m 0644 "$CRAFT_PROJECT_DIR/LICENSE" \ From 8173552bfe89ee4c3deb32bab36f2c0e82b7c836 Mon Sep 17 00:00:00 2001 From: Alex Lewontin Date: Tue, 26 May 2026 11:08:42 -0400 Subject: [PATCH 4/5] fix(driver-vm): handle prepared image rootfs failures Custom `--from` VM images were failing in the guest-prep path with stale or incompatible prepared rootfs handling. Observed failures: - `EXT4-fs (vdc): write access unavailable, cannot proceed (try mounting with noload)` - `mount: /image-cache: cannot mount /dev/vdc read-only` - `FATAL: umoci unpack did not produce rootfs directory` - `ProcessExited: VM process exited with status 0` hid the guest-side cause Mount prepared ext4 disks with `ro,noload`, accept both umoci unpack layouts, bump the rootfs cache layout versions so old prepared disks are rebuilt, and include the tail of rootfs-console.log in ProcessExited errors. Signed-off-by: Alex Lewontin --- .../scripts/openshell-vm-sandbox-init.sh | 4 +- crates/openshell-driver-vm/src/driver.rs | 73 +++++++++++++++- crates/openshell-driver-vm/src/rootfs.rs | 15 ++++ e2e/rust/Cargo.toml | 4 + e2e/rust/tests/vm_custom_image.rs | 83 +++++++++++++++++++ 5 files changed, 173 insertions(+), 6 deletions(-) create mode 100644 e2e/rust/tests/vm_custom_image.rs diff --git a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh index 8725984f9..916839c3d 100644 --- a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh +++ b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh @@ -165,7 +165,7 @@ prepare_guest_image_rootfs() { ts "FATAL: umoci not found in VM bootstrap image" exit 1 fi - /opt/openshell/bin/umoci raw unpack \ + /opt/openshell/bin/umoci unpack \ --image "$payload_dir/oci:openshell" \ "$partial_root" if [ ! -d "$partial_root/rootfs" ]; then @@ -252,7 +252,7 @@ setup_overlay_root() { local lower_root="/lower" if [ -b /dev/vdc ]; then - mount -t ext4 -o ro /dev/vdc /image-cache + mount -t ext4 -o ro,noload /dev/vdc /image-cache if [ -d /image-cache/image-rootfs ]; then lower_root="/image-cache/image-rootfs" ts "using prepared image rootfs lowerdir" diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 405bf226d..83909a703 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -2328,11 +2328,19 @@ impl VmDriver { } }; + let console_output = { + let registry = self.registry.lock().await; + registry + .get(&sandbox_id) + .map(|record| record.state_dir.join("rootfs-console.log")) + }; + if let Some(status) = exit_status { - let message = status.code().map_or_else( - || "VM process exited".to_string(), - |code| format!("VM process exited with status {code}"), - ); + let console_excerpt = match console_output { + Some(path) => read_vm_console_excerpt(&path).await, + None => None, + }; + let message = vm_process_exit_message(status.code(), console_excerpt.as_deref()); if let Some(snapshot) = self .set_snapshot_condition( &sandbox_id, @@ -2347,6 +2355,7 @@ impl VmDriver { sandbox_id.clone(), platform_event("vm", "Warning", "ProcessExited", message), ); + let has_gpu = { let registry = self.registry.lock().await; registry @@ -4321,6 +4330,32 @@ fn platform_event(source: &str, event_type: &str, reason: &str, message: String) event } +async fn read_vm_console_excerpt(path: &Path) -> Option { + let console = tokio::fs::read_to_string(path).await.ok()?; + let mut lines: Vec<&str> = console + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + .collect(); + if lines.is_empty() { + return None; + } + let keep_from = lines.len().saturating_sub(8); + lines.drain(..keep_from); + Some(lines.join("\n")) +} + +fn vm_process_exit_message(code: Option, console_excerpt: Option<&str>) -> String { + match (code, console_excerpt) { + (Some(code), Some(console)) => { + format!("VM process exited with status {code}. Last console output:\n{console}") + } + (Some(code), None) => format!("VM process exited with status {code}"), + (None, Some(console)) => format!("VM process exited. Last console output:\n{console}"), + (None, None) => "VM process exited".to_string(), + } +} + fn attach_vm_progress_metadata(event: &mut PlatformEvent) { if event.source != "vm" { return; @@ -4442,6 +4477,36 @@ mod tests { use std::time::{SystemTime, UNIX_EPOCH}; use tonic::Code; + #[test] + fn vm_process_exit_message_includes_console_excerpt() { + let message = vm_process_exit_message(Some(0), Some("line one\nline two")); + assert!(message.contains("VM process exited with status 0")); + assert!(message.contains("line one\nline two")); + } + + #[tokio::test] + async fn read_vm_console_excerpt_returns_tail_lines() { + let base = unique_temp_dir(); + std::fs::create_dir_all(&base).unwrap(); + let console = base.join("rootfs-console.log"); + fs::write( + &console, + [ + "line 1", "line 2", "line 3", "line 4", "line 5", "line 6", "line 7", "line 8", + "line 9", "line 10", + ] + .join("\n"), + ) + .unwrap(); + + let excerpt = read_vm_console_excerpt(&console).await.unwrap(); + assert_eq!( + excerpt, + "line 3\nline 4\nline 5\nline 6\nline 7\nline 8\nline 9\nline 10" + ); + + let _ = std::fs::remove_dir_all(base); + } #[test] fn vm_pulling_layer_event_adds_progress_detail_metadata() { let mut event = platform_event( diff --git a/crates/openshell-driver-vm/src/rootfs.rs b/crates/openshell-driver-vm/src/rootfs.rs index 904ed8cd3..3a6c1ea51 100644 --- a/crates/openshell-driver-vm/src/rootfs.rs +++ b/crates/openshell-driver-vm/src/rootfs.rs @@ -946,6 +946,21 @@ mod tests { assert!(rootfs.join("lower").is_dir()); assert!(rootfs.join("overlay").is_dir()); assert!(rootfs.join("newroot").is_dir()); + assert!( + fs::read_to_string(rootfs.join("srv/openshell-vm-sandbox-init.sh")) + .expect("read init script") + .contains("mount -t ext4 -o ro,noload /dev/vdc /image-cache") + ); + assert!( + fs::read_to_string(rootfs.join("srv/openshell-vm-sandbox-init.sh")) + .expect("read init script") + .contains("umoci unpack") + ); + assert!( + fs::read_to_string(rootfs.join("srv/openshell-vm-sandbox-init.sh")) + .expect("read init script") + .contains("if [ ! -d \"$partial_root/rootfs\" ]; then") + ); assert!( fs::read_dir(rootfs.join("sandbox")) .expect("read sandbox") diff --git a/e2e/rust/Cargo.toml b/e2e/rust/Cargo.toml index 26957baab..31cd721b8 100644 --- a/e2e/rust/Cargo.toml +++ b/e2e/rust/Cargo.toml @@ -55,6 +55,10 @@ required-features = ["e2e-podman"] name = "vm_gateway_resume" path = "tests/vm_gateway_resume.rs" required-features = ["e2e-vm"] +[[test]] +name = "vm_custom_image" +path = "tests/vm_custom_image.rs" +required-features = ["e2e-vm"] [[test]] name = "readyz_health" diff --git a/e2e/rust/tests/vm_custom_image.rs b/e2e/rust/tests/vm_custom_image.rs new file mode 100644 index 000000000..cb0c29570 --- /dev/null +++ b/e2e/rust/tests/vm_custom_image.rs @@ -0,0 +1,83 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![cfg(feature = "e2e-vm")] + +//! E2E test: build a custom container image locally, then launch it through the +//! standalone VM gateway. +//! +//! Prerequisites: +//! - A running VM-backed openshell gateway (`mise run e2e:vm` or +//! `e2e/rust/e2e-vm.sh`) +//! - Docker daemon running locally (the CLI builds Dockerfiles into the local +//! Docker daemon before handing the resulting image to the gateway) +//! - The `openshell` binary (built automatically from the workspace) + +use std::io::Write; + +use openshell_e2e::harness::output::strip_ansi; +use openshell_e2e::harness::sandbox::SandboxGuard; + +const DOCKERFILE_CONTENT: &str = r#"FROM public.ecr.aws/docker/library/python:3.13-slim + +# iproute2 is required for sandbox network namespace isolation. +RUN apt-get update && apt-get install -y --no-install-recommends iproute2 \ + && rm -rf /var/lib/apt/lists/* + +# Create the sandbox user/group so the supervisor can switch to it. +# Use a high UID range to avoid conflicts with host users when running without +# user namespace remapping (UID in container = UID on host). +RUN groupadd -g 1000660000 sandbox && \ + useradd -m -u 1000660000 -g sandbox sandbox + +# Write a marker file so we can verify this is our custom image. +# Place under /etc (Landlock baseline read-only path) so the sandbox +# can read it when filesystem restrictions are properly enforced. +RUN echo "vm-custom-image-e2e-marker" > /etc/marker.txt + +CMD ["sleep", "infinity"] +"#; + +const MARKER: &str = "vm-custom-image-e2e-marker"; + +#[tokio::test] +async fn sandbox_from_custom_dockerfile_on_vm_gateway() { + if std::env::var("OPENSHELL_E2E_DRIVER").as_deref() != Ok("vm") { + eprintln!("Skipping VM custom image test: e2e driver is not vm"); + return; + } + if std::env::var_os("DOCKER_HOST").is_none() + && !std::path::Path::new("/var/run/docker.sock").exists() + { + eprintln!("Skipping VM custom image test: /var/run/docker.sock not found"); + return; + } + + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let dockerfile_path = tmpdir.path().join("Dockerfile"); + { + let mut dockerfile = std::fs::File::create(&dockerfile_path).expect("create Dockerfile"); + dockerfile + .write_all(DOCKERFILE_CONTENT.as_bytes()) + .expect("write Dockerfile"); + } + + let dockerfile_str = dockerfile_path.to_str().expect("Dockerfile path is UTF-8"); + let mut sandbox = SandboxGuard::create(&[ + "--from", + dockerfile_str, + "--", + "cat", + "/etc/marker.txt", + ]) + .await + .expect("sandbox create from Dockerfile on VM gateway"); + + let clean_output = strip_ansi(&sandbox.create_output); + assert!( + clean_output.contains(MARKER), + "expected marker '{MARKER}' in VM sandbox output:\n{clean_output}" + ); + + sandbox.cleanup().await; +} From 9b13ce496fe0431f1f0f6c83d1867498ebf50db7 Mon Sep 17 00:00:00 2001 From: Alex Lewontin Date: Tue, 26 May 2026 12:14:10 -0400 Subject: [PATCH 5/5] fix(snap): use bundled ssh client under strict confinement Strict snap sandbox connect/create shells were still trying to exec the host OpenSSH binary. Observed failure: - `apparmor="DENIED" operation="exec" class="file" profile="snap.openshell.openshell" name="/usr/bin/ssh" requested_mask="x" denied_mask="x"` Bundle `openssh-client` in the snap so the CLI uses the bundled binary under strict confinement. Signed-off-by: Alex Lewontin --- snap/snapcraft.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index af24bf5c2..cb68da460 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -89,6 +89,7 @@ parts: - e2fsprogs - iproute2 - nftables + - openssh-client override-pull: | craftctl default craftctl set version="$(python3 "$CRAFT_PROJECT_DIR/tasks/scripts/release.py" get-version --snap)"