From 12c699d0b95cefe336f47ed755d97dd0d563cf1d Mon Sep 17 00:00:00 2001 From: st-gr <38470677+st-gr@users.noreply.github.com> Date: Wed, 27 May 2026 18:42:39 -0700 Subject: [PATCH 1/3] feat(core): add External(PathBuf) variant to ComputeDriverKind Carries the UDS path supplied by --compute-driver-socket. Drops Copy from the enum derive (PathBuf is not Copy); existing callers use Clone or owned values. FromStr accepts 'external:' and rejects bare 'external' with a message pointing at the CLI flag. Signed-off-by: st-gr <38470677+st-gr@users.noreply.github.com> --- crates/openshell-core/src/config.rs | 107 ++++++++++++++++++++- crates/openshell-server/src/cli.rs | 2 +- crates/openshell-server/src/config_file.rs | 4 + crates/openshell-server/src/lib.rs | 12 +-- 4 files changed, 113 insertions(+), 12 deletions(-) diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index 98562c8a6..315e9e517 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -40,30 +40,38 @@ pub const DEFAULT_SUPERVISOR_IMAGE: &str = "ghcr.io/nvidia/openshell/supervisor: pub const CDI_GPU_DEVICE_ALL: &str = "nvidia.com/gpu=all"; /// Compute backends the gateway can orchestrate sandboxes through. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum ComputeDriverKind { Kubernetes, Vm, Docker, Podman, + /// Out-of-process compute driver speaking the gRPC compute_driver.proto contract over a Unix domain socket. The path is supplied by --compute-driver-socket or OPENSHELL_COMPUTE_DRIVER_SOCKET. + External(PathBuf), } impl ComputeDriverKind { #[must_use] - pub const fn as_str(self) -> &'static str { + pub fn as_str(&self) -> &'static str { match self { Self::Kubernetes => "kubernetes", Self::Vm => "vm", Self::Docker => "docker", Self::Podman => "podman", + Self::External(_) => "external", } } } impl fmt::Display for ComputeDriverKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.as_str()) + match self { + Self::Kubernetes | Self::Vm | Self::Docker | Self::Podman => { + f.write_str(self.as_str()) + } + Self::External(path) => write!(f, "external:{}", path.display()), + } } } @@ -71,13 +79,31 @@ impl FromStr for ComputeDriverKind { type Err = String; fn from_str(value: &str) -> Result { - match value.trim().to_ascii_lowercase().as_str() { + let trimmed = value.trim(); + let lower = trimmed.to_ascii_lowercase(); + if let Some(suffix_lower) = lower.strip_prefix("external:") { + // Use the case-preserving suffix for the path. + let suffix = &trimmed[trimmed.len() - suffix_lower.len()..]; + if suffix.is_empty() { + return Err( + "compute driver 'external:' requires a non-empty socket path \ + (e.g. 'external:/var/run/openshell-driver.sock')" + .to_string(), + ); + } + return Ok(Self::External(PathBuf::from(suffix))); + } + match lower.as_str() { "kubernetes" => Ok(Self::Kubernetes), "vm" => Ok(Self::Vm), "docker" => Ok(Self::Docker), "podman" => Ok(Self::Podman), + "external" => Err( + "compute driver 'external' requires a socket path: 'external:/path/to/driver.sock' (or set --compute-driver-socket)" + .to_string(), + ), other => Err(format!( - "unsupported compute driver '{other}'. expected one of: kubernetes, vm, docker, podman" + "unsupported compute driver '{other}'. expected one of: kubernetes, vm, docker, podman, external:" )), } } @@ -628,6 +654,42 @@ mod tests { assert!(err.contains("unsupported compute driver 'firecracker'")); } + #[test] + fn compute_driver_kind_external_displays_with_path() { + let kind = ComputeDriverKind::External(PathBuf::from("/x/y")); + assert_eq!(kind.to_string(), "external:/x/y"); + } + + #[test] + fn compute_driver_kind_parses_external_with_socket_path() { + let parsed: ComputeDriverKind = + "external:/var/run/openshell-driver.sock".parse().unwrap(); + match parsed { + ComputeDriverKind::External(path) => { + assert_eq!(path, PathBuf::from("/var/run/openshell-driver.sock")); + } + other => panic!("expected External(_), got {other:?}"), + } + } + + #[test] + fn compute_driver_kind_rejects_bare_external_without_path() { + let err = "external".parse::().unwrap_err(); + assert!( + err.contains("requires a socket path"), + "missing socket-path hint in error: {err}" + ); + } + + #[test] + fn compute_driver_kind_unknown_error_lists_external_in_supported() { + let err = "unknown".parse::().unwrap_err(); + assert!( + err.contains("external:"), + "expected supported list to mention external:, got: {err}" + ); + } + #[test] fn config_defaults_to_loopback_bind_address() { let expected: SocketAddr = "127.0.0.1:17670".parse().expect("valid address"); @@ -754,4 +816,39 @@ mod tests { } } } + + #[test] + fn compute_driver_kind_display_roundtrips_through_from_str() { + use std::path::PathBuf; + for kind in [ + ComputeDriverKind::Kubernetes, + ComputeDriverKind::Vm, + ComputeDriverKind::Docker, + ComputeDriverKind::Podman, + ComputeDriverKind::External(PathBuf::from("/var/run/openshell-driver.sock")), + ] { + let s = kind.to_string(); + let parsed: ComputeDriverKind = s.parse().expect("round-trip parse"); + assert_eq!(parsed, kind, "round-trip mismatch for {s}"); + } + } + + #[test] + fn compute_driver_kind_rejects_external_with_empty_path() { + let err = "external:".parse::().unwrap_err(); + assert!(err.contains("non-empty socket path"), "unexpected error: {err}"); + } + + #[test] + fn compute_driver_kind_external_is_case_insensitive_on_prefix() { + let parsed: ComputeDriverKind = "External:/var/run/openshell-driver.sock" + .parse() + .expect("case-insensitive prefix should be accepted"); + match parsed { + ComputeDriverKind::External(p) => { + assert_eq!(p, PathBuf::from("/var/run/openshell-driver.sock")); + } + other => panic!("expected External, got {other:?}"), + } + } } diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index b8d345f9e..f02bc3ed7 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -613,7 +613,7 @@ fn merge_file_into_args(args: &mut RunArgs, file: &GatewayFileSection, matches: fn effective_single_driver(args: &RunArgs) -> Option { match args.drivers.as_slice() { [] => openshell_core::config::detect_driver(), - [driver] => Some(*driver), + [driver] => Some(driver.clone()), _ => None, } } diff --git a/crates/openshell-server/src/config_file.rs b/crates/openshell-server/src/config_file.rs index 7d7c99cc3..59eb219ca 100644 --- a/crates/openshell-server/src/config_file.rs +++ b/crates/openshell-server/src/config_file.rs @@ -285,6 +285,10 @@ fn inheritable_keys(driver: ComputeDriverKind) -> &'static [&'static str] { "guest_tls_cert", "guest_tls_key", ], + // The external driver is configured via the --compute-driver-socket + // CLI flag, not a TOML driver table, so no gateway-section keys are + // inheritable. + ComputeDriverKind::External(_) => &[], } } diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 1b20ba069..4f1147f09 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -762,6 +762,11 @@ async fn build_compute_runtime( .await .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) } + ComputeDriverKind::External(_) => Err(Error::config( + "external compute driver dispatch is not yet wired; \ + tonic UDS client lands in plan task 2a.4 \ + (st-gr/openshell-driver-kyma docs/superpowers/plans/2026-05-27-phase2a-gateway-fork.md)", + )), } } @@ -853,12 +858,7 @@ fn configured_compute_driver(config: &Config) -> Result { set --drivers or OPENSHELL_DRIVERS to kubernetes, podman, docker, or vm", )), }, - [ - driver @ (ComputeDriverKind::Kubernetes - | ComputeDriverKind::Vm - | ComputeDriverKind::Docker - | ComputeDriverKind::Podman), - ] => Ok(*driver), + [driver] => Ok(driver.clone()), drivers => Err(Error::config(format!( "multiple compute drivers are not supported yet; configured drivers: {}", drivers From 0dbe42a3914b62911f1031fe8baaded22ec335de Mon Sep 17 00:00:00 2001 From: st-gr <38470677+st-gr@users.noreply.github.com> Date: Wed, 27 May 2026 19:09:25 -0700 Subject: [PATCH 2/3] feat(server): add --compute-driver-socket CLI flag Adds an opt-in `--compute-driver-socket=PATH` flag (env `OPENSHELL_COMPUTE_DRIVER_SOCKET`) on the gateway's `RunArgs`. When set, the gateway pins `ComputeDriverKind::External()` and skips both the `--drivers` list and the auto-detection probe. This lets out-of-tree driver binaries (Kyma, custom backends) connect to a stock gateway without a rebuild. `effective_single_driver` and the `Config.compute_drivers` payload both honour the new flag so pre-runtime checks and the runtime factory dispatch agree on the configured driver. The companion dispatch arm in `lib.rs::build_compute_runtime` is wired in the follow-up commit. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: st-gr <38470677+st-gr@users.noreply.github.com> --- crates/openshell-server/src/cli.rs | 100 ++++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index f02bc3ed7..f89c5d66d 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -111,6 +111,16 @@ struct RunArgs { )] drivers: Vec, + /// Path to a Unix domain socket served by an external compute driver + /// implementing `compute_driver.proto`. + /// + /// When set, the gateway uses `ComputeDriverKind::External()` and + /// skips both the `--drivers` list and the auto-detection probe. This + /// lets out-of-tree driver binaries (Kyma, custom backends) connect to + /// an already-running gateway without rebuilding it. + #[arg(long, env = "OPENSHELL_COMPUTE_DRIVER_SOCKET")] + compute_driver_socket: Option, + /// Disable TLS entirely — listen on plaintext HTTP. /// Use this when the gateway sits behind a reverse proxy or tunnel /// (e.g. Cloudflare Tunnel) that terminates TLS at the edge. @@ -350,9 +360,18 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { config = config.with_metrics_bind_address(addr); } + // The --compute-driver-socket flag pins an external driver and overrides + // the --drivers list. `effective_single_driver` already mirrors this for + // pre-runtime checks; do the same here so `configured_compute_driver` + // sees the External entry when it inspects `config.compute_drivers`. + let configured_drivers = if let Some(socket) = args.compute_driver_socket.clone() { + vec![ComputeDriverKind::External(socket)] + } else { + args.drivers.clone() + }; config = config .with_database_url(db_url) - .with_compute_drivers(args.drivers.clone()) + .with_compute_drivers(configured_drivers) .with_server_sans(args.server_sans.clone()) .with_loopback_service_http(args.enable_loopback_service_http); @@ -611,6 +630,11 @@ fn merge_file_into_args(args: &mut RunArgs, file: &GatewayFileSection, matches: } fn effective_single_driver(args: &RunArgs) -> Option { + // The --compute-driver-socket flag pins an out-of-tree driver and + // therefore wins over both the explicit --drivers list and auto-detection. + if let Some(socket) = args.compute_driver_socket.clone() { + return Some(ComputeDriverKind::External(socket)); + } match args.drivers.as_slice() { [] => openshell_core::config::detect_driver(), [driver] => Some(driver.clone()), @@ -1428,6 +1452,80 @@ enable_loopback_service_http = false ); } + #[test] + fn compute_driver_socket_flag_yields_external_driver() { + // The CLI flag pins ComputeDriverKind::External() so that + // out-of-tree drivers (Kyma, custom backends) can be wired without + // recompiling the gateway. + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g1 = EnvVarGuard::remove("OPENSHELL_COMPUTE_DRIVER_SOCKET"); + let _g2 = EnvVarGuard::remove("OPENSHELL_DRIVERS"); + + let (args, _) = parse_with_args(&[ + "openshell-gateway", + "--db-url", + "sqlite::memory:", + "--compute-driver-socket", + "/tmp/openshell-driver.sock", + ]); + + match super::effective_single_driver(&args) { + Some(super::ComputeDriverKind::External(p)) => { + assert_eq!(p, std::path::PathBuf::from("/tmp/openshell-driver.sock")); + } + other => panic!("expected External, got {other:?}"), + } + } + + #[test] + fn compute_driver_socket_flag_overrides_drivers_list() { + // Even when --drivers is set, --compute-driver-socket pins the + // external driver. This avoids forcing operators to wipe a + // gateway-wide --drivers list to add an out-of-tree driver. + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g1 = EnvVarGuard::remove("OPENSHELL_COMPUTE_DRIVER_SOCKET"); + let _g2 = EnvVarGuard::remove("OPENSHELL_DRIVERS"); + + let (args, _) = parse_with_args(&[ + "openshell-gateway", + "--db-url", + "sqlite::memory:", + "--drivers", + "docker", + "--compute-driver-socket", + "/tmp/x.sock", + ]); + + match super::effective_single_driver(&args) { + Some(super::ComputeDriverKind::External(p)) => { + assert_eq!(p, std::path::PathBuf::from("/tmp/x.sock")); + } + other => panic!("expected External, got {other:?}"), + } + } + + #[test] + fn compute_driver_socket_reads_from_env_var() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g1 = EnvVarGuard::set("OPENSHELL_COMPUTE_DRIVER_SOCKET", "/run/external.sock"); + let _g2 = EnvVarGuard::remove("OPENSHELL_DRIVERS"); + + let (args, _) = parse_with_args(&["openshell-gateway", "--db-url", "sqlite::memory:"]); + + match super::effective_single_driver(&args) { + Some(super::ComputeDriverKind::External(p)) => { + assert_eq!(p, std::path::PathBuf::from("/run/external.sock")); + } + other => panic!("expected External, got {other:?}"), + } + } + #[test] fn driver_inherits_shared_image_from_gateway_section() { // [openshell.gateway].default_image inherits into the K8s driver From 8eabccb5199b431d7284dc2cc92e8f1a62c85a2f Mon Sep 17 00:00:00 2001 From: st-gr <38470677+st-gr@users.noreply.github.com> Date: Wed, 27 May 2026 19:09:44 -0700 Subject: [PATCH 3/3] feat(server): wire ComputeDriverKind::External via tonic UDS channel `build_compute_runtime` now connects a tonic `Channel` to the configured Unix domain socket using `hyper-util`'s `TokioIo` connector and wraps it in `RemoteComputeDriver` -- the same proxy used by the VM driver. Replaces the placeholder `External(_) => Err(...)` arm. Adds two helpers in `compute/mod.rs`: * `connect_external_compute_driver(socket_path)` -- a small tonic-Endpoint + tower::service_fn + UnixStream connector, parallel to the one in `compute::vm` but with no VM-specific logging or capability probing. Out-of-tree drivers manage their own readiness; the gateway just dials. * `ComputeRuntime::new_remote_external(channel, ...)` -- mirrors `new_remote_vm` but takes no `ManagedDriverProcess`. The external driver's lifecycle is the operator's responsibility (systemd unit, sidecar container, etc.). Smoke-tested: `--compute-driver-socket /tmp/nonexistent.sock` now starts the gateway, logs "Connecting to external compute driver", and fails with a clear "failed to connect to external compute driver socket '': transport error" message that points at the new arm. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: st-gr <38470677+st-gr@users.noreply.github.com> --- crates/openshell-server/src/compute/mod.rs | 70 ++++++++++++++++++++++ crates/openshell-server/src/lib.rs | 22 +++++-- 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 98dc3fd63..42274cf22 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -216,6 +216,44 @@ impl ComputeDriver for RemoteComputeDriver { } } +/// Build a tonic [`Channel`] connected to a Unix domain socket served by an +/// external compute driver. Used by the `External(PathBuf)` dispatch arm in +/// `lib.rs::build_compute_runtime`. The dummy authority `http://[::]:50051` +/// matches the connector convention used by the VM driver — tonic ignores it +/// once a custom service connector is supplied. +#[cfg(unix)] +pub(crate) async fn connect_external_compute_driver( + socket_path: std::path::PathBuf, +) -> Result { + use hyper_util::rt::TokioIo; + use tokio::net::UnixStream; + use tonic::transport::Endpoint; + use tower::service_fn; + + let display_path = socket_path.clone(); + Endpoint::from_static("http://[::]:50051") + .connect_with_connector(service_fn(move |_: tonic::transport::Uri| { + let socket_path = socket_path.clone(); + async move { UnixStream::connect(socket_path).await.map(TokioIo::new) } + })) + .await + .map_err(|e| { + openshell_core::Error::execution(format!( + "failed to connect to external compute driver socket '{}': {e}", + display_path.display() + )) + }) +} + +#[cfg(not(unix))] +pub(crate) async fn connect_external_compute_driver( + _socket_path: std::path::PathBuf, +) -> Result { + Err(openshell_core::Error::config( + "the external compute driver requires unix domain socket support", + )) +} + #[derive(Clone)] pub struct ComputeRuntime { driver: SharedComputeDriver, @@ -373,6 +411,38 @@ impl ComputeRuntime { .await } + /// Build a `ComputeRuntime` over a tonic `Channel` connected to an + /// already-running external compute driver process. + /// + /// Unlike [`new_remote_vm`], this constructor does not own a child + /// process — the external driver's lifecycle is the operator's + /// responsibility (systemd unit, sidecar container, etc.). The + /// underlying `RemoteComputeDriver` proxy is identical. + pub(crate) async fn new_remote_external( + channel: Channel, + store: Arc, + sandbox_index: SandboxIndex, + sandbox_watch_bus: SandboxWatchBus, + tracing_log_bus: TracingLogBus, + supervisor_sessions: Arc, + ) -> Result { + let driver: SharedComputeDriver = Arc::new(RemoteComputeDriver::new(channel)); + Self::from_driver( + driver, + None, + None, + None, + store, + sandbox_index, + sandbox_watch_bus, + tracing_log_bus, + supervisor_sessions, + true, + Vec::new(), + ) + .await + } + pub async fn new_podman( config: PodmanComputeConfig, store: Arc, diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 4f1147f09..b3cc247d4 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -762,11 +762,23 @@ async fn build_compute_runtime( .await .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) } - ComputeDriverKind::External(_) => Err(Error::config( - "external compute driver dispatch is not yet wired; \ - tonic UDS client lands in plan task 2a.4 \ - (st-gr/openshell-driver-kyma docs/superpowers/plans/2026-05-27-phase2a-gateway-fork.md)", - )), + ComputeDriverKind::External(socket) => { + info!( + socket = %socket.display(), + "Connecting to external compute driver over Unix domain socket" + ); + let channel = compute::connect_external_compute_driver(socket.clone()).await?; + ComputeRuntime::new_remote_external( + channel, + store, + sandbox_index, + sandbox_watch_bus, + tracing_log_bus, + supervisor_sessions, + ) + .await + .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) + } } }