diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index 98562c8a6..315e9e517 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -40,30 +40,38 @@ pub const DEFAULT_SUPERVISOR_IMAGE: &str = "ghcr.io/nvidia/openshell/supervisor: pub const CDI_GPU_DEVICE_ALL: &str = "nvidia.com/gpu=all"; /// Compute backends the gateway can orchestrate sandboxes through. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum ComputeDriverKind { Kubernetes, Vm, Docker, Podman, + /// Out-of-process compute driver speaking the gRPC compute_driver.proto contract over a Unix domain socket. The path is supplied by --compute-driver-socket or OPENSHELL_COMPUTE_DRIVER_SOCKET. + External(PathBuf), } impl ComputeDriverKind { #[must_use] - pub const fn as_str(self) -> &'static str { + pub fn as_str(&self) -> &'static str { match self { Self::Kubernetes => "kubernetes", Self::Vm => "vm", Self::Docker => "docker", Self::Podman => "podman", + Self::External(_) => "external", } } } impl fmt::Display for ComputeDriverKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.as_str()) + match self { + Self::Kubernetes | Self::Vm | Self::Docker | Self::Podman => { + f.write_str(self.as_str()) + } + Self::External(path) => write!(f, "external:{}", path.display()), + } } } @@ -71,13 +79,31 @@ impl FromStr for ComputeDriverKind { type Err = String; fn from_str(value: &str) -> Result { - match value.trim().to_ascii_lowercase().as_str() { + let trimmed = value.trim(); + let lower = trimmed.to_ascii_lowercase(); + if let Some(suffix_lower) = lower.strip_prefix("external:") { + // Use the case-preserving suffix for the path. + let suffix = &trimmed[trimmed.len() - suffix_lower.len()..]; + if suffix.is_empty() { + return Err( + "compute driver 'external:' requires a non-empty socket path \ + (e.g. 'external:/var/run/openshell-driver.sock')" + .to_string(), + ); + } + return Ok(Self::External(PathBuf::from(suffix))); + } + match lower.as_str() { "kubernetes" => Ok(Self::Kubernetes), "vm" => Ok(Self::Vm), "docker" => Ok(Self::Docker), "podman" => Ok(Self::Podman), + "external" => Err( + "compute driver 'external' requires a socket path: 'external:/path/to/driver.sock' (or set --compute-driver-socket)" + .to_string(), + ), other => Err(format!( - "unsupported compute driver '{other}'. expected one of: kubernetes, vm, docker, podman" + "unsupported compute driver '{other}'. expected one of: kubernetes, vm, docker, podman, external:" )), } } @@ -628,6 +654,42 @@ mod tests { assert!(err.contains("unsupported compute driver 'firecracker'")); } + #[test] + fn compute_driver_kind_external_displays_with_path() { + let kind = ComputeDriverKind::External(PathBuf::from("/x/y")); + assert_eq!(kind.to_string(), "external:/x/y"); + } + + #[test] + fn compute_driver_kind_parses_external_with_socket_path() { + let parsed: ComputeDriverKind = + "external:/var/run/openshell-driver.sock".parse().unwrap(); + match parsed { + ComputeDriverKind::External(path) => { + assert_eq!(path, PathBuf::from("/var/run/openshell-driver.sock")); + } + other => panic!("expected External(_), got {other:?}"), + } + } + + #[test] + fn compute_driver_kind_rejects_bare_external_without_path() { + let err = "external".parse::().unwrap_err(); + assert!( + err.contains("requires a socket path"), + "missing socket-path hint in error: {err}" + ); + } + + #[test] + fn compute_driver_kind_unknown_error_lists_external_in_supported() { + let err = "unknown".parse::().unwrap_err(); + assert!( + err.contains("external:"), + "expected supported list to mention external:, got: {err}" + ); + } + #[test] fn config_defaults_to_loopback_bind_address() { let expected: SocketAddr = "127.0.0.1:17670".parse().expect("valid address"); @@ -754,4 +816,39 @@ mod tests { } } } + + #[test] + fn compute_driver_kind_display_roundtrips_through_from_str() { + use std::path::PathBuf; + for kind in [ + ComputeDriverKind::Kubernetes, + ComputeDriverKind::Vm, + ComputeDriverKind::Docker, + ComputeDriverKind::Podman, + ComputeDriverKind::External(PathBuf::from("/var/run/openshell-driver.sock")), + ] { + let s = kind.to_string(); + let parsed: ComputeDriverKind = s.parse().expect("round-trip parse"); + assert_eq!(parsed, kind, "round-trip mismatch for {s}"); + } + } + + #[test] + fn compute_driver_kind_rejects_external_with_empty_path() { + let err = "external:".parse::().unwrap_err(); + assert!(err.contains("non-empty socket path"), "unexpected error: {err}"); + } + + #[test] + fn compute_driver_kind_external_is_case_insensitive_on_prefix() { + let parsed: ComputeDriverKind = "External:/var/run/openshell-driver.sock" + .parse() + .expect("case-insensitive prefix should be accepted"); + match parsed { + ComputeDriverKind::External(p) => { + assert_eq!(p, PathBuf::from("/var/run/openshell-driver.sock")); + } + other => panic!("expected External, got {other:?}"), + } + } } diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index b8d345f9e..f89c5d66d 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -111,6 +111,16 @@ struct RunArgs { )] drivers: Vec, + /// Path to a Unix domain socket served by an external compute driver + /// implementing `compute_driver.proto`. + /// + /// When set, the gateway uses `ComputeDriverKind::External()` and + /// skips both the `--drivers` list and the auto-detection probe. This + /// lets out-of-tree driver binaries (Kyma, custom backends) connect to + /// an already-running gateway without rebuilding it. + #[arg(long, env = "OPENSHELL_COMPUTE_DRIVER_SOCKET")] + compute_driver_socket: Option, + /// Disable TLS entirely — listen on plaintext HTTP. /// Use this when the gateway sits behind a reverse proxy or tunnel /// (e.g. Cloudflare Tunnel) that terminates TLS at the edge. @@ -350,9 +360,18 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { config = config.with_metrics_bind_address(addr); } + // The --compute-driver-socket flag pins an external driver and overrides + // the --drivers list. `effective_single_driver` already mirrors this for + // pre-runtime checks; do the same here so `configured_compute_driver` + // sees the External entry when it inspects `config.compute_drivers`. + let configured_drivers = if let Some(socket) = args.compute_driver_socket.clone() { + vec![ComputeDriverKind::External(socket)] + } else { + args.drivers.clone() + }; config = config .with_database_url(db_url) - .with_compute_drivers(args.drivers.clone()) + .with_compute_drivers(configured_drivers) .with_server_sans(args.server_sans.clone()) .with_loopback_service_http(args.enable_loopback_service_http); @@ -611,9 +630,14 @@ fn merge_file_into_args(args: &mut RunArgs, file: &GatewayFileSection, matches: } fn effective_single_driver(args: &RunArgs) -> Option { + // The --compute-driver-socket flag pins an out-of-tree driver and + // therefore wins over both the explicit --drivers list and auto-detection. + if let Some(socket) = args.compute_driver_socket.clone() { + return Some(ComputeDriverKind::External(socket)); + } match args.drivers.as_slice() { [] => openshell_core::config::detect_driver(), - [driver] => Some(*driver), + [driver] => Some(driver.clone()), _ => None, } } @@ -1428,6 +1452,80 @@ enable_loopback_service_http = false ); } + #[test] + fn compute_driver_socket_flag_yields_external_driver() { + // The CLI flag pins ComputeDriverKind::External() so that + // out-of-tree drivers (Kyma, custom backends) can be wired without + // recompiling the gateway. + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g1 = EnvVarGuard::remove("OPENSHELL_COMPUTE_DRIVER_SOCKET"); + let _g2 = EnvVarGuard::remove("OPENSHELL_DRIVERS"); + + let (args, _) = parse_with_args(&[ + "openshell-gateway", + "--db-url", + "sqlite::memory:", + "--compute-driver-socket", + "/tmp/openshell-driver.sock", + ]); + + match super::effective_single_driver(&args) { + Some(super::ComputeDriverKind::External(p)) => { + assert_eq!(p, std::path::PathBuf::from("/tmp/openshell-driver.sock")); + } + other => panic!("expected External, got {other:?}"), + } + } + + #[test] + fn compute_driver_socket_flag_overrides_drivers_list() { + // Even when --drivers is set, --compute-driver-socket pins the + // external driver. This avoids forcing operators to wipe a + // gateway-wide --drivers list to add an out-of-tree driver. + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g1 = EnvVarGuard::remove("OPENSHELL_COMPUTE_DRIVER_SOCKET"); + let _g2 = EnvVarGuard::remove("OPENSHELL_DRIVERS"); + + let (args, _) = parse_with_args(&[ + "openshell-gateway", + "--db-url", + "sqlite::memory:", + "--drivers", + "docker", + "--compute-driver-socket", + "/tmp/x.sock", + ]); + + match super::effective_single_driver(&args) { + Some(super::ComputeDriverKind::External(p)) => { + assert_eq!(p, std::path::PathBuf::from("/tmp/x.sock")); + } + other => panic!("expected External, got {other:?}"), + } + } + + #[test] + fn compute_driver_socket_reads_from_env_var() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g1 = EnvVarGuard::set("OPENSHELL_COMPUTE_DRIVER_SOCKET", "/run/external.sock"); + let _g2 = EnvVarGuard::remove("OPENSHELL_DRIVERS"); + + let (args, _) = parse_with_args(&["openshell-gateway", "--db-url", "sqlite::memory:"]); + + match super::effective_single_driver(&args) { + Some(super::ComputeDriverKind::External(p)) => { + assert_eq!(p, std::path::PathBuf::from("/run/external.sock")); + } + other => panic!("expected External, got {other:?}"), + } + } + #[test] fn driver_inherits_shared_image_from_gateway_section() { // [openshell.gateway].default_image inherits into the K8s driver diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 98dc3fd63..42274cf22 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -216,6 +216,44 @@ impl ComputeDriver for RemoteComputeDriver { } } +/// Build a tonic [`Channel`] connected to a Unix domain socket served by an +/// external compute driver. Used by the `External(PathBuf)` dispatch arm in +/// `lib.rs::build_compute_runtime`. The dummy authority `http://[::]:50051` +/// matches the connector convention used by the VM driver — tonic ignores it +/// once a custom service connector is supplied. +#[cfg(unix)] +pub(crate) async fn connect_external_compute_driver( + socket_path: std::path::PathBuf, +) -> Result { + use hyper_util::rt::TokioIo; + use tokio::net::UnixStream; + use tonic::transport::Endpoint; + use tower::service_fn; + + let display_path = socket_path.clone(); + Endpoint::from_static("http://[::]:50051") + .connect_with_connector(service_fn(move |_: tonic::transport::Uri| { + let socket_path = socket_path.clone(); + async move { UnixStream::connect(socket_path).await.map(TokioIo::new) } + })) + .await + .map_err(|e| { + openshell_core::Error::execution(format!( + "failed to connect to external compute driver socket '{}': {e}", + display_path.display() + )) + }) +} + +#[cfg(not(unix))] +pub(crate) async fn connect_external_compute_driver( + _socket_path: std::path::PathBuf, +) -> Result { + Err(openshell_core::Error::config( + "the external compute driver requires unix domain socket support", + )) +} + #[derive(Clone)] pub struct ComputeRuntime { driver: SharedComputeDriver, @@ -373,6 +411,38 @@ impl ComputeRuntime { .await } + /// Build a `ComputeRuntime` over a tonic `Channel` connected to an + /// already-running external compute driver process. + /// + /// Unlike [`new_remote_vm`], this constructor does not own a child + /// process — the external driver's lifecycle is the operator's + /// responsibility (systemd unit, sidecar container, etc.). The + /// underlying `RemoteComputeDriver` proxy is identical. + pub(crate) async fn new_remote_external( + channel: Channel, + store: Arc, + sandbox_index: SandboxIndex, + sandbox_watch_bus: SandboxWatchBus, + tracing_log_bus: TracingLogBus, + supervisor_sessions: Arc, + ) -> Result { + let driver: SharedComputeDriver = Arc::new(RemoteComputeDriver::new(channel)); + Self::from_driver( + driver, + None, + None, + None, + store, + sandbox_index, + sandbox_watch_bus, + tracing_log_bus, + supervisor_sessions, + true, + Vec::new(), + ) + .await + } + pub async fn new_podman( config: PodmanComputeConfig, store: Arc, diff --git a/crates/openshell-server/src/config_file.rs b/crates/openshell-server/src/config_file.rs index 7d7c99cc3..59eb219ca 100644 --- a/crates/openshell-server/src/config_file.rs +++ b/crates/openshell-server/src/config_file.rs @@ -285,6 +285,10 @@ fn inheritable_keys(driver: ComputeDriverKind) -> &'static [&'static str] { "guest_tls_cert", "guest_tls_key", ], + // The external driver is configured via the --compute-driver-socket + // CLI flag, not a TOML driver table, so no gateway-section keys are + // inheritable. + ComputeDriverKind::External(_) => &[], } } diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 1b20ba069..b3cc247d4 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -762,6 +762,23 @@ async fn build_compute_runtime( .await .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) } + ComputeDriverKind::External(socket) => { + info!( + socket = %socket.display(), + "Connecting to external compute driver over Unix domain socket" + ); + let channel = compute::connect_external_compute_driver(socket.clone()).await?; + ComputeRuntime::new_remote_external( + channel, + store, + sandbox_index, + sandbox_watch_bus, + tracing_log_bus, + supervisor_sessions, + ) + .await + .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) + } } } @@ -853,12 +870,7 @@ fn configured_compute_driver(config: &Config) -> Result { set --drivers or OPENSHELL_DRIVERS to kubernetes, podman, docker, or vm", )), }, - [ - driver @ (ComputeDriverKind::Kubernetes - | ComputeDriverKind::Vm - | ComputeDriverKind::Docker - | ComputeDriverKind::Podman), - ] => Ok(*driver), + [driver] => Ok(driver.clone()), drivers => Err(Error::config(format!( "multiple compute drivers are not supported yet; configured drivers: {}", drivers