diff --git a/architecture/gateway.md b/architecture/gateway.md index a7e8f1a00..ebdbbfe16 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -37,6 +37,10 @@ health, metrics, or tunnel routes. The plaintext service router also rejects browser requests whose Fetch Metadata, Origin, or Referer headers indicate a cross-origin or sibling-subdomain request. +Operators can configure a gateway-wide gRPC request rate limit. The limit is +applied only to gRPC API traffic after protocol multiplexing; health, metrics, +and local sandbox-service HTTP routes are not rate limited by this control. + Supported auth modes: | Mode | Use | diff --git a/architecture/sandbox.md b/architecture/sandbox.md index 71dd35227..673a1e3ad 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -40,10 +40,30 @@ OpenShell uses overlapping controls rather than a single sandbox primitive: | Seccomp | Blocks dangerous syscalls, including raw socket paths that bypass the proxy. | | Network namespace | Forces ordinary agent egress through the local CONNECT proxy. | | Policy proxy | Evaluates destination, binary identity, TLS/L7 rules, SSRF checks, and inference interception. | +| Cgroup limits | Caps per-sandbox CPU, memory, and supported PID limits to prevent runaway resource consumption. | The supervisor may enrich baseline filesystem allowances for runtime-required paths, such as proxy support files or GPU device paths when a GPU is present. +### Cgroup Resource Defaults + +The gateway overlays `template.resources.limits.{cpu,memory}` on every +`CreateSandbox` request that omits the field. User-supplied values are +preserved verbatim; the defaults are persisted, so subsequent `GetSandbox` +calls observe the effective limits. The defaults live on the public Struct and +propagate through typed driver resource fields. + +| Dimension | Default | Driver support | +|---|---|---| +| CPU | `"2"` | Kubernetes, Docker, Podman (defense-in-depth fallback). VM ignores. | +| Memory | `"4Gi"` | Kubernetes, Docker, Podman (defense-in-depth fallback). VM ignores. | + +Operators tune the values in `[openshell.gateway]` via +`default_sandbox_cpu_limit` and `default_sandbox_memory_limit`. Setting `"0"` +(or an empty value) disables the corresponding default — the sandbox runs +without a gateway-imposed bound on that dimension. Omitting a key uses the +built-in default above. Negative values are rejected during config load. + ## Network and Inference All ordinary agent egress is routed through the sandbox proxy. The proxy diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index 98562c8a6..ec8868b82 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -11,6 +11,7 @@ use std::os::unix::fs::FileTypeExt; use std::path::{Path, PathBuf}; use std::process::Command; use std::str::FromStr; +use std::time::Duration; // ── Public default constants ──────────────────────────────────────────── // @@ -39,6 +40,14 @@ pub const DEFAULT_SUPERVISOR_IMAGE: &str = "ghcr.io/nvidia/openshell/supervisor: /// CDI device identifier for requesting all NVIDIA GPUs. pub const CDI_GPU_DEVICE_ALL: &str = "nvidia.com/gpu=all"; +/// Default per-sandbox CPU limit applied when the user omits +/// `template.resources.limits.cpu`. Uses Kubernetes-style quantity strings. +pub const DEFAULT_SANDBOX_CPU_LIMIT: &str = "2"; + +/// Default per-sandbox memory limit applied when the user omits +/// `template.resources.limits.memory`. Uses Kubernetes-style quantity strings. +pub const DEFAULT_SANDBOX_MEMORY_LIMIT: &str = "4Gi"; + /// Compute backends the gateway can orchestrate sandboxes through. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] @@ -238,9 +247,57 @@ pub struct Config { #[serde(default = "default_ssh_session_ttl_secs")] pub ssh_session_ttl_secs: u64, + /// Maximum gRPC requests allowed per rate-limit window. + /// + /// When paired with [`Self::grpc_rate_limit_window_secs`], positive values + /// enable gateway-wide gRPC request rate limiting. `None` or `0` disables + /// the limit. + #[serde(default)] + pub grpc_rate_limit_requests: Option, + + /// gRPC rate-limit window length in seconds. + /// + /// When paired with [`Self::grpc_rate_limit_requests`], positive values + /// enable gateway-wide gRPC request rate limiting. `None` or `0` disables + /// the limit. + #[serde(default)] + pub grpc_rate_limit_window_secs: Option, + /// Browser-facing sandbox service routing configuration. #[serde(default)] pub service_routing: ServiceRoutingConfig, + + /// Default CPU limit overlaid onto `template.resources.limits.cpu` when + /// a `CreateSandbox` request omits the field. + /// + /// - `Some(value)`: a Kubernetes-style quantity string (e.g. `"2"`, + /// `"500m"`) injected into the sandbox template before persistence. + /// User-supplied `limits.cpu` always wins (overlay semantics). + /// - `None`: gateway opts out of imposing a CPU bound for sandboxes + /// that omit the field. The container runs without a CPU cgroup cap + /// unless a driver-level fallback applies. + /// + /// The TOML loader resolves `default_sandbox_cpu_limit = "0"`, `""`, or + /// whitespace-only values to `None`. Negative quantities are rejected + /// rather than silently treated as an opt-out. An absent key falls back to + /// [`DEFAULT_SANDBOX_CPU_LIMIT`]. See + /// `openshell_server::config_file::resolve_sandbox_quantity_default` + /// for the full resolution table. + /// + /// [`Config::new`] seeds this with `Some(DEFAULT_SANDBOX_CPU_LIMIT)` so + /// embedded callers inherit the secure default automatically. + #[serde(default)] + pub default_sandbox_cpu_limit: Option, + + /// Default memory limit overlaid onto `template.resources.limits.memory` + /// when a `CreateSandbox` request omits the field. Same + /// `Some`/`None` semantics as [`Self::default_sandbox_cpu_limit`]. + /// + /// TOML opt-out: `default_sandbox_memory_limit = "0"`, `""`, or + /// whitespace-only -> `None`. Negative quantities are rejected. An absent + /// key falls back to [`DEFAULT_SANDBOX_MEMORY_LIMIT`]. + #[serde(default)] + pub default_sandbox_memory_limit: Option, } /// Browser-facing sandbox service routing configuration. @@ -416,7 +473,11 @@ impl Config { database_url: String::new(), compute_drivers: vec![], ssh_session_ttl_secs: default_ssh_session_ttl_secs(), + grpc_rate_limit_requests: None, + grpc_rate_limit_window_secs: None, service_routing: ServiceRoutingConfig::default(), + default_sandbox_cpu_limit: Some(DEFAULT_SANDBOX_CPU_LIMIT.to_string()), + default_sandbox_memory_limit: Some(DEFAULT_SANDBOX_MEMORY_LIMIT.to_string()), } } @@ -483,6 +544,56 @@ impl Config { self } + /// Set the gateway-wide gRPC request rate limit. + #[must_use] + pub const fn with_grpc_rate_limit( + mut self, + requests: Option, + window_secs: Option, + ) -> Self { + self.grpc_rate_limit_requests = requests; + self.grpc_rate_limit_window_secs = window_secs; + self + } + + /// Return the effective gRPC rate limit, if fully configured and enabled. + #[must_use] + pub fn grpc_rate_limit(&self) -> Option<(u64, Duration)> { + let requests = self.grpc_rate_limit_requests?; + let window_secs = self.grpc_rate_limit_window_secs?; + if requests == 0 || window_secs == 0 { + None + } else { + Some((requests, Duration::from_secs(window_secs))) + } + } + + /// Override the default sandbox CPU limit. + /// + /// - `Some(value)`: applied as the gateway-wide default when a + /// `CreateSandbox` request omits `template.resources.limits.cpu`. + /// - `None`: gateway-wide opt-out — sandboxes that omit the field run + /// without a CPU cap from the gateway. Equivalent to setting + /// `default_sandbox_cpu_limit = "0"` (or `""`) in the gateway TOML. + /// + /// See [`Self::default_sandbox_cpu_limit`] for the overlay semantics. + #[must_use] + pub fn with_default_sandbox_cpu_limit(mut self, value: Option) -> Self { + self.default_sandbox_cpu_limit = value; + self + } + + /// Override the default sandbox memory limit. + /// + /// `None` opts out of the gateway-wide memory default, mirroring + /// `default_sandbox_memory_limit = "0"` in TOML. See + /// [`Self::default_sandbox_memory_limit`]. + #[must_use] + pub fn with_default_sandbox_memory_limit(mut self, value: Option) -> Self { + self.default_sandbox_memory_limit = value; + self + } + /// Set the OIDC configuration for JWT-based authentication. #[must_use] pub fn with_oidc(mut self, oidc: OidcConfig) -> Self { @@ -601,6 +712,7 @@ mod tests { #[cfg(unix)] use std::os::unix::net::UnixListener; use std::path::PathBuf; + use std::time::Duration; #[test] fn compute_driver_kind_parses_supported_values() { @@ -646,6 +758,29 @@ mod tests { assert!(!cfg.auth.allow_unauthenticated_users); } + #[test] + fn grpc_rate_limit_requires_positive_pair() { + assert!(Config::new(None).grpc_rate_limit().is_none()); + assert!( + Config::new(None) + .with_grpc_rate_limit(Some(10), None) + .grpc_rate_limit() + .is_none() + ); + assert!( + Config::new(None) + .with_grpc_rate_limit(Some(0), Some(60)) + .grpc_rate_limit() + .is_none() + ); + assert_eq!( + Config::new(None) + .with_grpc_rate_limit(Some(10), Some(60)) + .grpc_rate_limit(), + Some((10, Duration::from_secs(60))) + ); + } + #[test] fn service_routing_allows_loopback_plaintext_http_by_default() { let cfg = Config::new(None); diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index b8d345f9e..bf96c5b6c 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -175,6 +175,14 @@ struct RunArgs { #[arg(long, env = "OPENSHELL_OIDC_SCOPES_CLAIM", default_value = "")] oidc_scopes_claim: String, + /// Maximum gRPC requests allowed per rate-limit window. Set to 0 to disable. + #[arg(long, env = "OPENSHELL_GRPC_RATE_LIMIT_REQUESTS")] + grpc_rate_limit_requests: Option, + + /// gRPC rate-limit window length in seconds. Set to 0 to disable. + #[arg(long, env = "OPENSHELL_GRPC_RATE_LIMIT_WINDOW_SECONDS")] + grpc_rate_limit_window_seconds: Option, + /// Subject Alternative Names configured on the gateway server certificate. /// Wildcard DNS SANs also enable sandbox service URLs under that domain. #[arg( @@ -353,8 +361,16 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { config = config .with_database_url(db_url) .with_compute_drivers(args.drivers.clone()) + .with_grpc_rate_limit( + args.grpc_rate_limit_requests, + args.grpc_rate_limit_window_seconds, + ) .with_server_sans(args.server_sans.clone()) .with_loopback_service_http(args.enable_loopback_service_http); + validate_grpc_rate_limit_args( + args.grpc_rate_limit_requests, + args.grpc_rate_limit_window_seconds, + )?; if let Some(ttl) = file .as_ref() @@ -363,6 +379,26 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { config = config.with_ssh_session_ttl_secs(ttl); } + // Sandbox resource defaults. The gateway always applies these unless the + // operator explicitly opts out by setting `"0"` in `[openshell.gateway]`. + { + let cpu = config_file::resolve_sandbox_quantity_default( + "default_sandbox_cpu_limit", + file_gateway.and_then(|g| g.default_sandbox_cpu_limit.as_deref()), + openshell_core::config::DEFAULT_SANDBOX_CPU_LIMIT, + ) + .map_err(|e| miette::miette!("{e}"))?; + let memory = config_file::resolve_sandbox_quantity_default( + "default_sandbox_memory_limit", + file_gateway.and_then(|g| g.default_sandbox_memory_limit.as_deref()), + openshell_core::config::DEFAULT_SANDBOX_MEMORY_LIMIT, + ) + .map_err(|e| miette::miette!("{e}"))?; + config = config + .with_default_sandbox_cpu_limit(cpu) + .with_default_sandbox_memory_limit(memory); + } + if let Some(issuer) = args.oidc_issuer.clone() { config = config.with_oidc(openshell_core::OidcConfig { issuer, @@ -608,6 +644,37 @@ fn merge_file_into_args(args: &mut RunArgs, file: &GatewayFileSection, matches: args.oidc_scopes_claim.clone_from(&oidc.scopes_claim); } } + if let Some(requests) = file.grpc_rate_limit_requests + && args.grpc_rate_limit_requests.is_none() + && arg_defaulted(matches, "grpc_rate_limit_requests") + { + args.grpc_rate_limit_requests = Some(requests); + } + if let Some(window) = file.grpc_rate_limit_window_seconds + && args.grpc_rate_limit_window_seconds.is_none() + && arg_defaulted(matches, "grpc_rate_limit_window_seconds") + { + args.grpc_rate_limit_window_seconds = Some(window); + } +} + +fn validate_grpc_rate_limit_args(requests: Option, window_seconds: Option) -> Result<()> { + let disabled = matches!(requests, Some(0)) || matches!(window_seconds, Some(0)); + if disabled { + return Ok(()); + } + if matches!( + (requests, window_seconds), + (Some(requests), None) if requests > 0 + ) || matches!( + (requests, window_seconds), + (None, Some(window_seconds)) if window_seconds > 0 + ) { + return Err(miette::miette!( + "gRPC rate limiting requires both --grpc-rate-limit-requests and --grpc-rate-limit-window-seconds to be positive; set either value to 0 to disable" + )); + } + Ok(()) } fn effective_single_driver(args: &RunArgs) -> Option { @@ -889,6 +956,41 @@ mod tests { assert!(cli.run.enable_mtls_auth); } + #[test] + fn command_parses_grpc_rate_limit_flags() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _g1 = EnvVarGuard::remove("OPENSHELL_GRPC_RATE_LIMIT_REQUESTS"); + let _g2 = EnvVarGuard::remove("OPENSHELL_GRPC_RATE_LIMIT_WINDOW_SECONDS"); + + let cli = Cli::try_parse_from([ + "openshell-gateway", + "--db-url", + "sqlite::memory:", + "--grpc-rate-limit-requests", + "120", + "--grpc-rate-limit-window-seconds", + "60", + ]) + .unwrap(); + + assert_eq!(cli.run.grpc_rate_limit_requests, Some(120)); + assert_eq!(cli.run.grpc_rate_limit_window_seconds, Some(60)); + } + + #[test] + fn validate_grpc_rate_limit_args_requires_positive_pair() { + assert!(super::validate_grpc_rate_limit_args(None, None).is_ok()); + assert!(super::validate_grpc_rate_limit_args(Some(0), None).is_ok()); + assert!(super::validate_grpc_rate_limit_args(None, Some(0)).is_ok()); + assert!(super::validate_grpc_rate_limit_args(Some(0), Some(60)).is_ok()); + assert!(super::validate_grpc_rate_limit_args(Some(120), Some(0)).is_ok()); + assert!(super::validate_grpc_rate_limit_args(Some(120), Some(60)).is_ok()); + assert!(super::validate_grpc_rate_limit_args(Some(120), None).is_err()); + assert!(super::validate_grpc_rate_limit_args(None, Some(60)).is_err()); + } + #[test] fn command_rejects_removed_driver_flags() { let err = command() @@ -1287,6 +1389,45 @@ audience = "openshell-cli" assert_eq!(args.oidc_audience, "openshell-cli"); } + #[test] + fn file_grpc_rate_limit_populates_args_when_cli_omits() { + let (mut args, matches) = + parse_with_args(&["openshell-gateway", "--db-url", "sqlite::memory:"]); + let file = config_file_from_toml( + r" +[openshell.gateway] +grpc_rate_limit_requests = 100 +grpc_rate_limit_window_seconds = 30 +", + ); + merge_file_into_args(&mut args, &file.openshell.gateway, &matches); + + assert_eq!(args.grpc_rate_limit_requests, Some(100)); + assert_eq!(args.grpc_rate_limit_window_seconds, Some(30)); + } + + #[test] + fn cli_grpc_rate_limit_overrides_file_value() { + let (mut args, matches) = parse_with_args(&[ + "openshell-gateway", + "--db-url", + "sqlite::memory:", + "--grpc-rate-limit-requests", + "20", + ]); + let file = config_file_from_toml( + r" +[openshell.gateway] +grpc_rate_limit_requests = 100 +grpc_rate_limit_window_seconds = 30 +", + ); + merge_file_into_args(&mut args, &file.openshell.gateway, &matches); + + assert_eq!(args.grpc_rate_limit_requests, Some(20)); + assert_eq!(args.grpc_rate_limit_window_seconds, Some(30)); + } + #[test] fn aux_listener_preserves_file_ip_against_public_bind() { use std::net::SocketAddr; diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 98dc3fd63..7e2236989 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -1362,7 +1362,9 @@ fn build_platform_config(template: &SandboxTemplate) -> Option, #[serde(default)] pub ssh_session_ttl_secs: Option, + #[serde(default)] + pub grpc_rate_limit_requests: Option, + #[serde(default)] + pub grpc_rate_limit_window_seconds: Option, + + // ── Sandbox resource defaults ─────────────────────────────── + // + // Applied at `CreateSandbox` time as an overlay onto + // `template.resources.limits.{cpu,memory}`. User-supplied values are + // preserved; absent fields are filled from these defaults. The value `"0"` + // disables the corresponding default — consistent with the Linux/cgroup + // convention where 0 means "no bound". + #[serde(default)] + pub default_sandbox_cpu_limit: Option, + #[serde(default)] + pub default_sandbox_memory_limit: Option, // ── Service routing ────────────────────────────────────────────────── /// Subject Alternative Names configured on the gateway server certificate. @@ -182,6 +198,24 @@ pub enum ConfigFileError { env: &'static str, cli: &'static str, }, + #[error("invalid gateway config file '{}': {source}", path.display())] + InvalidSandboxDefault { + path: PathBuf, + #[source] + source: SandboxDefaultError, + }, +} + +#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)] +pub enum SandboxDefaultError { + #[error( + "`{field}` has invalid negative value `{value}`; use {disable_value} to disable the default" + )] + Negative { + field: &'static str, + value: String, + disable_value: &'static str, + }, } /// Load and validate a TOML config file. @@ -214,10 +248,36 @@ pub fn load(path: &Path) -> Result { cli: "--db-url", }); } + validate_sandbox_defaults(path, &file.openshell.gateway)?; Ok(file) } +fn validate_sandbox_defaults( + path: &Path, + gateway: &GatewayFileSection, +) -> Result<(), ConfigFileError> { + resolve_sandbox_quantity_default( + "default_sandbox_cpu_limit", + gateway.default_sandbox_cpu_limit.as_deref(), + openshell_core::config::DEFAULT_SANDBOX_CPU_LIMIT, + ) + .map_err(|source| ConfigFileError::InvalidSandboxDefault { + path: path.to_path_buf(), + source, + })?; + resolve_sandbox_quantity_default( + "default_sandbox_memory_limit", + gateway.default_sandbox_memory_limit.as_deref(), + openshell_core::config::DEFAULT_SANDBOX_MEMORY_LIMIT, + ) + .map_err(|source| ConfigFileError::InvalidSandboxDefault { + path: path.to_path_buf(), + source, + })?; + Ok(()) +} + /// Build the merged TOML table for `driver` by overlaying inheritable /// `[openshell.gateway]` defaults onto `[openshell.drivers.]`. /// @@ -313,6 +373,44 @@ fn path_value(p: &Path) -> toml::Value { toml::Value::String(p.display().to_string()) } +/// Resolve a TOML-supplied CPU or memory default into the effective runtime +/// value used by `apply_sandbox_template_defaults`. +/// +/// | TOML input | Result | Meaning | +/// |---|---|---| +/// | key absent (`None`) | `Some(core_default)` | Use the built-in constant. | +/// | `"0"` (after trim) | `None` | Operator opt-out; no default applied. | +/// | empty / whitespace-only | `None` | Operator opt-out; no default applied. | +/// | leading `-` (e.g. `"-1"`, `"-500m"`) | `Err` | Invalid Kubernetes quantity rejected. | +/// | any other string | `Some(value)` | Passed through verbatim; the driver validates as a Kubernetes quantity at sandbox creation. | +/// +/// Rejecting negative quantities matches the Kubernetes resource model (which +/// only accepts non-negative quantities) and prevents a typo like `"-1"` from +/// silently disabling the cgroup default. +pub fn resolve_sandbox_quantity_default( + field: &'static str, + toml_value: Option<&str>, + core_default: &'static str, +) -> Result, SandboxDefaultError> { + toml_value.map_or_else( + || Ok(Some(core_default.to_string())), + |v| { + let trimmed = v.trim(); + if trimmed.starts_with('-') { + Err(SandboxDefaultError::Negative { + field, + value: trimmed.to_string(), + disable_value: r#"`"0"` or `""`"#, + }) + } else if trimmed.is_empty() || trimmed == "0" { + Ok(None) + } else { + Ok(Some(trimmed.to_string())) + } + }, + ) +} + #[cfg(test)] mod tests { use super::*; @@ -348,6 +446,8 @@ health_bind_address = "0.0.0.0:8081" log_level = "info" compute_drivers = ["kubernetes"] sandbox_namespace = "agents" +grpc_rate_limit_requests = 120 +grpc_rate_limit_window_seconds = 60 default_image = "ghcr.io/nvidia/openshell/sandbox:latest" supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" client_tls_secret_name = "openshell-sandbox-tls" @@ -374,6 +474,8 @@ grpc_endpoint = "https://openshell-gateway.agents.svc:8080" gw.default_image.as_deref(), Some("ghcr.io/nvidia/openshell/sandbox:latest") ); + assert_eq!(gw.grpc_rate_limit_requests, Some(120)); + assert_eq!(gw.grpc_rate_limit_window_seconds, Some(60)); assert!(gw.tls.is_some()); assert!(gw.oidc.is_some()); assert!(file.openshell.drivers.contains_key("kubernetes")); @@ -588,4 +690,181 @@ version = 2 driver selection when Docker is also installed" ); } + + // ---- resolve_sandbox_quantity_default ---- + + #[test] + fn resolve_sandbox_quantity_default_uses_core_when_absent() { + assert_eq!( + resolve_sandbox_quantity_default("default_sandbox_cpu_limit", None, "2").unwrap(), + Some("2".to_string()) + ); + } + + #[test] + fn resolve_sandbox_quantity_default_zero_string_disables() { + assert_eq!( + resolve_sandbox_quantity_default("default_sandbox_cpu_limit", Some("0"), "2").unwrap(), + None + ); + } + + #[test] + fn resolve_sandbox_quantity_default_empty_string_disables() { + assert_eq!( + resolve_sandbox_quantity_default("default_sandbox_memory_limit", Some(""), "4Gi") + .unwrap(), + None + ); + assert_eq!( + resolve_sandbox_quantity_default("default_sandbox_memory_limit", Some(" "), "4Gi") + .unwrap(), + None + ); + } + + #[test] + fn resolve_sandbox_quantity_default_uses_supplied_value() { + assert_eq!( + resolve_sandbox_quantity_default("default_sandbox_memory_limit", Some("8Gi"), "4Gi") + .unwrap(), + Some("8Gi".to_string()) + ); + } + + #[test] + fn resolve_sandbox_quantity_default_negative_rejects() { + let err = resolve_sandbox_quantity_default("default_sandbox_cpu_limit", Some("-1"), "2") + .unwrap_err(); + assert!(matches!(err, SandboxDefaultError::Negative { .. })); + assert!( + resolve_sandbox_quantity_default("default_sandbox_cpu_limit", Some("-500m"), "2") + .is_err() + ); + assert!( + resolve_sandbox_quantity_default("default_sandbox_memory_limit", Some("-2Gi"), "4Gi") + .is_err() + ); + assert!( + resolve_sandbox_quantity_default("default_sandbox_cpu_limit", Some(" -1 "), "2") + .is_err() + ); + } + + #[test] + fn resolve_sandbox_quantity_default_trims_whitespace() { + assert_eq!( + resolve_sandbox_quantity_default("default_sandbox_memory_limit", Some(" 8Gi "), "4Gi") + .unwrap(), + Some("8Gi".to_string()) + ); + } + + /// End-to-end: a TOML file that opts out of both sandbox defaults + /// (cpu/memory = `"0"`) must resolve to `None` for each field. Guards + /// against a refactor that silently drops the sentinel handling between the + /// parser and the runtime `Config`. + #[test] + fn zero_sentinels_in_toml_disable_sandbox_defaults_end_to_end() { + let toml = r#" +[openshell] +version = 1 + +[openshell.gateway] +default_sandbox_cpu_limit = "0" +default_sandbox_memory_limit = "0" +"#; + let tmp = write_tmp(toml); + let file = load(tmp.path()).expect("valid file parses"); + let gw = &file.openshell.gateway; + + // Parser preserves the raw TOML values. + assert_eq!(gw.default_sandbox_cpu_limit.as_deref(), Some("0")); + assert_eq!(gw.default_sandbox_memory_limit.as_deref(), Some("0")); + + // Resolution collapses the sentinels into `None` so the gateway + // skips injection in `apply_sandbox_template_defaults`. + assert_eq!( + resolve_sandbox_quantity_default( + "default_sandbox_cpu_limit", + gw.default_sandbox_cpu_limit.as_deref(), + "2" + ) + .unwrap(), + None, + r#"`default_sandbox_cpu_limit = "0"` must disable the CPU default"# + ); + assert_eq!( + resolve_sandbox_quantity_default( + "default_sandbox_memory_limit", + gw.default_sandbox_memory_limit.as_deref(), + "4Gi" + ) + .unwrap(), + None, + r#"`default_sandbox_memory_limit = "0"` must disable the memory default"# + ); + } + + /// End-to-end: a TOML file with negative sandbox-default values must be + /// rejected. Guards against malformed config silently bypassing the cgroup + /// defaults via a downstream parser that maps `-1` to "unlimited". + #[test] + fn negative_sentinels_in_toml_reject_sandbox_defaults_end_to_end() { + let toml = r#" +[openshell] +version = 1 + +[openshell.gateway] +default_sandbox_cpu_limit = "-1" +default_sandbox_memory_limit = "-500m" +"#; + let tmp = write_tmp(toml); + let err = load(tmp.path()).expect_err("negative sandbox defaults must be rejected"); + assert!(matches!( + err, + ConfigFileError::InvalidSandboxDefault { + source: SandboxDefaultError::Negative { .. }, + .. + } + )); + } + + /// End-to-end: a TOML file with no sandbox-default keys must fall back + /// to the canonical constants in `openshell-core`. + #[test] + fn absent_keys_in_toml_use_core_constants_end_to_end() { + let toml = r#" +[openshell] +version = 1 + +[openshell.gateway] +log_level = "info" +"#; + let tmp = write_tmp(toml); + let file = load(tmp.path()).expect("valid file parses"); + let gw = &file.openshell.gateway; + + assert!(gw.default_sandbox_cpu_limit.is_none()); + assert!(gw.default_sandbox_memory_limit.is_none()); + + assert_eq!( + resolve_sandbox_quantity_default( + "default_sandbox_cpu_limit", + gw.default_sandbox_cpu_limit.as_deref(), + "2" + ) + .unwrap(), + Some("2".to_string()) + ); + assert_eq!( + resolve_sandbox_quantity_default( + "default_sandbox_memory_limit", + gw.default_sandbox_memory_limit.as_deref(), + "4Gi" + ) + .unwrap(), + Some("4Gi".to_string()) + ); + } } diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index 1855972d7..ee1431132 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -53,6 +53,66 @@ const TCP_FORWARD_CHUNK_SIZE: usize = 64 * 1024; // Sandbox lifecycle handlers // --------------------------------------------------------------------------- +/// Overlay gateway-wide cgroup defaults onto `template.resources.limits`. +/// +/// For CPU and memory, the function: +/// +/// 1. Skips the field entirely when the corresponding default is `None` +/// (operator opt-out via `"0"` in `[openshell.gateway]`). +/// 2. Leaves user-supplied values untouched (overlay semantics — never +/// overwrites an existing `limits.`). +/// 3. Inserts the configured default otherwise. +/// +/// The defaults live on the public `template.resources` Struct, so CPU/memory +/// flow through `extract_typed_resources` to drivers that support them. +/// Persisting the post-overlay template makes the applied defaults observable +/// via `GetSandbox`. +/// +/// All sandboxes receive CPU and memory caps unless the operator explicitly +/// disables them. PID caps are not overlaid today. +fn apply_sandbox_template_defaults( + template: &mut SandboxTemplate, + cpu_limit: Option<&str>, + memory_limit: Option<&str>, +) { + use prost_types::{Struct, Value, value::Kind}; + + if cpu_limit.is_none() && memory_limit.is_none() { + return; + } + + let resources = template.resources.get_or_insert_with(Struct::default); + let limits_value = resources + .fields + .entry("limits".to_string()) + .or_insert_with(|| Value { + kind: Some(Kind::StructValue(Struct::default())), + }); + + // If the caller put something other than a Struct at `limits` we leave + // it alone — the driver layer will surface the type error. + let Some(Kind::StructValue(limits)) = limits_value.kind.as_mut() else { + return; + }; + + if let Some(value) = cpu_limit { + limits + .fields + .entry("cpu".to_string()) + .or_insert_with(|| Value { + kind: Some(Kind::StringValue(value.to_string())), + }); + } + if let Some(value) = memory_limit { + limits + .fields + .entry("memory".to_string()) + .or_insert_with(|| Value { + kind: Some(Kind::StringValue(value.to_string())), + }); + } +} + pub(super) async fn handle_create_sandbox( state: &Arc, request: Request, @@ -91,6 +151,16 @@ pub(super) async fn handle_create_sandbox( template.image = state.compute.default_image().to_string(); } + // Overlay gateway-wide cgroup defaults onto the template's + // resource limits. User-supplied values win; absent fields are filled + // from the runtime `Config`. Persisting after the overlay makes the + // applied defaults visible to subsequent `GetSandbox` calls. + apply_sandbox_template_defaults( + template, + state.config.default_sandbox_cpu_limit.as_deref(), + state.config.default_sandbox_memory_limit.as_deref(), + ); + // Ensure process identity defaults to "sandbox" when missing or // empty, then validate policy safety before persisting. if let Some(ref mut policy) = spec.policy { @@ -1852,7 +1922,14 @@ async fn run_exec_with_russh( #[cfg(test)] mod tests { use super::*; + use crate::compute::new_test_runtime; use crate::grpc::test_support::test_server_state; + use crate::persistence::Store; + use crate::sandbox_index::SandboxIndex; + use crate::sandbox_watch::SandboxWatchBus; + use crate::supervisor_session::SupervisorSessionRegistry; + use crate::tracing_bus::TracingLogBus; + use openshell_core::Config; use openshell_core::proto::datamodel::v1::ObjectMeta; use std::collections::HashMap; @@ -3129,4 +3206,267 @@ mod tests { initial_version + 1 ); } + + // ---- apply_sandbox_template_defaults ---- + + fn template_with_limits(fields: &[(&str, prost_types::Value)]) -> SandboxTemplate { + use prost_types::{Struct, Value, value::Kind}; + let mut limits = Struct::default(); + for (k, v) in fields { + limits.fields.insert((*k).to_string(), v.clone()); + } + let mut resources = Struct::default(); + resources.fields.insert( + "limits".to_string(), + Value { + kind: Some(Kind::StructValue(limits)), + }, + ); + SandboxTemplate { + resources: Some(resources), + ..Default::default() + } + } + + fn string_value(s: &str) -> prost_types::Value { + prost_types::Value { + kind: Some(prost_types::value::Kind::StringValue(s.to_string())), + } + } + + fn number_value(n: f64) -> prost_types::Value { + prost_types::Value { + kind: Some(prost_types::value::Kind::NumberValue(n)), + } + } + + fn limits(template: &SandboxTemplate) -> &prost_types::Struct { + let res = template.resources.as_ref().expect("resources missing"); + match res.fields.get("limits").and_then(|v| v.kind.as_ref()) { + Some(prost_types::value::Kind::StructValue(s)) => s, + other => panic!("expected limits struct, got {other:?}"), + } + } + + fn limit_string(template: &SandboxTemplate, key: &str) -> Option { + match limits(template).fields.get(key)?.kind.as_ref()? { + prost_types::value::Kind::StringValue(s) => Some(s.clone()), + _ => None, + } + } + + fn limit_number(template: &SandboxTemplate, key: &str) -> Option { + match limits(template).fields.get(key)?.kind.as_ref()? { + prost_types::value::Kind::NumberValue(n) => Some(*n), + _ => None, + } + } + + #[test] + fn apply_sandbox_template_defaults_when_resources_is_none() { + let mut template = SandboxTemplate::default(); + apply_sandbox_template_defaults(&mut template, Some("2"), Some("4Gi")); + assert_eq!(limit_string(&template, "cpu").as_deref(), Some("2")); + assert_eq!(limit_string(&template, "memory").as_deref(), Some("4Gi")); + let l = limits(&template); + assert!(!l.fields.contains_key("pids")); + } + + #[test] + fn apply_sandbox_template_defaults_preserves_user_cpu_overlay_others() { + let mut template = template_with_limits(&[("cpu", string_value("1"))]); + apply_sandbox_template_defaults(&mut template, Some("2"), Some("4Gi")); + // User-supplied CPU is preserved. + assert_eq!(limit_string(&template, "cpu").as_deref(), Some("1")); + // Memory is overlaid from the defaults. + assert_eq!(limit_string(&template, "memory").as_deref(), Some("4Gi")); + let l = limits(&template); + assert!(!l.fields.contains_key("pids")); + } + + #[test] + fn apply_sandbox_template_defaults_noop_when_all_set() { + let mut template = template_with_limits(&[ + ("cpu", string_value("1")), + ("memory", string_value("1Gi")), + ("pids", number_value(64.0)), + ]); + apply_sandbox_template_defaults(&mut template, Some("2"), Some("4Gi")); + assert_eq!(limit_string(&template, "cpu").as_deref(), Some("1")); + assert_eq!(limit_string(&template, "memory").as_deref(), Some("1Gi")); + assert_eq!(limit_number(&template, "pids"), Some(64.0)); + } + + #[test] + fn apply_sandbox_template_defaults_skips_when_limits_is_non_struct() { + // A caller put a string in place of the limits struct — leave it + // alone; downstream validation will surface the type mismatch. + use prost_types::{Struct, Value, value::Kind}; + let mut resources = Struct::default(); + resources.fields.insert( + "limits".to_string(), + Value { + kind: Some(Kind::StringValue("bogus".to_string())), + }, + ); + let mut template = SandboxTemplate { + resources: Some(resources), + ..Default::default() + }; + apply_sandbox_template_defaults(&mut template, Some("2"), Some("4Gi")); + let res = template.resources.as_ref().unwrap(); + match res.fields.get("limits").and_then(|v| v.kind.as_ref()) { + Some(Kind::StringValue(s)) => assert_eq!(s, "bogus"), + other => panic!("expected unchanged string, got {other:?}"), + } + } + + #[test] + fn apply_sandbox_template_defaults_skips_when_all_config_disabled() { + let mut template = SandboxTemplate::default(); + apply_sandbox_template_defaults(&mut template, None, None); + assert!(template.resources.is_none()); + } + + #[test] + fn apply_sandbox_template_defaults_partial_config_only_injects_provided() { + let mut template = SandboxTemplate::default(); + // CPU disabled, memory enabled. + apply_sandbox_template_defaults(&mut template, None, Some("4Gi")); + let l = limits(&template); + assert!(!l.fields.contains_key("cpu")); + assert_eq!(limit_string(&template, "memory").as_deref(), Some("4Gi")); + assert!(!l.fields.contains_key("pids")); + } + + // ---- handle_create_sandbox integration ---- + + #[tokio::test] + async fn handle_create_sandbox_persists_default_resource_limits() { + use openshell_core::proto::{CreateSandboxRequest, SandboxSpec}; + let state = test_server_state().await; + + let response = handle_create_sandbox( + &state, + Request::new(CreateSandboxRequest { + name: "default-limits".to_string(), + spec: Some(SandboxSpec { + log_level: "info".to_string(), + policy: Some(openshell_core::proto::SandboxPolicy::default()), + ..Default::default() + }), + labels: HashMap::new(), + }), + ) + .await + .expect("create sandbox") + .into_inner(); + + let template = response + .sandbox + .expect("sandbox in response") + .spec + .expect("spec") + .template + .expect("template"); + assert_eq!(limit_string(&template, "cpu").as_deref(), Some("2")); + assert_eq!(limit_string(&template, "memory").as_deref(), Some("4Gi")); + let l = limits(&template); + assert!( + !l.fields.contains_key("pids"), + "Kubernetes does not enforce template.resources.limits.pids; got {:?}", + l.fields.get("pids") + ); + } + + #[tokio::test] + async fn handle_create_sandbox_preserves_user_resource_limits() { + use openshell_core::proto::{CreateSandboxRequest, SandboxSpec}; + let state = test_server_state().await; + + // User supplies a partial `limits.cpu` only. The gateway must keep + // it, then fill memory from the defaults. + let user_template = template_with_limits(&[("cpu", string_value("500m"))]); + + let response = handle_create_sandbox( + &state, + Request::new(CreateSandboxRequest { + name: "user-cpu".to_string(), + spec: Some(SandboxSpec { + log_level: "info".to_string(), + policy: Some(openshell_core::proto::SandboxPolicy::default()), + template: Some(user_template), + ..Default::default() + }), + labels: HashMap::new(), + }), + ) + .await + .expect("create sandbox") + .into_inner(); + + let template = response + .sandbox + .expect("sandbox in response") + .spec + .expect("spec") + .template + .expect("template"); + assert_eq!(limit_string(&template, "cpu").as_deref(), Some("500m")); + assert_eq!(limit_string(&template, "memory").as_deref(), Some("4Gi")); + let l = limits(&template); + assert!(!l.fields.contains_key("pids")); + } + + #[tokio::test] + async fn handle_create_sandbox_skips_defaults_when_disabled() { + use openshell_core::proto::{CreateSandboxRequest, SandboxSpec}; + let store = Arc::new(Store::connect("sqlite::memory:").await.unwrap()); + let compute = new_test_runtime(store.clone()).await; + // Build a Config with all sandbox defaults disabled (admin opt-out). + let config = Config::new(None) + .with_database_url("sqlite::memory:") + .with_default_sandbox_cpu_limit(None) + .with_default_sandbox_memory_limit(None); + let state = Arc::new(ServerState::new( + config, + store, + compute, + SandboxIndex::new(), + SandboxWatchBus::new(), + TracingLogBus::new(), + Arc::new(SupervisorSessionRegistry::new()), + None, + )); + + let response = handle_create_sandbox( + &state, + Request::new(CreateSandboxRequest { + name: "no-defaults".to_string(), + spec: Some(SandboxSpec { + log_level: "info".to_string(), + policy: Some(openshell_core::proto::SandboxPolicy::default()), + ..Default::default() + }), + labels: HashMap::new(), + }), + ) + .await + .expect("create sandbox") + .into_inner(); + + let template = response + .sandbox + .expect("sandbox in response") + .spec + .expect("spec") + .template + .expect("template"); + // With all defaults disabled, the gateway must not touch resources. + assert!( + template.resources.is_none(), + "expected no resources when defaults are disabled, got {:?}", + template.resources + ); + } } diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index b7e145bde..36b19e017 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -127,6 +127,9 @@ pub struct ServerState { /// `IssueSandboxToken` bootstrap path. Only present when the gateway /// runs in-cluster. pub k8s_sa_authenticator: Option>, + + /// Gateway-wide gRPC request rate limiter shared by every multiplex path. + pub(crate) grpc_rate_limiter: Option, } fn is_benign_tls_handshake_failure(error: &std::io::Error) -> bool { @@ -159,6 +162,7 @@ impl ServerState { supervisor_sessions: Arc, oidc_cache: Option>, ) -> Self { + let grpc_rate_limiter = multiplex::GrpcRateLimiter::from_config(&config); Self { config, store, @@ -174,6 +178,7 @@ impl ServerState { sandbox_jwt_issuer: None, sandbox_jwt_authenticator: None, k8s_sa_authenticator: None, + grpc_rate_limiter, } } } @@ -219,7 +224,10 @@ pub async fn run_server( let sandbox_index = SandboxIndex::new(); let sandbox_watch_bus = SandboxWatchBus::new(); let supervisor_sessions = Arc::new(supervisor_session::SupervisorSessionRegistry::new()); + let driver = configured_compute_driver(&config)?; + let config = config.with_compute_drivers([driver]); let compute = build_compute_runtime( + driver, &config, &vm_config, &docker_config, @@ -683,6 +691,7 @@ async fn terminate_signal() { // that must be passed through, so the count is justified. #[allow(clippy::too_many_arguments)] async fn build_compute_runtime( + driver: ComputeDriverKind, config: &Config, vm_config: &VmComputeConfig, docker_config: &DockerComputeConfig, @@ -693,7 +702,6 @@ async fn build_compute_runtime( tracing_log_bus: TracingLogBus, supervisor_sessions: Arc, ) -> Result { - let driver = configured_compute_driver(config)?; info!(driver = %driver, "Using compute driver"); match driver { diff --git a/crates/openshell-server/src/multiplex.rs b/crates/openshell-server/src/multiplex.rs index 4fcb3993a..3c4f89d7f 100644 --- a/crates/openshell-server/src/multiplex.rs +++ b/crates/openshell-server/src/multiplex.rs @@ -17,12 +17,13 @@ use hyper_util::{ service::TowerToHyperService, }; use metrics::{counter, histogram}; +use openshell_core::Config; use openshell_core::proto::{ inference_server::InferenceServer, open_shell_server::OpenShellServer, }; use std::future::Future; use std::pin::Pin; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use std::task::{Context, Poll}; use std::time::{Duration, Instant}; use tokio::io::{AsyncRead, AsyncWrite}; @@ -174,6 +175,8 @@ impl MultiplexService { self.state.config.mtls_auth.enabled, self.state.config.auth.allow_unauthenticated_users, ); + let grpc_service = + GrpcRateLimitService::new(grpc_service, self.state.grpc_rate_limiter.clone()); let http_service = http_router(self.state.clone()); let grpc_service = request_id_middleware!(grpc_service); @@ -211,6 +214,92 @@ impl MultiplexService { } } +#[derive(Clone, Debug)] +pub struct GrpcRateLimiter { + requests: u64, + window: Duration, + state: Arc>, +} + +#[derive(Debug)] +struct GrpcRateLimitState { + window_started: Instant, + remaining: u64, +} + +impl GrpcRateLimiter { + pub fn from_config(config: &Config) -> Option { + let (requests, window) = config.grpc_rate_limit()?; + Some(Self { + requests, + window, + state: Arc::new(Mutex::new(GrpcRateLimitState { + window_started: Instant::now(), + remaining: requests, + })), + }) + } + + fn allow(&self) -> bool { + let now = Instant::now(); + let mut state = self + .state + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + if now.duration_since(state.window_started) >= self.window { + state.window_started = now; + state.remaining = self.requests; + } + if state.remaining == 0 { + false + } else { + state.remaining -= 1; + true + } + } +} + +#[derive(Clone)] +struct GrpcRateLimitService { + inner: S, + limiter: Option, +} + +impl GrpcRateLimitService { + fn new(inner: S, limiter: Option) -> Self { + Self { inner, limiter } + } +} + +impl tower::Service> for GrpcRateLimitService +where + S: tower::Service, Response = Response>, + S::Future: Send + 'static, + B: Send + 'static, +{ + type Response = S::Response; + type Error = S::Error; + type Future = Pin> + Send>>; + + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_ready(cx) + } + + fn call(&mut self, req: Request) -> Self::Future { + if self + .limiter + .as_ref() + .is_some_and(|limiter| !limiter.allow()) + { + let response = + tonic::Status::resource_exhausted("gRPC rate limit exceeded").into_http(); + return Box::pin(async move { Ok(response) }); + } + let future = self.inner.call(req); + Box::pin(future) + } +} + /// Combined gRPC service that routes between `OpenShell` and Inference services /// based on the request path prefix. #[derive(Clone)] @@ -649,6 +738,8 @@ mod tests { use bytes::Bytes; use http_body_util::Empty; use std::sync::Mutex; + use std::sync::atomic::{AtomicUsize, Ordering}; + use tower::Service; #[test] fn uuid_request_id_generates_valid_uuid() { @@ -788,6 +879,164 @@ mod tests { assert_eq!(request_id.to_str().unwrap(), "grpc-corr-id"); } + #[derive(Clone)] + struct CountingGrpcService { + calls: Arc, + } + + impl Service> for CountingGrpcService { + type Response = Response; + type Error = std::convert::Infallible; + type Future = std::future::Ready>; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, _req: Request<()>) -> Self::Future { + self.calls.fetch_add(1, Ordering::Relaxed); + std::future::ready(Ok(Response::new(tonic::body::empty_body()))) + } + } + + #[tokio::test] + async fn grpc_rate_limit_returns_resource_exhausted_after_limit() { + let config = Config::new(None).with_grpc_rate_limit(Some(1), Some(60)); + let limiter = GrpcRateLimiter::from_config(&config); + let calls = Arc::new(AtomicUsize::new(0)); + let mut service = GrpcRateLimitService::new( + CountingGrpcService { + calls: calls.clone(), + }, + limiter, + ); + + let first = service + .ready() + .await + .unwrap() + .call(Request::new(())) + .await + .unwrap(); + assert_eq!(grpc_status_from_response(&first), "0"); + + let second = service + .ready() + .await + .unwrap() + .call(Request::new(())) + .await + .unwrap(); + assert_eq!(grpc_status_from_response(&second), "8"); + assert_eq!(calls.load(Ordering::Relaxed), 1); + } + + #[tokio::test] + async fn grpc_rate_limit_disabled_passes_requests_through() { + let config = Config::new(None).with_grpc_rate_limit(Some(0), Some(60)); + let limiter = GrpcRateLimiter::from_config(&config); + let calls = Arc::new(AtomicUsize::new(0)); + let mut service = GrpcRateLimitService::new( + CountingGrpcService { + calls: calls.clone(), + }, + limiter, + ); + + for _ in 0..3 { + let response = service + .ready() + .await + .unwrap() + .call(Request::new(())) + .await + .unwrap(); + assert_eq!(grpc_status_from_response(&response), "0"); + } + assert_eq!(calls.load(Ordering::Relaxed), 3); + } + + #[tokio::test] + async fn grpc_rate_limit_resets_after_window() { + let config = Config::new(None).with_grpc_rate_limit(Some(1), Some(60)); + let limiter = GrpcRateLimiter::from_config(&config).expect("limiter should be enabled"); + let calls = Arc::new(AtomicUsize::new(0)); + let mut service = GrpcRateLimitService::new( + CountingGrpcService { + calls: calls.clone(), + }, + Some(limiter.clone()), + ); + + let first = service + .ready() + .await + .unwrap() + .call(Request::new(())) + .await + .unwrap(); + assert_eq!(grpc_status_from_response(&first), "0"); + + { + let mut state = limiter + .state + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + state.window_started = state + .window_started + .checked_sub(Duration::from_secs(61)) + .expect("test window rewind should be valid"); + } + + let second = service + .ready() + .await + .unwrap() + .call(Request::new(())) + .await + .unwrap(); + assert_eq!(grpc_status_from_response(&second), "0"); + assert_eq!(calls.load(Ordering::Relaxed), 2); + } + + #[tokio::test] + async fn grpc_rate_limit_state_is_shared_across_service_clones() { + let config = Config::new(None).with_grpc_rate_limit(Some(1), Some(60)); + let limiter = GrpcRateLimiter::from_config(&config); + let calls = Arc::new(AtomicUsize::new(0)); + let mut first_service = GrpcRateLimitService::new( + CountingGrpcService { + calls: calls.clone(), + }, + limiter.clone(), + ); + let mut second_service = GrpcRateLimitService::new( + CountingGrpcService { + calls: calls.clone(), + }, + limiter, + ); + + let first = first_service + .ready() + .await + .unwrap() + .call(Request::new(())) + .await + .unwrap(); + assert_eq!(grpc_status_from_response(&first), "0"); + + let second = second_service + .ready() + .await + .unwrap() + .call(Request::new(())) + .await + .unwrap(); + assert_eq!(grpc_status_from_response(&second), "8"); + assert_eq!(calls.load(Ordering::Relaxed), 1); + } + #[derive(Clone)] struct TraceBuf(Arc>>); diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index d0f4ef32b..656c4bb2f 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -87,6 +87,25 @@ default_image = "ghcr.io/nvidia/openshell/sandbox:latest" supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" client_tls_secret_name = "openshell-client-tls" +# Per-sandbox CPU/memory cgroup defaults. Overlaid on +# `template.resources.limits` at CreateSandbox time when the request omits the +# field. Use Kubernetes-style quantity strings. +# +# Opt-out values: +# * `"0"`, `""`, or whitespace-only. +# An opt-out value means the gateway applies no default on that dimension; +# the sandbox runs without a gateway-imposed bound. Omitting a key entirely +# uses the built-in default ("2", "4Gi"). +# +# Negative values are rejected at config load time. +default_sandbox_cpu_limit = "2" +default_sandbox_memory_limit = "4Gi" + +# Optional gRPC rate limit. Both values must be positive to enable the limit. +# Set either value to 0, or omit both, to disable rate limiting. +grpc_rate_limit_requests = 120 +grpc_rate_limit_window_seconds = 60 + # Gateway listener TLS (distinct from the per-driver guest_tls_*). [openshell.gateway.tls] cert_path = "/etc/openshell/certs/gateway.pem"