diff --git a/architecture/gateway.md b/architecture/gateway.md
index a7e8f1a00..ebdbbfe16 100644
--- a/architecture/gateway.md
+++ b/architecture/gateway.md
@@ -37,6 +37,10 @@ health, metrics, or tunnel routes. The plaintext service router also rejects
 browser requests whose Fetch Metadata, Origin, or Referer headers indicate a
 cross-origin or sibling-subdomain request.
 
+Operators can configure a gateway-wide gRPC request rate limit. The limit is
+applied only to gRPC API traffic after protocol multiplexing; health, metrics,
+and local sandbox-service HTTP routes are not rate limited by this control.
+
 Supported auth modes:
 
 | Mode | Use |
diff --git a/architecture/sandbox.md b/architecture/sandbox.md
index 71dd35227..673a1e3ad 100644
--- a/architecture/sandbox.md
+++ b/architecture/sandbox.md
@@ -40,10 +40,30 @@ OpenShell uses overlapping controls rather than a single sandbox primitive:
 | Seccomp | Blocks dangerous syscalls, including raw socket paths that bypass the proxy. |
 | Network namespace | Forces ordinary agent egress through the local CONNECT proxy. |
 | Policy proxy | Evaluates destination, binary identity, TLS/L7 rules, SSRF checks, and inference interception. |
+| Cgroup limits | Caps per-sandbox CPU, memory, and supported PID limits to prevent runaway resource consumption. |
 
 The supervisor may enrich baseline filesystem allowances for runtime-required
 paths, such as proxy support files or GPU device paths when a GPU is present.
 
+### Cgroup Resource Defaults
+
+The gateway overlays `template.resources.limits.{cpu,memory}` on every
+`CreateSandbox` request that omits the field. User-supplied values are
+preserved verbatim; the defaults are persisted, so subsequent `GetSandbox`
+calls observe the effective limits. The defaults live on the public Struct and
+propagate through typed driver resource fields.
+
+| Dimension | Default | Driver support |
+|---|---|---|
+| CPU | `"2"` | Kubernetes, Docker, Podman (defense-in-depth fallback). VM ignores. |
+| Memory | `"4Gi"` | Kubernetes, Docker, Podman (defense-in-depth fallback). VM ignores. |
+
+Operators tune the values in `[openshell.gateway]` via
+`default_sandbox_cpu_limit` and `default_sandbox_memory_limit`. Setting `"0"`
+(or an empty value) disables the corresponding default — the sandbox runs
+without a gateway-imposed bound on that dimension. Omitting a key uses the
+built-in default above. Negative values are rejected during config load.
+
 ## Network and Inference
 
 All ordinary agent egress is routed through the sandbox proxy. The proxy
diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs
index 98562c8a6..ec8868b82 100644
--- a/crates/openshell-core/src/config.rs
+++ b/crates/openshell-core/src/config.rs
@@ -11,6 +11,7 @@ use std::os::unix::fs::FileTypeExt;
 use std::path::{Path, PathBuf};
 use std::process::Command;
 use std::str::FromStr;
+use std::time::Duration;
 
 // ── Public default constants ────────────────────────────────────────────
 //
@@ -39,6 +40,14 @@ pub const DEFAULT_SUPERVISOR_IMAGE: &str = "ghcr.io/nvidia/openshell/supervisor:
 /// CDI device identifier for requesting all NVIDIA GPUs.
 pub const CDI_GPU_DEVICE_ALL: &str = "nvidia.com/gpu=all";
 
+/// Default per-sandbox CPU limit applied when the user omits
+/// `template.resources.limits.cpu`. Uses Kubernetes-style quantity strings.
+pub const DEFAULT_SANDBOX_CPU_LIMIT: &str = "2";
+
+/// Default per-sandbox memory limit applied when the user omits
+/// `template.resources.limits.memory`. Uses Kubernetes-style quantity strings.
+pub const DEFAULT_SANDBOX_MEMORY_LIMIT: &str = "4Gi";
+
 /// Compute backends the gateway can orchestrate sandboxes through.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
@@ -238,9 +247,57 @@ pub struct Config {
     #[serde(default = "default_ssh_session_ttl_secs")]
     pub ssh_session_ttl_secs: u64,
 
+    /// Maximum gRPC requests allowed per rate-limit window.
+    ///
+    /// When paired with [`Self::grpc_rate_limit_window_secs`], positive values
+    /// enable gateway-wide gRPC request rate limiting. `None` or `0` disables
+    /// the limit.
+    #[serde(default)]
+    pub grpc_rate_limit_requests: Option<u64>,
+
+    /// gRPC rate-limit window length in seconds.
+    ///
+    /// When paired with [`Self::grpc_rate_limit_requests`], positive values
+    /// enable gateway-wide gRPC request rate limiting. `None` or `0` disables
+    /// the limit.
+    #[serde(default)]
+    pub grpc_rate_limit_window_secs: Option<u64>,
+
     /// Browser-facing sandbox service routing configuration.
     #[serde(default)]
     pub service_routing: ServiceRoutingConfig,
+
+    /// Default CPU limit overlaid onto `template.resources.limits.cpu` when
+    /// a `CreateSandbox` request omits the field.
+    ///
+    /// - `Some(value)`: a Kubernetes-style quantity string (e.g. `"2"`,
+    ///   `"500m"`) injected into the sandbox template before persistence.
+    ///   User-supplied `limits.cpu` always wins (overlay semantics).
+    /// - `None`: gateway opts out of imposing a CPU bound for sandboxes
+    ///   that omit the field. The container runs without a CPU cgroup cap
+    ///   unless a driver-level fallback applies.
+    ///
+    /// The TOML loader resolves `default_sandbox_cpu_limit = "0"`, `""`, or
+    /// whitespace-only values to `None`. Negative quantities are rejected
+    /// rather than silently treated as an opt-out. An absent key falls back to
+    /// [`DEFAULT_SANDBOX_CPU_LIMIT`]. See
+    /// `openshell_server::config_file::resolve_sandbox_quantity_default`
+    /// for the full resolution table.
+    ///
+    /// [`Config::new`] seeds this with `Some(DEFAULT_SANDBOX_CPU_LIMIT)` so
+    /// embedded callers inherit the secure default automatically.
+    #[serde(default)]
+    pub default_sandbox_cpu_limit: Option<String>,
+
+    /// Default memory limit overlaid onto `template.resources.limits.memory`
+    /// when a `CreateSandbox` request omits the field. Same
+    /// `Some`/`None` semantics as [`Self::default_sandbox_cpu_limit`].
+    ///
+    /// TOML opt-out: `default_sandbox_memory_limit = "0"`, `""`, or
+    /// whitespace-only -> `None`. Negative quantities are rejected. An absent
+    /// key falls back to [`DEFAULT_SANDBOX_MEMORY_LIMIT`].
+    #[serde(default)]
+    pub default_sandbox_memory_limit: Option<String>,
 }
 
 /// Browser-facing sandbox service routing configuration.
@@ -416,7 +473,11 @@ impl Config {
             database_url: String::new(),
             compute_drivers: vec![],
             ssh_session_ttl_secs: default_ssh_session_ttl_secs(),
+            grpc_rate_limit_requests: None,
+            grpc_rate_limit_window_secs: None,
             service_routing: ServiceRoutingConfig::default(),
+            default_sandbox_cpu_limit: Some(DEFAULT_SANDBOX_CPU_LIMIT.to_string()),
+            default_sandbox_memory_limit: Some(DEFAULT_SANDBOX_MEMORY_LIMIT.to_string()),
         }
     }
 
@@ -483,6 +544,56 @@ impl Config {
         self
     }
 
+    /// Set the gateway-wide gRPC request rate limit.
+    #[must_use]
+    pub const fn with_grpc_rate_limit(
+        mut self,
+        requests: Option<u64>,
+        window_secs: Option<u64>,
+    ) -> Self {
+        self.grpc_rate_limit_requests = requests;
+        self.grpc_rate_limit_window_secs = window_secs;
+        self
+    }
+
+    /// Return the effective gRPC rate limit, if fully configured and enabled.
+    #[must_use]
+    pub fn grpc_rate_limit(&self) -> Option<(u64, Duration)> {
+        let requests = self.grpc_rate_limit_requests?;
+        let window_secs = self.grpc_rate_limit_window_secs?;
+        if requests == 0 || window_secs == 0 {
+            None
+        } else {
+            Some((requests, Duration::from_secs(window_secs)))
+        }
+    }
+
+    /// Override the default sandbox CPU limit.
+    ///
+    /// - `Some(value)`: applied as the gateway-wide default when a
+    ///   `CreateSandbox` request omits `template.resources.limits.cpu`.
+    /// - `None`: gateway-wide opt-out — sandboxes that omit the field run
+    ///   without a CPU cap from the gateway. Equivalent to setting
+    ///   `default_sandbox_cpu_limit = "0"` (or `""`) in the gateway TOML.
+    ///
+    /// See [`Self::default_sandbox_cpu_limit`] for the overlay semantics.
+    #[must_use]
+    pub fn with_default_sandbox_cpu_limit(mut self, value: Option<String>) -> Self {
+        self.default_sandbox_cpu_limit = value;
+        self
+    }
+
+    /// Override the default sandbox memory limit.
+    ///
+    /// `None` opts out of the gateway-wide memory default, mirroring
+    /// `default_sandbox_memory_limit = "0"` in TOML. See
+    /// [`Self::default_sandbox_memory_limit`].
+    #[must_use]
+    pub fn with_default_sandbox_memory_limit(mut self, value: Option<String>) -> Self {
+        self.default_sandbox_memory_limit = value;
+        self
+    }
+
     /// Set the OIDC configuration for JWT-based authentication.
     #[must_use]
     pub fn with_oidc(mut self, oidc: OidcConfig) -> Self {
@@ -601,6 +712,7 @@ mod tests {
     #[cfg(unix)]
     use std::os::unix::net::UnixListener;
     use std::path::PathBuf;
+    use std::time::Duration;
 
     #[test]
     fn compute_driver_kind_parses_supported_values() {
@@ -646,6 +758,29 @@ mod tests {
         assert!(!cfg.auth.allow_unauthenticated_users);
     }
 
+    #[test]
+    fn grpc_rate_limit_requires_positive_pair() {
+        assert!(Config::new(None).grpc_rate_limit().is_none());
+        assert!(
+            Config::new(None)
+                .with_grpc_rate_limit(Some(10), None)
+                .grpc_rate_limit()
+                .is_none()
+        );
+        assert!(
+            Config::new(None)
+                .with_grpc_rate_limit(Some(0), Some(60))
+                .grpc_rate_limit()
+                .is_none()
+        );
+        assert_eq!(
+            Config::new(None)
+                .with_grpc_rate_limit(Some(10), Some(60))
+                .grpc_rate_limit(),
+            Some((10, Duration::from_secs(60)))
+        );
+    }
+
     #[test]
     fn service_routing_allows_loopback_plaintext_http_by_default() {
         let cfg = Config::new(None);
diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs
index b8d345f9e..bf96c5b6c 100644
--- a/crates/openshell-server/src/cli.rs
+++ b/crates/openshell-server/src/cli.rs
@@ -175,6 +175,14 @@ struct RunArgs {
     #[arg(long, env = "OPENSHELL_OIDC_SCOPES_CLAIM", default_value = "")]
     oidc_scopes_claim: String,
 
+    /// Maximum gRPC requests allowed per rate-limit window. Set to 0 to disable.
+    #[arg(long, env = "OPENSHELL_GRPC_RATE_LIMIT_REQUESTS")]
+    grpc_rate_limit_requests: Option<u64>,
+
+    /// gRPC rate-limit window length in seconds. Set to 0 to disable.
+    #[arg(long, env = "OPENSHELL_GRPC_RATE_LIMIT_WINDOW_SECONDS")]
+    grpc_rate_limit_window_seconds: Option<u64>,
+
     /// Subject Alternative Names configured on the gateway server certificate.
     /// Wildcard DNS SANs also enable sandbox service URLs under that domain.
     #[arg(
@@ -353,8 +361,16 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> {
     config = config
         .with_database_url(db_url)
         .with_compute_drivers(args.drivers.clone())
+        .with_grpc_rate_limit(
+            args.grpc_rate_limit_requests,
+            args.grpc_rate_limit_window_seconds,
+        )
         .with_server_sans(args.server_sans.clone())
         .with_loopback_service_http(args.enable_loopback_service_http);
+    validate_grpc_rate_limit_args(
+        args.grpc_rate_limit_requests,
+        args.grpc_rate_limit_window_seconds,
+    )?;
 
     if let Some(ttl) = file
         .as_ref()
@@ -363,6 +379,26 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> {
         config = config.with_ssh_session_ttl_secs(ttl);
     }
 
+    // Sandbox resource defaults. The gateway always applies these unless the
+    // operator explicitly opts out by setting `"0"` in `[openshell.gateway]`.
+    {
+        let cpu = config_file::resolve_sandbox_quantity_default(
+            "default_sandbox_cpu_limit",
+            file_gateway.and_then(|g| g.default_sandbox_cpu_limit.as_deref()),
+            openshell_core::config::DEFAULT_SANDBOX_CPU_LIMIT,
+        )
+        .map_err(|e| miette::miette!("{e}"))?;
+        let memory = config_file::resolve_sandbox_quantity_default(
+            "default_sandbox_memory_limit",
+            file_gateway.and_then(|g| g.default_sandbox_memory_limit.as_deref()),
+            openshell_core::config::DEFAULT_SANDBOX_MEMORY_LIMIT,
+        )
+        .map_err(|e| miette::miette!("{e}"))?;
+        config = config
+            .with_default_sandbox_cpu_limit(cpu)
+            .with_default_sandbox_memory_limit(memory);
+    }
+
     if let Some(issuer) = args.oidc_issuer.clone() {
         config = config.with_oidc(openshell_core::OidcConfig {
             issuer,
@@ -608,6 +644,37 @@ fn merge_file_into_args(args: &mut RunArgs, file: &GatewayFileSection, matches:
             args.oidc_scopes_claim.clone_from(&oidc.scopes_claim);
         }
     }
+    if let Some(requests) = file.grpc_rate_limit_requests
+        && args.grpc_rate_limit_requests.is_none()
+        && arg_defaulted(matches, "grpc_rate_limit_requests")
+    {
+        args.grpc_rate_limit_requests = Some(requests);
+    }
+    if let Some(window) = file.grpc_rate_limit_window_seconds
+        && args.grpc_rate_limit_window_seconds.is_none()
+        && arg_defaulted(matches, "grpc_rate_limit_window_seconds")
+    {
+        args.grpc_rate_limit_window_seconds = Some(window);
+    }
+}
+
+fn validate_grpc_rate_limit_args(requests: Option<u64>, window_seconds: Option<u64>) -> Result<()> {
+    let disabled = matches!(requests, Some(0)) || matches!(window_seconds, Some(0));
+    if disabled {
+        return Ok(());
+    }
+    if matches!(
+        (requests, window_seconds),
+        (Some(requests), None) if requests > 0
+    ) || matches!(
+        (requests, window_seconds),
+        (None, Some(window_seconds)) if window_seconds > 0
+    ) {
+        return Err(miette::miette!(
+            "gRPC rate limiting requires both --grpc-rate-limit-requests and --grpc-rate-limit-window-seconds to be positive; set either value to 0 to disable"
+        ));
+    }
+    Ok(())
 }
 
 fn effective_single_driver(args: &RunArgs) -> Option<ComputeDriverKind> {
@@ -889,6 +956,41 @@ mod tests {
         assert!(cli.run.enable_mtls_auth);
     }
 
+    #[test]
+    fn command_parses_grpc_rate_limit_flags() {
+        let _lock = ENV_LOCK
+            .lock()
+            .unwrap_or_else(std::sync::PoisonError::into_inner);
+        let _g1 = EnvVarGuard::remove("OPENSHELL_GRPC_RATE_LIMIT_REQUESTS");
+        let _g2 = EnvVarGuard::remove("OPENSHELL_GRPC_RATE_LIMIT_WINDOW_SECONDS");
+
+        let cli = Cli::try_parse_from([
+            "openshell-gateway",
+            "--db-url",
+            "sqlite::memory:",
+            "--grpc-rate-limit-requests",
+            "120",
+            "--grpc-rate-limit-window-seconds",
+            "60",
+        ])
+        .unwrap();
+
+        assert_eq!(cli.run.grpc_rate_limit_requests, Some(120));
+        assert_eq!(cli.run.grpc_rate_limit_window_seconds, Some(60));
+    }
+
+    #[test]
+    fn validate_grpc_rate_limit_args_requires_positive_pair() {
+        assert!(super::validate_grpc_rate_limit_args(None, None).is_ok());
+        assert!(super::validate_grpc_rate_limit_args(Some(0), None).is_ok());
+        assert!(super::validate_grpc_rate_limit_args(None, Some(0)).is_ok());
+        assert!(super::validate_grpc_rate_limit_args(Some(0), Some(60)).is_ok());
+        assert!(super::validate_grpc_rate_limit_args(Some(120), Some(0)).is_ok());
+        assert!(super::validate_grpc_rate_limit_args(Some(120), Some(60)).is_ok());
+        assert!(super::validate_grpc_rate_limit_args(Some(120), None).is_err());
+        assert!(super::validate_grpc_rate_limit_args(None, Some(60)).is_err());
+    }
+
     #[test]
     fn command_rejects_removed_driver_flags() {
         let err = command()
@@ -1287,6 +1389,45 @@ audience = "openshell-cli"
         assert_eq!(args.oidc_audience, "openshell-cli");
     }
 
+    #[test]
+    fn file_grpc_rate_limit_populates_args_when_cli_omits() {
+        let (mut args, matches) =
+            parse_with_args(&["openshell-gateway", "--db-url", "sqlite::memory:"]);
+        let file = config_file_from_toml(
+            r"
+[openshell.gateway]
+grpc_rate_limit_requests = 100
+grpc_rate_limit_window_seconds = 30
+",
+        );
+        merge_file_into_args(&mut args, &file.openshell.gateway, &matches);
+
+        assert_eq!(args.grpc_rate_limit_requests, Some(100));
+        assert_eq!(args.grpc_rate_limit_window_seconds, Some(30));
+    }
+
+    #[test]
+    fn cli_grpc_rate_limit_overrides_file_value() {
+        let (mut args, matches) = parse_with_args(&[
+            "openshell-gateway",
+            "--db-url",
+            "sqlite::memory:",
+            "--grpc-rate-limit-requests",
+            "20",
+        ]);
+        let file = config_file_from_toml(
+            r"
+[openshell.gateway]
+grpc_rate_limit_requests = 100
+grpc_rate_limit_window_seconds = 30
+",
+        );
+        merge_file_into_args(&mut args, &file.openshell.gateway, &matches);
+
+        assert_eq!(args.grpc_rate_limit_requests, Some(20));
+        assert_eq!(args.grpc_rate_limit_window_seconds, Some(30));
+    }
+
     #[test]
     fn aux_listener_preserves_file_ip_against_public_bind() {
         use std::net::SocketAddr;
diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs
index 98dc3fd63..7e2236989 100644
--- a/crates/openshell-server/src/compute/mod.rs
+++ b/crates/openshell-server/src/compute/mod.rs
@@ -1362,7 +1362,9 @@ fn build_platform_config(template: &SandboxTemplate) -> Option<prost_types::Stru
 
     // Pass through any resource fields that do not map to the typed
     // DriverResourceRequirements so platform-specific drivers can still see
-    // custom resources such as GPU limits.
+    // custom resources such as GPU limits. `pids` has no typed driver field
+    // today and is not a valid Kubernetes container resource, so ignore it
+    // rather than turning it into platform_config that strict drivers reject.
     if let Some(res) = build_platform_resources_config(&template.resources) {
         fields.insert(
             "resources_raw".to_string(),
@@ -1404,7 +1406,7 @@ fn build_platform_resources_config(
             .filter_map(|(resource_name, resource_value)| {
                 let is_typed_quantity = matches!(resource_name.as_str(), "cpu" | "memory")
                     && matches!(resource_value.kind.as_ref(), Some(Kind::StringValue(_)));
-                if is_typed_quantity {
+                if is_typed_quantity || resource_name == "pids" {
                     None
                 } else {
                     Some((resource_name.clone(), resource_value.clone()))
@@ -2097,7 +2099,11 @@ mod tests {
                 fields: [
                     (
                         "limits",
-                        struct_value([("cpu", string_value("2")), ("memory", string_value("1Gi"))]),
+                        struct_value([
+                            ("cpu", string_value("2")),
+                            ("memory", string_value("1Gi")),
+                            ("pids", number_value(256.0)),
+                        ]),
                     ),
                     (
                         "requests",
@@ -2127,6 +2133,7 @@ mod tests {
                         struct_value([
                             ("cpu", string_value("2")),
                             ("memory", string_value("1Gi")),
+                            ("pids", number_value(256.0)),
                             ("nvidia.com/gpu", string_value("1")),
                         ]),
                     ),
@@ -2169,6 +2176,7 @@ mod tests {
             .unwrap();
         assert!(!limits.fields.contains_key("cpu"));
         assert!(!limits.fields.contains_key("memory"));
+        assert!(!limits.fields.contains_key("pids"));
         assert_eq!(
             limits
                 .fields
diff --git a/crates/openshell-server/src/config_file.rs b/crates/openshell-server/src/config_file.rs
index 7d7c99cc3..a17960ff4 100644
--- a/crates/openshell-server/src/config_file.rs
+++ b/crates/openshell-server/src/config_file.rs
@@ -94,6 +94,22 @@ pub struct GatewayFileSection {
     pub sandbox_namespace: Option<String>,
     #[serde(default)]
     pub ssh_session_ttl_secs: Option<u64>,
+    #[serde(default)]
+    pub grpc_rate_limit_requests: Option<u64>,
+    #[serde(default)]
+    pub grpc_rate_limit_window_seconds: Option<u64>,
+
+    // ── Sandbox resource defaults ───────────────────────────────
+    //
+    // Applied at `CreateSandbox` time as an overlay onto
+    // `template.resources.limits.{cpu,memory}`. User-supplied values are
+    // preserved; absent fields are filled from these defaults. The value `"0"`
+    // disables the corresponding default — consistent with the Linux/cgroup
+    // convention where 0 means "no bound".
+    #[serde(default)]
+    pub default_sandbox_cpu_limit: Option<String>,
+    #[serde(default)]
+    pub default_sandbox_memory_limit: Option<String>,
 
     // ── Service routing ──────────────────────────────────────────────────
     /// Subject Alternative Names configured on the gateway server certificate.
@@ -182,6 +198,24 @@ pub enum ConfigFileError {
         env: &'static str,
         cli: &'static str,
     },
+    #[error("invalid gateway config file '{}': {source}", path.display())]
+    InvalidSandboxDefault {
+        path: PathBuf,
+        #[source]
+        source: SandboxDefaultError,
+    },
+}
+
+#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
+pub enum SandboxDefaultError {
+    #[error(
+        "`{field}` has invalid negative value `{value}`; use {disable_value} to disable the default"
+    )]
+    Negative {
+        field: &'static str,
+        value: String,
+        disable_value: &'static str,
+    },
 }
 
 /// Load and validate a TOML config file.
@@ -214,10 +248,36 @@ pub fn load(path: &Path) -> Result<ConfigFile, ConfigFileError> {
             cli: "--db-url",
         });
     }
+    validate_sandbox_defaults(path, &file.openshell.gateway)?;
 
     Ok(file)
 }
 
+fn validate_sandbox_defaults(
+    path: &Path,
+    gateway: &GatewayFileSection,
+) -> Result<(), ConfigFileError> {
+    resolve_sandbox_quantity_default(
+        "default_sandbox_cpu_limit",
+        gateway.default_sandbox_cpu_limit.as_deref(),
+        openshell_core::config::DEFAULT_SANDBOX_CPU_LIMIT,
+    )
+    .map_err(|source| ConfigFileError::InvalidSandboxDefault {
+        path: path.to_path_buf(),
+        source,
+    })?;
+    resolve_sandbox_quantity_default(
+        "default_sandbox_memory_limit",
+        gateway.default_sandbox_memory_limit.as_deref(),
+        openshell_core::config::DEFAULT_SANDBOX_MEMORY_LIMIT,
+    )
+    .map_err(|source| ConfigFileError::InvalidSandboxDefault {
+        path: path.to_path_buf(),
+        source,
+    })?;
+    Ok(())
+}
+
 /// Build the merged TOML table for `driver` by overlaying inheritable
 /// `[openshell.gateway]` defaults onto `[openshell.drivers.<name>]`.
 ///
@@ -313,6 +373,44 @@ fn path_value(p: &Path) -> toml::Value {
     toml::Value::String(p.display().to_string())
 }
 
+/// Resolve a TOML-supplied CPU or memory default into the effective runtime
+/// value used by `apply_sandbox_template_defaults`.
+///
+/// | TOML input | Result | Meaning |
+/// |---|---|---|
+/// | key absent (`None`) | `Some(core_default)` | Use the built-in constant. |
+/// | `"0"` (after trim) | `None` | Operator opt-out; no default applied. |
+/// | empty / whitespace-only | `None` | Operator opt-out; no default applied. |
+/// | leading `-` (e.g. `"-1"`, `"-500m"`) | `Err` | Invalid Kubernetes quantity rejected. |
+/// | any other string | `Some(value)` | Passed through verbatim; the driver validates as a Kubernetes quantity at sandbox creation. |
+///
+/// Rejecting negative quantities matches the Kubernetes resource model (which
+/// only accepts non-negative quantities) and prevents a typo like `"-1"` from
+/// silently disabling the cgroup default.
+pub fn resolve_sandbox_quantity_default(
+    field: &'static str,
+    toml_value: Option<&str>,
+    core_default: &'static str,
+) -> Result<Option<String>, SandboxDefaultError> {
+    toml_value.map_or_else(
+        || Ok(Some(core_default.to_string())),
+        |v| {
+            let trimmed = v.trim();
+            if trimmed.starts_with('-') {
+                Err(SandboxDefaultError::Negative {
+                    field,
+                    value: trimmed.to_string(),
+                    disable_value: r#"`"0"` or `""`"#,
+                })
+            } else if trimmed.is_empty() || trimmed == "0" {
+                Ok(None)
+            } else {
+                Ok(Some(trimmed.to_string()))
+            }
+        },
+    )
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -348,6 +446,8 @@ health_bind_address = "0.0.0.0:8081"
 log_level = "info"
 compute_drivers = ["kubernetes"]
 sandbox_namespace = "agents"
+grpc_rate_limit_requests = 120
+grpc_rate_limit_window_seconds = 60
 default_image = "ghcr.io/nvidia/openshell/sandbox:latest"
 supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest"
 client_tls_secret_name = "openshell-sandbox-tls"
@@ -374,6 +474,8 @@ grpc_endpoint = "https://openshell-gateway.agents.svc:8080"
             gw.default_image.as_deref(),
             Some("ghcr.io/nvidia/openshell/sandbox:latest")
         );
+        assert_eq!(gw.grpc_rate_limit_requests, Some(120));
+        assert_eq!(gw.grpc_rate_limit_window_seconds, Some(60));
         assert!(gw.tls.is_some());
         assert!(gw.oidc.is_some());
         assert!(file.openshell.drivers.contains_key("kubernetes"));
@@ -588,4 +690,181 @@ version = 2
              driver selection when Docker is also installed"
         );
     }
+
+    // ---- resolve_sandbox_quantity_default ----
+
+    #[test]
+    fn resolve_sandbox_quantity_default_uses_core_when_absent() {
+        assert_eq!(
+            resolve_sandbox_quantity_default("default_sandbox_cpu_limit", None, "2").unwrap(),
+            Some("2".to_string())
+        );
+    }
+
+    #[test]
+    fn resolve_sandbox_quantity_default_zero_string_disables() {
+        assert_eq!(
+            resolve_sandbox_quantity_default("default_sandbox_cpu_limit", Some("0"), "2").unwrap(),
+            None
+        );
+    }
+
+    #[test]
+    fn resolve_sandbox_quantity_default_empty_string_disables() {
+        assert_eq!(
+            resolve_sandbox_quantity_default("default_sandbox_memory_limit", Some(""), "4Gi")
+                .unwrap(),
+            None
+        );
+        assert_eq!(
+            resolve_sandbox_quantity_default("default_sandbox_memory_limit", Some("  "), "4Gi")
+                .unwrap(),
+            None
+        );
+    }
+
+    #[test]
+    fn resolve_sandbox_quantity_default_uses_supplied_value() {
+        assert_eq!(
+            resolve_sandbox_quantity_default("default_sandbox_memory_limit", Some("8Gi"), "4Gi")
+                .unwrap(),
+            Some("8Gi".to_string())
+        );
+    }
+
+    #[test]
+    fn resolve_sandbox_quantity_default_negative_rejects() {
+        let err = resolve_sandbox_quantity_default("default_sandbox_cpu_limit", Some("-1"), "2")
+            .unwrap_err();
+        assert!(matches!(err, SandboxDefaultError::Negative { .. }));
+        assert!(
+            resolve_sandbox_quantity_default("default_sandbox_cpu_limit", Some("-500m"), "2")
+                .is_err()
+        );
+        assert!(
+            resolve_sandbox_quantity_default("default_sandbox_memory_limit", Some("-2Gi"), "4Gi")
+                .is_err()
+        );
+        assert!(
+            resolve_sandbox_quantity_default("default_sandbox_cpu_limit", Some(" -1 "), "2")
+                .is_err()
+        );
+    }
+
+    #[test]
+    fn resolve_sandbox_quantity_default_trims_whitespace() {
+        assert_eq!(
+            resolve_sandbox_quantity_default("default_sandbox_memory_limit", Some(" 8Gi "), "4Gi")
+                .unwrap(),
+            Some("8Gi".to_string())
+        );
+    }
+
+    /// End-to-end: a TOML file that opts out of both sandbox defaults
+    /// (cpu/memory = `"0"`) must resolve to `None` for each field. Guards
+    /// against a refactor that silently drops the sentinel handling between the
+    /// parser and the runtime `Config`.
+    #[test]
+    fn zero_sentinels_in_toml_disable_sandbox_defaults_end_to_end() {
+        let toml = r#"
+[openshell]
+version = 1
+
+[openshell.gateway]
+default_sandbox_cpu_limit    = "0"
+default_sandbox_memory_limit = "0"
+"#;
+        let tmp = write_tmp(toml);
+        let file = load(tmp.path()).expect("valid file parses");
+        let gw = &file.openshell.gateway;
+
+        // Parser preserves the raw TOML values.
+        assert_eq!(gw.default_sandbox_cpu_limit.as_deref(), Some("0"));
+        assert_eq!(gw.default_sandbox_memory_limit.as_deref(), Some("0"));
+
+        // Resolution collapses the sentinels into `None` so the gateway
+        // skips injection in `apply_sandbox_template_defaults`.
+        assert_eq!(
+            resolve_sandbox_quantity_default(
+                "default_sandbox_cpu_limit",
+                gw.default_sandbox_cpu_limit.as_deref(),
+                "2"
+            )
+            .unwrap(),
+            None,
+            r#"`default_sandbox_cpu_limit = "0"` must disable the CPU default"#
+        );
+        assert_eq!(
+            resolve_sandbox_quantity_default(
+                "default_sandbox_memory_limit",
+                gw.default_sandbox_memory_limit.as_deref(),
+                "4Gi"
+            )
+            .unwrap(),
+            None,
+            r#"`default_sandbox_memory_limit = "0"` must disable the memory default"#
+        );
+    }
+
+    /// End-to-end: a TOML file with negative sandbox-default values must be
+    /// rejected. Guards against malformed config silently bypassing the cgroup
+    /// defaults via a downstream parser that maps `-1` to "unlimited".
+    #[test]
+    fn negative_sentinels_in_toml_reject_sandbox_defaults_end_to_end() {
+        let toml = r#"
+[openshell]
+version = 1
+
+[openshell.gateway]
+default_sandbox_cpu_limit    = "-1"
+default_sandbox_memory_limit = "-500m"
+"#;
+        let tmp = write_tmp(toml);
+        let err = load(tmp.path()).expect_err("negative sandbox defaults must be rejected");
+        assert!(matches!(
+            err,
+            ConfigFileError::InvalidSandboxDefault {
+                source: SandboxDefaultError::Negative { .. },
+                ..
+            }
+        ));
+    }
+
+    /// End-to-end: a TOML file with no sandbox-default keys must fall back
+    /// to the canonical constants in `openshell-core`.
+    #[test]
+    fn absent_keys_in_toml_use_core_constants_end_to_end() {
+        let toml = r#"
+[openshell]
+version = 1
+
+[openshell.gateway]
+log_level = "info"
+"#;
+        let tmp = write_tmp(toml);
+        let file = load(tmp.path()).expect("valid file parses");
+        let gw = &file.openshell.gateway;
+
+        assert!(gw.default_sandbox_cpu_limit.is_none());
+        assert!(gw.default_sandbox_memory_limit.is_none());
+
+        assert_eq!(
+            resolve_sandbox_quantity_default(
+                "default_sandbox_cpu_limit",
+                gw.default_sandbox_cpu_limit.as_deref(),
+                "2"
+            )
+            .unwrap(),
+            Some("2".to_string())
+        );
+        assert_eq!(
+            resolve_sandbox_quantity_default(
+                "default_sandbox_memory_limit",
+                gw.default_sandbox_memory_limit.as_deref(),
+                "4Gi"
+            )
+            .unwrap(),
+            Some("4Gi".to_string())
+        );
+    }
 }
diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs
index 1855972d7..ee1431132 100644
--- a/crates/openshell-server/src/grpc/sandbox.rs
+++ b/crates/openshell-server/src/grpc/sandbox.rs
@@ -53,6 +53,66 @@ const TCP_FORWARD_CHUNK_SIZE: usize = 64 * 1024;
 // Sandbox lifecycle handlers
 // ---------------------------------------------------------------------------
 
+/// Overlay gateway-wide cgroup defaults onto `template.resources.limits`.
+///
+/// For CPU and memory, the function:
+///
+/// 1. Skips the field entirely when the corresponding default is `None`
+///    (operator opt-out via `"0"` in `[openshell.gateway]`).
+/// 2. Leaves user-supplied values untouched (overlay semantics — never
+///    overwrites an existing `limits.<field>`).
+/// 3. Inserts the configured default otherwise.
+///
+/// The defaults live on the public `template.resources` Struct, so CPU/memory
+/// flow through `extract_typed_resources` to drivers that support them.
+/// Persisting the post-overlay template makes the applied defaults observable
+/// via `GetSandbox`.
+///
+/// All sandboxes receive CPU and memory caps unless the operator explicitly
+/// disables them. PID caps are not overlaid today.
+fn apply_sandbox_template_defaults(
+    template: &mut SandboxTemplate,
+    cpu_limit: Option<&str>,
+    memory_limit: Option<&str>,
+) {
+    use prost_types::{Struct, Value, value::Kind};
+
+    if cpu_limit.is_none() && memory_limit.is_none() {
+        return;
+    }
+
+    let resources = template.resources.get_or_insert_with(Struct::default);
+    let limits_value = resources
+        .fields
+        .entry("limits".to_string())
+        .or_insert_with(|| Value {
+            kind: Some(Kind::StructValue(Struct::default())),
+        });
+
+    // If the caller put something other than a Struct at `limits` we leave
+    // it alone — the driver layer will surface the type error.
+    let Some(Kind::StructValue(limits)) = limits_value.kind.as_mut() else {
+        return;
+    };
+
+    if let Some(value) = cpu_limit {
+        limits
+            .fields
+            .entry("cpu".to_string())
+            .or_insert_with(|| Value {
+                kind: Some(Kind::StringValue(value.to_string())),
+            });
+    }
+    if let Some(value) = memory_limit {
+        limits
+            .fields
+            .entry("memory".to_string())
+            .or_insert_with(|| Value {
+                kind: Some(Kind::StringValue(value.to_string())),
+            });
+    }
+}
+
 pub(super) async fn handle_create_sandbox(
     state: &Arc<ServerState>,
     request: Request<CreateSandboxRequest>,
@@ -91,6 +151,16 @@ pub(super) async fn handle_create_sandbox(
         template.image = state.compute.default_image().to_string();
     }
 
+    // Overlay gateway-wide cgroup defaults onto the template's
+    // resource limits. User-supplied values win; absent fields are filled
+    // from the runtime `Config`. Persisting after the overlay makes the
+    // applied defaults visible to subsequent `GetSandbox` calls.
+    apply_sandbox_template_defaults(
+        template,
+        state.config.default_sandbox_cpu_limit.as_deref(),
+        state.config.default_sandbox_memory_limit.as_deref(),
+    );
+
     // Ensure process identity defaults to "sandbox" when missing or
     // empty, then validate policy safety before persisting.
     if let Some(ref mut policy) = spec.policy {
@@ -1852,7 +1922,14 @@ async fn run_exec_with_russh(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::compute::new_test_runtime;
     use crate::grpc::test_support::test_server_state;
+    use crate::persistence::Store;
+    use crate::sandbox_index::SandboxIndex;
+    use crate::sandbox_watch::SandboxWatchBus;
+    use crate::supervisor_session::SupervisorSessionRegistry;
+    use crate::tracing_bus::TracingLogBus;
+    use openshell_core::Config;
     use openshell_core::proto::datamodel::v1::ObjectMeta;
     use std::collections::HashMap;
 
@@ -3129,4 +3206,267 @@ mod tests {
             initial_version + 1
         );
     }
+
+    // ---- apply_sandbox_template_defaults ----
+
+    fn template_with_limits(fields: &[(&str, prost_types::Value)]) -> SandboxTemplate {
+        use prost_types::{Struct, Value, value::Kind};
+        let mut limits = Struct::default();
+        for (k, v) in fields {
+            limits.fields.insert((*k).to_string(), v.clone());
+        }
+        let mut resources = Struct::default();
+        resources.fields.insert(
+            "limits".to_string(),
+            Value {
+                kind: Some(Kind::StructValue(limits)),
+            },
+        );
+        SandboxTemplate {
+            resources: Some(resources),
+            ..Default::default()
+        }
+    }
+
+    fn string_value(s: &str) -> prost_types::Value {
+        prost_types::Value {
+            kind: Some(prost_types::value::Kind::StringValue(s.to_string())),
+        }
+    }
+
+    fn number_value(n: f64) -> prost_types::Value {
+        prost_types::Value {
+            kind: Some(prost_types::value::Kind::NumberValue(n)),
+        }
+    }
+
+    fn limits(template: &SandboxTemplate) -> &prost_types::Struct {
+        let res = template.resources.as_ref().expect("resources missing");
+        match res.fields.get("limits").and_then(|v| v.kind.as_ref()) {
+            Some(prost_types::value::Kind::StructValue(s)) => s,
+            other => panic!("expected limits struct, got {other:?}"),
+        }
+    }
+
+    fn limit_string(template: &SandboxTemplate, key: &str) -> Option<String> {
+        match limits(template).fields.get(key)?.kind.as_ref()? {
+            prost_types::value::Kind::StringValue(s) => Some(s.clone()),
+            _ => None,
+        }
+    }
+
+    fn limit_number(template: &SandboxTemplate, key: &str) -> Option<f64> {
+        match limits(template).fields.get(key)?.kind.as_ref()? {
+            prost_types::value::Kind::NumberValue(n) => Some(*n),
+            _ => None,
+        }
+    }
+
+    #[test]
+    fn apply_sandbox_template_defaults_when_resources_is_none() {
+        let mut template = SandboxTemplate::default();
+        apply_sandbox_template_defaults(&mut template, Some("2"), Some("4Gi"));
+        assert_eq!(limit_string(&template, "cpu").as_deref(), Some("2"));
+        assert_eq!(limit_string(&template, "memory").as_deref(), Some("4Gi"));
+        let l = limits(&template);
+        assert!(!l.fields.contains_key("pids"));
+    }
+
+    #[test]
+    fn apply_sandbox_template_defaults_preserves_user_cpu_overlay_others() {
+        let mut template = template_with_limits(&[("cpu", string_value("1"))]);
+        apply_sandbox_template_defaults(&mut template, Some("2"), Some("4Gi"));
+        // User-supplied CPU is preserved.
+        assert_eq!(limit_string(&template, "cpu").as_deref(), Some("1"));
+        // Memory is overlaid from the defaults.
+        assert_eq!(limit_string(&template, "memory").as_deref(), Some("4Gi"));
+        let l = limits(&template);
+        assert!(!l.fields.contains_key("pids"));
+    }
+
+    #[test]
+    fn apply_sandbox_template_defaults_noop_when_all_set() {
+        let mut template = template_with_limits(&[
+            ("cpu", string_value("1")),
+            ("memory", string_value("1Gi")),
+            ("pids", number_value(64.0)),
+        ]);
+        apply_sandbox_template_defaults(&mut template, Some("2"), Some("4Gi"));
+        assert_eq!(limit_string(&template, "cpu").as_deref(), Some("1"));
+        assert_eq!(limit_string(&template, "memory").as_deref(), Some("1Gi"));
+        assert_eq!(limit_number(&template, "pids"), Some(64.0));
+    }
+
+    #[test]
+    fn apply_sandbox_template_defaults_skips_when_limits_is_non_struct() {
+        // A caller put a string in place of the limits struct — leave it
+        // alone; downstream validation will surface the type mismatch.
+        use prost_types::{Struct, Value, value::Kind};
+        let mut resources = Struct::default();
+        resources.fields.insert(
+            "limits".to_string(),
+            Value {
+                kind: Some(Kind::StringValue("bogus".to_string())),
+            },
+        );
+        let mut template = SandboxTemplate {
+            resources: Some(resources),
+            ..Default::default()
+        };
+        apply_sandbox_template_defaults(&mut template, Some("2"), Some("4Gi"));
+        let res = template.resources.as_ref().unwrap();
+        match res.fields.get("limits").and_then(|v| v.kind.as_ref()) {
+            Some(Kind::StringValue(s)) => assert_eq!(s, "bogus"),
+            other => panic!("expected unchanged string, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn apply_sandbox_template_defaults_skips_when_all_config_disabled() {
+        let mut template = SandboxTemplate::default();
+        apply_sandbox_template_defaults(&mut template, None, None);
+        assert!(template.resources.is_none());
+    }
+
+    #[test]
+    fn apply_sandbox_template_defaults_partial_config_only_injects_provided() {
+        let mut template = SandboxTemplate::default();
+        // CPU disabled, memory enabled.
+        apply_sandbox_template_defaults(&mut template, None, Some("4Gi"));
+        let l = limits(&template);
+        assert!(!l.fields.contains_key("cpu"));
+        assert_eq!(limit_string(&template, "memory").as_deref(), Some("4Gi"));
+        assert!(!l.fields.contains_key("pids"));
+    }
+
+    // ---- handle_create_sandbox integration ----
+
+    #[tokio::test]
+    async fn handle_create_sandbox_persists_default_resource_limits() {
+        use openshell_core::proto::{CreateSandboxRequest, SandboxSpec};
+        let state = test_server_state().await;
+
+        let response = handle_create_sandbox(
+            &state,
+            Request::new(CreateSandboxRequest {
+                name: "default-limits".to_string(),
+                spec: Some(SandboxSpec {
+                    log_level: "info".to_string(),
+                    policy: Some(openshell_core::proto::SandboxPolicy::default()),
+                    ..Default::default()
+                }),
+                labels: HashMap::new(),
+            }),
+        )
+        .await
+        .expect("create sandbox")
+        .into_inner();
+
+        let template = response
+            .sandbox
+            .expect("sandbox in response")
+            .spec
+            .expect("spec")
+            .template
+            .expect("template");
+        assert_eq!(limit_string(&template, "cpu").as_deref(), Some("2"));
+        assert_eq!(limit_string(&template, "memory").as_deref(), Some("4Gi"));
+        let l = limits(&template);
+        assert!(
+            !l.fields.contains_key("pids"),
+            "Kubernetes does not enforce template.resources.limits.pids; got {:?}",
+            l.fields.get("pids")
+        );
+    }
+
+    #[tokio::test]
+    async fn handle_create_sandbox_preserves_user_resource_limits() {
+        use openshell_core::proto::{CreateSandboxRequest, SandboxSpec};
+        let state = test_server_state().await;
+
+        // User supplies a partial `limits.cpu` only. The gateway must keep
+        // it, then fill memory from the defaults.
+        let user_template = template_with_limits(&[("cpu", string_value("500m"))]);
+
+        let response = handle_create_sandbox(
+            &state,
+            Request::new(CreateSandboxRequest {
+                name: "user-cpu".to_string(),
+                spec: Some(SandboxSpec {
+                    log_level: "info".to_string(),
+                    policy: Some(openshell_core::proto::SandboxPolicy::default()),
+                    template: Some(user_template),
+                    ..Default::default()
+                }),
+                labels: HashMap::new(),
+            }),
+        )
+        .await
+        .expect("create sandbox")
+        .into_inner();
+
+        let template = response
+            .sandbox
+            .expect("sandbox in response")
+            .spec
+            .expect("spec")
+            .template
+            .expect("template");
+        assert_eq!(limit_string(&template, "cpu").as_deref(), Some("500m"));
+        assert_eq!(limit_string(&template, "memory").as_deref(), Some("4Gi"));
+        let l = limits(&template);
+        assert!(!l.fields.contains_key("pids"));
+    }
+
+    #[tokio::test]
+    async fn handle_create_sandbox_skips_defaults_when_disabled() {
+        use openshell_core::proto::{CreateSandboxRequest, SandboxSpec};
+        let store = Arc::new(Store::connect("sqlite::memory:").await.unwrap());
+        let compute = new_test_runtime(store.clone()).await;
+        // Build a Config with all sandbox defaults disabled (admin opt-out).
+        let config = Config::new(None)
+            .with_database_url("sqlite::memory:")
+            .with_default_sandbox_cpu_limit(None)
+            .with_default_sandbox_memory_limit(None);
+        let state = Arc::new(ServerState::new(
+            config,
+            store,
+            compute,
+            SandboxIndex::new(),
+            SandboxWatchBus::new(),
+            TracingLogBus::new(),
+            Arc::new(SupervisorSessionRegistry::new()),
+            None,
+        ));
+
+        let response = handle_create_sandbox(
+            &state,
+            Request::new(CreateSandboxRequest {
+                name: "no-defaults".to_string(),
+                spec: Some(SandboxSpec {
+                    log_level: "info".to_string(),
+                    policy: Some(openshell_core::proto::SandboxPolicy::default()),
+                    ..Default::default()
+                }),
+                labels: HashMap::new(),
+            }),
+        )
+        .await
+        .expect("create sandbox")
+        .into_inner();
+
+        let template = response
+            .sandbox
+            .expect("sandbox in response")
+            .spec
+            .expect("spec")
+            .template
+            .expect("template");
+        // With all defaults disabled, the gateway must not touch resources.
+        assert!(
+            template.resources.is_none(),
+            "expected no resources when defaults are disabled, got {:?}",
+            template.resources
+        );
+    }
 }
diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs
index b7e145bde..36b19e017 100644
--- a/crates/openshell-server/src/lib.rs
+++ b/crates/openshell-server/src/lib.rs
@@ -127,6 +127,9 @@ pub struct ServerState {
     /// `IssueSandboxToken` bootstrap path. Only present when the gateway
     /// runs in-cluster.
     pub k8s_sa_authenticator: Option<Arc<auth::k8s_sa::K8sServiceAccountAuthenticator>>,
+
+    /// Gateway-wide gRPC request rate limiter shared by every multiplex path.
+    pub(crate) grpc_rate_limiter: Option<multiplex::GrpcRateLimiter>,
 }
 
 fn is_benign_tls_handshake_failure(error: &std::io::Error) -> bool {
@@ -159,6 +162,7 @@ impl ServerState {
         supervisor_sessions: Arc<supervisor_session::SupervisorSessionRegistry>,
         oidc_cache: Option<Arc<auth::oidc::JwksCache>>,
     ) -> Self {
+        let grpc_rate_limiter = multiplex::GrpcRateLimiter::from_config(&config);
         Self {
             config,
             store,
@@ -174,6 +178,7 @@ impl ServerState {
             sandbox_jwt_issuer: None,
             sandbox_jwt_authenticator: None,
             k8s_sa_authenticator: None,
+            grpc_rate_limiter,
         }
     }
 }
@@ -219,7 +224,10 @@ pub async fn run_server(
     let sandbox_index = SandboxIndex::new();
     let sandbox_watch_bus = SandboxWatchBus::new();
     let supervisor_sessions = Arc::new(supervisor_session::SupervisorSessionRegistry::new());
+    let driver = configured_compute_driver(&config)?;
+    let config = config.with_compute_drivers([driver]);
     let compute = build_compute_runtime(
+        driver,
         &config,
         &vm_config,
         &docker_config,
@@ -683,6 +691,7 @@ async fn terminate_signal() {
 // that must be passed through, so the count is justified.
 #[allow(clippy::too_many_arguments)]
 async fn build_compute_runtime(
+    driver: ComputeDriverKind,
     config: &Config,
     vm_config: &VmComputeConfig,
     docker_config: &DockerComputeConfig,
@@ -693,7 +702,6 @@ async fn build_compute_runtime(
     tracing_log_bus: TracingLogBus,
     supervisor_sessions: Arc<supervisor_session::SupervisorSessionRegistry>,
 ) -> Result<ComputeRuntime> {
-    let driver = configured_compute_driver(config)?;
     info!(driver = %driver, "Using compute driver");
 
     match driver {
diff --git a/crates/openshell-server/src/multiplex.rs b/crates/openshell-server/src/multiplex.rs
index 4fcb3993a..3c4f89d7f 100644
--- a/crates/openshell-server/src/multiplex.rs
+++ b/crates/openshell-server/src/multiplex.rs
@@ -17,12 +17,13 @@ use hyper_util::{
     service::TowerToHyperService,
 };
 use metrics::{counter, histogram};
+use openshell_core::Config;
 use openshell_core::proto::{
     inference_server::InferenceServer, open_shell_server::OpenShellServer,
 };
 use std::future::Future;
 use std::pin::Pin;
-use std::sync::Arc;
+use std::sync::{Arc, Mutex};
 use std::task::{Context, Poll};
 use std::time::{Duration, Instant};
 use tokio::io::{AsyncRead, AsyncWrite};
@@ -174,6 +175,8 @@ impl MultiplexService {
             self.state.config.mtls_auth.enabled,
             self.state.config.auth.allow_unauthenticated_users,
         );
+        let grpc_service =
+            GrpcRateLimitService::new(grpc_service, self.state.grpc_rate_limiter.clone());
         let http_service = http_router(self.state.clone());
 
         let grpc_service = request_id_middleware!(grpc_service);
@@ -211,6 +214,92 @@ impl MultiplexService {
     }
 }
 
+#[derive(Clone, Debug)]
+pub struct GrpcRateLimiter {
+    requests: u64,
+    window: Duration,
+    state: Arc<Mutex<GrpcRateLimitState>>,
+}
+
+#[derive(Debug)]
+struct GrpcRateLimitState {
+    window_started: Instant,
+    remaining: u64,
+}
+
+impl GrpcRateLimiter {
+    pub fn from_config(config: &Config) -> Option<Self> {
+        let (requests, window) = config.grpc_rate_limit()?;
+        Some(Self {
+            requests,
+            window,
+            state: Arc::new(Mutex::new(GrpcRateLimitState {
+                window_started: Instant::now(),
+                remaining: requests,
+            })),
+        })
+    }
+
+    fn allow(&self) -> bool {
+        let now = Instant::now();
+        let mut state = self
+            .state
+            .lock()
+            .unwrap_or_else(std::sync::PoisonError::into_inner);
+        if now.duration_since(state.window_started) >= self.window {
+            state.window_started = now;
+            state.remaining = self.requests;
+        }
+        if state.remaining == 0 {
+            false
+        } else {
+            state.remaining -= 1;
+            true
+        }
+    }
+}
+
+#[derive(Clone)]
+struct GrpcRateLimitService<S> {
+    inner: S,
+    limiter: Option<GrpcRateLimiter>,
+}
+
+impl<S> GrpcRateLimitService<S> {
+    fn new(inner: S, limiter: Option<GrpcRateLimiter>) -> Self {
+        Self { inner, limiter }
+    }
+}
+
+impl<S, B> tower::Service<Request<B>> for GrpcRateLimitService<S>
+where
+    S: tower::Service<Request<B>, Response = Response<tonic::body::BoxBody>>,
+    S::Future: Send + 'static,
+    B: Send + 'static,
+{
+    type Response = S::Response;
+    type Error = S::Error;
+    type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
+
+    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
+        self.inner.poll_ready(cx)
+    }
+
+    fn call(&mut self, req: Request<B>) -> Self::Future {
+        if self
+            .limiter
+            .as_ref()
+            .is_some_and(|limiter| !limiter.allow())
+        {
+            let response =
+                tonic::Status::resource_exhausted("gRPC rate limit exceeded").into_http();
+            return Box::pin(async move { Ok(response) });
+        }
+        let future = self.inner.call(req);
+        Box::pin(future)
+    }
+}
+
 /// Combined gRPC service that routes between `OpenShell` and Inference services
 /// based on the request path prefix.
 #[derive(Clone)]
@@ -649,6 +738,8 @@ mod tests {
     use bytes::Bytes;
     use http_body_util::Empty;
     use std::sync::Mutex;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use tower::Service;
 
     #[test]
     fn uuid_request_id_generates_valid_uuid() {
@@ -788,6 +879,164 @@ mod tests {
         assert_eq!(request_id.to_str().unwrap(), "grpc-corr-id");
     }
 
+    #[derive(Clone)]
+    struct CountingGrpcService {
+        calls: Arc<AtomicUsize>,
+    }
+
+    impl Service<Request<()>> for CountingGrpcService {
+        type Response = Response<tonic::body::BoxBody>;
+        type Error = std::convert::Infallible;
+        type Future = std::future::Ready<Result<Self::Response, Self::Error>>;
+
+        fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
+            Poll::Ready(Ok(()))
+        }
+
+        fn call(&mut self, _req: Request<()>) -> Self::Future {
+            self.calls.fetch_add(1, Ordering::Relaxed);
+            std::future::ready(Ok(Response::new(tonic::body::empty_body())))
+        }
+    }
+
+    #[tokio::test]
+    async fn grpc_rate_limit_returns_resource_exhausted_after_limit() {
+        let config = Config::new(None).with_grpc_rate_limit(Some(1), Some(60));
+        let limiter = GrpcRateLimiter::from_config(&config);
+        let calls = Arc::new(AtomicUsize::new(0));
+        let mut service = GrpcRateLimitService::new(
+            CountingGrpcService {
+                calls: calls.clone(),
+            },
+            limiter,
+        );
+
+        let first = service
+            .ready()
+            .await
+            .unwrap()
+            .call(Request::new(()))
+            .await
+            .unwrap();
+        assert_eq!(grpc_status_from_response(&first), "0");
+
+        let second = service
+            .ready()
+            .await
+            .unwrap()
+            .call(Request::new(()))
+            .await
+            .unwrap();
+        assert_eq!(grpc_status_from_response(&second), "8");
+        assert_eq!(calls.load(Ordering::Relaxed), 1);
+    }
+
+    #[tokio::test]
+    async fn grpc_rate_limit_disabled_passes_requests_through() {
+        let config = Config::new(None).with_grpc_rate_limit(Some(0), Some(60));
+        let limiter = GrpcRateLimiter::from_config(&config);
+        let calls = Arc::new(AtomicUsize::new(0));
+        let mut service = GrpcRateLimitService::new(
+            CountingGrpcService {
+                calls: calls.clone(),
+            },
+            limiter,
+        );
+
+        for _ in 0..3 {
+            let response = service
+                .ready()
+                .await
+                .unwrap()
+                .call(Request::new(()))
+                .await
+                .unwrap();
+            assert_eq!(grpc_status_from_response(&response), "0");
+        }
+        assert_eq!(calls.load(Ordering::Relaxed), 3);
+    }
+
+    #[tokio::test]
+    async fn grpc_rate_limit_resets_after_window() {
+        let config = Config::new(None).with_grpc_rate_limit(Some(1), Some(60));
+        let limiter = GrpcRateLimiter::from_config(&config).expect("limiter should be enabled");
+        let calls = Arc::new(AtomicUsize::new(0));
+        let mut service = GrpcRateLimitService::new(
+            CountingGrpcService {
+                calls: calls.clone(),
+            },
+            Some(limiter.clone()),
+        );
+
+        let first = service
+            .ready()
+            .await
+            .unwrap()
+            .call(Request::new(()))
+            .await
+            .unwrap();
+        assert_eq!(grpc_status_from_response(&first), "0");
+
+        {
+            let mut state = limiter
+                .state
+                .lock()
+                .unwrap_or_else(std::sync::PoisonError::into_inner);
+            state.window_started = state
+                .window_started
+                .checked_sub(Duration::from_secs(61))
+                .expect("test window rewind should be valid");
+        }
+
+        let second = service
+            .ready()
+            .await
+            .unwrap()
+            .call(Request::new(()))
+            .await
+            .unwrap();
+        assert_eq!(grpc_status_from_response(&second), "0");
+        assert_eq!(calls.load(Ordering::Relaxed), 2);
+    }
+
+    #[tokio::test]
+    async fn grpc_rate_limit_state_is_shared_across_service_clones() {
+        let config = Config::new(None).with_grpc_rate_limit(Some(1), Some(60));
+        let limiter = GrpcRateLimiter::from_config(&config);
+        let calls = Arc::new(AtomicUsize::new(0));
+        let mut first_service = GrpcRateLimitService::new(
+            CountingGrpcService {
+                calls: calls.clone(),
+            },
+            limiter.clone(),
+        );
+        let mut second_service = GrpcRateLimitService::new(
+            CountingGrpcService {
+                calls: calls.clone(),
+            },
+            limiter,
+        );
+
+        let first = first_service
+            .ready()
+            .await
+            .unwrap()
+            .call(Request::new(()))
+            .await
+            .unwrap();
+        assert_eq!(grpc_status_from_response(&first), "0");
+
+        let second = second_service
+            .ready()
+            .await
+            .unwrap()
+            .call(Request::new(()))
+            .await
+            .unwrap();
+        assert_eq!(grpc_status_from_response(&second), "8");
+        assert_eq!(calls.load(Ordering::Relaxed), 1);
+    }
+
     #[derive(Clone)]
     struct TraceBuf(Arc<Mutex<Vec<u8>>>);
 
diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx
index d0f4ef32b..656c4bb2f 100644
--- a/docs/reference/gateway-config.mdx
+++ b/docs/reference/gateway-config.mdx
@@ -87,6 +87,25 @@ default_image          = "ghcr.io/nvidia/openshell/sandbox:latest"
 supervisor_image       = "ghcr.io/nvidia/openshell/supervisor:latest"
 client_tls_secret_name = "openshell-client-tls"
 
+# Per-sandbox CPU/memory cgroup defaults. Overlaid on
+# `template.resources.limits` at CreateSandbox time when the request omits the
+# field. Use Kubernetes-style quantity strings.
+#
+# Opt-out values:
+#   * `"0"`, `""`, or whitespace-only.
+# An opt-out value means the gateway applies no default on that dimension;
+# the sandbox runs without a gateway-imposed bound. Omitting a key entirely
+# uses the built-in default ("2", "4Gi").
+#
+# Negative values are rejected at config load time.
+default_sandbox_cpu_limit    = "2"
+default_sandbox_memory_limit = "4Gi"
+
+# Optional gRPC rate limit. Both values must be positive to enable the limit.
+# Set either value to 0, or omit both, to disable rate limiting.
+grpc_rate_limit_requests       = 120
+grpc_rate_limit_window_seconds = 60
+
 # Gateway listener TLS (distinct from the per-driver guest_tls_*).
 [openshell.gateway.tls]
 cert_path             = "/etc/openshell/certs/gateway.pem"