diff --git a/crates/openshell-bootstrap/src/docker.rs b/crates/openshell-bootstrap/src/docker.rs index cc63aacce..0ee84e6c8 100644 --- a/crates/openshell-bootstrap/src/docker.rs +++ b/crates/openshell-bootstrap/src/docker.rs @@ -596,6 +596,34 @@ pub async fn ensure_container( } } + // On Tegra platforms (Jetson) the NVIDIA container toolkit and CDI spec + // generation reads host-file injection config from + // /etc/nvidia-container-runtime/host-files-for-container.d on the host. + // Without this bind mount, the device plugin inside k3s cannot discover + // Tegra GPU devices and fails with "CDI options are only supported on + // NVML-based systems". + // + // We detect Tegra by querying the Docker daemon's kernel version (which + // works for both local and remote/SSH deploys) rather than checking the + // local filesystem. + if !device_ids.is_empty() { + let info = docker.info().await.into_diagnostic()?; + let is_tegra = info + .kernel_version + .as_deref() + .map_or(false, |k| k.contains("tegra")); + if is_tegra { + const HOST_FILES_DIR: &str = "/etc/nvidia-container-runtime/host-files-for-container.d"; + tracing::info!( + kernel_version = info.kernel_version.as_deref().unwrap_or("unknown"), + "Detected Tegra platform, bind-mounting {HOST_FILES_DIR} for CDI spec generation" + ); + let mut binds = host_config.binds.take().unwrap_or_default(); + binds.push(format!("{HOST_FILES_DIR}:{HOST_FILES_DIR}:ro")); + host_config.binds = Some(binds); + } + } + let mut cmd = vec![ "server".to_string(), "--disable=traefik".to_string(), diff --git a/crates/openshell-sandbox/src/process.rs b/crates/openshell-sandbox/src/process.rs index b93d125ab..94d18b5af 100644 --- a/crates/openshell-sandbox/src/process.rs +++ b/crates/openshell-sandbox/src/process.rs @@ -414,7 +414,22 @@ pub fn drop_privileges(policy: &SandboxPolicy) -> Result<()> { target_os = "redox" )))] { + let cdi_gids = snapshot_cdi_gids(); nix::unistd::initgroups(user_cstr.as_c_str(), group.gid).into_diagnostic()?; + if !cdi_gids.is_empty() { + let mut merged: Vec = + nix::unistd::getgroups().unwrap_or_default(); + for gid in &cdi_gids { + if !merged.contains(gid) { + merged.push(*gid); + } + } + tracing::info!( + gids = ?cdi_gids.iter().map(|g| g.as_raw()).collect::>(), + "Preserving CDI-injected supplementary GIDs across initgroups" + ); + nix::unistd::setgroups(&merged).into_diagnostic()?; + } } } @@ -458,6 +473,30 @@ pub fn drop_privileges(policy: &SandboxPolicy) -> Result<()> { Ok(()) } +/// Snapshot supplementary GIDs injected by the container runtime (e.g. via CDI +/// `additionalGids`) before `initgroups` replaces them. +/// +/// Only captures GIDs when GPU devices are present — on non-GPU sandboxes the +/// runtime won't inject device-access GIDs so there is nothing to preserve. +/// GID 0 (root) is always excluded to avoid inadvertent privilege retention. +#[cfg(not(any( + target_os = "macos", + target_os = "ios", + target_os = "haiku", + target_os = "redox" +)))] +fn snapshot_cdi_gids() -> Vec { + if !std::path::Path::new("/dev/nvidiactl").exists() { + return Vec::new(); + } + let root_gid = nix::unistd::Gid::from_raw(0); + nix::unistd::getgroups() + .unwrap_or_default() + .into_iter() + .filter(|&g| g != root_gid) + .collect() +} + /// Process exit status. #[derive(Debug, Clone, Copy)] pub struct ProcessStatus { diff --git a/deploy/docker/Dockerfile.images b/deploy/docker/Dockerfile.images index af17b9b0a..31db828e7 100644 --- a/deploy/docker/Dockerfile.images +++ b/deploy/docker/Dockerfile.images @@ -19,7 +19,7 @@ ARG K3S_VERSION=v1.35.2-k3s1 ARG K3S_DIGEST=sha256:c3184157c3048112bab0c3e17405991da486cb3413511eba23f7650efd70776b ARG K9S_VERSION=v0.50.18 ARG HELM_VERSION=v3.17.3 -ARG NVIDIA_CONTAINER_TOOLKIT_VERSION=1.18.2-1 +ARG NVIDIA_CONTAINER_TOOLKIT_VERSION=1.19.0-1 # --------------------------------------------------------------------------- # Shared Rust build stages diff --git a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml index 1cb0ca70a..737efdb54 100644 --- a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml +++ b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml @@ -31,6 +31,9 @@ spec: targetNamespace: nvidia-device-plugin createNamespace: true valuesContent: |- + image: + repository: ghcr.io/nvidia/k8s-device-plugin + tag: "2ab68c16" runtimeClassName: nvidia deviceListStrategy: cdi-cri deviceIDStrategy: index diff --git a/e2e/python/test_sandbox_gpu.py b/e2e/python/test_sandbox_gpu.py index 510b3d92d..472d0e38c 100644 --- a/e2e/python/test_sandbox_gpu.py +++ b/e2e/python/test_sandbox_gpu.py @@ -20,11 +20,13 @@ def test_gpu_sandbox_reports_available_gpu( sandbox: Callable[..., Sandbox], gpu_sandbox_spec: datamodel_pb2.SandboxSpec, ) -> None: + nvidia_smi_args = ["--query-gpu=name", "--format=csv,noheader"] with sandbox(spec=gpu_sandbox_spec, delete_on_exit=True) as sb: - result = sb.exec( - ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"], - timeout_seconds=30, - ) + result = sb.exec(["nvidia-smi", *nvidia_smi_args], timeout_seconds=30) + if result.exit_code != 0: + # On some platforms (e.g. Tegra/Jetson) nvidia-smi lives in + # /usr/sbin rather than /usr/bin and may not be on PATH. + result = sb.exec(["/usr/sbin/nvidia-smi", *nvidia_smi_args], timeout_seconds=30) assert result.exit_code == 0, result.stderr assert result.stdout.strip()