Skip to content

Commit f05cdf0

Browse files
committed
fix(sandbox): add /proc and /dev/urandom to Landlock baselines for GPU and proxy mode
After the Landlock per-path fix (#677), missing paths no longer silently disable the entire ruleset. This exposed two gaps: Proxy baseline: - /proc and /dev/urandom are needed by virtually every process (already in restrictive_default_policy but missing from the enrichment baseline). GPU baseline: - /proc must be read-write because CUDA writes to /proc/<pid>/task/<tid>/comm during cuInit() to set thread names. Using /proc (not /proc/self/task) because Landlock rules bind to inodes and child processes have different procfs inodes than the parent shell. Also skips chown for virtual filesystem paths (/proc, /sys) in prepare_filesystem since they are kernel-managed and may contain symlinks (e.g. /proc/self) that trigger the symlink safety check.
1 parent 5f7b8e5 commit f05cdf0

File tree

1 file changed

+51
-17
lines changed
  • crates/openshell-sandbox/src

1 file changed

+51
-17
lines changed

crates/openshell-sandbox/src/lib.rs

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -877,8 +877,22 @@ pub(crate) fn spawn_route_refresh(
877877

878878
/// Minimum read-only paths required for a proxy-mode sandbox child process to
879879
/// function: dynamic linker, shared libraries, DNS resolution, CA certs,
880-
/// Python venv, and openshell logs.
881-
const PROXY_BASELINE_READ_ONLY: &[&str] = &["/usr", "/lib", "/etc", "/app", "/var/log"];
880+
/// Python venv, openshell logs, process info, and random bytes.
881+
///
882+
/// `/proc` and `/dev/urandom` are included here for the same reasons they
883+
/// appear in `restrictive_default_policy()`: virtually every process needs
884+
/// them. Before the Landlock per-path fix (#677) these were effectively free
885+
/// because a missing path silently disabled the entire ruleset; now they must
886+
/// be explicit.
887+
const PROXY_BASELINE_READ_ONLY: &[&str] = &[
888+
"/usr",
889+
"/lib",
890+
"/etc",
891+
"/app",
892+
"/var/log",
893+
"/proc",
894+
"/dev/urandom",
895+
];
882896

883897
/// Minimum read-write paths required for a proxy-mode sandbox child process:
884898
/// user working directory and temporary files.
@@ -899,7 +913,16 @@ const GPU_BASELINE_READ_ONLY_FIXED: &[&str] = &["/run/nvidia-persistenced"];
899913
/// `/dev/nvidia-modeset`: control and UVM devices injected by CDI.
900914
/// Landlock READ_FILE/WRITE_FILE restricts open(2) on device files even
901915
/// when DAC permissions would otherwise allow it. Device nodes need
902-
/// read-write because NVML opens them with O_RDWR.
916+
/// read-write because NVML/CUDA opens them with O_RDWR.
917+
///
918+
/// `/proc`: CUDA writes to `/proc/<pid>/task/<tid>/comm` during `cuInit()`
919+
/// to set thread names. Without write access, `cuInit()` returns error 304
920+
/// (`cudaErrorOperatingSystem`). We must use `/proc` (not `/proc/self/task`)
921+
/// because Landlock rules bind to inodes: `/proc/self/task` in the pre_exec
922+
/// hook resolves to the shell's PID, but child processes (python, etc.)
923+
/// have different PIDs and thus different procfs inodes. Security impact
924+
/// is limited — writable proc entries (`oom_score_adj`, etc.) are already
925+
/// kernel-restricted for non-root users; Landlock is defense-in-depth.
903926
///
904927
/// Per-GPU device files (`/dev/nvidia0`, `/dev/nvidia1`, …) are enumerated
905928
/// at runtime by `gpu_baseline_read_write_paths()` since the count varies.
@@ -908,6 +931,7 @@ const GPU_BASELINE_READ_WRITE_FIXED: &[&str] = &[
908931
"/dev/nvidia-uvm",
909932
"/dev/nvidia-uvm-tools",
910933
"/dev/nvidia-modeset",
934+
"/proc",
911935
];
912936

913937
/// Returns true if GPU devices are present in the container.
@@ -1380,22 +1404,32 @@ fn prepare_filesystem(policy: &SandboxPolicy) -> Result<()> {
13801404
// The TOCTOU window between lstat and chown is not exploitable because
13811405
// no untrusted process is running yet (the child has not been forked).
13821406
for path in &policy.filesystem.read_write {
1383-
// Check for symlinks before touching the path. Character/block devices
1384-
// (e.g. /dev/null) are legitimate read_write entries and must be allowed.
1385-
if let Ok(meta) = std::fs::symlink_metadata(path) {
1386-
if meta.file_type().is_symlink() {
1387-
return Err(miette::miette!(
1388-
"read_write path '{}' is a symlink — refusing to chown (potential privilege escalation)",
1389-
path.display()
1390-
));
1391-
}
1407+
// Virtual filesystems (/proc, /sys) are kernel-managed — skip chown
1408+
// and mkdir. These paths are added to the Landlock ruleset only;
1409+
// ownership changes are meaningless and may fail (symlinks like
1410+
// /proc/self, permission errors on sysfs nodes).
1411+
let is_virtual_fs = path.starts_with("/proc") || path.starts_with("/sys");
1412+
1413+
if is_virtual_fs {
1414+
debug!(path = %path.display(), "Skipping chown for virtual filesystem path");
13921415
} else {
1393-
debug!(path = %path.display(), "Creating read_write directory");
1394-
std::fs::create_dir_all(path).into_diagnostic()?;
1395-
}
1416+
// Check for symlinks before touching the path. Character/block devices
1417+
// (e.g. /dev/null) are legitimate read_write entries and must be allowed.
1418+
if let Ok(meta) = std::fs::symlink_metadata(path) {
1419+
if meta.file_type().is_symlink() {
1420+
return Err(miette::miette!(
1421+
"read_write path '{}' is a symlink — refusing to chown (potential privilege escalation)",
1422+
path.display()
1423+
));
1424+
}
1425+
} else {
1426+
debug!(path = %path.display(), "Creating read_write directory");
1427+
std::fs::create_dir_all(path).into_diagnostic()?;
1428+
}
13961429

1397-
debug!(path = %path.display(), ?uid, ?gid, "Setting ownership on read_write directory");
1398-
chown(path, uid, gid).into_diagnostic()?;
1430+
debug!(path = %path.display(), ?uid, ?gid, "Setting ownership on read_write directory");
1431+
chown(path, uid, gid).into_diagnostic()?;
1432+
}
13991433
}
14001434

14011435
Ok(())

0 commit comments

Comments
 (0)