@@ -877,8 +877,22 @@ pub(crate) fn spawn_route_refresh(
877877
878878/// Minimum read-only paths required for a proxy-mode sandbox child process to
879879/// function: dynamic linker, shared libraries, DNS resolution, CA certs,
880- /// Python venv, and openshell logs.
881- const PROXY_BASELINE_READ_ONLY : & [ & str ] = & [ "/usr" , "/lib" , "/etc" , "/app" , "/var/log" ] ;
880+ /// Python venv, openshell logs, process info, and random bytes.
881+ ///
882+ /// `/proc` and `/dev/urandom` are included here for the same reasons they
883+ /// appear in `restrictive_default_policy()`: virtually every process needs
884+ /// them. Before the Landlock per-path fix (#677) these were effectively free
885+ /// because a missing path silently disabled the entire ruleset; now they must
886+ /// be explicit.
887+ const PROXY_BASELINE_READ_ONLY : & [ & str ] = & [
888+ "/usr" ,
889+ "/lib" ,
890+ "/etc" ,
891+ "/app" ,
892+ "/var/log" ,
893+ "/proc" ,
894+ "/dev/urandom" ,
895+ ] ;
882896
883897/// Minimum read-write paths required for a proxy-mode sandbox child process:
884898/// user working directory and temporary files.
@@ -899,7 +913,16 @@ const GPU_BASELINE_READ_ONLY_FIXED: &[&str] = &["/run/nvidia-persistenced"];
899913/// `/dev/nvidia-modeset`: control and UVM devices injected by CDI.
900914/// Landlock READ_FILE/WRITE_FILE restricts open(2) on device files even
901915/// when DAC permissions would otherwise allow it. Device nodes need
902- /// read-write because NVML opens them with O_RDWR.
916+ /// read-write because NVML/CUDA opens them with O_RDWR.
917+ ///
918+ /// `/proc`: CUDA writes to `/proc/<pid>/task/<tid>/comm` during `cuInit()`
919+ /// to set thread names. Without write access, `cuInit()` returns error 304
920+ /// (`cudaErrorOperatingSystem`). We must use `/proc` (not `/proc/self/task`)
921+ /// because Landlock rules bind to inodes: `/proc/self/task` in the pre_exec
922+ /// hook resolves to the shell's PID, but child processes (python, etc.)
923+ /// have different PIDs and thus different procfs inodes. Security impact
924+ /// is limited — writable proc entries (`oom_score_adj`, etc.) are already
925+ /// kernel-restricted for non-root users; Landlock is defense-in-depth.
903926///
904927/// Per-GPU device files (`/dev/nvidia0`, `/dev/nvidia1`, …) are enumerated
905928/// at runtime by `gpu_baseline_read_write_paths()` since the count varies.
@@ -908,6 +931,7 @@ const GPU_BASELINE_READ_WRITE_FIXED: &[&str] = &[
908931 "/dev/nvidia-uvm" ,
909932 "/dev/nvidia-uvm-tools" ,
910933 "/dev/nvidia-modeset" ,
934+ "/proc" ,
911935] ;
912936
913937/// Returns true if GPU devices are present in the container.
@@ -1380,22 +1404,32 @@ fn prepare_filesystem(policy: &SandboxPolicy) -> Result<()> {
13801404 // The TOCTOU window between lstat and chown is not exploitable because
13811405 // no untrusted process is running yet (the child has not been forked).
13821406 for path in & policy. filesystem . read_write {
1383- // Check for symlinks before touching the path. Character/block devices
1384- // (e.g. /dev/null) are legitimate read_write entries and must be allowed.
1385- if let Ok ( meta) = std:: fs:: symlink_metadata ( path) {
1386- if meta. file_type ( ) . is_symlink ( ) {
1387- return Err ( miette:: miette!(
1388- "read_write path '{}' is a symlink — refusing to chown (potential privilege escalation)" ,
1389- path. display( )
1390- ) ) ;
1391- }
1407+ // Virtual filesystems (/proc, /sys) are kernel-managed — skip chown
1408+ // and mkdir. These paths are added to the Landlock ruleset only;
1409+ // ownership changes are meaningless and may fail (symlinks like
1410+ // /proc/self, permission errors on sysfs nodes).
1411+ let is_virtual_fs = path. starts_with ( "/proc" ) || path. starts_with ( "/sys" ) ;
1412+
1413+ if is_virtual_fs {
1414+ debug ! ( path = %path. display( ) , "Skipping chown for virtual filesystem path" ) ;
13921415 } else {
1393- debug ! ( path = %path. display( ) , "Creating read_write directory" ) ;
1394- std:: fs:: create_dir_all ( path) . into_diagnostic ( ) ?;
1395- }
1416+ // Check for symlinks before touching the path. Character/block devices
1417+ // (e.g. /dev/null) are legitimate read_write entries and must be allowed.
1418+ if let Ok ( meta) = std:: fs:: symlink_metadata ( path) {
1419+ if meta. file_type ( ) . is_symlink ( ) {
1420+ return Err ( miette:: miette!(
1421+ "read_write path '{}' is a symlink — refusing to chown (potential privilege escalation)" ,
1422+ path. display( )
1423+ ) ) ;
1424+ }
1425+ } else {
1426+ debug ! ( path = %path. display( ) , "Creating read_write directory" ) ;
1427+ std:: fs:: create_dir_all ( path) . into_diagnostic ( ) ?;
1428+ }
13961429
1397- debug ! ( path = %path. display( ) , ?uid, ?gid, "Setting ownership on read_write directory" ) ;
1398- chown ( path, uid, gid) . into_diagnostic ( ) ?;
1430+ debug ! ( path = %path. display( ) , ?uid, ?gid, "Setting ownership on read_write directory" ) ;
1431+ chown ( path, uid, gid) . into_diagnostic ( ) ?;
1432+ }
13991433 }
14001434
14011435 Ok ( ( ) )
0 commit comments