From a6124b130064d559b9a74422e1745c7a939179dc Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Sat, 27 Jun 2026 17:48:25 -0700 Subject: [PATCH] =?UTF-8?q?feat(python):=20complete=20the=20Pyodide=20FS?= =?UTF-8?q?=20backend=20=E2=80=94=20symlink/readlink=20+=20setattr->host?= =?UTF-8?q?=20(chmod/chown/utimes)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../assets/runners/python-runner.mjs | 74 +++++++- crates/execution/src/python.rs | 39 ++++ crates/sidecar/src/execution.rs | 14 +- crates/sidecar/src/filesystem.rs | 42 +++++ crates/sidecar/tests/python.rs | 176 ++++++++++++++++++ crates/sidecar/tests/service.rs | 24 +++ 6 files changed, 361 insertions(+), 8 deletions(-) diff --git a/crates/execution/assets/runners/python-runner.mjs b/crates/execution/assets/runners/python-runner.mjs index f28f821d..4af1292e 100644 --- a/crates/execution/assets/runners/python-runner.mjs +++ b/crates/execution/assets/runners/python-runner.mjs @@ -596,6 +596,16 @@ function createPythonBridgeRpcBridge() { fsRenameSync(path, destination) { requestSync('fsRename', { path, destination }); }, + fsSymlinkSync(target, path) { + requestSync('fsSymlink', { target, path }); + }, + fsReadlinkSync(path) { + const result = requestSync('fsReadlink', { path }); + return result.target ?? ''; + }, + fsSetattrSync(path, attr) { + requestSync('fsSetattr', { path, ...attr }); + }, httpRequestSync(url, method = 'GET', headersJson = '{}', bodyBase64 = null) { let headers; try { @@ -781,6 +791,16 @@ function createPythonFdRpcBridge() { fsRenameSync(path, destination) { requestSync('fsRename', { path, destination }); }, + fsSymlinkSync(target, path) { + requestSync('fsSymlink', { target, path }); + }, + fsReadlinkSync(path) { + const result = requestSync('fsReadlink', { path }); + return result.target ?? ''; + }, + fsSetattrSync(path, attr) { + requestSync('fsSetattr', { path, ...attr }); + }, httpRequestSync(url, method = 'GET', headersJson = '{}', bodyBase64 = null) { let headers; try { @@ -1465,6 +1485,7 @@ function installPythonWorkspaceFs(pyodide, bridge) { const memfsDirStreamOps = MEMFS.ops_table.dir.stream; const memfsFileNodeOps = MEMFS.ops_table.file.node; const memfsFileStreamOps = MEMFS.ops_table.file.stream; + const memfsLinkNodeOps = MEMFS.ops_table.link.node; const workspaceDirStreamOps = memfsDirStreamOps; function joinGuestPath(parentPath, name) { @@ -1529,6 +1550,8 @@ function installPythonWorkspaceFs(pyodide, bridge) { if (FS.isDir(mode)) { node.node_ops = workspaceDirNodeOps; node.stream_ops = workspaceDirStreamOps; + } else if (FS.isLink(mode)) { + node.node_ops = workspaceLinkNodeOps; } else if (FS.isFile(mode)) { node.node_ops = workspaceFileNodeOps; node.stream_ops = workspaceFileStreamOps; @@ -1630,6 +1653,46 @@ function installPythonWorkspaceFs(pyodide, bridge) { }; } + function toEpochMs(value) { + if (value == null) return null; + if (typeof value === 'number') return value; + if (typeof value.getTime === 'function') return value.getTime(); + return null; + } + + // Propagate chmod/chown/utimes from an Emscripten `setattr` into the host VFS. + // (size/truncate is handled via the dirty-write path, not here.) + function propagateSetattrToHost(node, attr) { + if (!attr) return; + const payload = {}; + if (attr.mode != null) payload.mode = attr.mode & 0o7777; + if (attr.uid != null) payload.uid = attr.uid; + if (attr.gid != null) payload.gid = attr.gid; + const atimeMs = toEpochMs(attr.atime ?? attr.timestamp); + const mtimeMs = toEpochMs(attr.mtime ?? attr.timestamp); + if (atimeMs != null && mtimeMs != null) { + payload.atimeMs = Math.trunc(atimeMs); + payload.mtimeMs = Math.trunc(mtimeMs); + } + if (Object.keys(payload).length === 0) return; + withFsErrors(() => bridge.fsSetattrSync(nodeGuestPath(node), payload)); + } + + const workspaceLinkNodeOps = { + // A symlink node reports itself (lstat semantics), not its target — so use + // the in-memory link mode rather than a host stat (which follows the link). + getattr(node) { + return makeStat(node, null); + }, + setattr(node, attr) { + memfsLinkNodeOps.setattr(node, attr); + propagateSetattrToHost(node, attr); + }, + readlink(node) { + return withFsErrors(() => bridge.fsReadlinkSync(nodeGuestPath(node))); + }, + }; + const workspaceFileNodeOps = { getattr(node) { const stat = node.agentOSDirty @@ -1646,6 +1709,7 @@ function installPythonWorkspaceFs(pyodide, bridge) { node.agentOSDirty = true; node.agentOSLoaded = true; } + propagateSetattrToHost(node, attr); }, }; @@ -1690,6 +1754,7 @@ function installPythonWorkspaceFs(pyodide, bridge) { }, setattr(node, attr) { memfsDirNodeOps.setattr(node, attr); + propagateSetattrToHost(node, attr); }, lookup(parent, name) { syncDirectory(parent); @@ -1749,8 +1814,13 @@ function installPythonWorkspaceFs(pyodide, bridge) { syncDirectory(node); return memfsDirNodeOps.readdir(node); }, - symlink() { - throw new FS.ErrnoError(ERRNO_CODES.ENOSYS); + symlink(parent, newName, oldPath) { + const guestPath = joinGuestPath(nodeGuestPath(parent), newName); + withFsErrors(() => bridge.fsSymlinkSync(oldPath, guestPath)); + const node = createWorkspaceNode(parent, newName, 0o120777, 0, guestPath); + node.link = oldPath; + node.usedBytes = oldPath.length; + return node; }, }; diff --git a/crates/execution/src/python.rs b/crates/execution/src/python.rs index 45fb6413..1dd10945 100644 --- a/crates/execution/src/python.rs +++ b/crates/execution/src/python.rs @@ -53,6 +53,9 @@ pub enum PythonVfsRpcMethod { Unlink, Rmdir, Rename, + Symlink, + ReadLink, + Setattr, HttpRequest, DnsLookup, SubprocessRun, @@ -69,6 +72,9 @@ impl PythonVfsRpcMethod { "fsUnlink" => Some(Self::Unlink), "fsRmdir" => Some(Self::Rmdir), "fsRename" => Some(Self::Rename), + "fsSymlink" => Some(Self::Symlink), + "fsReadlink" => Some(Self::ReadLink), + "fsSetattr" => Some(Self::Setattr), "httpRequest" => Some(Self::HttpRequest), "dnsLookup" => Some(Self::DnsLookup), "subprocessRun" => Some(Self::SubprocessRun), @@ -84,6 +90,14 @@ pub struct PythonVfsRpcRequest { pub path: String, /// Second path for `Rename` (the destination); `None` for other methods. pub destination: Option, + /// Symlink target (the path the link points at), for `Symlink`. + pub target: Option, + /// `Setattr` metadata fields (each applied only when present). + pub mode: Option, + pub uid: Option, + pub gid: Option, + pub atime_ms: Option, + pub mtime_ms: Option, pub content_base64: Option, pub recursive: bool, pub url: Option, @@ -136,6 +150,9 @@ pub enum PythonVfsRpcResponsePayload { stderr: String, max_buffer_exceeded: bool, }, + SymlinkTarget { + target: String, + }, } #[derive(Debug, Deserialize)] @@ -147,6 +164,19 @@ struct PythonVfsBridgeRequestWire { #[serde(default)] destination: Option, #[serde(default)] + target: Option, + // JS numbers cross the bridge as f64; accept that and narrow below. + #[serde(default)] + mode: Option, + #[serde(default)] + uid: Option, + #[serde(default)] + gid: Option, + #[serde(default, rename = "atimeMs")] + atime_ms: Option, + #[serde(default, rename = "mtimeMs")] + mtime_ms: Option, + #[serde(default)] content_base64: Option, #[serde(default)] recursive: bool, @@ -477,6 +507,9 @@ impl PythonExecution { "stderr": stderr, "maxBufferExceeded": max_buffer_exceeded, }), + PythonVfsRpcResponsePayload::SymlinkTarget { target } => json!({ + "target": target, + }), }; self.inner @@ -1187,6 +1220,12 @@ fn parse_python_bridge_sync_rpc_request( method, path: wire.path, destination: wire.destination, + target: wire.target, + mode: wire.mode.map(|value| value as u32), + uid: wire.uid.map(|value| value as u32), + gid: wire.gid.map(|value| value as u32), + atime_ms: wire.atime_ms.map(|value| value as u64), + mtime_ms: wire.mtime_ms.map(|value| value as u64), content_base64: wire.content_base64, recursive: wire.recursive, url: wire.url, diff --git a/crates/sidecar/src/execution.rs b/crates/sidecar/src/execution.rs index 86cebb72..487c4baf 100644 --- a/crates/sidecar/src/execution.rs +++ b/crates/sidecar/src/execution.rs @@ -17,10 +17,9 @@ use crate::protocol::{ OwnershipScope, ProcessExitedEvent, ProcessKilledResponse, ProcessOutputEvent, ProcessSnapshotEntry, ProcessSnapshotResponse, ProcessSnapshotStatus, ProcessStartedResponse, PtyResizedResponse, RequestFrame, ResizePtyRequest, ResponseFrame, ResponsePayload, - SidecarRequestPayload, SignalDispositionAction, - SignalHandlerRegistration, SignalStateResponse, SocketStateEntry, StdinClosedResponse, - StdinWrittenResponse, StreamChannel, VmFetchRequest, VmFetchResponse, WasmPermissionTier, - WriteStdinRequest, ZombieTimerCountResponse, + SidecarRequestPayload, SignalDispositionAction, SignalHandlerRegistration, SignalStateResponse, + SocketStateEntry, StdinClosedResponse, StdinWrittenResponse, StreamChannel, VmFetchRequest, + VmFetchResponse, WasmPermissionTier, WriteStdinRequest, ZombieTimerCountResponse, }; use crate::service::{ audit_fields, dirname, emit_security_audit_event, emit_structured_event, javascript_error, @@ -4762,7 +4761,10 @@ where | PythonVfsRpcMethod::Mkdir | PythonVfsRpcMethod::Unlink | PythonVfsRpcMethod::Rmdir - | PythonVfsRpcMethod::Rename => { + | PythonVfsRpcMethod::Rename + | PythonVfsRpcMethod::Symlink + | PythonVfsRpcMethod::ReadLink + | PythonVfsRpcMethod::Setattr => { filesystem_handle_python_vfs_rpc_request(self, vm_id, process_id, request) } PythonVfsRpcMethod::HttpRequest => { @@ -16810,7 +16812,7 @@ fn install_kernel_stdin_pipe(kernel: &mut SidecarKernel, pid: u32) -> Result) -> Option<(u16, u16)> { let cols = env diff --git a/crates/sidecar/src/filesystem.rs b/crates/sidecar/src/filesystem.rs index 3fe0361b..0ea92de4 100644 --- a/crates/sidecar/src/filesystem.rs +++ b/crates/sidecar/src/filesystem.rs @@ -818,6 +818,48 @@ where Err(error) => Err(error), } } + // Kernel-direct (no shadow mirror): guest Python writes/creates + // land only in the kernel VFS, so mirroring create/modify ops into + // the host-side shadow would leave empty stubs that a later + // shadow->kernel sync resurrects over real content. (Delete/rename + // still mirror — to *remove* stale wire-written shadow entries.) + PythonVfsRpcMethod::Symlink => { + let target = request.target.clone().ok_or_else(|| { + SidecarError::InvalidState(format!( + "python VFS fsSymlink for {} requires a target", + path + )) + })?; + vm.kernel + .symlink(&target, &path) + .map(|()| PythonVfsRpcResponsePayload::Empty) + .map_err(kernel_error) + } + PythonVfsRpcMethod::ReadLink => vm + .kernel + .read_link(&path) + .map(|target| PythonVfsRpcResponsePayload::SymlinkTarget { target }) + .map_err(kernel_error), + // `setattr` carries any of mode/uid+gid/atime+mtime; apply each + // present field to the host VFS. + PythonVfsRpcMethod::Setattr => { + (|| -> Result { + if let Some(mode) = request.mode { + vm.kernel.chmod(&path, mode).map_err(kernel_error)?; + } + if let (Some(uid), Some(gid)) = (request.uid, request.gid) { + vm.kernel.chown(&path, uid, gid).map_err(kernel_error)?; + } + if let (Some(atime_ms), Some(mtime_ms)) = + (request.atime_ms, request.mtime_ms) + { + vm.kernel + .utimes(&path, atime_ms, mtime_ms) + .map_err(kernel_error)?; + } + Ok(PythonVfsRpcResponsePayload::Empty) + })() + } PythonVfsRpcMethod::HttpRequest | PythonVfsRpcMethod::DnsLookup | PythonVfsRpcMethod::SubprocessRun => { diff --git a/crates/sidecar/tests/python.rs b/crates/sidecar/tests/python.rs index ab05430d..2736f4b0 100644 --- a/crates/sidecar/tests/python.rs +++ b/crates/sidecar/tests/python.rs @@ -625,6 +625,77 @@ fn guest_exists( response.exists.expect("guest filesystem exists flag") } +fn guest_readlink( + sidecar: &mut secure_exec_sidecar::NativeSidecar, + request_id: RequestId, + connection_id: &str, + session_id: &str, + vm_id: &str, + path: &str, +) -> String { + let response = guest_filesystem_call( + sidecar, + request_id, + connection_id, + session_id, + vm_id, + GuestFilesystemCallRequest { + operation: GuestFilesystemOperation::ReadLink, + path: path.to_owned(), + destination_path: None, + target: None, + content: None, + encoding: None, + recursive: false, + mode: None, + uid: None, + gid: None, + atime_ms: None, + mtime_ms: None, + len: None, + offset: None, + }, + ); + + assert_eq!(response.operation, GuestFilesystemOperation::ReadLink); + response.target.expect("guest filesystem readlink target") +} + +fn guest_stat_mode( + sidecar: &mut secure_exec_sidecar::NativeSidecar, + request_id: RequestId, + connection_id: &str, + session_id: &str, + vm_id: &str, + path: &str, +) -> u32 { + let response = guest_filesystem_call( + sidecar, + request_id, + connection_id, + session_id, + vm_id, + GuestFilesystemCallRequest { + operation: GuestFilesystemOperation::Stat, + path: path.to_owned(), + destination_path: None, + target: None, + content: None, + encoding: None, + recursive: false, + mode: None, + uid: None, + gid: None, + atime_ms: None, + mtime_ms: None, + len: None, + offset: None, + }, + ); + + response.stat.expect("guest filesystem stat").mode +} + fn write_process_stdin( sidecar: &mut secure_exec_sidecar::NativeSidecar, request_id: RequestId, @@ -1338,6 +1409,111 @@ print(json.dumps(results)) ); } +#[test] +fn python_runtime_supports_symlink_readlink_and_metadata() { + assert_node_available(); + + let mut sidecar = new_sidecar("python-fs-hooks"); + let cwd = temp_dir("python-fs-hooks-cwd"); + let connection_id = authenticate_wire(&mut sidecar, "conn-python"); + let session_id = open_session_wire(&mut sidecar, 2, &connection_id); + let (vm_id, _) = create_vm_wire( + &mut sidecar, + 3, + &connection_id, + &session_id, + GuestRuntimeKind::Python, + &cwd, + ); + + bootstrap_root_filesystem( + &mut sidecar, + 4, + &connection_id, + &session_id, + &vm_id, + vec![root_dir("/workspace")], + ); + + execute_inline_python( + &mut sidecar, + 5, + &connection_id, + &session_id, + &vm_id, + "proc-python-fs-hooks", + r#" +import json +import os + +result = {} + +with open("/workspace/file.txt", "w", encoding="utf-8") as handle: + handle.write("data") + +# symlink + readlink +os.symlink("file.txt", "/workspace/link.txt") +result["readlink"] = os.readlink("/workspace/link.txt") +result["islink"] = os.path.islink("/workspace/link.txt") + +# chmod (setattr -> host) +os.chmod("/workspace/file.txt", 0o640) +result["mode"] = os.stat("/workspace/file.txt").st_mode & 0o777 + +# utimes (setattr -> host) — just exercise the hook +os.utime("/workspace/file.txt", (1700000000, 1710000000)) + +print(json.dumps(result)) +"#, + ); + + let (stdout, stderr, exit_code) = collect_process_output( + &mut sidecar, + &connection_id, + &session_id, + &vm_id, + "proc-python-fs-hooks", + ); + + assert_eq!(exit_code, 0, "stdout: {stdout}\nstderr: {stderr}"); + assert!(stderr.is_empty(), "unexpected stderr: {stderr}"); + + let parsed: Value = serde_json::from_str(stdout.trim()).expect("parse fs-hooks JSON"); + assert_eq!( + parsed["readlink"], "file.txt", + "os.readlink should return the target" + ); + assert_eq!(parsed["islink"], true, "os.path.islink should be true"); + assert_eq!(parsed["mode"], 0o640, "os.chmod should be reflected"); + + // Cross-check the host kernel VFS. + let host_target = guest_readlink( + &mut sidecar, + 6, + &connection_id, + &session_id, + &vm_id, + "/workspace/link.txt", + ); + assert_eq!( + host_target, "file.txt", + "host VFS should resolve the symlink" + ); + let host_mode = guest_stat_mode( + &mut sidecar, + 7, + &connection_id, + &session_id, + &vm_id, + "/workspace/file.txt", + ); + assert_eq!( + host_mode & 0o777, + 0o640, + "host VFS should reflect the chmod" + ); +} + fn workspace_files_are_shared_between_javascript_and_python_runtimes() { assert_node_available(); diff --git a/crates/sidecar/tests/service.rs b/crates/sidecar/tests/service.rs index ca640684..c73fb8f9 100644 --- a/crates/sidecar/tests/service.rs +++ b/crates/sidecar/tests/service.rs @@ -5587,6 +5587,12 @@ ykAheWCsAteSEWVc0w==\n\ method: PythonVfsRpcMethod::Mkdir, path: String::from("/tmp/stale-python-rpc"), destination: None, + target: None, + mode: None, + uid: None, + gid: None, + atime_ms: None, + mtime_ms: None, content_base64: None, recursive: false, url: None, @@ -5624,6 +5630,12 @@ ykAheWCsAteSEWVc0w==\n\ method: PythonVfsRpcMethod::Mkdir, path: String::from("/tmp/stale-python-rpc"), destination: None, + target: None, + mode: None, + uid: None, + gid: None, + atime_ms: None, + mtime_ms: None, content_base64: None, recursive: false, url: None, @@ -10116,6 +10128,12 @@ export async function loadPyodide() { method: PythonVfsRpcMethod::Mkdir, path: String::from("/workspace"), destination: None, + target: None, + mode: None, + uid: None, + gid: None, + atime_ms: None, + mtime_ms: None, content_base64: None, recursive: false, url: None, @@ -10143,6 +10161,12 @@ export async function loadPyodide() { method: PythonVfsRpcMethod::Write, path: String::from("/workspace/note.txt"), destination: None, + target: None, + mode: None, + uid: None, + gid: None, + atime_ms: None, + mtime_ms: None, content_base64: Some(String::from("aGVsbG8gZnJvbSBzaWRlY2FyIHJwYw==")), recursive: false, url: None,