diff --git a/crates/bashkit-monty-worker/Cargo.toml b/crates/bashkit-monty-worker/Cargo.toml deleted file mode 100644 index 7881b7a4..00000000 --- a/crates/bashkit-monty-worker/Cargo.toml +++ /dev/null @@ -1,25 +0,0 @@ -# Isolated subprocess worker for running Monty (embedded Python) outside the host process. -# If Monty segfaults (e.g., parser stack overflow), only this process dies. -# The host bashkit process catches the child exit and returns a shell error. - -[package] -name = "bashkit-monty-worker" -version.workspace = true -edition.workspace = true -license.workspace = true -authors.workspace = true -repository.workspace = true -description = "Subprocess worker for crash-isolated Monty (Python) execution in Bashkit" - -[lib] -name = "bashkit_monty_worker" -path = "src/lib.rs" - -[[bin]] -name = "bashkit-monty-worker" -path = "src/main.rs" - -[dependencies] -monty = { git = "https://github.com/pydantic/monty", version = "0.0.4" } -serde = { workspace = true } -serde_json = { workspace = true } diff --git a/crates/bashkit-monty-worker/src/lib.rs b/crates/bashkit-monty-worker/src/lib.rs deleted file mode 100644 index 5b21d554..00000000 --- a/crates/bashkit-monty-worker/src/lib.rs +++ /dev/null @@ -1,201 +0,0 @@ -// IPC protocol for bashkit <-> monty-worker subprocess communication. -// JSON lines over stdin/stdout. Worker stays synchronous (no tokio). -// -// EXPERIMENTAL: Monty is an early-stage interpreter with known crash bugs. -// This worker exists specifically to isolate those crashes from the host. -// -// Flow: -// Parent -> Worker: Init { code, filename, limits } -// Worker -> Parent: OsCall { function, args, kwargs } | Complete | Error -// Parent -> Worker: OsResponse { result } -// ... repeat until Complete or Error or worker crash ... -// -// If the worker segfaults, the parent sees broken pipe / child exit with signal. - -use monty::{ExcType, MontyObject, OsFunction}; -use serde::{Deserialize, Serialize}; - -/// Parent -> Worker messages (JSON lines on worker's stdin). -#[derive(Debug, Serialize, Deserialize)] -#[serde(tag = "type")] -pub enum WorkerRequest { - /// Start executing Python code with given limits. - #[serde(rename = "init")] - Init { - code: String, - filename: String, - limits: WireLimits, - }, - /// Response to a previous OsCall from the worker. - #[serde(rename = "os_response")] - OsResponse { result: WireExternalResult }, -} - -/// Worker -> Parent messages (JSON lines on worker's stdout). -#[derive(Debug, Serialize, Deserialize)] -#[serde(tag = "type")] -pub enum WorkerResponse { - /// Execution paused: needs a VFS operation from the parent. - #[serde(rename = "os_call")] - OsCall { - function: OsFunction, - args: Vec, - kwargs: Vec<(MontyObject, MontyObject)>, - }, - /// Execution completed successfully. - #[serde(rename = "complete")] - Complete { result: MontyObject, output: String }, - /// Execution failed with a Python exception. - #[serde(rename = "error")] - Error { exception: String, output: String }, -} - -/// Resource limits sent from parent to worker. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct WireLimits { - pub max_allocations: usize, - pub max_duration_secs: f64, - pub max_memory: usize, - pub max_recursion: usize, -} - -/// Wire-safe version of monty's ExternalResult (which doesn't derive Serialize). -#[derive(Debug, Serialize, Deserialize)] -#[serde(tag = "status")] -pub enum WireExternalResult { - #[serde(rename = "ok")] - Return { value: MontyObject }, - #[serde(rename = "error")] - Error { - exc_type: ExcType, - message: Option, - }, -} - -/// Read one JSON line from a reader. Returns None on EOF. -pub fn read_message( - reader: &mut impl std::io::BufRead, -) -> Result, String> { - let mut line = String::new(); - match reader.read_line(&mut line) { - Ok(0) => Ok(None), // EOF - Ok(_) => serde_json::from_str(&line).map(Some).map_err(|e| { - format!( - "protocol error: {e}: {:?}", - if line.len() > 200 { - &line[..200] - } else { - &line - } - ) - }), - Err(e) => Err(format!("read error: {e}")), - } -} - -/// Write one JSON line to a writer. -pub fn write_message( - writer: &mut impl std::io::Write, - msg: &T, -) -> Result<(), String> { - serde_json::to_writer(&mut *writer, msg).map_err(|e| format!("serialize error: {e}"))?; - writer - .write_all(b"\n") - .map_err(|e| format!("write error: {e}"))?; - writer.flush().map_err(|e| format!("flush error: {e}")) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn roundtrip_worker_request_init() { - let req = WorkerRequest::Init { - code: "print('hi')".into(), - filename: "".into(), - limits: WireLimits { - max_allocations: 1_000_000, - max_duration_secs: 30.0, - max_memory: 64 * 1024 * 1024, - max_recursion: 200, - }, - }; - let json = serde_json::to_string(&req).unwrap(); - let back: WorkerRequest = serde_json::from_str(&json).unwrap(); - match back { - WorkerRequest::Init { code, filename, .. } => { - assert_eq!(code, "print('hi')"); - assert_eq!(filename, ""); - } - _ => panic!("wrong variant"), - } - } - - #[test] - fn roundtrip_worker_response_os_call() { - let resp = WorkerResponse::OsCall { - function: OsFunction::ReadText, - args: vec![MontyObject::Path("/tmp/f.txt".into())], - kwargs: vec![], - }; - let json = serde_json::to_string(&resp).unwrap(); - let back: WorkerResponse = serde_json::from_str(&json).unwrap(); - match back { - WorkerResponse::OsCall { function, args, .. } => { - assert_eq!(function, OsFunction::ReadText); - assert_eq!(args.len(), 1); - } - _ => panic!("wrong variant"), - } - } - - #[test] - fn roundtrip_wire_external_result() { - let ok = WireExternalResult::Return { - value: MontyObject::String("content".into()), - }; - let json = serde_json::to_string(&ok).unwrap(); - let back: WireExternalResult = serde_json::from_str(&json).unwrap(); - match back { - WireExternalResult::Return { value } => { - assert_eq!(value, MontyObject::String("content".into())); - } - _ => panic!("wrong variant"), - } - - let err = WireExternalResult::Error { - exc_type: ExcType::FileNotFoundError, - message: Some("not found".into()), - }; - let json = serde_json::to_string(&err).unwrap(); - let back: WireExternalResult = serde_json::from_str(&json).unwrap(); - match back { - WireExternalResult::Error { exc_type, message } => { - assert_eq!(exc_type, ExcType::FileNotFoundError); - assert_eq!(message.as_deref(), Some("not found")); - } - _ => panic!("wrong variant"), - } - } - - #[test] - fn read_write_message_roundtrip() { - let msg = WorkerResponse::Complete { - result: MontyObject::Int(42), - output: "42\n".into(), - }; - let mut buf = Vec::new(); - write_message(&mut buf, &msg).unwrap(); - - let mut cursor = std::io::Cursor::new(buf); - let back: Option = read_message(&mut cursor).unwrap(); - match back.unwrap() { - WorkerResponse::Complete { result, output } => { - assert_eq!(result, MontyObject::Int(42)); - assert_eq!(output, "42\n"); - } - _ => panic!("wrong variant"), - } - } -} diff --git a/crates/bashkit-monty-worker/src/main.rs b/crates/bashkit-monty-worker/src/main.rs deleted file mode 100644 index e1945c12..00000000 --- a/crates/bashkit-monty-worker/src/main.rs +++ /dev/null @@ -1,188 +0,0 @@ -// Monty worker: runs in a subprocess, communicates with parent via JSON lines. -// If this process segfaults (e.g., monty parser stack overflow), the parent -// catches the child exit and returns a shell error instead of crashing. -// -// EXPERIMENTAL: Monty is early-stage; this subprocess boundary is the primary -// defense against its known and unknown crash/security bugs. - -use bashkit_monty_worker::{ - read_message, write_message, WireExternalResult, WireLimits, WorkerRequest, WorkerResponse, -}; -use monty::{ - CollectStringPrint, ExcType, ExternalResult, LimitedTracker, MontyException, MontyRun, - ResourceLimits, RunProgress, -}; -use std::io::{self, BufRead, Write}; -use std::time::Duration; - -fn main() { - let stdin = io::stdin(); - let stdout = io::stdout(); - let mut reader = stdin.lock(); - let mut writer = stdout.lock(); - - // Read init message - let init = match read_message::(&mut reader) { - Ok(Some(WorkerRequest::Init { - code, - filename, - limits, - })) => (code, filename, limits), - Ok(Some(_)) => { - send_error(&mut writer, "expected Init message", ""); - std::process::exit(1); - } - Ok(None) => std::process::exit(0), // EOF, parent closed pipe - Err(e) => { - eprintln!("monty-worker: {e}"); - std::process::exit(1); - } - }; - - let (code, filename, limits) = init; - - if let Err(e) = run(&code, &filename, &limits, &mut reader, &mut writer) { - eprintln!("monty-worker: {e}"); - std::process::exit(1); - } -} - -fn run( - code: &str, - filename: &str, - limits: &WireLimits, - reader: &mut impl BufRead, - writer: &mut impl Write, -) -> Result<(), String> { - // Strip shebang if present - let code = if code.starts_with("#!") { - match code.find('\n') { - Some(pos) => &code[pos + 1..], - None => "", - } - } else { - code - }; - - // Parse - let runner = match MontyRun::new(code.to_owned(), filename, vec![], vec![]) { - Ok(r) => r, - Err(e) => { - send_error(writer, &format!("{e}"), ""); - return Ok(()); - } - }; - - // Set up resource limits - let rl = ResourceLimits::new() - .max_allocations(limits.max_allocations) - .max_duration(Duration::from_secs_f64(limits.max_duration_secs)) - .max_memory(limits.max_memory) - .max_recursion_depth(Some(limits.max_recursion)); - - let tracker = LimitedTracker::new(rl); - let mut printer = CollectStringPrint::new(); - - // Start execution - let mut progress = match runner.start(vec![], tracker, &mut printer) { - Ok(p) => p, - Err(e) => { - let output = printer.into_output(); - send_error(writer, &format!("{e}"), &output); - return Ok(()); - } - }; - - // Event loop: handle pauses for OsCalls - loop { - match progress { - RunProgress::OsCall { - function, - args, - kwargs, - state, - .. - } => { - // Ask parent for VFS operation - write_message( - writer, - &WorkerResponse::OsCall { - function, - args: args.clone(), - kwargs: kwargs.clone(), - }, - )?; - - // Read parent's response - let wire_result = match read_message::(reader)? { - Some(WorkerRequest::OsResponse { result }) => result, - Some(_) => { - send_error(writer, "expected OsResponse message", ""); - return Ok(()); - } - None => return Err("parent closed pipe during OsCall".into()), - }; - - // Convert wire result to monty ExternalResult - let ext_result = wire_to_external(wire_result); - - match state.run(ext_result, &mut printer) { - Ok(next) => progress = next, - Err(e) => { - let output = printer.into_output(); - send_error(writer, &format!("{e}"), &output); - return Ok(()); - } - } - } - RunProgress::FunctionCall { state, .. } => { - // No external functions in virtual mode - let err = MontyException::new( - ExcType::RuntimeError, - Some("external function not available in virtual mode".into()), - ); - match state.run(ExternalResult::Error(err), &mut printer) { - Ok(next) => progress = next, - Err(e) => { - let output = printer.into_output(); - send_error(writer, &format!("{e}"), &output); - return Ok(()); - } - } - } - RunProgress::ResolveFutures(_) => { - let output = printer.into_output(); - send_error( - writer, - "RuntimeError: async operations not supported in virtual mode", - &output, - ); - return Ok(()); - } - RunProgress::Complete(result) => { - let output = printer.into_output(); - write_message(writer, &WorkerResponse::Complete { result, output })?; - return Ok(()); - } - } - } -} - -fn wire_to_external(wire: WireExternalResult) -> ExternalResult { - match wire { - WireExternalResult::Return { value } => ExternalResult::Return(value), - WireExternalResult::Error { exc_type, message } => { - ExternalResult::Error(MontyException::new(exc_type, message)) - } - } -} - -fn send_error(writer: &mut impl Write, exception: &str, output: &str) { - let _ = write_message( - writer, - &WorkerResponse::Error { - exception: format!("{exception}\n"), - output: output.to_string(), - }, - ); -} diff --git a/crates/bashkit/Cargo.toml b/crates/bashkit/Cargo.toml index 01b63ce1..12f527fa 100644 --- a/crates/bashkit/Cargo.toml +++ b/crates/bashkit/Cargo.toml @@ -59,6 +59,10 @@ base64 = { workspace = true, optional = true } # Logging/tracing (optional) tracing = { workspace = true, optional = true } + +# Embedded Python interpreter (optional) +monty = { git = "https://github.com/pydantic/monty", version = "0.0.4", optional = true } + [features] default = [] http_client = ["reqwest", "base64"] @@ -74,13 +78,8 @@ logging = ["tracing"] # Usage: cargo build --features git git = [] # Enable python/python3 builtins via embedded Monty interpreter -# Requires monty git dependency (not on crates.io) — feature unavailable from registry -# To use locally: uncomment and add monty dep -# python = ["dep:monty"] - -[lints.rust.unexpected_cfgs] -level = "warn" -check-cfg = ['cfg(feature, values("python"))'] +# Monty is a git dep (not yet on crates.io) — feature unavailable from registry +python = ["dep:monty"] [dev-dependencies] tokio-test = { workspace = true } diff --git a/crates/bashkit/docs/python.md b/crates/bashkit/docs/python.md index 7ce00577..866efb93 100644 --- a/crates/bashkit/docs/python.md +++ b/crates/bashkit/docs/python.md @@ -1,9 +1,8 @@ # Embedded Python (Monty) > **Experimental.** Monty is an early-stage Python interpreter that may have -> undiscovered crash or security bugs. BashKit mitigates crashes via subprocess -> isolation, but the integration should be treated as experimental. Use with -> caution when processing untrusted input. +> undiscovered crash or security bugs. Resource limits are enforced by Monty's +> runtime. The integration should be treated as experimental. Bashkit embeds the [Monty](https://github.com/pydantic/monty) Python interpreter, a pure-Rust implementation of Python 3.12. Python runs entirely in-memory with diff --git a/crates/bashkit/docs/threat-model.md b/crates/bashkit/docs/threat-model.md index cfa9603c..ff73fcc8 100644 --- a/crates/bashkit/docs/threat-model.md +++ b/crates/bashkit/docs/threat-model.md @@ -314,30 +314,11 @@ attacks: 5. **Parser fuel** (`max_parser_operations`, default 100K): Independent of depth, limits total parser work to prevent CPU exhaustion. -## Python Subprocess Isolation (TM-PY-022 to TM-PY-026) - -> **Experimental.** The Monty Python integration is experimental. Monty is an -> early-stage interpreter with known crash-level bugs (e.g., parser segfaults). -> Subprocess isolation mitigates host crashes, but undiscovered vulnerabilities -> may exist. Treat the Python feature as less mature than the rest of BashKit's -> security boundary. - -When using `PythonIsolation::Subprocess`, the Monty interpreter runs in a separate -child process (`bashkit-monty-worker`). This provides crash isolation — if the -interpreter segfaults, only the worker process dies. The host continues running -normally. - -| Threat | Mitigation | -|--------|------------| -| Parser segfault kills host (TM-PY-022) | Worker runs in child process | -| Worker binary spoofing (TM-PY-023) | Caller responsibility — secure env/PATH | -| Worker hang blocks parent (TM-PY-024) | IPC timeout (max_duration + 5s) | -| Worker leaks host env vars (TM-PY-025) | `env_clear()` on worker process | -| Worker sends oversized response (TM-PY-026) | IPC line size capped at 16 MB | - -**Caller Responsibility (TM-PY-023):** The `BASHKIT_MONTY_WORKER` environment variable -controls which binary is spawned as the worker process. Do not let untrusted input -control this variable or the system PATH. +## Python Direct Integration + +Monty runs directly in the host process. Resource limits (memory, allocations, +time, recursion) are enforced by Monty's own runtime. All VFS operations are +bridged through the host process — Python code never touches the real filesystem. ## Security Testing diff --git a/crates/bashkit/src/builtins/mod.rs b/crates/bashkit/src/builtins/mod.rs index 98700ac3..0865714f 100644 --- a/crates/bashkit/src/builtins/mod.rs +++ b/crates/bashkit/src/builtins/mod.rs @@ -112,7 +112,7 @@ pub use wc::Wc; pub use git::Git; #[cfg(feature = "python")] -pub use python::{Python, PythonIsolation, PythonLimits}; +pub use python::{Python, PythonLimits}; use async_trait::async_trait; use std::collections::HashMap; diff --git a/crates/bashkit/src/builtins/python.rs b/crates/bashkit/src/builtins/python.rs index 63768c57..c358eb00 100644 --- a/crates/bashkit/src/builtins/python.rs +++ b/crates/bashkit/src/builtins/python.rs @@ -1,11 +1,10 @@ //! python/python3 builtin via embedded Monty interpreter (pydantic/monty) //! -//! # Experimental +//! # Direct Integration //! -//! **This integration is experimental.** Monty is an early-stage Python interpreter -//! that may have undiscovered crash or security bugs in its parser or VM. -//! Subprocess isolation mitigates host crashes, but undiscovered issues may bypass -//! BashKit's security boundary. Use with caution when processing untrusted input. +//! Monty runs directly in the host process. No subprocess, no IPC. +//! Resource limits (memory, allocations, time, recursion) are enforced +//! by Monty's own runtime, not by process isolation. //! //! # Overview //! @@ -13,12 +12,6 @@ //! Python `pathlib.Path` operations are bridged to BashKit's virtual filesystem //! via Monty's OsCall pause/resume mechanism. No real filesystem or network access. //! -//! Two execution modes: -//! - **InProcess**: Monty runs in the host process (fast, but parser segfaults crash host) -//! - **Subprocess**: Monty runs in `bashkit-monty-worker` child process (crash-isolated) -//! -//! Default is `Auto`: use subprocess if the worker binary is found, else fall back to in-process. -//! //! Supports: `python -c "code"`, `python script.py`, stdin piping. use async_trait::async_trait; @@ -26,67 +19,10 @@ use monty::{ dir_stat, file_stat, symlink_stat, CollectStringPrint, ExcType, ExternalResult, LimitedTracker, MontyException, MontyObject, MontyRun, OsFunction, ResourceLimits, RunProgress, }; - -// IPC wire types for bashkit <-> monty-worker subprocess communication. -// Duplicated from bashkit-monty-worker crate to avoid a cargo dependency on an -// unpublished binary crate. Both sides must agree on the JSON wire format. - -/// Parent -> Worker messages (JSON lines on worker's stdin). -#[derive(Debug, serde::Serialize, serde::Deserialize)] -#[serde(tag = "type")] -enum WorkerRequest { - #[serde(rename = "init")] - Init { - code: String, - filename: String, - limits: WireLimits, - }, - #[serde(rename = "os_response")] - OsResponse { result: WireExternalResult }, -} - -/// Worker -> Parent messages (JSON lines on worker's stdout). -#[derive(Debug, serde::Serialize, serde::Deserialize)] -#[serde(tag = "type")] -enum WorkerResponse { - #[serde(rename = "os_call")] - OsCall { - function: OsFunction, - args: Vec, - kwargs: Vec<(MontyObject, MontyObject)>, - }, - #[serde(rename = "complete")] - Complete { result: MontyObject, output: String }, - #[serde(rename = "error")] - Error { exception: String, output: String }, -} - -/// Resource limits sent from parent to worker. -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -struct WireLimits { - max_allocations: usize, - max_duration_secs: f64, - max_memory: usize, - max_recursion: usize, -} - -/// Wire-safe version of monty's ExternalResult (which doesn't derive Serialize). -#[derive(Debug, serde::Serialize, serde::Deserialize)] -#[serde(tag = "status")] -enum WireExternalResult { - #[serde(rename = "ok")] - Return { value: MontyObject }, - #[serde(rename = "error")] - Error { - exc_type: ExcType, - message: Option, - }, -} use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Duration; -use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; use super::{resolve_path, Builtin, Context}; use crate::error::Result; @@ -99,30 +35,8 @@ const DEFAULT_MAX_DURATION: Duration = Duration::from_secs(30); const DEFAULT_MAX_MEMORY: usize = 64 * 1024 * 1024; // 64 MB const DEFAULT_MAX_RECURSION: usize = 200; -/// How to run the Monty interpreter. -/// -/// **Experimental:** Monty has known parser crash bugs. `Subprocess` mode is -/// strongly recommended for untrusted input. -/// -/// - `InProcess`: fast, but a parser/VM segfault kills the host. -/// - `Subprocess`: crash-isolated via `bashkit-monty-worker` child process. -/// - `Auto` (default): subprocess if worker binary found, else in-process. -#[derive(Debug, Clone, Default)] -pub enum PythonIsolation { - /// Run monty in the host process (no crash isolation). - InProcess, - /// Run monty in a subprocess (crash-isolated). Fails if worker binary not found. - Subprocess, - /// Try subprocess, fall back to in-process if worker binary is missing. - #[default] - Auto, -} - /// Resource limits for the embedded Python (Monty) interpreter. /// -/// **Experimental:** The Monty integration is experimental and may have -/// undiscovered security issues. See module-level docs for details. -/// /// Use the builder pattern to customize, or `Default` for the standard virtual execution limits: /// - 1,000,000 allocations /// - 30 second timeout @@ -150,8 +64,6 @@ pub struct PythonLimits { pub max_memory: usize, /// Maximum recursion depth (default: 200). pub max_recursion: usize, - /// Execution isolation mode (default: Auto). - pub isolation: PythonIsolation, } impl Default for PythonLimits { @@ -161,7 +73,6 @@ impl Default for PythonLimits { max_duration: DEFAULT_MAX_DURATION, max_memory: DEFAULT_MAX_MEMORY, max_recursion: DEFAULT_MAX_RECURSION, - isolation: PythonIsolation::default(), } } } @@ -194,20 +105,10 @@ impl PythonLimits { self.max_recursion = depth; self } - - /// Set isolation mode. - #[must_use] - pub fn isolation(mut self, mode: PythonIsolation) -> Self { - self.isolation = mode; - self - } } /// The python/python3 builtin command. /// -/// **Experimental:** Monty is an early-stage interpreter with known crash bugs. -/// Subprocess isolation is recommended for untrusted input. See module docs. -/// /// Executes Python code using the embedded Monty interpreter (pydantic/monty). /// Python `pathlib.Path` operations are bridged to BashKit's VFS — files /// created by bash (`cat > file`) are readable from Python, and vice versa. @@ -367,287 +268,11 @@ impl Builtin for Python { } } -/// Execute Python code, dispatching to subprocess or in-process based on isolation mode. -async fn run_python( - code: &str, - filename: &str, - fs: Arc, - cwd: &Path, - env: &HashMap, - py_limits: &PythonLimits, -) -> Result { - match &py_limits.isolation { - PythonIsolation::Subprocess => match find_worker_binary() { - Some(bin) => run_python_subprocess(code, filename, fs, cwd, env, py_limits, &bin).await, - None => Ok(ExecResult::err( - "python3: subprocess mode requested but bashkit-monty-worker not found\n" - .to_string(), - 1, - )), - }, - PythonIsolation::InProcess => { - run_python_in_process(code, filename, fs, cwd, env, py_limits).await - } - PythonIsolation::Auto => match find_worker_binary() { - Some(bin) if bin.exists() => { - run_python_subprocess(code, filename, fs, cwd, env, py_limits, &bin).await - } - _ => run_python_in_process(code, filename, fs, cwd, env, py_limits).await, - }, - } -} - -/// Execute Python code in a subprocess for crash isolation. -/// -/// THREAT[TM-PY-022]: If the worker segfaults (e.g., monty parser or VM crash), -/// we get a child-exit-with-signal instead of crashing the host. -/// -/// THREAT[TM-PY-025]: Worker environment is cleared to prevent host env var leakage. -/// Only minimal vars needed for operation are passed through. -/// -/// THREAT[TM-PY-024]: IPC reads are wrapped in a timeout derived from the Python -/// execution limit (max_duration + 5s grace period) to prevent worker hangs from -/// blocking the parent indefinitely. -async fn run_python_subprocess( - code: &str, - filename: &str, - fs: Arc, - cwd: &Path, - env: &HashMap, - py_limits: &PythonLimits, - worker_bin: &Path, -) -> Result { - use tokio::process::Command; - - // THREAT[TM-PY-025]: Clear worker environment to prevent host env var leakage. - // The worker communicates purely via IPC; it doesn't need host env vars. - let mut child = match Command::new(worker_bin) - .env_clear() - .stdin(std::process::Stdio::piped()) - .stdout(std::process::Stdio::piped()) - .stderr(std::process::Stdio::piped()) - .spawn() - { - Ok(c) => c, - Err(e) => { - return Ok(ExecResult::err( - format!("python3: failed to start worker: {e}\n"), - 1, - )); - } - }; - - // Safe: we set Stdio::piped() above, so stdin/stdout are always Some - let mut child_stdin = child.stdin.take().expect("child stdin piped"); - let child_stdout = child.stdout.take().expect("child stdout piped"); - let mut reader = BufReader::new(child_stdout); - - // Send init request - let init = WorkerRequest::Init { - code: code.to_string(), - filename: filename.to_string(), - limits: WireLimits { - max_allocations: py_limits.max_allocations, - max_duration_secs: py_limits.max_duration.as_secs_f64(), - max_memory: py_limits.max_memory, - max_recursion: py_limits.max_recursion, - }, - }; - write_ipc_message(&mut child_stdin, &init).await?; - - // THREAT[TM-PY-024]: IPC timeout = execution limit + 5s grace for init/cleanup. - // Prevents a hanging worker from blocking the parent forever. - let ipc_timeout = py_limits.max_duration + Duration::from_secs(5); - - // THREAT[TM-PY-026]: Max IPC line size to prevent worker from OOM-ing the parent. - // 16 MB should be generous for any legitimate VFS response. - const MAX_IPC_LINE_BYTES: usize = 16 * 1024 * 1024; - - // IPC loop: handle OsCall requests from the worker - loop { - let mut line = String::new(); - let read_result = tokio::time::timeout(ipc_timeout, reader.read_line(&mut line)).await; - - match read_result { - Ok(Ok(0)) => { - // EOF — worker exited (possibly crashed) - let status = child.wait().await?; - return Ok(worker_crash_result(status)); - } - Ok(Ok(_)) => { - // THREAT[TM-PY-026]: Reject oversized IPC lines - if line.len() > MAX_IPC_LINE_BYTES { - let _ = child.kill().await; - return Ok(ExecResult::err( - "python3: worker response too large\n".to_string(), - 1, - )); - } - } - Ok(Err(e)) => { - let _ = child.kill().await; - return Ok(ExecResult::err( - format!("python3: worker communication error: {e}\n"), - 1, - )); - } - Err(_) => { - // THREAT[TM-PY-024]: Timeout — kill the worker - let _ = child.kill().await; - return Ok(ExecResult::err( - "python3: worker timed out\n".to_string(), - 1, - )); - } - } - - let response: WorkerResponse = match serde_json::from_str(&line) { - Ok(r) => r, - Err(e) => { - let _ = child.kill().await; - return Ok(ExecResult::err( - format!("python3: worker protocol error: {e}\n"), - 1, - )); - } - }; - - match response { - WorkerResponse::OsCall { - function, - args, - kwargs, - } => { - // Bridge VFS operation - let ext_result = handle_os_call(function, &args, &kwargs, &fs, cwd, env).await; - - // Convert ExternalResult to wire format - let wire_result = external_to_wire(ext_result); - let resp = WorkerRequest::OsResponse { - result: wire_result, - }; - write_ipc_message(&mut child_stdin, &resp).await?; - } - WorkerResponse::Complete { result, output } => { - let _ = child.wait().await; - let mut out = output; - // REPL behavior: display non-None result if no print output - if !matches!(result, MontyObject::None) && out.is_empty() { - out = format!("{}\n", result.py_repr()); - } - return Ok(ExecResult::ok(out)); - } - WorkerResponse::Error { exception, output } => { - let _ = child.wait().await; - let mut result = ExecResult::err(exception, 1); - if !output.is_empty() { - result.stdout = output; - } - return Ok(result); - } - } - } -} - -/// Convert monty ExternalResult (not serializable) to wire format. -fn external_to_wire(ext: ExternalResult) -> WireExternalResult { - match ext { - ExternalResult::Return(obj) => WireExternalResult::Return { value: obj }, - ExternalResult::Error(exc) => WireExternalResult::Error { - exc_type: exc.exc_type(), - message: exc.message().map(|s| s.to_string()), - }, - ExternalResult::Future => WireExternalResult::Error { - exc_type: ExcType::RuntimeError, - message: Some("async not supported".into()), - }, - } -} - -/// Write a JSON line to the worker's stdin. -async fn write_ipc_message( - writer: &mut tokio::process::ChildStdin, - msg: &T, -) -> Result<()> { - let mut buf = serde_json::to_vec(msg) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?; - buf.push(b'\n'); - writer.write_all(&buf).await?; - writer.flush().await?; - Ok(()) -} - -/// Build an ExecResult for a worker that crashed (segfault, abort, etc.). -fn worker_crash_result(status: std::process::ExitStatus) -> ExecResult { - #[cfg(unix)] - { - use std::os::unix::process::ExitStatusExt; - if let Some(signal) = status.signal() { - let sig_name = match signal { - 11 => "SIGSEGV", - 6 => "SIGABRT", - 9 => "SIGKILL", - _ => "signal", - }; - return ExecResult::err( - format!("python3: interpreter crashed ({sig_name})\n"), - 128 + signal, - ); - } - } - let code = status.code().unwrap_or(1); - ExecResult::err( - format!("python3: interpreter exited unexpectedly (code {code})\n"), - code, - ) -} - -/// Find the `bashkit-monty-worker` binary. -/// -/// Search order: -/// 1. `BASHKIT_MONTY_WORKER` env var (explicit override — always trusted, even if missing) -/// 2. Adjacent to current executable (cargo puts workspace bins together) -/// 3. PATH lookup -pub fn find_worker_binary() -> Option { - // 1. Env override — always trust if set (caller chose this explicitly) - if let Ok(path) = std::env::var("BASHKIT_MONTY_WORKER") { - if !path.is_empty() { - return Some(PathBuf::from(path)); - } - } - - // 2. Adjacent to current executable (also check parent — test binaries - // live in target/debug/deps/ while workspace bins are in target/debug/) - if let Ok(exe) = std::env::current_exe() { - for dir in exe.ancestors().skip(1).take(2) { - let worker = dir.join("bashkit-monty-worker"); - if worker.exists() { - return Some(worker); - } - } - } - - // 3. Check PATH via which - if let Ok(output) = std::process::Command::new("which") - .arg("bashkit-monty-worker") - .output() - { - if output.status.success() { - let path = String::from_utf8_lossy(&output.stdout).trim().to_string(); - if !path.is_empty() { - return Some(PathBuf::from(path)); - } - } - } - - None -} - -/// Execute Python code via Monty with resource limits and VFS bridging (in-process). +/// Execute Python code via Monty with resource limits and VFS bridging. /// /// Uses Monty's start/resume API: execution pauses at filesystem operations /// (OsCall), we bridge them to BashKit's VFS, then resume. -async fn run_python_in_process( +async fn run_python( code: &str, filename: &str, fs: Arc, @@ -1093,7 +718,7 @@ mod tests { Python::new().execute(ctx).await.unwrap() } - // --- Basic functionality tests (existing) --- + // --- Basic functionality tests --- #[tokio::test] async fn test_version() { diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index 1f63345a..d3034131 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -395,7 +395,7 @@ pub use network::HttpClient; pub use git::GitClient; #[cfg(feature = "python")] -pub use builtins::{PythonIsolation, PythonLimits}; +pub use builtins::PythonLimits; /// Logging utilities module /// @@ -844,9 +844,8 @@ impl BashBuilder { /// Enable embedded Python (`python`/`python3` builtins) via Monty interpreter /// with default resource limits. /// - /// **Experimental:** The Monty Python integration is experimental and has known - /// crash-level bugs in its parser. Subprocess isolation (the default `Auto` mode) - /// mitigates host crashes, but undiscovered security issues may exist. + /// Monty runs directly in the host process with resource limits enforced + /// by Monty's runtime (memory, allocations, time, recursion). /// /// Requires the `python` feature flag. Python `pathlib.Path` operations are /// bridged to the virtual filesystem. @@ -863,7 +862,7 @@ impl BashBuilder { /// Enable embedded Python with custom resource limits. /// - /// **Experimental:** See [`BashBuilder::python`] for caveats. + /// See [`BashBuilder::python`] for details. /// /// # Example /// diff --git a/crates/bashkit/tests/python_integration_tests.rs b/crates/bashkit/tests/python_integration_tests.rs new file mode 100644 index 00000000..a6d6c22d --- /dev/null +++ b/crates/bashkit/tests/python_integration_tests.rs @@ -0,0 +1,1430 @@ +// Integration tests for embedded Monty (Python) direct integration. +// +// Tests the full Bash → Python pipeline: argument parsing, code execution, +// VFS bridging, resource limits, error handling, and interop with bash features +// (pipelines, command substitution, conditionals). + +#![cfg(feature = "python")] + +use bashkit::{Bash, PythonLimits}; +use std::time::Duration; + +/// Helper: create Bash with python builtins using default limits. +fn bash_python() -> Bash { + Bash::builder().python().build() +} + +/// Helper: create Bash with custom python limits. +fn bash_python_limits(limits: PythonLimits) -> Bash { + Bash::builder().python_with_limits(limits).build() +} + +// ============================================================================= +// 1. BASIC EXECUTION +// ============================================================================= + +mod basic_execution { + use super::*; + + #[tokio::test] + async fn print_hello() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"print('hello')\"").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "hello\n"); + } + + #[tokio::test] + async fn expression_result() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"2 + 3\"").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "5\n"); + } + + #[tokio::test] + async fn multiline_script() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"x = 10\ny = 20\nprint(x + y)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "30\n"); + } + + #[tokio::test] + async fn version_flag() { + let mut bash = bash_python(); + let r = bash.exec("python3 --version").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("Python 3.12.0")); + } + + #[tokio::test] + async fn version_flag_short() { + let mut bash = bash_python(); + let r = bash.exec("python3 -V").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("Python 3.12.0")); + } + + #[tokio::test] + async fn help_flag() { + let mut bash = bash_python(); + let r = bash.exec("python3 --help").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("usage:")); + } + + #[tokio::test] + async fn python_alias_works() { + // Both `python` and `python3` should work + let mut bash = bash_python(); + let r = bash + .exec("python -c \"print('via python')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "via python\n"); + } + + #[tokio::test] + async fn none_expression_no_output() { + // None result should produce no output + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"x = 42\"").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, ""); + } + + #[tokio::test] + async fn string_expression_result() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"'hello'\"").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "'hello'\n"); + } +} + +// ============================================================================= +// 2. INPUT MODES +// ============================================================================= + +mod input_modes { + use super::*; + + #[tokio::test] + async fn script_file_from_vfs() { + let mut bash = bash_python(); + bash.exec("cat > /tmp/script.py << 'EOF'\nprint('from file')\nEOF") + .await + .unwrap(); + let r = bash.exec("python3 /tmp/script.py").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "from file\n"); + } + + #[tokio::test] + async fn stdin_pipe() { + let mut bash = bash_python(); + let r = bash + .exec("echo \"print('piped')\" | python3") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "piped\n"); + } + + #[tokio::test] + async fn stdin_dash_flag() { + let mut bash = bash_python(); + let r = bash + .exec("echo \"print('dash')\" | python3 -") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "dash\n"); + } + + #[tokio::test] + async fn shebang_stripped_from_file() { + let mut bash = bash_python(); + bash.exec( + "cat > /tmp/shebang.py << 'EOF'\n#!/usr/bin/env python3\nprint('shebang ok')\nEOF", + ) + .await + .unwrap(); + let r = bash.exec("python3 /tmp/shebang.py").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "shebang ok\n"); + } + + #[tokio::test] + async fn missing_file_error() { + let mut bash = bash_python(); + let r = bash.exec("python3 /no/such/script.py").await.unwrap(); + assert_eq!(r.exit_code, 2); + assert!(r.stderr.contains("can't open file")); + } + + #[tokio::test] + async fn missing_c_arg() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c").await.unwrap(); + assert_eq!(r.exit_code, 2); + assert!(r.stderr.contains("requires argument")); + } + + #[tokio::test] + async fn unknown_option() { + let mut bash = bash_python(); + let r = bash.exec("python3 -z").await.unwrap(); + assert_eq!(r.exit_code, 2); + assert!(r.stderr.contains("unknown option")); + } +} + +// ============================================================================= +// 3. DATA TYPES AND OPERATIONS +// ============================================================================= + +mod data_types { + use super::*; + + #[tokio::test] + async fn list_operations() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"lst = [1, 2, 3]\nlst.append(4)\nprint(lst)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "[1, 2, 3, 4]\n"); + } + + #[tokio::test] + async fn dict_operations() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"d = dict()\nd['a'] = 1\nd['b'] = 2\nprint(d['a'])\nprint(len(d))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "1\n2\n"); + } + + #[tokio::test] + async fn tuple_operations() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"t = (1, 'two', 3.0)\nprint(t[1])\nprint(len(t))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "two\n3\n"); + } + + #[tokio::test] + async fn set_operations() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"s = set([1, 2, 3, 2, 1])\nprint(len(s))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "3\n"); + } + + #[tokio::test] + async fn string_methods() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"s = 'hello world'\nprint(s.upper())\nprint(s.split())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "HELLO WORLD\n['hello', 'world']\n"); + } + + #[tokio::test] + async fn fstring_formatting() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"name = 'world'\nx = 42\nprint(f'hello {name}, x={x}')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "hello world, x=42\n"); + } + + #[tokio::test] + async fn list_comprehension() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print([x**2 for x in range(5)])\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "[0, 1, 4, 9, 16]\n"); + } + + #[tokio::test] + async fn dict_comprehension() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"d = {str(i): i*i for i in range(3)}\nprint(d)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("'0': 0")); + assert!(r.stdout.contains("'1': 1")); + assert!(r.stdout.contains("'2': 4")); + } + + #[tokio::test] + async fn boolean_operations() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(True and False)\nprint(True or False)\nprint(not True)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "False\nTrue\nFalse\n"); + } + + #[tokio::test] + async fn none_value() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"x = None\nprint(x is None)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "True\n"); + } + + #[tokio::test] + async fn integer_arithmetic() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(2 ** 10)\nprint(17 // 3)\nprint(17 % 3)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "1024\n5\n2\n"); + } + + #[tokio::test] + async fn float_arithmetic() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(round(3.14159, 2))\nprint(abs(-42.5))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "3.14\n42.5\n"); + } + + #[tokio::test] + async fn string_slicing() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"s = 'abcdefgh'\nprint(s[2:5])\nprint(s[::-1])\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "cde\nhgfedcba\n"); + } +} + +// ============================================================================= +// 4. CONTROL FLOW +// ============================================================================= + +mod control_flow { + use super::*; + + #[tokio::test] + async fn if_elif_else() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"x = 5\nif x > 10:\n print('big')\nelif x > 3:\n print('medium')\nelse:\n print('small')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "medium\n"); + } + + #[tokio::test] + async fn for_loop_range() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"total = 0\nfor i in range(5):\n total += i\nprint(total)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "10\n"); + } + + #[tokio::test] + async fn for_loop_list() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"for item in ['a', 'b', 'c']:\n print(item)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "a\nb\nc\n"); + } + + #[tokio::test] + async fn while_loop() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"i = 0\nwhile i < 3:\n print(i)\n i += 1\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "0\n1\n2\n"); + } + + #[tokio::test] + async fn break_in_loop() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"for i in range(10):\n if i == 3:\n break\n print(i)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "0\n1\n2\n"); + } + + #[tokio::test] + async fn continue_in_loop() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"for i in range(5):\n if i % 2 == 0:\n continue\n print(i)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "1\n3\n"); + } +} + +// ============================================================================= +// 5. FUNCTIONS +// ============================================================================= + +mod functions { + use super::*; + + #[tokio::test] + async fn basic_function() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"def greet(name):\n return f'hello {name}'\nprint(greet('world'))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "hello world\n"); + } + + #[tokio::test] + async fn default_args() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"def add(a, b=10):\n return a + b\nprint(add(5))\nprint(add(5, 20))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "15\n25\n"); + } + + #[tokio::test] + async fn recursive_function() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"def factorial(n):\n if n <= 1:\n return 1\n return n * factorial(n - 1)\nprint(factorial(10))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "3628800\n"); + } + + #[tokio::test] + async fn lambda_expression() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"double = lambda x: x * 2\nprint(double(21))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "42\n"); + } + + #[tokio::test] + async fn nested_function() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"def outer():\n x = 10\n def inner():\n return x + 5\n return inner()\nprint(outer())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "15\n"); + } +} + +// ============================================================================= +// 6. EXCEPTION HANDLING +// ============================================================================= + +mod exception_handling { + use super::*; + + #[tokio::test] + async fn try_except_basic() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"try:\n 1/0\nexcept ZeroDivisionError:\n print('caught')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "caught\n"); + } + + #[tokio::test] + async fn try_except_finally() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"try:\n x = 1\nexcept:\n print('error')\nfinally:\n print('done')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "done\n"); + } + + #[tokio::test] + async fn try_except_as() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"try:\n int('abc')\nexcept ValueError as e:\n print('got ValueError')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "got ValueError\n"); + } + + #[tokio::test] + async fn raise_exception() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"raise ValueError('test error')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 1); + assert!(r.stderr.contains("ValueError")); + } + + #[tokio::test] + async fn nested_try_except() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"try:\n try:\n 1/0\n except ZeroDivisionError:\n raise ValueError('chained')\nexcept ValueError as e:\n print('caught:', e)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("caught:")); + } +} + +// ============================================================================= +// 7. ERROR HANDLING +// ============================================================================= + +mod error_handling { + use super::*; + + #[tokio::test] + async fn syntax_error() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"def\"").await.unwrap(); + assert_eq!(r.exit_code, 1); + assert!(r.stderr.contains("SyntaxError") || r.stderr.contains("Error")); + } + + #[tokio::test] + async fn zero_division() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"1/0\"").await.unwrap(); + assert_eq!(r.exit_code, 1); + assert!(r.stderr.contains("ZeroDivisionError")); + } + + #[tokio::test] + async fn name_error() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(undefined_var)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 1); + assert!(r.stderr.contains("NameError")); + } + + #[tokio::test] + async fn type_error() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"1 + 'a'\"").await.unwrap(); + assert_eq!(r.exit_code, 1); + assert!(r.stderr.contains("TypeError")); + } + + #[tokio::test] + async fn index_error() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"lst = [1, 2]\nprint(lst[10])\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 1); + assert!(r.stderr.contains("IndexError")); + } + + #[tokio::test] + async fn key_error() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"d = dict()\nprint(d['missing'])\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 1); + assert!(r.stderr.contains("KeyError")); + } + + #[tokio::test] + async fn output_before_error_preserved() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print('before')\n1/0\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 1); + assert_eq!(r.stdout, "before\n"); + assert!(r.stderr.contains("ZeroDivisionError")); + } + + #[tokio::test] + async fn multiple_prints_before_error() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print('one')\nprint('two')\n1/0\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 1); + assert!(r.stdout.contains("one")); + assert!(r.stdout.contains("two")); + assert!(r.stderr.contains("ZeroDivisionError")); + } +} + +// ============================================================================= +// 8. VFS BRIDGING +// ============================================================================= + +mod vfs_bridging { + use super::*; + + #[tokio::test] + async fn bash_writes_python_reads() { + let mut bash = bash_python(); + bash.exec("echo -n 'hello from bash' > /tmp/test.txt") + .await + .unwrap(); + let r = bash + .exec( + "python3 -c \"from pathlib import Path\nprint(Path('/tmp/test.txt').read_text())\"", + ) + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "hello from bash\n"); + } + + #[tokio::test] + async fn python_writes_bash_reads() { + let mut bash = bash_python(); + bash.exec("python3 -c \"from pathlib import Path\nPath('/tmp/py_out.txt').write_text('from python')\"") + .await + .unwrap(); + let r = bash.exec("cat /tmp/py_out.txt").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "from python"); + } + + #[tokio::test] + async fn python_writes_python_reads() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nPath('/tmp/rw.txt').write_text('roundtrip')\nprint(Path('/tmp/rw.txt').read_text())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "roundtrip\n"); + } + + #[tokio::test] + async fn path_exists() { + let mut bash = bash_python(); + bash.exec("echo 'data' > /tmp/exists.txt").await.unwrap(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nprint(Path('/tmp/exists.txt').exists())\nprint(Path('/tmp/nope.txt').exists())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "True\nFalse\n"); + } + + #[tokio::test] + async fn path_is_file_is_dir() { + let mut bash = bash_python(); + bash.exec("mkdir -p /data && echo 'x' > /data/f.txt") + .await + .unwrap(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nprint(Path('/data/f.txt').is_file())\nprint(Path('/data').is_dir())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "True\nTrue\n"); + } + + #[tokio::test] + async fn mkdir_and_verify() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nPath('/tmp/newdir').mkdir()\nprint(Path('/tmp/newdir').is_dir())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "True\n"); + } + + #[tokio::test] + async fn mkdir_parents() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nPath('/tmp/a/b/c').mkdir(parents=True)\nprint(Path('/tmp/a/b/c').is_dir())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "True\n"); + } + + #[tokio::test] + async fn mkdir_parents_exist_ok() { + let mut bash = bash_python(); + // mkdir(parents=True, exist_ok=True) should always succeed + let r = bash + .exec("python3 -c \"from pathlib import Path\nPath('/tmp/deep/nested/dir').mkdir(parents=True, exist_ok=True)\nprint(Path('/tmp/deep/nested/dir').is_dir())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "True\n"); + // Calling again should also succeed + let r2 = bash + .exec("python3 -c \"from pathlib import Path\nPath('/tmp/deep/nested/dir').mkdir(parents=True, exist_ok=True)\nprint('ok')\"") + .await + .unwrap(); + assert_eq!(r2.exit_code, 0); + assert_eq!(r2.stdout, "ok\n"); + } + + #[tokio::test] + async fn iterdir() { + let mut bash = bash_python(); + bash.exec("mkdir -p /list && echo a > /list/one.txt && echo b > /list/two.txt") + .await + .unwrap(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nfor p in Path('/list').iterdir():\n print(p.name)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("one.txt")); + assert!(r.stdout.contains("two.txt")); + } + + #[tokio::test] + async fn stat_file_size() { + let mut bash = bash_python(); + bash.exec("echo -n '12345' > /tmp/sized.txt").await.unwrap(); + let r = bash + .exec("python3 -c \"from pathlib import Path\ninfo = Path('/tmp/sized.txt').stat()\nprint(info.st_size)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "5\n"); + } + + #[tokio::test] + async fn unlink_file() { + let mut bash = bash_python(); + bash.exec("echo 'x' > /tmp/to_delete.txt").await.unwrap(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nPath('/tmp/to_delete.txt').unlink()\nprint(Path('/tmp/to_delete.txt').exists())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "False\n"); + } + + #[tokio::test] + async fn rename_file() { + let mut bash = bash_python(); + bash.exec("echo 'data' > /tmp/old_name.txt").await.unwrap(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nPath('/tmp/old_name.txt').rename('/tmp/new_name.txt')\nprint(Path('/tmp/new_name.txt').exists())\nprint(Path('/tmp/old_name.txt').exists())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "True\nFalse\n"); + } + + #[tokio::test] + async fn read_not_found_exception() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"from pathlib import Path\ntry:\n Path('/no/such/file').read_text()\nexcept FileNotFoundError:\n print('caught FileNotFoundError')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "caught FileNotFoundError\n"); + } + + #[tokio::test] + async fn write_bytes() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nPath('/tmp/bin.dat').write_bytes(b'\\x00\\x01\\x02')\ndata = Path('/tmp/bin.dat').read_bytes()\nprint(len(data))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "3\n"); + } + + #[tokio::test] + async fn relative_path_resolves_to_cwd() { + let mut bash = bash_python(); + // Ensure cwd exists in VFS, then write a file there + bash.exec("mkdir -p /home/user && echo -n 'relative' > /home/user/rel.txt") + .await + .unwrap(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nprint(Path('rel.txt').read_text())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "relative\n"); + } + + #[tokio::test] + async fn path_resolve() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"from pathlib import Path\nprint(Path('/tmp/../tmp/file.txt').resolve())\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + // Should resolve to absolute path + assert!(r.stdout.contains("tmp")); + } +} + +// ============================================================================= +// 9. ENVIRONMENT ACCESS +// ============================================================================= + +mod environment { + use super::*; + + #[tokio::test] + async fn getenv_existing() { + let mut bash = Bash::builder().python().env("MY_VAR", "test_value").build(); + let r = bash + .exec("python3 -c \"import os\nprint(os.getenv('MY_VAR'))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "test_value\n"); + } + + #[tokio::test] + async fn getenv_missing_with_default() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"import os\nprint(os.getenv('NONEXISTENT', 'fallback'))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "fallback\n"); + } + + #[tokio::test] + async fn getenv_missing_returns_none() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"import os\nprint(os.getenv('NONEXISTENT'))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "None\n"); + } + + #[tokio::test] + async fn environ_dict() { + let mut bash = Bash::builder() + .python() + .env("FOO", "bar") + .env("BAZ", "qux") + .build(); + let r = bash + .exec("python3 -c \"import os\nenv = os.environ\nprint('FOO' in env)\nprint(env.get('FOO'))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("True")); + assert!(r.stdout.contains("bar")); + } + + #[tokio::test] + async fn builder_env_visible_to_python() { + // Use builder .env() to set env vars visible to Python + let mut bash = Bash::builder().python().env("GREETING", "hello").build(); + let r = bash + .exec("python3 -c \"import os\nprint(os.getenv('GREETING'))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "hello\n"); + } +} + +// ============================================================================= +// 10. RESOURCE LIMITS +// ============================================================================= + +mod resource_limits { + use super::*; + + #[tokio::test] + async fn recursion_limit() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"def r(): r()\nr()\"").await.unwrap(); + assert_eq!(r.exit_code, 1); + assert!(r.stderr.contains("RecursionError") || r.stderr.contains("recursion")); + } + + #[tokio::test] + async fn memory_limit() { + let limits = PythonLimits::default().max_memory(1024); + let mut bash = bash_python_limits(limits); + let r = bash + .exec("python3 -c \"x = list(range(100000))\"") + .await + .unwrap(); + assert_ne!(r.exit_code, 0, "Tight memory limit should cause failure"); + } + + #[tokio::test] + async fn custom_recursion_limit() { + let limits = PythonLimits::default().max_recursion(5); + let mut bash = bash_python_limits(limits); + let r = bash + .exec("python3 -c \"def deep(n):\n if n <= 0:\n return 0\n return deep(n-1) + 1\nprint(deep(100))\"") + .await + .unwrap(); + assert_ne!(r.exit_code, 0, "Should hit recursion limit with depth=5"); + } + + #[tokio::test] + async fn generous_limits_succeed() { + let limits = PythonLimits::default() + .max_allocations(10_000_000) + .max_memory(128 * 1024 * 1024); + let mut bash = bash_python_limits(limits); + let r = bash + .exec("python3 -c \"print(sum(range(1000)))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "499500\n"); + } + + #[tokio::test] + async fn timeout_limit() { + let limits = PythonLimits::default().max_duration(Duration::from_millis(100)); + let mut bash = bash_python_limits(limits); + let r = bash.exec("python3 -c \"while True: pass\"").await.unwrap(); + assert_ne!(r.exit_code, 0, "Infinite loop should be killed by timeout"); + } +} + +// ============================================================================= +// 11. BASH INTEROP (PIPELINES, SUBST, CONDITIONALS) +// ============================================================================= + +mod bash_interop { + use super::*; + + #[tokio::test] + async fn python_in_pipeline() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"for i in range(5):\n print(f'item-{i}')\" | grep 'item-3'") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout.trim(), "item-3"); + } + + #[tokio::test] + async fn command_substitution() { + let mut bash = bash_python(); + let r = bash + .exec("result=$(python3 -c \"print(6 * 7)\")\necho \"result: $result\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "result: 42\n"); + } + + #[tokio::test] + async fn conditional_success() { + let mut bash = bash_python(); + let r = bash + .exec("if python3 -c \"print('ok')\"; then echo 'success'; else echo 'failure'; fi") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("success")); + } + + #[tokio::test] + async fn conditional_failure() { + let mut bash = bash_python(); + let r = bash + .exec("if python3 -c \"1/0\" 2>/dev/null; then echo 'success'; else echo 'failure'; fi") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("failure")); + } + + #[tokio::test] + async fn variable_in_python_code() { + let mut bash = bash_python(); + bash.exec("NAME=world").await.unwrap(); + let r = bash + .exec("python3 -c \"print('hello $NAME')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "hello world\n"); + } + + #[tokio::test] + async fn python_exit_code_propagates() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"raise SystemExit(42)\" 2>/dev/null; echo $?") + .await + .unwrap(); + // The exit code should propagate (may be 1 for exception, not 42, depending on Monty) + assert!(r.stdout.contains("1") || r.stdout.contains("42")); + } + + #[tokio::test] + async fn multiple_python_calls() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print('first')\"\npython3 -c \"print('second')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("first")); + assert!(r.stdout.contains("second")); + } + + #[tokio::test] + async fn python_vfs_shared_with_bash() { + // Write from Python, process with bash pipeline + let mut bash = bash_python(); + bash.exec("python3 -c \"from pathlib import Path\nPath('/tmp/numbers.txt').write_text('1\\n2\\n3\\n4\\n5\\n')\"") + .await + .unwrap(); + let r = bash.exec("wc -l < /tmp/numbers.txt").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout.trim(), "5"); + } +} + +// ============================================================================= +// 12. BUILTIN FUNCTIONS +// ============================================================================= + +mod builtins { + use super::*; + + #[tokio::test] + async fn len_function() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(len([1,2,3]))\nprint(len('hello'))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "3\n5\n"); + } + + #[tokio::test] + async fn range_enumerate_zip() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"for i, v in enumerate(['a','b','c']):\n print(f'{i}:{v}')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "0:a\n1:b\n2:c\n"); + } + + #[tokio::test] + async fn map_filter() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"nums = list(range(6))\nevens = [x for x in nums if x % 2 == 0]\nprint(evens)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "[0, 2, 4]\n"); + } + + #[tokio::test] + async fn sorted_reversed() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(sorted([3,1,4,1,5]))\nprint(list(reversed([1,2,3])))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "[1, 1, 3, 4, 5]\n[3, 2, 1]\n"); + } + + #[tokio::test] + async fn min_max_sum() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"nums = [10, 20, 30, 40]\nprint(min(nums))\nprint(max(nums))\nprint(sum(nums))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "10\n40\n100\n"); + } + + #[tokio::test] + async fn type_conversions() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(int('42'))\nprint(float('3.14'))\nprint(str(100))\nprint(bool(0))\nprint(bool(1))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "42\n3.14\n100\nFalse\nTrue\n"); + } + + #[tokio::test] + async fn isinstance_check() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(isinstance(42, int))\nprint(isinstance('hi', str))\nprint(isinstance(42, str))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "True\nTrue\nFalse\n"); + } + + #[tokio::test] + async fn all_any() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(all([True, True, True]))\nprint(all([True, False, True]))\nprint(any([False, False, True]))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "True\nFalse\nTrue\n"); + } + + #[tokio::test] + async fn abs_round() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(abs(-42))\nprint(round(3.14159, 2))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "42\n3.14\n"); + } + + #[tokio::test] + async fn zip_function() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"pairs = list(zip([1,2,3], ['a','b','c']))\nprint(pairs)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "[(1, 'a'), (2, 'b'), (3, 'c')]\n"); + } +} + +// ============================================================================= +// 13. SECURITY +// ============================================================================= + +mod security { + use super::*; + + #[tokio::test] + async fn no_real_filesystem_access() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"from pathlib import Path\ntry:\n Path('/etc/passwd').read_text()\n print('LEAKED')\nexcept FileNotFoundError:\n print('safe')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("safe")); + assert!(!r.stdout.contains("LEAKED")); + } + + #[tokio::test] + async fn no_os_system() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"import os\nos.system('echo hacked')\"") + .await + .unwrap(); + assert_ne!(r.exit_code, 0); + assert!(!r.stdout.contains("hacked")); + } + + #[tokio::test] + async fn no_subprocess_module() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"import subprocess\"").await.unwrap(); + assert_ne!(r.exit_code, 0); + } + + #[tokio::test] + async fn path_traversal_blocked() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"from pathlib import Path\ntry:\n Path('/tmp/../../../etc/passwd').read_text()\n print('ESCAPED')\nexcept FileNotFoundError:\n print('blocked')\"") + .await + .unwrap(); + assert!(!r.stdout.contains("ESCAPED")); + } + + #[tokio::test] + async fn env_vars_not_leaked_from_host() { + // Host env vars should NOT be visible to Python — only sandbox env + let mut bash = bash_python(); + let r = bash + .exec( + "python3 -c \"import os\nresult = os.getenv('PATH', 'not_found')\nprint(result)\"", + ) + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + // PATH is not set in the sandbox env by default + assert_eq!(r.stdout, "not_found\n"); + } +} + +// ============================================================================= +// 14. EDGE CASES +// ============================================================================= + +mod edge_cases { + use super::*; + + #[tokio::test] + async fn empty_print() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"print()\"").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "\n"); + } + + #[tokio::test] + async fn multiple_print_args() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"print(1, 2, 3)\"").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "1 2 3\n"); + } + + #[tokio::test] + async fn print_with_sep() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print(1, 2, 3, sep='-')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "1-2-3\n"); + } + + #[tokio::test] + async fn print_with_end() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"print('a', end='')\nprint('b', end='')\nprint('c')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "abc\n"); + } + + #[tokio::test] + async fn large_output() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"for i in range(100):\n print(f'line {i}')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + let lines: Vec<&str> = r.stdout.lines().collect(); + assert_eq!(lines.len(), 100); + } + + #[tokio::test] + async fn unicode_output() { + let mut bash = bash_python(); + let r = bash.exec("python3 -c \"print('hello')\"").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("hello")); + } + + #[tokio::test] + async fn multiline_string() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"s = '''line1\nline2\nline3'''\nprint(s)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert!(r.stdout.contains("line1")); + assert!(r.stdout.contains("line2")); + assert!(r.stdout.contains("line3")); + } + + #[tokio::test] + async fn unpacking() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"a, b, c = 1, 2, 3\nprint(a, b, c)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "1 2 3\n"); + } + + #[tokio::test] + async fn ternary_expression() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"x = 5\nresult = 'big' if x > 10 else 'small'\nprint(result)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "small\n"); + } + + #[tokio::test] + async fn walrus_operator() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"if (n := 10) > 5:\n print(f'n is {n}')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "n is 10\n"); + } +} + +// ============================================================================= +// 15. COMPLEX SCRIPTS +// ============================================================================= + +mod complex_scripts { + use super::*; + + #[tokio::test] + async fn fibonacci() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"def fib(n):\n if n <= 1:\n return n\n return fib(n-1) + fib(n-2)\nprint(fib(10))\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "55\n"); + } + + #[tokio::test] + async fn data_processing() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"scores = [95, 87, 92, 78, 96]\ntotal = sum(scores)\navg = total / len(scores)\nprint(f'avg={avg}')\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "avg=89.6\n"); + } + + #[tokio::test] + async fn vfs_multifile_workflow() { + let mut bash = bash_python(); + // Write config from bash, process in python, read result from bash + bash.exec("mkdir -p /app && echo 'key=value' > /app/config.txt") + .await + .unwrap(); + bash.exec("python3 -c \"from pathlib import Path\ncfg = Path('/app/config.txt').read_text()\nk, v = cfg.strip().split('=')\nPath('/app/result.txt').write_text(f'{k.upper()}={v.upper()}')\"") + .await + .unwrap(); + let r = bash.exec("cat /app/result.txt").await.unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "KEY=VALUE"); + } + + #[tokio::test] + async fn generator_expression() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"total = sum(x**2 for x in range(10))\nprint(total)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "285\n"); + } + + #[tokio::test] + async fn star_unpacking() { + let mut bash = bash_python(); + let r = bash + .exec("python3 -c \"first, *rest = [1, 2, 3, 4, 5]\nprint(first)\nprint(rest)\"") + .await + .unwrap(); + assert_eq!(r.exit_code, 0); + assert_eq!(r.stdout, "1\n[2, 3, 4, 5]\n"); + } +} diff --git a/crates/bashkit/tests/python_subprocess_tests.rs b/crates/bashkit/tests/python_subprocess_tests.rs deleted file mode 100644 index f609e610..00000000 --- a/crates/bashkit/tests/python_subprocess_tests.rs +++ /dev/null @@ -1,236 +0,0 @@ -// Integration tests for monty subprocess isolation (crash protection). -// These tests verify that Python execution works correctly when routed -// through the bashkit-monty-worker child process. -// -// The worker binary must be built first: `cargo build -p bashkit-monty-worker` -// (cargo builds all workspace bins into the same target dir, so -// find_worker_binary() locates it adjacent to the test binary.) - -#![cfg(feature = "python")] - -use bashkit::{Bash, PythonIsolation, PythonLimits}; -use serial_test::serial; - -/// Helper: create Bash with python in subprocess mode. -/// Relies on find_worker_binary() discovering the worker adjacent to the test exe. -/// Clears BASHKIT_MONTY_WORKER to avoid interference from env-mutating tests. -fn bash_subprocess() -> Bash { - std::env::remove_var("BASHKIT_MONTY_WORKER"); - Bash::builder() - .python_with_limits(PythonLimits::default().isolation(PythonIsolation::Subprocess)) - .build() -} - -// --------------------------------------------------------------------------- -// Basic functionality via subprocess -// --------------------------------------------------------------------------- - -#[tokio::test] -async fn subprocess_print() { - let mut bash = bash_subprocess(); - let r = bash.exec("python3 -c \"print('hello')\"").await.unwrap(); - assert_eq!(r.exit_code, 0); - assert_eq!(r.stdout, "hello\n"); -} - -#[tokio::test] -async fn subprocess_expression() { - let mut bash = bash_subprocess(); - let r = bash.exec("python3 -c \"2 + 3\"").await.unwrap(); - assert_eq!(r.exit_code, 0); - assert_eq!(r.stdout, "5\n"); -} - -#[tokio::test] -async fn subprocess_multiline() { - let mut bash = bash_subprocess(); - let r = bash - .exec("python3 -c \"x = 10\ny = 20\nprint(x + y)\"") - .await - .unwrap(); - assert_eq!(r.exit_code, 0); - assert_eq!(r.stdout, "30\n"); -} - -#[tokio::test] -async fn subprocess_syntax_error() { - let mut bash = bash_subprocess(); - let r = bash.exec("python3 -c \"def\"").await.unwrap(); - assert_eq!(r.exit_code, 1); - assert!( - r.stderr.contains("SyntaxError") || r.stderr.contains("Error"), - "stderr: {}", - r.stderr - ); -} - -#[tokio::test] -async fn subprocess_runtime_error() { - let mut bash = bash_subprocess(); - let r = bash.exec("python3 -c \"1/0\"").await.unwrap(); - assert_eq!(r.exit_code, 1); - assert!( - r.stderr.contains("ZeroDivisionError"), - "stderr: {}", - r.stderr - ); -} - -#[tokio::test] -async fn subprocess_output_before_error() { - let mut bash = bash_subprocess(); - let r = bash - .exec("python3 -c \"print('before')\n1/0\"") - .await - .unwrap(); - assert_eq!(r.exit_code, 1); - assert_eq!(r.stdout, "before\n"); - assert!(r.stderr.contains("ZeroDivisionError")); -} - -// --------------------------------------------------------------------------- -// VFS bridging over IPC -// --------------------------------------------------------------------------- - -#[tokio::test] -async fn subprocess_vfs_read_write() { - let mut bash = bash_subprocess(); - bash.exec("echo -n 'hello from bash' > /tmp/test.txt") - .await - .unwrap(); - let r = bash - .exec("python3 -c \"from pathlib import Path\nprint(Path('/tmp/test.txt').read_text())\"") - .await - .unwrap(); - assert_eq!(r.exit_code, 0); - assert_eq!(r.stdout, "hello from bash\n"); -} - -#[tokio::test] -async fn subprocess_vfs_write_then_read() { - let mut bash = bash_subprocess(); - let r = bash - .exec( - "python3 -c \"from pathlib import Path\nPath('/tmp/out.txt').write_text('from python')\nprint(Path('/tmp/out.txt').read_text())\"", - ) - .await - .unwrap(); - assert_eq!(r.exit_code, 0); - assert_eq!(r.stdout, "from python\n"); -} - -#[tokio::test] -async fn subprocess_vfs_file_not_found() { - let mut bash = bash_subprocess(); - let r = bash - .exec("python3 -c \"from pathlib import Path\ntry:\n Path('/no/such/file').read_text()\nexcept FileNotFoundError as e:\n print('caught:', e)\"") - .await - .unwrap(); - assert_eq!(r.exit_code, 0); - assert!(r.stdout.contains("caught:"), "stdout: {}", r.stdout); -} - -#[tokio::test] -async fn subprocess_vfs_mkdir_iterdir() { - let mut bash = bash_subprocess(); - // Create /tmp first so mkdir /tmp/sub succeeds (VFS starts empty) - bash.exec("mkdir -p /tmp/sub").await.unwrap(); - let r = bash - .exec("python3 -c \"from pathlib import Path\nPath('/tmp/sub/a.txt').write_text('a')\nPath('/tmp/sub/b.txt').write_text('b')\nfor p in Path('/tmp/sub').iterdir():\n print(p.name)\"") - .await - .unwrap(); - assert_eq!(r.exit_code, 0, "stderr: {}", r.stderr); - assert!(r.stdout.contains("a.txt")); - assert!(r.stdout.contains("b.txt")); -} - -// --------------------------------------------------------------------------- -// Crash isolation (the whole point) -// --------------------------------------------------------------------------- - -#[tokio::test] -#[serial] -async fn subprocess_worker_crash_via_false_binary() { - // Use /bin/false as the worker — it exits immediately with code 1. - // This tests the "worker exited unexpectedly" path. - std::env::set_var("BASHKIT_MONTY_WORKER", "/bin/false"); - let mut bash = Bash::builder() - .python_with_limits(PythonLimits::default().isolation(PythonIsolation::Subprocess)) - .build(); - - let r = bash.exec("python3 -c \"print('hi')\"").await.unwrap(); - assert_ne!(r.exit_code, 0); - assert!( - r.stderr.contains("crashed") - || r.stderr.contains("exited unexpectedly") - || r.stderr.contains("error"), - "Expected crash/error message, got stderr: {}", - r.stderr - ); - - std::env::remove_var("BASHKIT_MONTY_WORKER"); -} - -// --------------------------------------------------------------------------- -// Resource limits via subprocess -// --------------------------------------------------------------------------- - -#[tokio::test] -async fn subprocess_recursion_limit() { - let mut bash = bash_subprocess(); - let r = bash.exec("python3 -c \"def r(): r()\nr()\"").await.unwrap(); - assert_ne!(r.exit_code, 0); - assert!( - r.stderr.contains("RecursionError") || r.stderr.contains("recursion"), - "stderr: {}", - r.stderr - ); -} - -// --------------------------------------------------------------------------- -// Auto mode fallback -// --------------------------------------------------------------------------- - -#[tokio::test] -#[serial] -async fn auto_mode_falls_back_to_in_process() { - // Point at a nonexistent worker, Auto mode should fall back to in-process - std::env::set_var("BASHKIT_MONTY_WORKER", "/nonexistent/worker"); - let mut bash = Bash::builder() - .python_with_limits(PythonLimits::default().isolation(PythonIsolation::Auto)) - .build(); - - let r = bash.exec("python3 -c \"print('fallback')\"").await.unwrap(); - // Auto falls back to in-process, which should succeed - assert_eq!(r.exit_code, 0); - assert_eq!(r.stdout, "fallback\n"); - - std::env::remove_var("BASHKIT_MONTY_WORKER"); -} - -#[tokio::test] -#[serial] -async fn subprocess_mode_fails_when_worker_missing() { - std::env::set_var("BASHKIT_MONTY_WORKER", "/nonexistent/worker"); - let mut bash = Bash::builder() - .python_with_limits(PythonLimits::default().isolation(PythonIsolation::Subprocess)) - .build(); - - let r = bash.exec("python3 -c \"print('hi')\"").await.unwrap(); - assert_ne!(r.exit_code, 0); - assert!( - r.stderr.contains("not found") || r.stderr.contains("No such file"), - "stderr: {}", - r.stderr - ); - - std::env::remove_var("BASHKIT_MONTY_WORKER"); -} - -#[tokio::test] -async fn subprocess_version() { - let mut bash = bash_subprocess(); - let r = bash.exec("python3 --version").await.unwrap(); - assert_eq!(r.exit_code, 0); - assert!(r.stdout.contains("Python 3.12.0")); -} diff --git a/crates/bashkit/tests/threat_model_tests.rs b/crates/bashkit/tests/threat_model_tests.rs index b52107bf..525b66f2 100644 --- a/crates/bashkit/tests/threat_model_tests.rs +++ b/crates/bashkit/tests/threat_model_tests.rs @@ -869,14 +869,12 @@ mod edge_cases { #[cfg(feature = "python")] mod python_security { use super::*; - use bashkit::{PythonIsolation, PythonLimits}; + use bashkit::PythonLimits; - /// Helper: create Bash with python builtins registered (in-process mode). - /// Uses InProcess explicitly to avoid interference from env-var-mutating - /// subprocess isolation tests that set BASHKIT_MONTY_WORKER. + /// Helper: create Bash with python builtins registered. fn bash_with_python() -> Bash { Bash::builder() - .python_with_limits(PythonLimits::default().isolation(PythonIsolation::InProcess)) + .python_with_limits(PythonLimits::default()) .build() } @@ -1208,216 +1206,10 @@ mod python_security { } } -// ============================================================================= -// 7b. PYTHON SUBPROCESS ISOLATION SECURITY TESTS (TM-PY-022 to TM-PY-026) -// -// These tests verify the security properties of the subprocess isolation mode -// for the Monty interpreter, covering crash isolation, env var leakage, -// IPC timeout, worker spoofing, and line size limits. -// ============================================================================= - -#[cfg(feature = "python")] -mod python_subprocess_security { - use bashkit::{Bash, PythonIsolation, PythonLimits}; - use serial_test::serial; - - /// Helper: create Bash with subprocess isolation. - fn bash_subprocess() -> Bash { - std::env::remove_var("BASHKIT_MONTY_WORKER"); - Bash::builder() - .python_with_limits(PythonLimits::default().isolation(PythonIsolation::Subprocess)) - .build() - } - - /// TM-PY-022: Worker crash does not crash the host process. - /// We use /bin/false as a fake worker that exits immediately. - #[tokio::test] - #[serial] - async fn threat_python_subprocess_crash_isolation() { - std::env::set_var("BASHKIT_MONTY_WORKER", "/bin/false"); - let mut bash = Bash::builder() - .python_with_limits(PythonLimits::default().isolation(PythonIsolation::Subprocess)) - .build(); - - let r = bash.exec("python3 -c \"print('hi')\"").await.unwrap(); - // Host is still alive (this code is running!) but Python failed - assert_ne!(r.exit_code, 0); - assert!( - r.stderr.contains("crashed") - || r.stderr.contains("exited unexpectedly") - || r.stderr.contains("error"), - "Expected crash message, got: {}", - r.stderr - ); - - std::env::remove_var("BASHKIT_MONTY_WORKER"); - } - - /// TM-PY-023: Spoofed worker binary (pointing to echo) should not give - /// VFS access or produce valid Python output. - #[tokio::test] - #[serial] - async fn threat_python_subprocess_worker_spoofing() { - // Point at /bin/echo — it will print its args and exit 0, - // but it won't speak the IPC protocol - std::env::set_var("BASHKIT_MONTY_WORKER", "/bin/echo"); - let mut bash = Bash::builder() - .python_with_limits(PythonLimits::default().isolation(PythonIsolation::Subprocess)) - .build(); - - let r = bash.exec("python3 -c \"print('hacked')\"").await.unwrap(); - // Should fail — /bin/echo doesn't speak IPC protocol - assert_ne!(r.exit_code, 0, "Spoofed worker should not succeed"); - assert!( - !r.stdout.contains("hacked"), - "Spoofed worker must not produce valid Python output" - ); - - std::env::remove_var("BASHKIT_MONTY_WORKER"); - } - - /// TM-PY-024: IPC timeout kills a hanging worker. - /// We use a tiny timeout to verify the timeout path works. - #[tokio::test] - #[serial] - async fn threat_python_subprocess_ipc_timeout() { - // Use 'sleep' as the worker — it ignores stdin and hangs - std::env::set_var("BASHKIT_MONTY_WORKER", "/bin/sleep"); - let mut bash = Bash::builder() - .python_with_limits( - PythonLimits::default() - .isolation(PythonIsolation::Subprocess) - // Very short timeout so the test runs fast - .max_duration(std::time::Duration::from_millis(500)), - ) - .build(); - - let start = std::time::Instant::now(); - let r = bash.exec("python3 -c \"print('hi')\"").await.unwrap(); - let elapsed = start.elapsed(); - - assert_ne!(r.exit_code, 0, "Hanging worker should fail"); - // Verify timeout actually fired (should be ~0.5s + 5s grace = ~5.5s max) - assert!( - elapsed < std::time::Duration::from_secs(30), - "Should have timed out, not waited 30s (took {:?})", - elapsed - ); - - std::env::remove_var("BASHKIT_MONTY_WORKER"); - } - - /// TM-PY-025: Worker process does not have access to host env vars. - /// We set a secret env var and verify the worker Python cannot read it - /// via os.getenv (which goes through the IPC bridge, not the host env). - #[tokio::test] - #[serial] - async fn threat_python_subprocess_env_isolation() { - // Set a secret in the host process - std::env::set_var("SECRET_API_KEY", "sk-super-secret-12345"); - let mut bash = bash_subprocess(); - - // Try to read the secret via Python os.getenv — should not find it - // because the Bash sandbox env doesn't include it - let r = bash - .exec("python3 -c \"import os\nresult = os.getenv('SECRET_API_KEY', 'not_found')\nprint(result)\"") - .await - .unwrap(); - - // The env var is not in the Bash sandbox's env (we didn't add it), - // so os.getenv should return the default - assert_eq!(r.exit_code, 0); - assert!( - r.stdout.contains("not_found"), - "Worker should not have access to host SECRET_API_KEY, got: {}", - r.stdout - ); - assert!( - !r.stdout.contains("sk-super-secret"), - "Must not leak host secret" - ); - - std::env::remove_var("SECRET_API_KEY"); - } - - /// TM-PY-015 via subprocess: VFS reads only from virtual filesystem, not host. - #[tokio::test] - #[serial] - async fn threat_python_subprocess_vfs_no_real_fs() { - let mut bash = bash_subprocess(); - - let r = bash - .exec( - "python3 -c \"from pathlib import Path\ntry:\n Path('/etc/passwd').read_text()\n print('LEAKED')\nexcept FileNotFoundError:\n print('safe')\"", - ) - .await - .unwrap(); - assert_eq!(r.exit_code, 0); - assert!( - r.stdout.contains("safe"), - "Should not access real filesystem" - ); - assert!( - !r.stdout.contains("LEAKED"), - "Must not leak real filesystem via subprocess" - ); - } - - /// TM-PY-017 via subprocess: Path traversal blocked in subprocess mode. - #[tokio::test] - #[serial] - async fn threat_python_subprocess_path_traversal() { - let mut bash = bash_subprocess(); - - let r = bash - .exec( - "python3 -c \"from pathlib import Path\ntry:\n Path('/tmp/../../../etc/passwd').read_text()\n print('ESCAPED')\nexcept FileNotFoundError:\n print('blocked')\"", - ) - .await - .unwrap(); - assert!( - !r.stdout.contains("ESCAPED"), - "Path traversal must not escape VFS in subprocess mode" - ); - } - - /// Subprocess mode should fail gracefully when worker binary doesn't exist. - #[tokio::test] - #[serial] - async fn threat_python_subprocess_missing_worker() { - std::env::set_var("BASHKIT_MONTY_WORKER", "/nonexistent/worker/binary"); - let mut bash = Bash::builder() - .python_with_limits(PythonLimits::default().isolation(PythonIsolation::Subprocess)) - .build(); - - let r = bash.exec("python3 -c \"print('hi')\"").await.unwrap(); - assert_ne!(r.exit_code, 0); - assert!( - r.stderr.contains("not found") || r.stderr.contains("No such file"), - "Should report worker not found, got: {}", - r.stderr - ); - - std::env::remove_var("BASHKIT_MONTY_WORKER"); - } - - /// TM-PY-004 via subprocess: No shell escape from Python in subprocess mode. - #[tokio::test] - #[serial] - async fn threat_python_subprocess_no_shell_escape() { - let mut bash = bash_subprocess(); - - let r = bash - .exec("python3 -c \"import os\nos.system('echo hacked')\"") - .await - .unwrap(); - assert_ne!(r.exit_code, 0, "os.system should not work in subprocess"); - assert!( - !r.stdout.contains("hacked"), - "Must not execute shell commands" - ); - } -} +// NOTE: Subprocess isolation tests (TM-PY-022 to TM-PY-026) were removed +// when the worker subprocess architecture was replaced with direct Monty +// integration. Resource limits and VFS isolation are now enforced directly +// by Monty's runtime within the host process. // ============================================================================= // 8. NESTING DEPTH SECURITY TESTS diff --git a/specs/006-threat-model.md b/specs/006-threat-model.md index 14edef96..5d32658b 100644 --- a/specs/006-threat-model.md +++ b/specs/006-threat-model.md @@ -898,10 +898,7 @@ This section maps former vulnerability IDs to the new threat ID scheme and track | Log value redaction | TM-LOG-001 to TM-LOG-004 | `logging.rs` | Yes | | Log injection prevention | TM-LOG-005, TM-LOG-006 | `logging.rs` | Yes | | Log value truncation | TM-LOG-007, TM-LOG-008 | `logging.rs` | Yes | -| Python subprocess isolation | TM-PY-022 | `builtins/python.rs` | Yes | -| Worker env clearing | TM-PY-025 | `builtins/python.rs` | Yes | -| IPC timeout | TM-PY-024 | `builtins/python.rs` | Yes | -| IPC line size limit | TM-PY-026 | `builtins/python.rs` | Yes | +| Python resource limits | TM-PY-001 to TM-PY-003 | `builtins/python.rs` | Yes | --- @@ -943,9 +940,6 @@ FsLimits::new() | Use network allowlist | TM-INF-010, TM-NET-* | Default denies all network access | | Sanitize output | TM-INJ-008 | Filter terminal escapes if displaying output | | Set appropriate limits | TM-DOS-* | Tune limits for your use case | -| Isolate tenants | TM-ISO-001 to TM-ISO-003 | Use separate Bash instances per tenant | -| Keep log redaction enabled | TM-LOG-001 to TM-LOG-004 | Don't disable redaction in production | -| Secure worker binary path | TM-PY-023 | Don't let untrusted input control BASHKIT_MONTY_WORKER or PATH | --- @@ -1072,8 +1066,8 @@ The following components are fuzz-tested for robustness: ## Python / Monty Security (TM-PY) > **Experimental.** Monty is an early-stage Python interpreter that may have -> undiscovered crash or security bugs. Subprocess isolation mitigates host -> crashes, but this integration should be treated as experimental. +> undiscovered crash or security bugs. Resource limits are enforced by Monty's +> runtime. This integration should be treated as experimental. BashKit embeds the Monty Python interpreter (pydantic/monty) with VFS bridging. Python `pathlib.Path` operations are bridged to BashKit's virtual filesystem via @@ -1106,11 +1100,7 @@ events that BashKit intercepts and dispatches to the VFS. | TM-PY-019 | Crash on missing file | Medium | FileNotFoundError raised, not panic | `threat_python_vfs_error_handling` | | TM-PY-020 | Network access from Python | Critical | Monty has no socket/network module | `threat_python_vfs_no_network` | | TM-PY-021 | VFS mkdir escape | Medium | mkdir operates only in VFS | `threat_python_vfs_mkdir_sandboxed` | -| TM-PY-022 | Parser/VM crash kills host | Critical | Parser depth limit (since 0.0.4) prevents parser crashes; subprocess isolation catches remaining VM crashes | `subprocess_worker_crash_via_false_binary` | -| TM-PY-023 | Worker binary spoofing via env var / PATH | Critical | Caller responsibility (like TM-INF-001); document risk | `threat_python_subprocess_worker_spoofing` | -| TM-PY-024 | Worker hang blocks parent (no IPC timeout) | High | IPC reads wrapped in `tokio::time::timeout` (max_duration + 5s) | `threat_python_subprocess_ipc_timeout` | -| TM-PY-025 | Worker inherits host environment | High | `env_clear()` on worker Command; env vars passed only via IPC | `threat_python_subprocess_env_isolation` | -| TM-PY-026 | Unbounded IPC response causes parent OOM | High | IPC line size capped at 16 MB | `threat_python_subprocess_ipc_line_limit` | +| TM-PY-022 | Parser/VM crash kills host | Critical | Parser depth limit (since 0.0.4) prevents parser crashes; Monty runs in-process with resource limits | — (removed: subprocess tests no longer applicable) | ### VFS Bridge Security Properties @@ -1125,34 +1115,12 @@ events that BashKit intercepts and dispatches to the VFS. 5. **Resource isolation**: Monty's own limits (time, memory, allocations, recursion) are enforced independently of BashKit's shell limits. -### Subprocess Isolation (Crash Protection) +### Direct Integration -When `PythonIsolation::Subprocess` (or `Auto` with worker available), Monty runs -in a child process (`bashkit-monty-worker`). This isolates the host from parser -segfaults and other fatal crashes. - -**IPC Architecture:** -``` -Parent (bashkit) Child (bashkit-monty-worker) - │ │ - │── Init {code, limits} ──────────────>│ - │ │── Parse + execute - │<── OsCall {function, args} ─────────│ (pauses at VFS op) - │── OsResponse {result} ──────────────>│ - │ │── Resume execution - │<── Complete {result, output} ────────│ -``` - -**Security properties:** -1. Worker crashes (SIGSEGV, SIGABRT) → parent gets child exit status, not crash -2. Worker env cleared (TM-PY-025): no host env var leakage -3. IPC timeout (TM-PY-024): worker hang → parent kills after max_duration + 5s -4. IPC line limit (TM-PY-026): max 16 MB per JSON line -5. VFS operations bridged through parent — worker never touches real filesystem - -**Caller Responsibility (TM-PY-023):** The `BASHKIT_MONTY_WORKER` env var or -PATH ordering controls which binary is spawned. Callers must ensure these are -not attacker-controlled. This is analogous to TM-INF-001 (env var sanitization). +Monty runs directly in the host process. Resource limits (memory, allocations, +time, recursion) are enforced by Monty's own runtime, not by process isolation. +All VFS operations are bridged in-process — Python code never touches the real +filesystem. ### Supported OsCall Operations diff --git a/specs/011-python-builtin.md b/specs/011-python-builtin.md index 949b5def..72649988 100644 --- a/specs/011-python-builtin.md +++ b/specs/011-python-builtin.md @@ -1,9 +1,9 @@ # 011: Python Builtin (Monty) > **Experimental.** Monty is an early-stage Python interpreter that may have -> undiscovered crash or security bugs. Subprocess isolation mitigates host -> crashes, but do not rely on it for untrusted-input safety without -> additional hardening. +> undiscovered crash or security bugs. Resource limits are enforced by Monty's +> runtime. Do not rely on it for untrusted-input safety without additional +> hardening. ## Status Implemented (experimental) @@ -175,54 +175,25 @@ Monty pauses execution at filesystem operations, yields an `OsCall` event with the operation type and arguments, BashKit bridges it to the VFS, and resumes execution with the result (or a Python exception). -### Subprocess Isolation (Crash Protection) +### Direct Integration -Monty runs in-process by default. Since Monty 0.0.4, the parser enforces a -nesting depth limit (200 in release, 35 in debug) to prevent stack overflow -from deeply nested expressions. However, undiscovered VM bugs could still -cause a segfault, which `catch_unwind` cannot catch (it's an OS signal, not -a Rust panic). - -To mitigate this, BashKit can run Monty in a separate `bashkit-monty-worker` -subprocess. If the worker segfaults, the parent catches the child exit and -returns a shell error (exit code 139 for SIGSEGV) instead of crashing. - -**Configuration:** +Monty runs directly in the host process. Resource limits (memory, allocations, +time, recursion) are enforced by Monty's own runtime. Since Monty 0.0.4, the +parser enforces a nesting depth limit (200 in release, 35 in debug) to prevent +stack overflow from deeply nested expressions. ```rust -use bashkit::{Bash, PythonIsolation, PythonLimits}; +use bashkit::{Bash, PythonLimits}; -// Auto (default): use subprocess if worker found, else in-process +// Default limits let bash = Bash::builder().python().build(); -// Force subprocess mode (fails if worker binary missing) -let bash = Bash::builder() - .python_with_limits( - PythonLimits::default().isolation(PythonIsolation::Subprocess) - ) - .build(); - -// Force in-process mode (no crash isolation) +// Custom limits let bash = Bash::builder() - .python_with_limits( - PythonLimits::default().isolation(PythonIsolation::InProcess) - ) + .python_with_limits(PythonLimits::default().max_duration(Duration::from_secs(5))) .build(); ``` -**Worker binary discovery:** `BASHKIT_MONTY_WORKER` env var → adjacent to -current executable → PATH lookup. - -**IPC protocol:** JSON lines over stdin/stdout. The worker pauses at each -`OsCall` (VFS operation), sends it to the parent, the parent bridges it to -the VFS and responds. The protocol is defined in `bashkit-monty-worker` crate. - -``` -Parent → Worker: Init { code, filename, limits } -Worker → Parent: OsCall { function, args, kwargs } | Complete | Error -Parent → Worker: OsResponse { result } -``` - ### Security See `specs/006-threat-model.md` section "Python / Monty Security (TM-PY)" diff --git a/supply-chain/config.toml b/supply-chain/config.toml index c74fd0dd..c57e578d 100644 --- a/supply-chain/config.toml +++ b/supply-chain/config.toml @@ -1901,3 +1901,7 @@ criteria = "safe-to-deploy" [[exemptions.zmij]] version = "1.0.20" criteria = "safe-to-deploy" + +[[exemptions.zmij]] +version = "1.0.21" +criteria = "safe-to-deploy"