From 74cdff76ce14835235a1983ba9ac9bb5fc90555b Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Mon, 25 May 2026 17:51:20 +1200 Subject: [PATCH] fix: replace unsafe byte indexing with upfront span validation Direct byte indexing (source[span.0..span.1] and source[..span.0]) panics if yamlpath returns a span misaligned to UTF-8 boundaries, crashing the Python process. Replace with upfront is_char_boundary + bounds checks that return a recoverable error, then use direct indexing safely. Applied consistently to all 5 sites in document.rs: parse_value, apply_insert_at, and apply_complex_replace. Closes #44 --- src/document.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/document.rs b/src/document.rs index ad23139..777acf9 100644 --- a/src/document.rs +++ b/src/document.rs @@ -94,6 +94,15 @@ impl PyDocument { match self.inner.query_exact(&r) { Ok(Some(feature)) => { let span = feature.location.byte_span; + // Note: span.0 <= span.1 is guaranteed by tree-sitter node construction. + if span.1 > source.len() + || !source.is_char_boundary(span.0) + || !source.is_char_boundary(span.1) + { + return Err(PyErr::new::( + "Feature span is out of bounds or not aligned to UTF-8 character boundaries", + )); + } let raw = &source[span.0..span.1]; // Calculate the column offset (in bytes) of the value // start relative to the beginning of its line, so we can @@ -285,6 +294,12 @@ fn apply_insert_at( .ok_or_else(|| format!("insert_at: item at index {resolved} not found"))?; let item_start = item_feature.location.byte_span.0; + if item_start > source.len() || !source.is_char_boundary(item_start) { + return Err( + "Feature span is out of bounds or not aligned to UTF-8 character boundaries" + .to_string(), + ); + } let line_start = source[..item_start] .rfind('\n') .map(|nl| nl + 1) @@ -343,6 +358,16 @@ fn apply_complex_replace( .query_pretty(route) .map_err(|e| format!("Query failed: {e}"))?; + let span = feature.location.byte_span; + // Note: span.0 <= span.1 is guaranteed by tree-sitter node construction. + if span.1 > source.len() || !source.is_char_boundary(span.0) || !source.is_char_boundary(span.1) + { + return Err( + "Feature span is out of bounds or not aligned to UTF-8 character boundaries" + .to_string(), + ); + } + let content_with_ws = doc.extract_with_leading_whitespace(&feature); let content = doc.extract(&feature);