diff --git a/src/document.rs b/src/document.rs index 614cc52..d4ad4b6 100644 --- a/src/document.rs +++ b/src/document.rs @@ -7,6 +7,14 @@ use crate::types::{PyFeature, PyFeatureKind, PyLocation, PyRoute}; #[pyclass(name = "Document", module = "yamltrip._core")] pub struct PyDocument { inner: yamlpath::Document, + source_hash: u64, +} + +fn hash_source(source: &str) -> u64 { + use std::hash::{Hash, Hasher}; + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + source.hash(&mut hasher); + hasher.finish() } #[pymethods] @@ -16,7 +24,10 @@ impl PyDocument { let doc = yamlpath::Document::new(source).map_err(|e| { PyErr::new::(format!("Failed to parse YAML: {e}")) })?; - Ok(Self { inner: doc }) + Ok(Self { + source_hash: hash_source(doc.source()), + inner: doc, + }) } fn source(&self) -> &str { @@ -31,7 +42,7 @@ impl PyDocument { fn query_exact(&self, route: &PyRoute) -> PyResult> { let r = route.to_yamlpath_route(); match self.inner.query_exact(&r) { - Ok(Some(feature)) => Ok(Some(convert_feature(&feature))), + Ok(Some(feature)) => Ok(Some(convert_feature(&feature, self.source_hash))), Ok(None) => Ok(None), Err(e) => Err(PyErr::new::(format!( "Query failed: {e}" @@ -42,7 +53,7 @@ impl PyDocument { fn query_pretty(&self, route: &PyRoute) -> PyResult { let r = route.to_yamlpath_route(); match self.inner.query_pretty(&r) { - Ok(feature) => Ok(convert_feature(&feature)), + Ok(feature) => Ok(convert_feature(&feature, self.source_hash)), Err(e) => Err(PyErr::new::(format!( "Query failed: {e}" ))), @@ -50,6 +61,11 @@ impl PyDocument { } fn extract(&self, feature: &PyFeature) -> PyResult { + if feature.source_hash != self.source_hash { + return Err(PyErr::new::( + "Feature does not belong to this document", + )); + } let source = self.inner.source(); let start = feature.location.start; let end = feature.location.end; @@ -151,11 +167,14 @@ impl PyDocument { PyErr::new::(format!("Patch failed: {e}")) })?; - Ok(Self { inner: result }) + Ok(Self { + source_hash: hash_source(result.source()), + inner: result, + }) } } -fn convert_feature(feature: &yamlpath::Feature<'_>) -> PyFeature { +fn convert_feature(feature: &yamlpath::Feature<'_>, source_hash: u64) -> PyFeature { PyFeature { location: PyLocation { start: feature.location.byte_span.0, @@ -167,5 +186,6 @@ fn convert_feature(feature: &yamlpath::Feature<'_>) -> PyFeature { }), kind: PyFeatureKind::from(feature.kind()), is_multiline: feature.is_multiline(), + source_hash, } } diff --git a/src/types.rs b/src/types.rs index 35401a7..0661aca 100644 --- a/src/types.rs +++ b/src/types.rs @@ -164,6 +164,7 @@ pub struct PyFeature { pub kind: PyFeatureKind, #[pyo3(get)] pub is_multiline: bool, + pub source_hash: u64, } #[pymethods] diff --git a/tests/test_core_document.py b/tests/test_core_document.py index 6b430bd..5680b34 100644 --- a/tests/test_core_document.py +++ b/tests/test_core_document.py @@ -60,9 +60,17 @@ def test_sequence_query(self): assert doc.extract(feature) == "a" -class TestExtractCrossDocumentUTF8: - """Using a Feature from one document on another can produce byte offsets - that land mid-UTF-8 codepoint. This must raise ValueError, not crash.""" +class TestExtractCrossDocument: + """Using a Feature from one document on another must raise ValueError.""" + + def test_extract_cross_document_rejected(self): + doc_a = Document("x: y") + feature = doc_a.query_exact(Route(["x"])) + assert feature is not None + + doc_b = Document("a: b") + with pytest.raises(ValueError, match="does not belong"): + doc_b.extract(feature) def test_extract_mid_utf8_raises_not_panics(self): # "x: y" — scalar "y" is at byte offset 3..4 @@ -76,8 +84,8 @@ def test_extract_mid_utf8_raises_not_panics(self): # continuation byte, not a char boundary. doc_b = Document("\U0001f389: z") - # Should raise a clean ValueError, not a Rust panic / PanicException. - with pytest.raises(ValueError, match="UTF-8"): + # Should raise ValueError — now caught by source_hash check first. + with pytest.raises(ValueError, match="does not belong"): doc_b.extract(feature)