Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ use crate::types::{PyFeature, PyFeatureKind, PyLocation, PyRoute};
#[pyclass(name = "Document", module = "yamltrip._core")]
pub struct PyDocument {
inner: yamlpath::Document,
source_hash: u64,
}

fn hash_source(source: &str) -> u64 {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
source.hash(&mut hasher);
hasher.finish()
}

#[pymethods]
Expand All @@ -16,7 +24,10 @@ impl PyDocument {
let doc = yamlpath::Document::new(source).map_err(|e| {
PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Failed to parse YAML: {e}"))
})?;
Ok(Self { inner: doc })
Ok(Self {
source_hash: hash_source(doc.source()),
inner: doc,
})
}

fn source(&self) -> &str {
Expand All @@ -31,7 +42,7 @@ impl PyDocument {
fn query_exact(&self, route: &PyRoute) -> PyResult<Option<PyFeature>> {
let r = route.to_yamlpath_route();
match self.inner.query_exact(&r) {
Ok(Some(feature)) => Ok(Some(convert_feature(&feature))),
Ok(Some(feature)) => Ok(Some(convert_feature(&feature, self.source_hash))),
Ok(None) => Ok(None),
Err(e) => Err(PyErr::new::<pyo3::exceptions::PyKeyError, _>(format!(
"Query failed: {e}"
Expand All @@ -42,14 +53,19 @@ impl PyDocument {
fn query_pretty(&self, route: &PyRoute) -> PyResult<PyFeature> {
let r = route.to_yamlpath_route();
match self.inner.query_pretty(&r) {
Ok(feature) => Ok(convert_feature(&feature)),
Ok(feature) => Ok(convert_feature(&feature, self.source_hash)),
Err(e) => Err(PyErr::new::<pyo3::exceptions::PyKeyError, _>(format!(
"Query failed: {e}"
))),
}
}

fn extract(&self, feature: &PyFeature) -> PyResult<String> {
if feature.source_hash != self.source_hash {
return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
"Feature does not belong to this document",
));
}
let source = self.inner.source();
let start = feature.location.start;
let end = feature.location.end;
Expand Down Expand Up @@ -151,11 +167,14 @@ impl PyDocument {
PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Patch failed: {e}"))
})?;

Ok(Self { inner: result })
Ok(Self {
source_hash: hash_source(result.source()),
inner: result,
})
}
}

fn convert_feature(feature: &yamlpath::Feature<'_>) -> PyFeature {
fn convert_feature(feature: &yamlpath::Feature<'_>, source_hash: u64) -> PyFeature {
PyFeature {
location: PyLocation {
start: feature.location.byte_span.0,
Expand All @@ -167,5 +186,6 @@ fn convert_feature(feature: &yamlpath::Feature<'_>) -> PyFeature {
}),
kind: PyFeatureKind::from(feature.kind()),
is_multiline: feature.is_multiline(),
source_hash,
}
}
1 change: 1 addition & 0 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ pub struct PyFeature {
pub kind: PyFeatureKind,
#[pyo3(get)]
pub is_multiline: bool,
pub source_hash: u64,
}

#[pymethods]
Expand Down
18 changes: 13 additions & 5 deletions tests/test_core_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,17 @@ def test_sequence_query(self):
assert doc.extract(feature) == "a"


class TestExtractCrossDocumentUTF8:
"""Using a Feature from one document on another can produce byte offsets
that land mid-UTF-8 codepoint. This must raise ValueError, not crash."""
class TestExtractCrossDocument:
"""Using a Feature from one document on another must raise ValueError."""

def test_extract_cross_document_rejected(self):
doc_a = Document("x: y")
feature = doc_a.query_exact(Route(["x"]))
assert feature is not None

doc_b = Document("a: b")
with pytest.raises(ValueError, match="does not belong"):
doc_b.extract(feature)

def test_extract_mid_utf8_raises_not_panics(self):
# "x: y" — scalar "y" is at byte offset 3..4
Expand All @@ -76,8 +84,8 @@ def test_extract_mid_utf8_raises_not_panics(self):
# continuation byte, not a char boundary.
doc_b = Document("\U0001f389: z")

# Should raise a clean ValueError, not a Rust panic / PanicException.
with pytest.raises(ValueError, match="UTF-8"):
# Should raise ValueError — now caught by source_hash check first.
with pytest.raises(ValueError, match="does not belong"):
doc_b.extract(feature)


Expand Down
Loading