From 8961ef9af159153df87387b8a72dd8ac3fee1eb4 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 26 May 2026 11:17:39 +1200 Subject: [PATCH 1/2] refactor: centralise patch-error classification and apply stepdown rule Add `_PatchErrorKind` enum and `_classify_patch_error()` to confine all yamlpatch error-message substrings in one place. Refactor `append`, `insert`, `extend_list`, and `sync` to branch on the enum rather than matching raw strings directly. Add `TestPatchErrorStringPins` to `test_edge_cases.py` to pin each raw yamlpatch substring against a live Rust error so any upstream change to the error messages is caught immediately. Apply stepdown rule throughout `document.py` and `editor.py`: - Move `_apply_patches`/`_from_core` to after `find_index` - Move `_create_at`/`_is_empty_document` to after `upsert` - Swap `prune_remove`/`remove` so the delegating wrapper comes first - Move module-level private helpers to after the `Document` class - Move `Editor.document` property to after all delegation methods --- src/yamltrip/document.py | 261 +++++++++++++++++++++------------------ src/yamltrip/editor.py | 16 +-- tests/test_edge_cases.py | 65 +++++++++- 3 files changed, 214 insertions(+), 128 deletions(-) diff --git a/src/yamltrip/document.py b/src/yamltrip/document.py index c8fc2d5..3847921 100644 --- a/src/yamltrip/document.py +++ b/src/yamltrip/document.py @@ -2,6 +2,7 @@ from __future__ import annotations +import enum from pathlib import Path from typing import TYPE_CHECKING, Any, cast @@ -53,59 +54,6 @@ def _make_route(keys: Sequence[KeyPart]) -> Route: return Route(list(keys)) -def _ensure_str_keys(keys: tuple[KeyPart, ...]) -> tuple[str, ...]: - """Validate all keys are strings for creation operations. - - Raises PatchError if any key is an int (cannot create sequences via upsert). - """ - for k in keys: - if isinstance(k, int): - msg = ( - f"Cannot create intermediate structure with integer key {k}; " - "only string keys can create new mappings" - ) - raise PatchError(msg) - return cast("tuple[str, ...]", keys) - - -def _flow_seq_replacements( - core_doc: CoreDocument, - old_value: Any, - new_value: Any, - path: tuple[KeyPart, ...], -) -> list[Patch]: - """Find flow sequences that need modification and emit targeted replace patches.""" - patches: list[Patch] = [] - - if isinstance(old_value, list) and isinstance(new_value, list): - if old_value != new_value: - route = _make_route(path) - try: - feature = core_doc.query_exact(route) - if feature and feature.kind == FeatureKind.FlowSequence: - patches.append(Patch(route=route, operation=Op.replace(new_value))) - return patches - except (KeyError, ValueError): - pass - # Recurse into shared list elements to find nested flow sequences - for i in range(min(len(old_value), len(new_value))): - sub_patches = _flow_seq_replacements( - core_doc, old_value[i], new_value[i], (*path, i) - ) - patches.extend(sub_patches) - return patches - - if isinstance(old_value, dict) and isinstance(new_value, dict): - for key in new_value: - if key in old_value: - sub_patches = _flow_seq_replacements( - core_doc, old_value[key], new_value[key], (*path, key) - ) - patches.extend(sub_patches) - - return patches - - class Document: """An immutable YAML document. @@ -128,22 +76,6 @@ def __init__(self, source: str) -> None: raise ParseError(str(e)) from None self._source: str = source - @classmethod - def _from_core(cls, core_doc: CoreDocument) -> Document: - """Construct a Document from an already-parsed CoreDocument.""" - obj = object.__new__(cls) - obj._core_doc = core_doc - obj._source = core_doc.source() - return obj - - def _apply_patches(self, patches: list[Patch]) -> Document: - """Apply patches to this document and return a new Document.""" - try: - core_doc = self._core_doc.apply_patches(patches) - except RuntimeError as e: - raise PatchError(str(e)) from None - return Document._from_core(core_doc) - @property def source(self) -> str: """The current YAML source text.""" @@ -267,9 +199,34 @@ def add(self, *keys: KeyPart, key: str, value: Any) -> Document: patch = Patch(route=route, operation=op) return self._apply_patches([patch]) - def _is_empty_document(self) -> bool: - """True if the document has no root data node.""" - return not self._core_doc.query_exists(_make_route(())) + def upsert(self, *keys: KeyPart, value: Any) -> Document: + """Replace if exists, create (with intermediate mappings) if not.""" + if not keys: + if self._is_empty_document(): + msg = ( + "Cannot replace root of an empty document; provide at least one key" + ) + raise PatchError(msg) + route = _make_route(()) + op = Op.replace(value) + patch = Patch(route=route, operation=op) + return self._apply_patches([patch]) + + full_route = _make_route(keys) + if self._core_doc.query_exists(full_route): + return self.replace(*keys, value=value) + + # Find deepest existing ancestor + for depth in range(len(keys) - 1, 0, -1): + ancestor_keys = keys[:depth] + ancestor_route = _make_route(ancestor_keys) + if self._core_doc.query_exists(ancestor_route): + return self._create_at( + ancestor_keys, _ensure_str_keys(keys[depth:]), value + ) + + # No path exists — add at root + return self._create_at((), _ensure_str_keys(keys), value) def _create_at( self, @@ -315,34 +272,13 @@ def _create_at( patch = Patch(route=route, operation=op) return self._apply_patches([patch]) - def upsert(self, *keys: KeyPart, value: Any) -> Document: - """Replace if exists, create (with intermediate mappings) if not.""" - if not keys: - if self._is_empty_document(): - msg = ( - "Cannot replace root of an empty document; provide at least one key" - ) - raise PatchError(msg) - route = _make_route(()) - op = Op.replace(value) - patch = Patch(route=route, operation=op) - return self._apply_patches([patch]) - - full_route = _make_route(keys) - if self._core_doc.query_exists(full_route): - return self.replace(*keys, value=value) - - # Find deepest existing ancestor - for depth in range(len(keys) - 1, 0, -1): - ancestor_keys = keys[:depth] - ancestor_route = _make_route(ancestor_keys) - if self._core_doc.query_exists(ancestor_route): - return self._create_at( - ancestor_keys, _ensure_str_keys(keys[depth:]), value - ) + def _is_empty_document(self) -> bool: + """True if the document has no root data node.""" + return not self._core_doc.query_exists(_make_route(())) - # No path exists — add at root - return self._create_at((), _ensure_str_keys(keys), value) + def prune_remove(self, *keys: KeyPart) -> Document: + """Remove key and prune empty parents.""" + return self.remove(*keys, prune=True) def remove(self, *keys: KeyPart, prune: bool = False) -> Document: """Remove the key/index at path.""" @@ -364,10 +300,6 @@ def remove(self, *keys: KeyPart, prune: bool = False) -> Document: break return doc - def prune_remove(self, *keys: KeyPart) -> Document: - """Remove key and prune empty parents.""" - return self.remove(*keys, prune=True) - def append(self, *keys: KeyPart, value: Any) -> Document: """Append a single item to the sequence at path.""" route = _make_route(keys) @@ -376,15 +308,14 @@ def append(self, *keys: KeyPart, value: Any) -> Document: try: return self._apply_patches([patch]) except PatchError as e: - msg = str(e) - # yamlpatch raises "...flow sequence..." for append on FlowSequence nodes - if "flow sequence" in msg: + kind = _classify_patch_error(e) + if kind == _PatchErrorKind.FLOW_SEQUENCE: current = self[keys] new_list = [*list(current), value] replace_op = Op.replace(new_list) return self._apply_patches([Patch(route=route, operation=replace_op)]) - if "only permitted against sequence" in msg: - raise NodeTypeError(msg) from None + if kind == _PatchErrorKind.NOT_A_SEQUENCE: + raise NodeTypeError(str(e)) from None raise def insert(self, *keys: KeyPart, index: int, value: Any) -> Document: @@ -399,14 +330,13 @@ def insert(self, *keys: KeyPart, index: int, value: Any) -> Document: try: return self._apply_patches([patch]) except PatchError as e: - msg = str(e) - # Rust apply_insert_at raises "expected BlockSequence, got ..." for - # both FlowSequence and non-sequence nodes (Scalar, BlockMapping, etc.) - if "expected BlockSequence" not in msg: + # Rust apply_insert_at raises BLOCK_SEQUENCE_EXPECTED for both + # FlowSequence and non-sequence nodes (Scalar, BlockMapping, etc.) + if _classify_patch_error(e) != _PatchErrorKind.BLOCK_SEQUENCE_EXPECTED: raise current = self[keys] if not isinstance(current, list): - raise NodeTypeError(msg) from None + raise NodeTypeError(str(e)) from None new_list: list[Any] = list(current) new_list.insert(index, value) replace_op = Op.replace(new_list) @@ -421,15 +351,14 @@ def extend_list(self, *keys: KeyPart, values: Sequence[Any]) -> Document: try: return self._apply_patches(patches) except PatchError as e: - msg = str(e) - # yamlpatch raises "...flow sequence..." for append on FlowSequence nodes - if "flow sequence" in msg: + kind = _classify_patch_error(e) + if kind == _PatchErrorKind.FLOW_SEQUENCE: current = self[keys] new_list = [*list(current), *values] replace_op = Op.replace(new_list) return self._apply_patches([Patch(route=route, operation=replace_op)]) - if "only permitted against sequence" in msg: - raise NodeTypeError(msg) from None + if kind == _PatchErrorKind.NOT_A_SEQUENCE: + raise NodeTypeError(str(e)) from None raise def remove_from_list(self, *keys: KeyPart, values: Sequence[Any]) -> Document: @@ -543,7 +472,7 @@ def sync(self, *keys: KeyPart, value: Any) -> Document: try: return doc._apply_patches(patches) except PatchError as e: - if "expected BlockSequence" not in str(e): + if _classify_patch_error(e) != _PatchErrorKind.BLOCK_SEQUENCE_EXPECTED: raise # Fallback: a flow sequence was missed by pre-detection (e.g. due to # list reordering). Replace the entire synced value. @@ -634,3 +563,97 @@ def find_index(self, *keys: KeyPart, where: dict[str, Any]) -> int | None: if all(k in entry and entry[k] == v for k, v in where.items()): return i return None + + def _apply_patches(self, patches: list[Patch]) -> Document: + """Apply patches to this document and return a new Document.""" + try: + core_doc = self._core_doc.apply_patches(patches) + except RuntimeError as e: + raise PatchError(str(e)) from None + return Document._from_core(core_doc) + + @classmethod + def _from_core(cls, core_doc: CoreDocument) -> Document: + """Construct a Document from an already-parsed CoreDocument.""" + obj = object.__new__(cls) + obj._core_doc = core_doc + obj._source = core_doc.source() + return obj + + +def _ensure_str_keys(keys: tuple[KeyPart, ...]) -> tuple[str, ...]: + """Validate all keys are strings for creation operations. + + Raises PatchError if any key is an int (cannot create sequences via upsert). + """ + for k in keys: + if isinstance(k, int): + msg = ( + f"Cannot create intermediate structure with integer key {k}; " + "only string keys can create new mappings" + ) + raise PatchError(msg) + return cast("tuple[str, ...]", keys) + + +def _flow_seq_replacements( + core_doc: CoreDocument, + old_value: Any, + new_value: Any, + path: tuple[KeyPart, ...], +) -> list[Patch]: + """Find flow sequences that need modification and emit targeted replace patches.""" + patches: list[Patch] = [] + + if isinstance(old_value, list) and isinstance(new_value, list): + if old_value != new_value: + route = _make_route(path) + try: + feature = core_doc.query_exact(route) + if feature and feature.kind == FeatureKind.FlowSequence: + patches.append(Patch(route=route, operation=Op.replace(new_value))) + return patches + except (KeyError, ValueError): + pass + # Recurse into shared list elements to find nested flow sequences + for i in range(min(len(old_value), len(new_value))): + sub_patches = _flow_seq_replacements( + core_doc, old_value[i], new_value[i], (*path, i) + ) + patches.extend(sub_patches) + return patches + + if isinstance(old_value, dict) and isinstance(new_value, dict): + for key in new_value: + if key in old_value: + sub_patches = _flow_seq_replacements( + core_doc, old_value[key], new_value[key], (*path, key) + ) + patches.extend(sub_patches) + + return patches + + +class _PatchErrorKind(enum.Enum): + """Classifies a PatchError by its originating yamlpatch error message.""" + + FLOW_SEQUENCE = "flow sequence" + NOT_A_SEQUENCE = "only permitted against sequence" + BLOCK_SEQUENCE_EXPECTED = "expected BlockSequence" + UNKNOWN = "" + + +def _classify_patch_error(err: PatchError) -> _PatchErrorKind: + """Return the kind of a PatchError based on its message string. + + All yamlpatch error-message substrings are confined here so that + callers can branch on the enum rather than matching raw strings. + """ + msg = str(err) + if _PatchErrorKind.FLOW_SEQUENCE.value in msg: + return _PatchErrorKind.FLOW_SEQUENCE + if _PatchErrorKind.NOT_A_SEQUENCE.value in msg: + return _PatchErrorKind.NOT_A_SEQUENCE + if _PatchErrorKind.BLOCK_SEQUENCE_EXPECTED.value in msg: + return _PatchErrorKind.BLOCK_SEQUENCE_EXPECTED + return _PatchErrorKind.UNKNOWN diff --git a/src/yamltrip/editor.py b/src/yamltrip/editor.py index 539f3d5..80bfacd 100644 --- a/src/yamltrip/editor.py +++ b/src/yamltrip/editor.py @@ -79,14 +79,6 @@ def original(self) -> Document: raise RuntimeError(msg) return self._original - @property - def document(self) -> Document: - """The current in-progress document.""" - if self._document is None: - msg = "Editor must be used as a context manager" - raise RuntimeError(msg) - return self._document - @property def root(self) -> Any: """The entire document parsed as a Python object.""" @@ -170,3 +162,11 @@ def query(self, *keys: KeyPart) -> Feature: def extract(self, feature: Feature) -> str: """Extract the raw YAML text for a feature.""" return self.document.extract(feature) + + @property + def document(self) -> Document: + """The current in-progress document.""" + if self._document is None: + msg = "Editor must be used as a context manager" + raise RuntimeError(msg) + return self._document diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py index fc64a79..a598b6c 100644 --- a/tests/test_edge_cases.py +++ b/tests/test_edge_cases.py @@ -2,7 +2,13 @@ import pytest -from yamltrip.document import Document +from yamltrip._core import Op, Patch +from yamltrip.document import ( + Document, + _classify_patch_error, + _make_route, + _PatchErrorKind, +) from yamltrip.editor import Editor from yamltrip.errors import PatchError, QueryError @@ -109,3 +115,60 @@ def test_getitem_missing_raises(self): doc = Document("name: foo") with pytest.raises(QueryError): doc["missing"] + + +class TestPatchErrorStringPins: + """Pin the yamlpatch error substrings that _classify_patch_error depends on. + + If yamlpatch changes its error wording these tests fail loudly rather than + causing silent mis-classification at runtime. + """ + + def _raw_error(self, doc: Document, *keys: str, op: Op) -> str: + """Trigger a PatchError through _apply_patches and return its message.""" + route = _make_route(keys) + with pytest.raises(PatchError) as exc_info: + doc._apply_patches([Patch(route=route, operation=op)]) + return str(exc_info.value) + + def test_flow_sequence_substring(self): + """Op.append on a flow-sequence node raises an error containing 'flow sequence'.""" + doc = Document("items: [a, b]\n") + msg = self._raw_error(doc, "items", op=Op.append("c")) + assert _PatchErrorKind.FLOW_SEQUENCE.value in msg + + def test_not_a_sequence_substring(self): + """Op.append on a scalar node raises an error containing 'only permitted against sequence'.""" + doc = Document("name: hello\n") + msg = self._raw_error(doc, "name", op=Op.append("x")) + assert _PatchErrorKind.NOT_A_SEQUENCE.value in msg + + def test_block_sequence_expected_substring(self): + """Op.insert_at on a flow-sequence node raises an error containing 'expected BlockSequence'.""" + doc = Document("items: [a, b]\n") + msg = self._raw_error(doc, "items", op=Op.insert_at(index=0, value="c")) + assert _PatchErrorKind.BLOCK_SEQUENCE_EXPECTED.value in msg + + def test_classify_flow_sequence(self): + doc = Document("items: [a, b]\n") + msg = self._raw_error(doc, "items", op=Op.append("c")) + assert _classify_patch_error(PatchError(msg)) == _PatchErrorKind.FLOW_SEQUENCE + + def test_classify_not_a_sequence(self): + doc = Document("name: hello\n") + msg = self._raw_error(doc, "name", op=Op.append("x")) + assert _classify_patch_error(PatchError(msg)) == _PatchErrorKind.NOT_A_SEQUENCE + + def test_classify_block_sequence_expected(self): + doc = Document("items: [a, b]\n") + msg = self._raw_error(doc, "items", op=Op.insert_at(index=0, value="c")) + assert ( + _classify_patch_error(PatchError(msg)) + == _PatchErrorKind.BLOCK_SEQUENCE_EXPECTED + ) + + def test_classify_unknown(self): + assert ( + _classify_patch_error(PatchError("some unrelated error")) + == _PatchErrorKind.UNKNOWN + ) From 0d3581f81d29b7a5a2ff1b965454089ccb11ece0 Mon Sep 17 00:00:00 2001 From: Nathan McDougall Date: Tue, 26 May 2026 11:52:34 +1200 Subject: [PATCH 2/2] fix: add UNEXPECTED_NODE to _PatchErrorKind and remove last inline match Document.merge() was still branching on the raw string 'unexpected node' inline. Add _PatchErrorKind.UNEXPECTED_NODE = 'unexpected node' and the corresponding check in _classify_patch_error(), then update merge() to use the classifier. Also add two new TestPatchErrorStringPins tests: one pinning the raw substring (triggered by Op.merge_into on a scalar node) and one verifying the classifier round-trips correctly. --- src/yamltrip/document.py | 5 ++++- tests/test_edge_cases.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/yamltrip/document.py b/src/yamltrip/document.py index 3847921..959b1b2 100644 --- a/src/yamltrip/document.py +++ b/src/yamltrip/document.py @@ -503,7 +503,7 @@ def merge(self, *keys: KeyPart, value: Any) -> Document: try: return self.upsert(*normalized, value=value) except PatchError as e: - if "unexpected node" in str(e): + if _classify_patch_error(e) == _PatchErrorKind.UNEXPECTED_NODE: # Find deepest existing ancestor to report failing = normalized for i in range(len(normalized), 0, -1): @@ -640,6 +640,7 @@ class _PatchErrorKind(enum.Enum): FLOW_SEQUENCE = "flow sequence" NOT_A_SEQUENCE = "only permitted against sequence" BLOCK_SEQUENCE_EXPECTED = "expected BlockSequence" + UNEXPECTED_NODE = "unexpected node" UNKNOWN = "" @@ -656,4 +657,6 @@ def _classify_patch_error(err: PatchError) -> _PatchErrorKind: return _PatchErrorKind.NOT_A_SEQUENCE if _PatchErrorKind.BLOCK_SEQUENCE_EXPECTED.value in msg: return _PatchErrorKind.BLOCK_SEQUENCE_EXPECTED + if _PatchErrorKind.UNEXPECTED_NODE.value in msg: + return _PatchErrorKind.UNEXPECTED_NODE return _PatchErrorKind.UNKNOWN diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py index a598b6c..655edd2 100644 --- a/tests/test_edge_cases.py +++ b/tests/test_edge_cases.py @@ -167,6 +167,17 @@ def test_classify_block_sequence_expected(self): == _PatchErrorKind.BLOCK_SEQUENCE_EXPECTED ) + def test_unexpected_node_substring(self): + """Op.merge_into on a scalar node raises an error containing 'unexpected node'.""" + doc = Document("name: scalar\n") + msg = self._raw_error(doc, "name", op=Op.merge_into("nested", {"foo": "bar"})) + assert _PatchErrorKind.UNEXPECTED_NODE.value in msg + + def test_classify_unexpected_node(self): + doc = Document("name: scalar\n") + msg = self._raw_error(doc, "name", op=Op.merge_into("nested", {"foo": "bar"})) + assert _classify_patch_error(PatchError(msg)) == _PatchErrorKind.UNEXPECTED_NODE + def test_classify_unknown(self): assert ( _classify_patch_error(PatchError("some unrelated error"))