diff --git a/doc/specs/2026-05-20-flow-sequence-fallback-design.md b/doc/specs/2026-05-20-flow-sequence-fallback-design.md new file mode 100644 index 0000000..cc3e9e4 --- /dev/null +++ b/doc/specs/2026-05-20-flow-sequence-fallback-design.md @@ -0,0 +1,151 @@ +# Flow Sequence Fallback + +**Date:** 2026-05-20 +**Issue:** #31 + +## Problem + +`insert()`, `append()`, `extend_list()`, and `sync()` raise `PatchError` when the target sequence uses flow syntax (e.g. `repos: [a, b, c]` or `repos: []`). The Rust `apply_insert_at` implementation requires a `BlockSequence` tree-sitter node — flow sequences have different grammar structure and are not supported. + +This forces callers to defensively wrap calls in try/except: + +```python +try: + doc = doc.sync("repos", value=repos_list) +except yamltrip.PatchError: + doc = doc.upsert("repos", value=repos_list) +``` + +The error is surprising because callers expressed high-level intent ("append to this list", "sync this list") and don't expect to care about YAML syntax details. + +## Design + +When a list-mutating operation hits a flow sequence, fall back to a get→mutate→replace strategy in Python. This eliminates the `PatchError` for flow sequences at the cost of losing inline formatting (flow → block conversion), which is acceptable because: + +1. An empty flow sequence (`[]`) has no formatting to preserve. +2. A non-empty single-line flow sequence cannot contain comments; multi-line flow sequences could in theory, but are vanishingly rare in practice. +3. The alternative is crashing. + +### Affected Methods + +| Method | Current Behavior | New Behavior | +|--------|-----------------|--------------| +| `sync()` | `PatchError` when diff produces `insert_at` on flow seq | Catch error, fall back to full `replace` with computed new value | +| `insert()` | `PatchError` always on flow seq | Get current list, insert in Python, `replace` full list | +| `append()` | `PatchError` always on flow seq | Get current list, append in Python, `replace` full list | +| `extend_list()` | `PatchError` always on flow seq | Get current list, extend in Python, `replace` full list | + +### Detection Strategy + +Catch `PatchError` from `_apply_patches` and inspect the message for the flow sequence signature (`"expected BlockSequence"`). If matched, execute the fallback. If not, re-raise. + +This is preferable to pre-checking the node type because: +- It keeps the happy path (block sequences) zero-cost. +- It avoids duplicating Rust-side type checking in Python. +- It naturally handles mixed patches where only some operations hit flow sequences. + +### Implementation: `sync()` + +```python +def sync(self, *keys: KeyPart, value: Any) -> Document: + ... + patches = _compute_patches(old_value, value, normalized) + if not patches: + return self + try: + return self._apply_patches(patches) + except PatchError as e: + if "expected BlockSequence" not in str(e): + raise + # Flow sequence — fall back to replacing the entire value + route = _make_route(normalized) + op = _core.Op.replace(value) + return self._apply_patches([_core.Patch(route=route, operation=op)]) +``` + +### Implementation: `insert()` + +```python +def insert(self, *keys: KeyPart, index: int, value: Any) -> Document: + route = _make_route(keys) + op = _core.Op.insert_at(index=index, value=value) + patch = _core.Patch(route=route, operation=op) + try: + return self._apply_patches([patch]) + except PatchError as e: + if "expected BlockSequence" not in str(e): + raise + current = self[keys] + # Use Python list.insert semantics + new_list = list(current) + new_list.insert(index, value) + replace_op = _core.Op.replace(new_list) + return self._apply_patches([_core.Patch(route=route, operation=replace_op)]) +``` + +### Implementation: `append()` + +```python +def append(self, *keys: KeyPart, value: Any) -> Document: + route = _make_route(keys) + op = _core.Op.append(value) + patch = _core.Patch(route=route, operation=op) + try: + return self._apply_patches([patch]) + except PatchError as e: + if "expected BlockSequence" not in str(e): + raise + current = self[keys] + new_list = list(current) + [value] + replace_op = _core.Op.replace(new_list) + return self._apply_patches([_core.Patch(route=route, operation=replace_op)]) +``` + +### Implementation: `extend_list()` + +```python +def extend_list(self, *keys: KeyPart, values: Sequence[Any]) -> Document: + if not values: + return self + route = _make_route(keys) + patches = [ + _core.Patch(route=route, operation=_core.Op.append(v)) for v in values + ] + try: + return self._apply_patches(patches) + except PatchError as e: + if "expected BlockSequence" not in str(e): + raise + current = self[keys] + new_list = list(current) + list(values) + replace_op = _core.Op.replace(new_list) + return self._apply_patches([_core.Patch(route=route, operation=replace_op)]) +``` + +## Behavioral Notes + +- The fallback always produces block sequence output. `repos: []` becomes: + ```yaml + repos: + - item1 + - item2 + ``` +- If the entire value is being replaced (e.g. `sync` from `[a]` to `[b]`), no fallback is needed — `Op.replace` works on any node type. +- Comment preservation: flow sequences cannot contain comments, so no comments are lost. +- The fallback does NOT apply to `remove()` or `remove_from_list()` — those already work on flow sequences via index paths. + +## Edge Cases + +| Scenario | Behavior | +|----------|----------| +| `sync("key", value=[])` on flow seq `key: [a, b]` | No fallback needed — `_diff_lists` emits a single `replace([])` | +| `sync("key", value=[a, b])` on `key: []` (flow) | Fallback: replace with `[a, b]` as block | +| `insert("key", index=0, value="x")` on `key: [a]` (flow) | Fallback: replace with `["x", "a"]` | +| `append("key", value="x")` on `key: []` (flow) | Fallback: replace with `["x"]` | +| Nested flow inside block: `- [a, b]` | Fallback applies to inner flow sequence if targeted directly | + +## Not In Scope + +- Teaching Rust to natively insert into flow sequences (potential future optimization). +- Converting block sequences to flow sequences. +- Handling flow mappings (`{a: 1, b: 2}`) — not affected since `sync` on mappings uses `add`/`replace`/`remove`, not `insert_at`. diff --git a/doc/specs/2026-05-20-node-type-error-design.md b/doc/specs/2026-05-20-node-type-error-design.md new file mode 100644 index 0000000..615a3c9 --- /dev/null +++ b/doc/specs/2026-05-20-node-type-error-design.md @@ -0,0 +1,116 @@ +# NodeTypeError for Type Mismatch Operations + +**Date:** 2026-05-20 +**Issue:** #31 + +## Problem + +When a caller uses a sequence operation (`append`, `remove_from_list`, etc.) on a node that isn't a sequence, or a mapping operation on a non-mapping, the error raised is a bare `PatchError`. This conflates "the operation is structurally impossible given YAML syntax limitations" with "you called the wrong method for this node type." + +The latter is a caller mistake — semantically a `TypeError` — and should be distinguishable from structural failures. + +## Design + +Introduce `NodeTypeError` as a subclass of both `PatchError` and `TypeError`: + +```python +class NodeTypeError(PatchError, TypeError): + """Raised when a node is not the expected type for the operation.""" +``` + +### Why dual inheritance? + +- Subclassing `PatchError` preserves backward compatibility: existing `except PatchError` handlers still catch it. +- Subclassing `TypeError` communicates the semantic meaning: the caller passed a path to the wrong kind of node. + +### Error Hierarchy After Change + +``` +Exception +├── TypeError +│ └── NodeTypeError (also under PatchError) +└── YAMLTripError + ├── ParseError + ├── QueryError + └── PatchError + ├── KeyExistsError + ├── KeyMissingError + └── NodeTypeError (also under TypeError) +``` + +## Affected Call Sites + +### Python-side checks (already explicit) + +| Method | Current | New | +|--------|---------|-----| +| `remove_from_list()` | `raise PatchError("Value at {keys} is not a list")` | `raise NodeTypeError(...)` | + +### Rust-side errors (detected by message pattern) + +| Error message pattern | Source operation | New behavior | +|----------------------|-----------------|--------------| +| `"Value is not a mapping"` | `add()` on non-mapping | Catch `PatchError`, re-raise as `NodeTypeError` | +| `"Value is not a sequence"` | `append()` on non-sequence | Catch `PatchError`, re-raise as `NodeTypeError` | + +### Detection in `_apply_patches` + +Rather than adding detection logic to `_apply_patches` itself (which is generic), each public method that expects a specific node type handles the re-raise: + +```python +def append(self, *keys: KeyPart, value: Any) -> Document: + route = _make_route(keys) + op = _core.Op.append(value) + patch = _core.Patch(route=route, operation=op) + try: + return self._apply_patches([patch]) + except PatchError as e: + if "expected BlockSequence" not in str(e): + # Not a flow sequence issue — check if it's a type mismatch + if "not a sequence" in str(e): + raise NodeTypeError(str(e)) from None + raise + # flow sequence fallback (from the other spec) + ... +``` + +## Error Messages + +The messages stay descriptive of what went wrong: + +```python +NodeTypeError("Value at ('config',) is not a list") +NodeTypeError("Patch failed: Value is not a sequence") +NodeTypeError("Patch failed: Value is not a mapping") +``` + +## Public API Changes + +### New export + +```python +# yamltrip/__init__.py +from yamltrip.errors import NodeTypeError + +# yamltrip/errors.py +class NodeTypeError(PatchError, TypeError): + """Raised when a node is not the expected type for the operation.""" +``` + +### Stub update + +```python +# yamltrip/_core.pyi — no change needed (error is Python-only) +``` + +## Backward Compatibility + +Fully backward compatible: +- `except PatchError` still catches `NodeTypeError`. +- `except TypeError` now also catches it (new capability). +- `except NodeTypeError` is available for callers who want to distinguish. + +## Not In Scope + +- Checking node types before calling Rust (would require adding Python-side query logic that duplicates Rust). The catch-and-re-raise approach is sufficient. +- Flow mapping type errors — `sync` on mappings uses `add`/`replace`/`remove` which don't have this problem. diff --git a/src/yamltrip/__init__.py b/src/yamltrip/__init__.py index eb9431c..eb80f05 100644 --- a/src/yamltrip/__init__.py +++ b/src/yamltrip/__init__.py @@ -10,6 +10,7 @@ from yamltrip.errors import ( KeyExistsError, KeyMissingError, + NodeTypeError, ParseError, PatchError, QueryError, @@ -46,6 +47,7 @@ def edit(path: str | Path) -> Editor: "KeyExistsError", "KeyMissingError", "Location", + "NodeTypeError", "ParseError", "PatchError", "QueryError", diff --git a/src/yamltrip/document.py b/src/yamltrip/document.py index 1b0a04c..84e24ea 100644 --- a/src/yamltrip/document.py +++ b/src/yamltrip/document.py @@ -9,6 +9,7 @@ from yamltrip.errors import ( KeyExistsError, KeyMissingError, + NodeTypeError, ParseError, PatchError, QueryError, @@ -50,6 +51,46 @@ def _check_no_int_keys_for_creation(keys: Sequence[KeyPart]) -> None: raise PatchError(msg) +def _flow_seq_replacements( + core_doc: _core.Document, + old_value: Any, + new_value: Any, + path: tuple[KeyPart, ...], +) -> list[_core.Patch]: + """Find flow sequences that need modification and emit targeted replace patches.""" + patches: list[_core.Patch] = [] + + if isinstance(old_value, list) and isinstance(new_value, list): + if old_value != new_value: + route = _make_route(path) + try: + feature = core_doc.query_exact(route) + if feature and feature.kind == _core.FeatureKind.FlowSequence: + patches.append( + _core.Patch(route=route, operation=_core.Op.replace(new_value)) + ) + return patches + except (KeyError, ValueError): + pass + # Recurse into shared list elements to find nested flow sequences + for i in range(min(len(old_value), len(new_value))): + sub_patches = _flow_seq_replacements( + core_doc, old_value[i], new_value[i], (*path, i) + ) + patches.extend(sub_patches) + return patches + + if isinstance(old_value, dict) and isinstance(new_value, dict): + for key in new_value: + if key in old_value: + sub_patches = _flow_seq_replacements( + core_doc, old_value[key], new_value[key], (*path, key) + ) + patches.extend(sub_patches) + + return patches + + class Document: """An immutable YAML document. @@ -279,7 +320,21 @@ def append(self, *keys: KeyPart, value: Any) -> Document: route = _make_route(keys) op = _core.Op.append(value) patch = _core.Patch(route=route, operation=op) - return self._apply_patches([patch]) + try: + return self._apply_patches([patch]) + except PatchError as e: + msg = str(e) + # yamlpatch raises "...flow sequence..." for append on FlowSequence nodes + if "flow sequence" in msg: + current = self[keys] + new_list = [*list(current), value] + replace_op = _core.Op.replace(new_list) + return self._apply_patches( + [_core.Patch(route=route, operation=replace_op)] + ) + if "only permitted against sequence" in msg: + raise NodeTypeError(msg) from None + raise def insert(self, *keys: KeyPart, index: int, value: Any) -> Document: """Insert an item at a specific position in the sequence at path. @@ -290,7 +345,21 @@ def insert(self, *keys: KeyPart, index: int, value: Any) -> Document: route = _make_route(keys) op = _core.Op.insert_at(index=index, value=value) patch = _core.Patch(route=route, operation=op) - return self._apply_patches([patch]) + try: + return self._apply_patches([patch]) + except PatchError as e: + msg = str(e) + # Rust apply_insert_at raises "expected BlockSequence, got ..." for + # both FlowSequence and non-sequence nodes (Scalar, BlockMapping, etc.) + if "expected BlockSequence" not in msg: + raise + current = self[keys] + if not isinstance(current, list): + raise NodeTypeError(msg) from None + new_list = list(current) + new_list.insert(index, value) + replace_op = _core.Op.replace(new_list) + return self._apply_patches([_core.Patch(route=route, operation=replace_op)]) def extend_list(self, *keys: KeyPart, values: Sequence[Any]) -> Document: """Append multiple items to the sequence at path.""" @@ -300,14 +369,28 @@ def extend_list(self, *keys: KeyPart, values: Sequence[Any]) -> Document: patches = [ _core.Patch(route=route, operation=_core.Op.append(v)) for v in values ] - return self._apply_patches(patches) + try: + return self._apply_patches(patches) + except PatchError as e: + msg = str(e) + # yamlpatch raises "...flow sequence..." for append on FlowSequence nodes + if "flow sequence" in msg: + current = self[keys] + new_list = [*list(current), *values] + replace_op = _core.Op.replace(new_list) + return self._apply_patches( + [_core.Patch(route=route, operation=replace_op)] + ) + if "only permitted against sequence" in msg: + raise NodeTypeError(msg) from None + raise def remove_from_list(self, *keys: KeyPart, values: Sequence[Any]) -> Document: """Remove all occurrences of given values from the sequence at path.""" current_list = self[keys] if not isinstance(current_list, list): msg = f"Value at {keys} is not a list" - raise PatchError(msg) + raise NodeTypeError(msg) values_list = list(values) indices_to_remove = sorted( @@ -349,7 +432,27 @@ def sync(self, *keys: KeyPart, value: Any) -> Document: except (ValueError, KeyError): return self.upsert(*normalized, value=value) + # Pre-convert any flow sequences that will be modified. + # This targets only the affected leaf paths, preserving sibling formatting. + doc: Document = self + flow_patches = _flow_seq_replacements( + self._core_doc, old_value, value, normalized + ) + if flow_patches: + doc = doc._apply_patches(flow_patches) + # Re-read old_value from the now-converted document + old_value = doc._core_doc.parse_value(_make_route(normalized)) + patches = _compute_patches(old_value, value, normalized) if not patches: - return self - return self._apply_patches(patches) + return doc + try: + return doc._apply_patches(patches) + except PatchError as e: + if "expected BlockSequence" not in str(e): + raise + # Fallback: a flow sequence was missed by pre-detection (e.g. due to + # list reordering). Replace the entire synced value. + route = _make_route(normalized) + op = _core.Op.replace(value) + return self._apply_patches([_core.Patch(route=route, operation=op)]) diff --git a/src/yamltrip/errors.py b/src/yamltrip/errors.py index db49e65..9a6f14b 100644 --- a/src/yamltrip/errors.py +++ b/src/yamltrip/errors.py @@ -23,3 +23,7 @@ class KeyExistsError(PatchError): class KeyMissingError(PatchError): """Raised by replace() when the key doesn't exist.""" + + +class NodeTypeError(PatchError, TypeError): + """Raised when a node is not the expected type for the operation.""" diff --git a/tests/test_document.py b/tests/test_document.py index 84e4ecd..0465c47 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -5,6 +5,7 @@ from yamltrip.errors import ( KeyExistsError, KeyMissingError, + NodeTypeError, PatchError, QueryError, ) @@ -443,6 +444,26 @@ def test_extend_list(self): assert "b" in result assert "c" in result + def test_append_flow_sequence(self): + doc = Document("items: [a, b]\n") + doc2 = doc.append("items", value="c") + assert doc2["items"] == ["a", "b", "c"] + + def test_append_empty_flow_sequence(self): + doc = Document("items: []\n") + doc2 = doc.append("items", value="x") + assert doc2["items"] == ["x"] + + def test_extend_list_flow_sequence(self): + doc = Document("items: [a]\n") + doc2 = doc.extend_list("items", values=["b", "c"]) + assert doc2["items"] == ["a", "b", "c"] + + def test_extend_list_empty_flow_sequence(self): + doc = Document("items: []\n") + doc2 = doc.extend_list("items", values=["a", "b"]) + assert doc2["items"] == ["a", "b"] + def test_remove_from_list(self): doc = Document("items:\n - a\n - b\n - c") doc2 = doc.remove_from_list("items", values=["b"]) @@ -514,12 +535,59 @@ def test_insert_path_not_found(self): with pytest.raises(PatchError): doc.insert("missing", index=0, value="x") - def test_insert_flow_sequence_error(self): + def test_insert_flow_sequence_converts_to_block(self): + doc = Document("items: [a, b, c]\n") + doc2 = doc.insert("items", index=1, value="x") + assert doc2["items"] == ["a", "x", "b", "c"] + + def test_insert_empty_flow_sequence(self): + doc = Document("items: []\n") + doc2 = doc.insert("items", index=0, value="x") + assert doc2["items"] == ["x"] + + def test_insert_flow_sequence_append_semantics(self): + doc = Document("items: [a, b]\n") + doc2 = doc.insert("items", index=99, value="c") + assert doc2["items"] == ["a", "b", "c"] + + def test_insert_flow_sequence_negative_index(self): doc = Document("items: [a, b, c]\n") - with pytest.raises(PatchError, match="BlockSequence"): - doc.insert("items", index=1, value="x") + doc2 = doc.insert("items", index=-1, value="x") + assert doc2["items"] == ["a", "b", "x", "c"] def test_insert_nested_path(self): doc = Document("config:\n items:\n - a\n - b\n") doc2 = doc.insert("config", "items", index=1, value="x") assert doc2["config", "items"] == ["a", "x", "b"] + + +class TestNodeTypeError: + def test_remove_from_list_on_scalar_raises_node_type_error(self): + doc = Document("name: foo\n") + with pytest.raises(NodeTypeError, match="not a list"): + doc.remove_from_list("name", values=["foo"]) + + def test_remove_from_list_node_type_error_is_patch_error(self): + doc = Document("name: foo\n") + with pytest.raises(PatchError): + doc.remove_from_list("name", values=["foo"]) + + def test_append_on_scalar_raises_node_type_error(self): + doc = Document("name: foo\n") + with pytest.raises(NodeTypeError): + doc.append("name", value="bar") + + def test_append_on_mapping_raises_node_type_error(self): + doc = Document("config:\n a: 1\n") + with pytest.raises(NodeTypeError): + doc.append("config", value="x") + + def test_insert_on_scalar_raises_node_type_error(self): + doc = Document("name: foo\n") + with pytest.raises(NodeTypeError): + doc.insert("name", index=0, value="bar") + + def test_extend_list_on_scalar_raises_node_type_error(self): + doc = Document("name: foo\n") + with pytest.raises(NodeTypeError): + doc.extend_list("name", values=["a", "b"]) diff --git a/tests/test_errors.py b/tests/test_errors.py index 1288221..aa81d9b 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -3,6 +3,7 @@ from yamltrip.errors import ( KeyExistsError, KeyMissingError, + NodeTypeError, ParseError, PatchError, QueryError, @@ -29,6 +30,10 @@ def test_key_exists_error(self): def test_key_missing_error(self): assert issubclass(KeyMissingError, PatchError) + def test_node_type_error_inherits_patch_error_and_type_error(self): + assert issubclass(NodeTypeError, PatchError) + assert issubclass(NodeTypeError, TypeError) + def test_raise_and_catch_base(self): msg = "key already exists" with pytest.raises(YAMLTripError, match=msg): diff --git a/tests/test_sync.py b/tests/test_sync.py index 2f727ce..fdde31d 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -170,6 +170,55 @@ def test_sync_empty_list_removes_all_items(self): assert doc2["items"] == [] +class TestSyncFlowSequence: + def test_sync_empty_flow_sequence_to_nonempty(self): + doc = Document("repos: []\n") + doc2 = doc.sync("repos", value=["a", "b"]) + assert doc2["repos"] == ["a", "b"] + + def test_sync_nonempty_flow_sequence_append(self): + doc = Document("items: [a, b]\n") + doc2 = doc.sync("items", value=["a", "b", "c"]) + assert doc2["items"] == ["a", "b", "c"] + + def test_sync_nonempty_flow_sequence_insert(self): + doc = Document("items: [a, c]\n") + doc2 = doc.sync("items", value=["a", "b", "c"]) + assert doc2["items"] == ["a", "b", "c"] + + def test_sync_flow_sequence_replace_only_no_fallback_needed(self): + doc = Document("items: [a, b]\n") + doc2 = doc.sync("items", value=["x", "y"]) + assert doc2["items"] == ["x", "y"] + + def test_sync_flow_sequence_to_empty(self): + doc = Document("items: [a, b]\n") + doc2 = doc.sync("items", value=[]) + assert doc2["items"] == [] + + def test_sync_flow_sequence_nested_in_mapping(self): + doc = Document("config:\n items: []\n") + doc2 = doc.sync("config", "items", value=["x"]) + assert doc2["config", "items"] == ["x"] + + def test_sync_flow_sequence_preserves_sibling_formatting(self): + doc = Document("config:\n name: foo # important\n items: []\n") + doc2 = doc.sync("config", value={"name": "foo", "items": ["a"]}) + assert doc2["config", "items"] == ["a"] + assert "# important" in doc2.source + + def test_sync_flow_sequence_nested_in_list_of_dicts(self): + doc = Document("items:\n - repos: []\n name: foo\n") + doc2 = doc.sync("items", value=[{"repos": ["new-repo"], "name": "foo"}]) + assert doc2["items", 0, "repos"] == ["new-repo"] + + def test_sync_flow_sequence_nested_in_list_of_dicts_multiple(self): + doc = Document("items:\n - tags: []\n - tags: [a]\n") + doc2 = doc.sync("items", value=[{"tags": ["x"]}, {"tags": ["a", "b"]}]) + assert doc2["items", 0, "tags"] == ["x"] + assert doc2["items", 1, "tags"] == ["a", "b"] + + class TestSyncNullValue: def test_sync_to_none(self): doc = Document("a: 1\n") diff --git a/uv.lock b/uv.lock index b1220b3..089e486 100644 --- a/uv.lock +++ b/uv.lock @@ -816,7 +816,7 @@ wheels = [ [[package]] name = "yamltrip" -version = "0.2.0" +version = "0.4.0" source = { editable = "." } [package.dev-dependencies]