diff --git a/doc/specs/2026-05-22-empty-document-mutations-design.md b/doc/specs/2026-05-22-empty-document-mutations-design.md new file mode 100644 index 0000000..3d825c8 --- /dev/null +++ b/doc/specs/2026-05-22-empty-document-mutations-design.md @@ -0,0 +1,89 @@ +# Support Mutations on Empty Documents + +**Date:** 2026-05-22 +**Issue:** #34 + +## Problem + +Mutation methods that create structure (`upsert`, `add`, `sync`) raise `PatchError` when called on a document with no root data node: + +```python +doc = yamltrip.loads("") +doc.upsert("x", value=1) +# PatchError: YAML query error: syntax node 'stream' is missing named child 'document' +``` + +The underlying yamlpatch `Add` operation requires an existing mapping node at the target route. This forces callers to use a sentinel workaround. + +## Scope + +"Empty document" means any document with no root data node: +- Zero-length source (`""`) +- Whitespace-only source (`" \n"`) +- Comment-only source (`"# header\n"`) + +Integer keys on empty documents still error — consistent with existing behaviour. Only string keys can bootstrap a root mapping. + +## Design decisions + +### Detection + +Check whether the root route resolves to a data node. If it doesn't, the document is "empty" for our purposes. This covers all three cases above uniformly. + +### New Rust capability: `_core.serialize_value()` + +Expose a function that takes a Python value and returns YAML text via `serde_yaml`. This gives the Python layer a direct serialization path without constructing throwaway documents. Uses the existing `py_to_yaml_value` conversion. + +### Bootstrap strategy + +When a mutation targets an empty document and needs to create structure: +1. Build the nested Python dict representing the full key path + value +2. Serialize it to YAML text via `serialize_value` +3. Concatenate with existing source content (preserving comments) +4. Re-parse into a new Document + +This is the same cost as any other mutation (every patch application ends with a re-parse). + +### Comment preservation + +When the document is comment-only, the existing source is preserved as a prefix above the new content. + +### Per-method behaviour on empty documents + +| Method | Behaviour on empty doc | +|--------|----------------------| +| `upsert` | Creates root mapping (bootstrap) | +| `sync` | Delegates to `upsert` (existing path) | +| `add` | Creates root mapping (bootstrap) | +| `replace` | `KeyMissingError` (correct — nothing to replace) | +| `append` / `insert` / `extend_list` | `PatchError` (correct — no sequence) | +| `remove` | `PatchError` (correct — nothing to remove) | + +## Expected behaviour + +```python +# Basic +Document("").upsert("x", value=1)["x"] == 1 + +# Nested keys +Document("").upsert("a", "b", value="hello")["a", "b"] == "hello" + +# Comment preservation +doc = Document("# header\n").upsert("x", value=1) +doc.source.startswith("# header\n") # True + +# add() works too +Document("").add(key="name", value="foo")["name"] == "foo" + +# Integer keys still error +Document("").upsert(0, value="x") # raises PatchError + +# Complex values +Document("").upsert("items", value=["a", "b", "c"])["items"] == ["a", "b", "c"] +``` + +## Non-goals + +- Creating root sequences via integer keys +- Modifying yamlpatch to handle empty documents internally +- Changing the Rust `apply_patches_impl` flow diff --git a/src/convert.rs b/src/convert.rs index b4576ed..a75354d 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -105,6 +105,15 @@ pub fn yaml_value_to_py(py: Python<'_>, value: &Value) -> PyResult> { } } +/// Serialize a Python value to a YAML string via serde_yaml. +#[pyfunction] +pub fn serialize_value(value: &Bound<'_, PyAny>) -> PyResult { + let val = py_to_yaml_value(value)?; + serde_yaml::to_string(&val).map_err(|e| { + PyErr::new::(format!("Failed to serialize YAML: {e}")) + }) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/lib.rs b/src/lib.rs index da382f5..9c2c6b5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,4 +34,9 @@ mod _core { fn apply_patches(source: &str, patches: Vec) -> PyResult { ops::apply_patches(source, patches) } + + #[pyfunction] + fn serialize_value(value: &Bound<'_, PyAny>) -> PyResult { + convert::serialize_value(value) + } } diff --git a/src/yamltrip/_core.pyi b/src/yamltrip/_core.pyi index 7730713..7975792 100644 --- a/src/yamltrip/_core.pyi +++ b/src/yamltrip/_core.pyi @@ -12,6 +12,7 @@ __all__ = [ "Patch", "Route", "apply_patches", + "serialize_value", ] @final @@ -121,3 +122,4 @@ class Patch: def operation(self) -> Op: ... def apply_patches(source: str, patches: list[Patch]) -> str: ... +def serialize_value(value: Any) -> str: ... diff --git a/src/yamltrip/document.py b/src/yamltrip/document.py index 84e24ea..3d9a396 100644 --- a/src/yamltrip/document.py +++ b/src/yamltrip/document.py @@ -179,8 +179,8 @@ def __getitem__(self, keys: object) -> Any: def __contains__(self, keys: object) -> bool: """Check whether a path exists in the document. - An empty tuple ``()`` checks that the document root exists (always True - for a successfully parsed document). + An empty tuple ``()`` checks that the document has a root data node. + Returns False for empty or comment-only documents. """ normalized = _normalize_keys(keys) route = _make_route(normalized) @@ -241,11 +241,18 @@ def add(self, *keys: KeyPart, key: str, value: Any) -> Document: msg = f"Key already exists: {full_path}" raise KeyExistsError(msg) + if self._is_empty_document(): + return self._create_at((), full_path, value) + route = _make_route(keys) op = _core.Op.add(key, value) patch = _core.Patch(route=route, operation=op) return self._apply_patches([patch]) + def _is_empty_document(self) -> bool: + """True if the document has no root data node.""" + return not self._core_doc.query_exists(_make_route(())) + def _create_at( self, parent_keys: tuple[KeyPart, ...], @@ -254,6 +261,23 @@ def _create_at( ) -> Document: """Create a nested value under parent_keys using child_keys.""" _check_no_int_keys_for_creation(child_keys) + + # Bootstrap root mapping if document has no root data node + if not parent_keys and self._is_empty_document(): + first_key = child_keys[0] + if not isinstance(first_key, str): + msg = f"Expected string key, got {type(first_key).__name__}" + raise TypeError(msg) + nested_value = value + for k in reversed(child_keys[1:]): + nested_value = {k: nested_value} + full_dict = {first_key: nested_value} + yaml_text = _core.serialize_value(full_dict) + prefix = self._source + if prefix and not prefix.endswith("\n"): + prefix += "\n" + return Document(prefix + yaml_text) + first_key = child_keys[0] if not isinstance(first_key, str): msg = f"Expected string key, got {type(first_key).__name__}" @@ -272,6 +296,11 @@ def _create_at( def upsert(self, *keys: KeyPart, value: Any) -> Document: """Replace if exists, create (with intermediate mappings) if not.""" if not keys: + if self._is_empty_document(): + msg = ( + "Cannot replace root of an empty document; provide at least one key" + ) + raise PatchError(msg) route = _make_route(()) op = _core.Op.replace(value) patch = _core.Patch(route=route, operation=op) diff --git a/tests/test_document.py b/tests/test_document.py index 0465c47..c341ccb 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -591,3 +591,95 @@ def test_extend_list_on_scalar_raises_node_type_error(self): doc = Document("name: foo\n") with pytest.raises(NodeTypeError): doc.extend_list("name", values=["a", "b"]) + + +class TestEmptyDocumentUpsert: + def test_upsert_single_key_on_empty(self): + doc = Document("") + doc2 = doc.upsert("x", value=1) + assert doc2["x"] == 1 + + def test_upsert_nested_keys_on_empty(self): + doc = Document("") + doc2 = doc.upsert("a", "b", value="hello") + assert doc2["a", "b"] == "hello" + + def test_upsert_complex_value_on_empty(self): + doc = Document("") + doc2 = doc.upsert("items", value=["a", "b", "c"]) + assert doc2["items"] == ["a", "b", "c"] + + def test_upsert_dict_value_on_empty(self): + doc = Document("") + doc2 = doc.upsert("config", value={"debug": True, "port": 8080}) + assert doc2["config"] == {"debug": True, "port": 8080} + + def test_upsert_whitespace_only_doc(self): + doc = Document(" \n") + doc2 = doc.upsert("x", value=1) + assert doc2["x"] == 1 + + def test_upsert_comment_only_preserves_comments(self): + doc = Document("# header\n") + doc2 = doc.upsert("x", value=1) + assert doc2.source.startswith("# header\n") + assert doc2["x"] == 1 + + def test_upsert_int_key_on_empty_raises(self): + doc = Document("") + with pytest.raises(PatchError): + doc.upsert(0, value="x") + + +class TestEmptyDocumentAdd: + def test_add_single_key_on_empty(self): + doc = Document("") + doc2 = doc.add(key="name", value="foo") + assert doc2["name"] == "foo" + + def test_add_nested_parent_on_empty(self): + doc = Document("") + doc2 = doc.add("nested", key="x", value=1) + assert doc2["nested", "x"] == 1 + + def test_add_comment_only_preserves_comments(self): + doc = Document("# managed\n") + doc2 = doc.add(key="tool", value="usethis") + assert doc2.source.startswith("# managed\n") + assert doc2["tool"] == "usethis" + + +class TestEmptyDocumentSync: + def test_sync_single_key_on_empty(self): + doc = Document("") + doc2 = doc.sync("x", value=1) + assert doc2["x"] == 1 + + def test_sync_dict_value_on_empty(self): + doc = Document("") + doc2 = doc.sync("config", value={"nested": True}) + assert doc2["config"] == {"nested": True} + + +class TestEmptyDocumentErrors: + def test_replace_on_empty_raises_key_missing(self): + doc = Document("") + with pytest.raises(KeyMissingError): + doc.replace("x", value=1) + + def test_append_on_empty_raises_patch_error(self): + doc = Document("") + with pytest.raises(PatchError): + doc.append("items", value="a") + + def test_remove_on_empty_raises_patch_error(self): + doc = Document("") + with pytest.raises(PatchError): + doc.remove("x") + + def test_root_upsert_on_empty_raises_patch_error(self): + doc = Document("") + with pytest.raises( + PatchError, match="Cannot replace root of an empty document" + ): + doc.upsert(value=42)