Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions doc/specs/2026-05-22-empty-document-mutations-design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Support Mutations on Empty Documents

**Date:** 2026-05-22
**Issue:** #34

## Problem

Mutation methods that create structure (`upsert`, `add`, `sync`) raise `PatchError` when called on a document with no root data node:

```python
doc = yamltrip.loads("")
doc.upsert("x", value=1)
# PatchError: YAML query error: syntax node 'stream' is missing named child 'document'
```

The underlying yamlpatch `Add` operation requires an existing mapping node at the target route. This forces callers to use a sentinel workaround.

## Scope

"Empty document" means any document with no root data node:
- Zero-length source (`""`)
- Whitespace-only source (`" \n"`)
- Comment-only source (`"# header\n"`)

Integer keys on empty documents still error — consistent with existing behaviour. Only string keys can bootstrap a root mapping.

## Design decisions

### Detection

Check whether the root route resolves to a data node. If it doesn't, the document is "empty" for our purposes. This covers all three cases above uniformly.

### New Rust capability: `_core.serialize_value()`

Expose a function that takes a Python value and returns YAML text via `serde_yaml`. This gives the Python layer a direct serialization path without constructing throwaway documents. Uses the existing `py_to_yaml_value` conversion.

### Bootstrap strategy

When a mutation targets an empty document and needs to create structure:
1. Build the nested Python dict representing the full key path + value
2. Serialize it to YAML text via `serialize_value`
3. Concatenate with existing source content (preserving comments)
4. Re-parse into a new Document

This is the same cost as any other mutation (every patch application ends with a re-parse).

### Comment preservation

When the document is comment-only, the existing source is preserved as a prefix above the new content.

### Per-method behaviour on empty documents

| Method | Behaviour on empty doc |
|--------|----------------------|
| `upsert` | Creates root mapping (bootstrap) |
| `sync` | Delegates to `upsert` (existing path) |
| `add` | Creates root mapping (bootstrap) |
| `replace` | `KeyMissingError` (correct — nothing to replace) |
| `append` / `insert` / `extend_list` | `PatchError` (correct — no sequence) |
| `remove` | `PatchError` (correct — nothing to remove) |

## Expected behaviour

```python
# Basic
Document("").upsert("x", value=1)["x"] == 1

# Nested keys
Document("").upsert("a", "b", value="hello")["a", "b"] == "hello"

# Comment preservation
doc = Document("# header\n").upsert("x", value=1)
doc.source.startswith("# header\n") # True

# add() works too
Document("").add(key="name", value="foo")["name"] == "foo"

# Integer keys still error
Document("").upsert(0, value="x") # raises PatchError

# Complex values
Document("").upsert("items", value=["a", "b", "c"])["items"] == ["a", "b", "c"]
```

## Non-goals

- Creating root sequences via integer keys
- Modifying yamlpatch to handle empty documents internally
- Changing the Rust `apply_patches_impl` flow
9 changes: 9 additions & 0 deletions src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,15 @@ pub fn yaml_value_to_py(py: Python<'_>, value: &Value) -> PyResult<Py<PyAny>> {
}
}

/// Serialize a Python value to a YAML string via serde_yaml.
#[pyfunction]
pub fn serialize_value(value: &Bound<'_, PyAny>) -> PyResult<String> {
let val = py_to_yaml_value(value)?;
serde_yaml::to_string(&val).map_err(|e| {
PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Failed to serialize YAML: {e}"))
})
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
5 changes: 5 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,9 @@ mod _core {
fn apply_patches(source: &str, patches: Vec<PyPatch>) -> PyResult<String> {
ops::apply_patches(source, patches)
}

#[pyfunction]
fn serialize_value(value: &Bound<'_, PyAny>) -> PyResult<String> {
convert::serialize_value(value)
}
}
2 changes: 2 additions & 0 deletions src/yamltrip/_core.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ __all__ = [
"Patch",
"Route",
"apply_patches",
"serialize_value",
]

@final
Expand Down Expand Up @@ -121,3 +122,4 @@ class Patch:
def operation(self) -> Op: ...

def apply_patches(source: str, patches: list[Patch]) -> str: ...
def serialize_value(value: Any) -> str: ...
33 changes: 31 additions & 2 deletions src/yamltrip/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ def __getitem__(self, keys: object) -> Any:
def __contains__(self, keys: object) -> bool:
"""Check whether a path exists in the document.

An empty tuple ``()`` checks that the document root exists (always True
for a successfully parsed document).
An empty tuple ``()`` checks that the document has a root data node.
Returns False for empty or comment-only documents.
"""
normalized = _normalize_keys(keys)
route = _make_route(normalized)
Expand Down Expand Up @@ -241,11 +241,18 @@ def add(self, *keys: KeyPart, key: str, value: Any) -> Document:
msg = f"Key already exists: {full_path}"
raise KeyExistsError(msg)

if self._is_empty_document():
return self._create_at((), full_path, value)

route = _make_route(keys)
op = _core.Op.add(key, value)
patch = _core.Patch(route=route, operation=op)
return self._apply_patches([patch])

def _is_empty_document(self) -> bool:
"""True if the document has no root data node."""
return not self._core_doc.query_exists(_make_route(()))
Comment thread
nathanjmcdougall marked this conversation as resolved.

def _create_at(
self,
parent_keys: tuple[KeyPart, ...],
Expand All @@ -254,6 +261,23 @@ def _create_at(
) -> Document:
"""Create a nested value under parent_keys using child_keys."""
_check_no_int_keys_for_creation(child_keys)

# Bootstrap root mapping if document has no root data node
if not parent_keys and self._is_empty_document():
first_key = child_keys[0]
if not isinstance(first_key, str):
msg = f"Expected string key, got {type(first_key).__name__}"
raise TypeError(msg)
nested_value = value
for k in reversed(child_keys[1:]):
nested_value = {k: nested_value}
full_dict = {first_key: nested_value}
yaml_text = _core.serialize_value(full_dict)
prefix = self._source
if prefix and not prefix.endswith("\n"):
prefix += "\n"
return Document(prefix + yaml_text)

first_key = child_keys[0]
if not isinstance(first_key, str):
msg = f"Expected string key, got {type(first_key).__name__}"
Expand All @@ -272,6 +296,11 @@ def _create_at(
def upsert(self, *keys: KeyPart, value: Any) -> Document:
"""Replace if exists, create (with intermediate mappings) if not."""
if not keys:
if self._is_empty_document():
msg = (
"Cannot replace root of an empty document; provide at least one key"
)
raise PatchError(msg)
route = _make_route(())
op = _core.Op.replace(value)
patch = _core.Patch(route=route, operation=op)
Expand Down
92 changes: 92 additions & 0 deletions tests/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,3 +591,95 @@ def test_extend_list_on_scalar_raises_node_type_error(self):
doc = Document("name: foo\n")
with pytest.raises(NodeTypeError):
doc.extend_list("name", values=["a", "b"])


class TestEmptyDocumentUpsert:
def test_upsert_single_key_on_empty(self):
doc = Document("")
doc2 = doc.upsert("x", value=1)
assert doc2["x"] == 1

def test_upsert_nested_keys_on_empty(self):
doc = Document("")
doc2 = doc.upsert("a", "b", value="hello")
assert doc2["a", "b"] == "hello"

def test_upsert_complex_value_on_empty(self):
doc = Document("")
doc2 = doc.upsert("items", value=["a", "b", "c"])
assert doc2["items"] == ["a", "b", "c"]

def test_upsert_dict_value_on_empty(self):
doc = Document("")
doc2 = doc.upsert("config", value={"debug": True, "port": 8080})
assert doc2["config"] == {"debug": True, "port": 8080}

def test_upsert_whitespace_only_doc(self):
doc = Document(" \n")
doc2 = doc.upsert("x", value=1)
assert doc2["x"] == 1

def test_upsert_comment_only_preserves_comments(self):
doc = Document("# header\n")
doc2 = doc.upsert("x", value=1)
assert doc2.source.startswith("# header\n")
assert doc2["x"] == 1

def test_upsert_int_key_on_empty_raises(self):
doc = Document("")
with pytest.raises(PatchError):
doc.upsert(0, value="x")


class TestEmptyDocumentAdd:
def test_add_single_key_on_empty(self):
doc = Document("")
doc2 = doc.add(key="name", value="foo")
assert doc2["name"] == "foo"

def test_add_nested_parent_on_empty(self):
doc = Document("")
doc2 = doc.add("nested", key="x", value=1)
assert doc2["nested", "x"] == 1

def test_add_comment_only_preserves_comments(self):
doc = Document("# managed\n")
doc2 = doc.add(key="tool", value="usethis")
assert doc2.source.startswith("# managed\n")
assert doc2["tool"] == "usethis"


class TestEmptyDocumentSync:
def test_sync_single_key_on_empty(self):
doc = Document("")
doc2 = doc.sync("x", value=1)
assert doc2["x"] == 1

def test_sync_dict_value_on_empty(self):
doc = Document("")
doc2 = doc.sync("config", value={"nested": True})
assert doc2["config"] == {"nested": True}


class TestEmptyDocumentErrors:
def test_replace_on_empty_raises_key_missing(self):
doc = Document("")
with pytest.raises(KeyMissingError):
doc.replace("x", value=1)

def test_append_on_empty_raises_patch_error(self):
doc = Document("")
with pytest.raises(PatchError):
doc.append("items", value="a")

def test_remove_on_empty_raises_patch_error(self):
doc = Document("")
with pytest.raises(PatchError):
doc.remove("x")

def test_root_upsert_on_empty_raises_patch_error(self):
doc = Document("")
with pytest.raises(
PatchError, match="Cannot replace root of an empty document"
):
doc.upsert(value=42)
Loading