diff --git a/README.md b/README.md index f394d7f..4e506f5 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,10 @@ doc2 = doc.replace("age", value=31) doc3 = doc2.add(key="city", value="Portland") print(doc3.dumps()) +# Complex values (dicts/lists) work too +doc4 = doc3.replace("age", value={"years": 31, "months": 4}) +doc5 = doc4.upsert("hobbies", value=["reading", "hiking"]) + # File-based editing with a context manager with yamltrip.edit("config.yml") as editor: editor.replace("version", value="2.0") @@ -62,8 +66,10 @@ doc["items", 0] # "a" ("items", 0) in doc # True doc.replace("items", 0, value="x") +doc.replace("items", value=["x", "y"]) # dicts and lists accepted doc.add("items", key="c", value=3) doc.upsert("new", "nested", value=True) +doc.upsert("config", value={"debug": True}) # dicts and lists accepted doc.remove("items", 0) doc.prune_remove("a", "b", "c") # remove + prune empty parents doc.append("items", value="c") diff --git a/doc/specs/2026-05-15-complex-value-replace-design.md b/doc/specs/2026-05-15-complex-value-replace-design.md new file mode 100644 index 0000000..895327c --- /dev/null +++ b/doc/specs/2026-05-15-complex-value-replace-design.md @@ -0,0 +1,95 @@ +# Complex Value Support for Replace and Upsert + +**Date:** 2026-05-15 +**Issue:** #18 + +## Problem + +`Document.replace()` and `Document.upsert()` only accept scalar values. Passing a dict or list raises `PatchError: Patch failed: input is not valid YAML` because yamlpatch's `apply_value_replacement` serializes the value inline after the key colon, producing invalid YAML when the serialized value is multi-line. + +`Op.add` and `Op.append` already handle complex values correctly. Only `Op.replace` is broken. + +## Approach + +Intercept `Replace` operations with complex values (Mapping or Sequence) in yamltrip's Rust layer before they reach yamlpatch. Perform direct string surgery on the document source. Scalar `Replace` and all other operation types pass through to yamlpatch unchanged. + +## Change Location + +All Rust changes are in `src/document.rs`. No Python API changes. No yamlpatch changes. No new files. + +A new function `apply_complex_replace` handles the complex case, called from `PyDocument::apply_patches`. + +## Algorithm: `apply_complex_replace` + +Input: a `yamlpath::Document`, a `yamlpath::Route`, and a `serde_yaml::Value` (Mapping or Sequence). + +1. **Locate the feature** — `document.query_pretty(&route)` to get the byte span with surrounding key context. + +2. **Extract content** — `extract_with_leading_whitespace(feature)` to get the full `key: value` text. Split at the first `:` (naive `find(':')`, consistent with yamlpatch's own Replace implementation) into `key_part` (through the colon) and the rest. + +3. **Compute indentation** — Derive base indent from the number of leading spaces on the feature's line. Value content indentation = base indent + 2 spaces. + +4. **Serialize the new value** — `serde_yaml::to_string(&value)`, strip trailing newline, re-indent each line to the computed value indentation. + +5. **Assemble replacement text** — `key:\n indented_value`. Inline comments on the value line are not preserved, consistent with yamlpatch's own Replace behavior for scalar values. + +6. **String surgery** — Replace the byte range from the feature's span (adjusted for leading whitespace) in the document source. + +7. **Re-parse** — `yamlpath::Document::new(patched_source)` to validate. + +### Root-level replace + +If the route is empty, skip key extraction. Replace the entire document content with the serialized value. This matches yamlpatch's existing root-replace behavior. + +## Patch Routing in `apply_patches` + +In `PyDocument::apply_patches`, before building the `Vec`: + +``` +for each patch: + if patch.operation is Replace(value) AND value is Mapping or Sequence: + apply_complex_replace(document, route, value) + re-parse document for subsequent patches + else: + collect into yamlpatch batch +``` + +Patches are applied sequentially. If a complex replace is encountered mid-batch, the preceding yamlpatch batch is flushed first, then the complex replace is applied, then the next batch starts from the updated document. + +## How Upsert Benefits + +`Document.upsert()` delegates to `replace()` when the path exists, and to `_create_at()` (which uses `Op.add`/`Op.merge_into`) when it doesn't. Since `Op.add` and `Op.merge_into` already handle complex values, the only broken path is the `replace` delegation — fixed by this change. No changes to `document.py`. + +## Serialization Style + +No changes to `Op.add` behavior. Complex values in `Replace` use `serde_yaml::to_string()` which produces block style by default (block mappings, block sequences). This matches the pre-commit config use case where block style is expected. + +## Testing + +New tests in `tests/test_document.py`: + +- `replace()` with a dict value +- `replace()` with a list value +- `replace()` with nested dict-in-list-in-dict +- `upsert()` with a dict value (path exists → goes through replace) +- `upsert()` with a dict value (path doesn't exist → goes through add, already works) +- Replacing a complex value with another complex value +- Root-level replace with complex value +- Indentation correctness at various nesting depths (0, 2, 4 spaces) + +## Known Limitations + +- **Quoted keys with colons** — `find_key_colon` uses a naive `str::find(':')`, which will misparse keys like `"host:port": value`. This is consistent with yamlpatch's own Replace implementation. When yamlpatch fixes this upstream, yamltrip should inherit the fix. +- **Inline comments** — Not preserved during complex replace, consistent with yamlpatch's scalar Replace behavior. + +## Scope Boundaries + +**In scope:** +- `Replace` with Mapping/Sequence values +- Indentation handling + +**Out of scope:** +- Changing `Add`/`Append` serialization style +- Multi-document YAML support +- Tagged values +- Flow-style output preference (uses serde_yaml default block style) diff --git a/src/document.rs b/src/document.rs index 614cc52..91b46f5 100644 --- a/src/document.rs +++ b/src/document.rs @@ -137,22 +137,209 @@ impl PyDocument { } /// Apply patches to this document and return a new document. - /// NOTE: Similar patch-application logic exists in ops::apply_patches (returns String). fn apply_patches(&self, patches: Vec) -> PyResult { - let yaml_patches: Vec> = patches + let doc = apply_patches_impl(&self.inner, &patches).map_err(|e| { + PyErr::new::(format!("Patch failed: {e}")) + })?; + Ok(Self { inner: doc }) + } +} + +/// Shared patch-application logic used by both PyDocument::apply_patches and ops::apply_patches. +/// +/// Patches are applied in order. Complex replaces (Mapping/Sequence values) are handled via +/// direct string surgery; all other operations are batched and passed to yamlpatch. +/// +/// Routes are symbolic paths (key names / sequence indices), not byte offsets, so they remain +/// valid across complex replaces that restructure sibling values. This is the same sequential +/// semantics yamlpatch uses internally when applying multiple patches. +pub(crate) fn apply_patches_impl( + doc: &yamlpath::Document, + patches: &[PyPatch], +) -> Result { + let mut current_doc = doc.clone(); + let mut batch: Vec = Vec::new(); + + for (idx, patch) in patches.iter().enumerate() { + let is_complex_replace = matches!( + &patch.operation.inner, + yamlpatch::Op::Replace(v) if matches!(v, serde_yaml::Value::Mapping(_) | serde_yaml::Value::Sequence(_)) + ); + + if is_complex_replace { + // Flush any pending yamlpatch batch first + if !batch.is_empty() { + let yaml_patches: Vec> = batch + .iter() + .map(|&i| yamlpatch::Patch { + route: patches[i].route.to_yamlpath_route(), + operation: patches[i].operation.inner.clone(), + }) + .collect(); + current_doc = yamlpatch::apply_yaml_patches(¤t_doc, &yaml_patches) + .map_err(|e| e.to_string())?; + batch.clear(); + } + + // Apply the complex replace directly. + // NOTE: This re-parses the document for each complex replace (O(N) parses). + // This is consistent with yamlpatch::apply_yaml_patches, which also + // re-parses per patch via apply_single_patch. A bottom-up single-pass + // approach could avoid this, but isn't warranted until profiling shows + // it matters. + let route = patch.route.to_yamlpath_route(); + let value = match &patch.operation.inner { + yamlpatch::Op::Replace(v) => v, + _ => unreachable!(), + }; + current_doc = apply_complex_replace(¤t_doc, &route, value)?; + } else { + batch.push(idx); + } + } + + // Flush remaining batch + if !batch.is_empty() { + let yaml_patches: Vec> = batch .iter() - .map(|p| yamlpatch::Patch { - route: p.route.to_yamlpath_route(), - operation: p.operation.inner.clone(), + .map(|&i| yamlpatch::Patch { + route: patches[i].route.to_yamlpath_route(), + operation: patches[i].operation.inner.clone(), }) .collect(); + current_doc = yamlpatch::apply_yaml_patches(¤t_doc, &yaml_patches) + .map_err(|e| e.to_string())?; + } - let result = yamlpatch::apply_yaml_patches(&self.inner, &yaml_patches).map_err(|e| { - PyErr::new::(format!("Patch failed: {e}")) - })?; + Ok(current_doc) +} + +fn apply_complex_replace( + doc: &yamlpath::Document, + route: &yamlpath::Route<'_>, + value: &serde_yaml::Value, +) -> Result { + let source = doc.source(); + + // Root-level replace: just serialize the entire value + if route.is_empty() { + let serialized = + serde_yaml::to_string(value).map_err(|e| format!("Failed to serialize YAML: {e}"))?; + return yamlpath::Document::new(serialized) + .map_err(|e| format!("Failed to re-parse YAML: {e}")); + } + + // Locate the feature (with key context) + let feature = doc + .query_pretty(route) + .map_err(|e| format!("Query failed: {e}"))?; + + let content_with_ws = doc.extract_with_leading_whitespace(&feature); + let content = doc.extract(&feature); - Ok(Self { inner: result }) + // Calculate the start byte including leading whitespace. + // Safety: ws_len <= byte_span.0 because extract_with_leading_whitespace only + // extends backward to the last newline (or start of document), never beyond byte_span.0. + let ws_len = content_with_ws.len() - content.len(); + let start_byte = feature.location.byte_span.0 - ws_len; + let end_byte = feature.location.byte_span.1; + + // Find the colon separating key from value + let colon_pos = find_key_colon(content_with_ws); + + let key_part = match colon_pos { + Some(pos) => { + let key = &content_with_ws[..pos + 1]; // through the colon + key.to_string() + } + None => { + // No colon found — bare value (e.g. sequence item) + let serialized = serde_yaml::to_string(value) + .map_err(|e| format!("Failed to serialize YAML: {e}"))?; + let trimmed = serialized.trim_end_matches('\n'); + + let line_start = source[..feature.location.byte_span.0] + .rfind('\n') + .map(|nl| nl + 1) + .unwrap_or(0); + let base_indent = feature.location.byte_span.0 - line_start; + let indent_str = " ".repeat(base_indent); + + let indented = indent_block(trimmed, &indent_str); + + let mut result = source.to_string(); + result.replace_range( + feature.location.byte_span.0..feature.location.byte_span.1, + &indented, + ); + if !result.ends_with('\n') { + result.push('\n'); + } + return yamlpath::Document::new(result) + .map_err(|e| format!("Failed to re-parse YAML: {e}")); + } + }; + + // Compute base indentation from the feature's actual position + let feat_start = feature.location.byte_span.0; + let line_start = source[..feat_start] + .rfind('\n') + .map(|nl| nl + 1) + .unwrap_or(0); + let base_indent = feat_start - line_start; + // NOTE: The +2 assumes 2-space indentation. This is consistent with yamlpatch, + // which also hardcodes 2-space indent in Add, Append, MergeInto, and Replace ops. + let value_indent = " ".repeat(base_indent + 2); + + // Serialize the new value in block style + let serialized = + serde_yaml::to_string(value).map_err(|e| format!("Failed to serialize YAML: {e}"))?; + let trimmed = serialized.trim_end_matches('\n'); + + // Re-indent each line of the serialized value + let indented_value = indent_block(trimmed, &value_indent); + + // Assemble: key:\n indented_value + // NOTE: Inline comments on the value line are not preserved, consistent + // with yamlpatch's own Replace behavior for scalar values. + let replacement = format!("{}\n{}", key_part, indented_value); + + // Replace in source + let mut result = source.to_string(); + result.replace_range(start_byte..end_byte, &replacement); + + if !result.ends_with('\n') { + result.push('\n'); + } + + yamlpath::Document::new(result).map_err(|e| format!("Failed to re-parse YAML: {e}")) +} + +/// Find the first colon (key-value separator) in a YAML fragment. +/// +/// Uses a naive `find(':')`, consistent with yamlpatch's own Replace +/// implementation. This means colons inside quoted keys will be +/// misidentified — a known yamlpatch limitation that will be fixed +/// uniformly when yamlpatch addresses it. +fn find_key_colon(content: &str) -> Option { + content.find(':') +} + +fn indent_block(content: &str, indent: &str) -> String { + let mut result = String::new(); + for (i, line) in content.lines().enumerate() { + if i > 0 { + result.push('\n'); + } + // Blank lines are preserved (the \n above) but not indented, + // avoiding trailing whitespace. In practice serde_yaml::to_string() + // never emits blank lines for Mapping/Sequence values. + if !line.trim().is_empty() { + result.push_str(indent); + result.push_str(line); + } } + result } fn convert_feature(feature: &yamlpath::Feature<'_>) -> PyFeature { diff --git a/src/ops.rs b/src/ops.rs index eeef2ad..8499ed7 100644 --- a/src/ops.rs +++ b/src/ops.rs @@ -157,22 +157,13 @@ impl PyPatch { } /// Apply a list of patches to a YAML source string. -/// NOTE: Similar patch-application logic exists in PyDocument::apply_patches (returns Document). #[pyfunction] pub fn apply_patches(source: &str, patches: Vec) -> PyResult { let document = yamlpath::Document::new(source).map_err(|e| { PyErr::new::(format!("Invalid YAML: {e}")) })?; - let yaml_patches: Vec> = patches - .iter() - .map(|p| yamlpatch::Patch { - route: p.route.to_yamlpath_route(), - operation: p.operation.inner.clone(), - }) - .collect(); - - let result = yamlpatch::apply_yaml_patches(&document, &yaml_patches).map_err(|e| { + let result = crate::document::apply_patches_impl(&document, &patches).map_err(|e| { PyErr::new::(format!("Patch failed: {e}")) })?; diff --git a/tests/test_core_ops.py b/tests/test_core_ops.py index 9776d45..089abea 100644 --- a/tests/test_core_ops.py +++ b/tests/test_core_ops.py @@ -49,6 +49,33 @@ def test_append_to_sequence(self): assert "- c" in result assert "- a" in result + def test_replace_with_dict(self): + source = "config:\n key: value\n" + patches = [Patch(route=Route(["config"]), operation=Op.replace({"a": 1}))] + result = apply_patches(source, patches) + assert "a: 1" in result + assert "key: value" not in result + + def test_replace_with_list(self): + source = "repos: []\n" + patches = [ + Patch(route=Route(["repos"]), operation=Op.replace([{"repo": "local"}])) + ] + result = apply_patches(source, patches) + assert "repo: local" in result + + def test_batch_scalar_then_complex_then_scalar(self): + source = "name: foo\nconfig: old\nversion: 1\n" + patches = [ + Patch(route=Route(["name"]), operation=Op.replace("bar")), + Patch(route=Route(["config"]), operation=Op.replace({"a": 1})), + Patch(route=Route(["version"]), operation=Op.replace(2)), + ] + result = apply_patches(source, patches) + assert "name: bar" in result + assert "a: 1" in result + assert "version: 2" in result + def test_preserves_comments(self): source = "# top comment\nname: foo # inline" patches = [Patch(route=Route(["name"]), operation=Op.replace("bar"))] diff --git a/tests/test_document.py b/tests/test_document.py index 8456636..5afa49b 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -188,6 +188,118 @@ def test_replace_missing_raises(self): doc.replace("missing", value="bar") +class TestDocumentReplaceComplex: + def test_replace_with_dict(self): + doc = Document("config:\n key: value\n") + doc2 = doc.replace("config", value={"key": "new", "extra": "field"}) + assert doc2["config"] == {"key": "new", "extra": "field"} + + def test_replace_with_list(self): + doc = Document("repos: []\n") + doc2 = doc.replace( + "repos", value=[{"repo": "local", "hooks": [{"id": "my-hook"}]}] + ) + result = doc2["repos"] + assert len(result) == 1 + assert result[0]["repo"] == "local" + assert result[0]["hooks"] == [{"id": "my-hook"}] + + def test_replace_with_nested_dict_in_list(self): + doc = Document("data:\n - old\n") + doc2 = doc.replace("data", value=[{"a": {"b": [1, 2, 3]}}]) + assert doc2["data"] == [{"a": {"b": [1, 2, 3]}}] + + def test_replace_complex_preserves_other_keys(self): + doc = Document("name: foo\nconfig:\n key: value\nversion: 1\n") + doc2 = doc.replace("config", value={"new_key": "new_val"}) + assert doc2["name"] == "foo" + assert doc2["version"] == 1 + assert doc2["config"] == {"new_key": "new_val"} + + def test_replace_complex_preserves_comments_on_other_keys(self): + doc = Document("name: foo # keep this\nconfig: old\n") + doc2 = doc.replace("config", value={"a": 1}) + assert "# keep this" in doc2.source + + def test_replace_nested_key_with_dict(self): + doc = Document("outer:\n inner: old\n") + doc2 = doc.replace("outer", "inner", value={"a": 1, "b": 2}) + assert doc2["outer", "inner"] == {"a": 1, "b": 2} + + def test_replace_complex_with_complex(self): + doc = Document("config:\n a: 1\n b: 2\n") + doc2 = doc.replace("config", value={"x": 10, "y": 20}) + assert doc2["config"] == {"x": 10, "y": 20} + + def test_replace_scalar_with_list(self): + doc = Document("items: none\n") + doc2 = doc.replace("items", value=["a", "b", "c"]) + assert doc2["items"] == ["a", "b", "c"] + + def test_replace_deeply_nested_with_dict(self): + doc = Document("a:\n b:\n c: old\n") + doc2 = doc.replace("a", "b", "c", value={"deep": "value"}) + assert doc2["a", "b", "c"] == {"deep": "value"} + + def test_replace_comment_relocation(self): + doc = Document("repos: [] # managed by tool\n") + doc2 = doc.replace("repos", value=[{"repo": "local"}]) + assert "# managed by tool" in doc2.source + result = doc2["repos"] + assert result == [{"repo": "local"}] + + def test_replace_root_level_with_dict(self): + doc = Document("key: value\n") + doc2 = doc.replace(value={"new_key": "new_val", "another": 42}) + assert doc2["new_key"] == "new_val" + assert doc2["another"] == 42 + + def test_replace_top_level_key_with_dict(self): + """Indentation depth 0: top-level key gets value indented at 2 spaces.""" + doc = Document("config: old\n") + doc2 = doc.replace("config", value={"a": 1}) + assert doc2["config"] == {"a": 1} + # Value should be indented at 2 spaces (base_indent=0 + 2) + assert " a: 1" in doc2.source + + def test_replace_depth2_key_with_dict(self): + """Indentation depth 2: nested key gets value indented at 4 spaces.""" + doc = Document("outer:\n config: old\n") + doc2 = doc.replace("outer", "config", value={"a": 1}) + assert doc2["outer", "config"] == {"a": 1} + assert " a: 1" in doc2.source + + def test_replace_with_empty_dict(self): + doc = Document("config:\n key: value\n") + doc2 = doc.replace("config", value={}) + assert doc2["config"] == {} + + def test_replace_with_empty_list(self): + doc = Document("items:\n - a\n - b\n") + doc2 = doc.replace("items", value=[]) + assert doc2["items"] == [] + + def test_replace_block_scalar_with_dict(self): + doc = Document("description: |\n This is a\n multi-line string\n") + doc2 = doc.replace("description", value={"summary": "short"}) + assert doc2["description"] == {"summary": "short"} + + def test_replace_folded_scalar_with_list(self): + doc = Document("notes: >\n folded\n text\n") + doc2 = doc.replace("notes", value=["a", "b"]) + assert doc2["notes"] == ["a", "b"] + + def test_replace_flow_mapping_with_dict(self): + doc = Document("config: {a: 1, b: 2}\n") + doc2 = doc.replace("config", value={"x": 10}) + assert doc2["config"] == {"x": 10} + + def test_replace_key_with_hash_in_value(self): + doc = Document("color: '#ff0000'\n") + doc2 = doc.replace("color", value={"r": 255, "g": 0, "b": 0}) + assert doc2["color"] == {"r": 255, "g": 0, "b": 0} + + class TestDocumentAdd: def test_add_key(self): doc = Document("name: foo") @@ -218,6 +330,31 @@ def test_upsert_missing(self): assert doc2["age"] == 30 +class TestDocumentUpsertComplex: + def test_upsert_existing_with_dict(self): + doc = Document("config:\n key: value\n") + doc2 = doc.upsert("config", value={"key": "new", "extra": "field"}) + assert doc2["config"] == {"key": "new", "extra": "field"} + + def test_upsert_existing_with_list(self): + doc = Document("repos: []\n") + doc2 = doc.upsert("repos", value=[{"repo": "local"}]) + assert doc2["repos"] == [{"repo": "local"}] + + def test_upsert_missing_with_dict(self): + """This already works via Op.add — verify it stays working.""" + doc = Document("name: foo\n") + doc2 = doc.upsert("config", value={"a": 1}) + assert doc2["config"] == {"a": 1} + assert doc2["name"] == "foo" + + def test_upsert_missing_with_list(self): + """This already works via Op.add — verify it stays working.""" + doc = Document("name: foo\n") + doc2 = doc.upsert("items", value=["a", "b"]) + assert doc2["items"] == ["a", "b"] + + class TestDocumentRemove: def test_remove_key(self): doc = Document("name: foo\nage: 30")