diff --git a/README.md b/README.md index 43b5804..645d99b 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ doc.insert("items", index=1, value="between") # positional insert doc.extend_list("items", values=["d", "e"]) doc.remove_from_list("items", values=["a"]) doc.sync("items", value=["a", "new", "b"]) # minimal diff-and-patch +doc.find_index("repos", where={"id": "x"}) # find in list-of-dicts; returns int | None doc.query("items") # Feature with location info doc.query_pretty("items") # Feature with surrounding context diff --git a/doc/specs/2026-05-22-find-index-design.md b/doc/specs/2026-05-22-find-index-design.md new file mode 100644 index 0000000..ce10c3c --- /dev/null +++ b/doc/specs/2026-05-22-find-index-design.md @@ -0,0 +1,105 @@ +# Document.find_index() — Find Item in List-of-Dicts + +**Date:** 2026-05-22 + +## Problem + +YAML configs frequently use lists of dicts keyed by a distinguishing field: + +```yaml +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + hooks: [...] + - repo: https://github.com/astral-sh/ruff-pre-commit + hooks: [...] +``` + +Finding an item by field value currently requires manual iteration: + +```python +repos = doc["repos"] +idx = next((i for i, r in enumerate(repos) if r["repo"] == url), None) +doc = doc.replace("repos", idx, "hooks", value=new_hooks) +``` + +This is verbose, error-prone, and repeated across callers. + +## Design + +Add a `find_index` method to `Document` and `Editor` that returns the index of the first list item matching a set of key/value constraints. + +### Signature + +```python +def find_index(self, *keys: KeyPart, where: dict[str, Any]) -> int | None: +``` + +### Semantics + +| Expression | Result | +|------------|--------| +| `doc.find_index("repos", where={"repo": url})` | Index of first item where `item["repo"] == url`, or `None` | +| `doc.find_index("repos", where={"repo": url, "rev": "v1"})` | First item matching *all* pairs (AND semantics) | +| `doc.find_index("repos", where={"repo": "nonexistent"})` | `None` | +| `doc.find_index("steps", where={"uses": "actions/checkout@v4"})` | Works for any list-of-dicts | + +### Behavior + +1. Retrieve the parsed value at `keys` +2. If value is not a list, raise `NodeTypeError` +3. If path doesn't exist, raise `QueryError` +4. Iterate items left-to-right; return index of first item where `item[k] == v` for all `(k, v)` in `where` +5. Items that are not dicts are skipped (no error) +6. Return `None` if no item matches + +### Error Cases + +| Condition | Raised | +|-----------|--------| +| Path doesn't exist | `QueryError` | +| Value at path is not a list | `NodeTypeError` | +| `where` is empty | `ValueError` | + +### Editor Delegation + +```python +class Editor: + def find_index(self, *keys: KeyPart, where: dict[str, Any]) -> int | None: + return self.document.find_index(*keys, where=where) +``` + +## Change Locations + +- `src/yamltrip/document.py` — add `find_index()` method to `Document` +- `src/yamltrip/editor.py` — add `find_index()` method to `Editor` +- `src/yamltrip/_core.pyi` — no changes (Python-only logic) +- No Rust changes required + +## Testing + +New tests: + +- `doc.find_index("repos", where={"repo": url})` → correct index +- `doc.find_index("repos", where={"repo": "missing"})` → `None` +- Multi-key where: `where={"repo": url, "rev": "v1"}` matches only when both match +- First match wins when multiple items match +- Non-dict items in list are skipped +- Path not found → `QueryError` +- Value is a scalar → `NodeTypeError` +- Value is a dict → `NodeTypeError` +- Empty `where={}` → `ValueError` +- Nested path: `doc.find_index("ci", "steps", where={"uses": "..."})` +- Integer key in path prefix works: `doc.find_index("jobs", 0, "steps", where={...})` +- Editor.find_index mirrors Document behavior + +## Scope Boundaries + +**In scope:** +- `Document.find_index()` method +- `Editor.find_index()` method + +**Out of scope:** +- `match=` callable predicate (future addition, additive) +- `find_value()` or `find()` returning the item itself (use `doc["repos", idx]`) +- `find_all_indices()` returning multiple matches +- Rust-side implementation (pure Python is sufficient; values are already parsed) diff --git a/src/yamltrip/document.py b/src/yamltrip/document.py index 3d9a396..8e3dd68 100644 --- a/src/yamltrip/document.py +++ b/src/yamltrip/document.py @@ -485,3 +485,38 @@ def sync(self, *keys: KeyPart, value: Any) -> Document: route = _make_route(normalized) op = _core.Op.replace(value) return self._apply_patches([_core.Patch(route=route, operation=op)]) + + def find_index(self, *keys: KeyPart, where: dict[str, Any]) -> int | None: + """Return the index of the first list item matching all key/value pairs. + + Comparison uses Python ``==``. YAML scalars are parsed to their + native types (e.g. ``port: 8080`` is int, not str). + + Args: + *keys: Path to the list within the document. + where: Dict of key/value pairs that must all match (AND semantics). + + Returns: + The integer index of the first matching item, or None if no match. + + Raises: + QueryError: If the path doesn't exist. + NodeTypeError: If the value at path is not a list. + ValueError: If where is empty. + """ + if not where: + msg = "where must be a non-empty dict" + raise ValueError(msg) + + value = self[keys] + if not isinstance(value, list): + msg = f"Value at {keys} is not a list" + raise NodeTypeError(msg) + + for i, item in enumerate(value): + if not isinstance(item, dict): + continue + entry = cast("dict[str, Any]", item) + if all(k in entry and entry[k] == v for k, v in where.items()): + return i + return None diff --git a/src/yamltrip/editor.py b/src/yamltrip/editor.py index d7730e4..d66e244 100644 --- a/src/yamltrip/editor.py +++ b/src/yamltrip/editor.py @@ -145,6 +145,10 @@ def sync(self, *keys: KeyPart, value: Any) -> None: """Sync the value at path to match the desired value.""" self._document = self.document.sync(*keys, value=value) + def find_index(self, *keys: KeyPart, where: dict[str, Any]) -> int | None: + """Return the index of the first list item matching all key/value pairs.""" + return self.document.find_index(*keys, where=where) + def query(self, *keys: KeyPart) -> Feature: """Return the Feature at the given path.""" return self.document.query(*keys) diff --git a/tests/test_document.py b/tests/test_document.py index c341ccb..87fe123 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -593,6 +593,68 @@ def test_extend_list_on_scalar_raises_node_type_error(self): doc.extend_list("name", values=["a", "b"]) +class TestDocumentFindIndex: + def test_finds_first_match(self): + doc = Document("repos:\n - repo: alpha\n - repo: beta\n") + assert doc.find_index("repos", where={"repo": "beta"}) == 1 + + def test_returns_none_when_no_match(self): + doc = Document("repos:\n - repo: alpha\n - repo: beta\n") + assert doc.find_index("repos", where={"repo": "missing"}) is None + + def test_multi_key_where_all_must_match(self): + doc = Document( + "items:\n - name: a\n version: '1'\n - name: a\n version: '2'\n" + ) + assert doc.find_index("items", where={"name": "a", "version": "2"}) == 1 + + def test_first_match_wins(self): + doc = Document("items:\n - id: x\n - id: x\n") + assert doc.find_index("items", where={"id": "x"}) == 0 + + def test_non_dict_items_skipped(self): + doc = Document("items:\n - plain_string\n - id: found\n") + assert doc.find_index("items", where={"id": "found"}) == 1 + + def test_missing_key_does_not_match_none(self): + doc = Document("items:\n - name: alpha\n - id:\n") + assert doc.find_index("items", where={"id": None}) == 1 + + def test_nested_path(self): + doc = Document("ci:\n steps:\n - uses: checkout\n - uses: setup\n") + assert doc.find_index("ci", "steps", where={"uses": "setup"}) == 1 + + def test_path_not_found_raises_query_error(self): + doc = Document("name: foo\n") + with pytest.raises(QueryError): + doc.find_index("missing", where={"k": "v"}) + + def test_value_not_a_list_raises_node_type_error(self): + doc = Document("name: foo\n") + with pytest.raises(NodeTypeError): + doc.find_index("name", where={"k": "v"}) + + def test_value_is_dict_raises_node_type_error(self): + doc = Document("data:\n key: val\n") + with pytest.raises(NodeTypeError): + doc.find_index("data", where={"key": "val"}) + + def test_empty_where_raises_value_error(self): + doc = Document("items:\n - id: x\n") + with pytest.raises(ValueError, match="where"): + doc.find_index("items", where={}) + + def test_integer_key_in_path(self): + doc = Document( + "jobs:\n - steps:\n - uses: checkout\n - uses: build\n" + ) + assert doc.find_index("jobs", 0, "steps", where={"uses": "build"}) == 1 + + def test_root_list(self): + doc = Document("- name: alpha\n- name: beta\n") + assert doc.find_index(where={"name": "beta"}) == 1 + + class TestEmptyDocumentUpsert: def test_upsert_single_key_on_empty(self): doc = Document("") diff --git a/tests/test_editor.py b/tests/test_editor.py index 1b02d0b..37c9ffe 100644 --- a/tests/test_editor.py +++ b/tests/test_editor.py @@ -1,6 +1,7 @@ import pytest from yamltrip.editor import Editor +from yamltrip.errors import NodeTypeError, QueryError @pytest.fixture @@ -162,6 +163,38 @@ def test_repr(self, yaml_file): assert "Editor(" in repr(editor) +class TestEditorFindIndex: + def test_finds_match(self, tmp_path): + p = tmp_path / "test.yaml" + p.write_text("repos:\n - repo: alpha\n - repo: beta\n", encoding="utf-8") + with Editor(p) as ed: + assert ed.find_index("repos", where={"repo": "beta"}) == 1 + + def test_returns_none_when_no_match(self, tmp_path): + p = tmp_path / "test.yaml" + p.write_text("repos:\n - repo: alpha\n", encoding="utf-8") + with Editor(p) as ed: + assert ed.find_index("repos", where={"repo": "missing"}) is None + + def test_empty_where_raises_value_error(self, tmp_path): + p = tmp_path / "test.yaml" + p.write_text("items:\n - id: x\n", encoding="utf-8") + with Editor(p) as ed, pytest.raises(ValueError, match="where"): + ed.find_index("items", where={}) + + def test_missing_path_raises_query_error(self, tmp_path): + p = tmp_path / "test.yaml" + p.write_text("name: foo\n", encoding="utf-8") + with Editor(p) as ed, pytest.raises(QueryError): + ed.find_index("missing", where={"k": "v"}) + + def test_non_list_raises_node_type_error(self, tmp_path): + p = tmp_path / "test.yaml" + p.write_text("name: foo\n", encoding="utf-8") + with Editor(p) as ed, pytest.raises(NodeTypeError): + ed.find_index("name", where={"k": "v"}) + + class TestEditorGet: def test_get_existing_key(self, yaml_file): with Editor(yaml_file) as editor: