From 7019c1ad39f93b207721c715ba22972159df4c7a Mon Sep 17 00:00:00 2001 From: Benjamin David Date: Sat, 4 Apr 2026 02:27:58 +0100 Subject: [PATCH 1/9] feat(changelog): semantic changelog for ODCS data contracts with text report Co-Authored-By: Claude Sonnet 4.6 (1M context) --- API.md | 13 + CHANGELOG.md | 3 +- CLAUDE.md | 6 + README.md | 28 +- datacontract/api.py | 43 + datacontract/changelog/__init__.py | 3 + datacontract/changelog/changelog.py | 175 +++ datacontract/changelog/normalize.py | 207 ++++ datacontract/cli.py | 13 + datacontract/data_contract.py | 46 + datacontract/model/changelog.py | 30 + datacontract/output/text_changelog_results.py | 90 ++ pyproject.toml | 1 + .../breaking/datacontract-definitions-v1.yaml | 13 - .../breaking/datacontract-definitions-v2.yaml | 36 - .../breaking/datacontract-definitions-v3.yaml | 36 - .../datacontract-fields-array-v1.yaml | 64 - .../datacontract-fields-array-v2.yaml | 64 - .../breaking/datacontract-fields-v1.yaml | 55 - .../breaking/datacontract-fields-v2.yaml | 83 -- .../breaking/datacontract-fields-v3.yaml | 84 -- .../breaking/datacontract-info-v1.yaml | 10 - .../breaking/datacontract-info-v2.yaml | 14 - .../breaking/datacontract-info-v3.yaml | 14 - .../breaking/datacontract-models-v1.yaml | 12 - .../breaking/datacontract-models-v2.yaml | 20 - .../breaking/datacontract-models-v3.yaml | 19 - .../breaking/datacontract-quality-v1.yaml | 10 - .../breaking/datacontract-quality-v2.yaml | 15 - .../breaking/datacontract-quality-v3.yaml | 15 - .../breaking/datacontract-terms-v1.yaml | 10 - .../breaking/datacontract-terms-v2.yaml | 20 - .../breaking/datacontract-terms-v3.yaml | 15 - .../changelog/golden_changelog_text.txt | 62 + .../changelog/helper/generate_golden.py | 54 + .../integration/changelog_integration_v1.yaml | 29 + .../integration/changelog_integration_v2.yaml | 59 + .../changelog/unit/changelog_unit_v1.yaml | 95 ++ .../changelog/unit/changelog_unit_v2.yaml | 106 ++ tests/test_api.py | 37 + tests/test_changelog.py | 58 + tests/test_changelog_engine.py | 786 ++++++++++++ tests/test_changelog_normalize.py | 1057 +++++++++++++++++ tests/test_changelog_output_text.py | 171 +++ tests/test_cli.py | 19 + 45 files changed, 3189 insertions(+), 611 deletions(-) create mode 100644 datacontract/changelog/__init__.py create mode 100644 datacontract/changelog/changelog.py create mode 100644 datacontract/changelog/normalize.py create mode 100644 datacontract/model/changelog.py create mode 100644 datacontract/output/text_changelog_results.py delete mode 100644 tests/fixtures/breaking/datacontract-definitions-v1.yaml delete mode 100644 tests/fixtures/breaking/datacontract-definitions-v2.yaml delete mode 100644 tests/fixtures/breaking/datacontract-definitions-v3.yaml delete mode 100644 tests/fixtures/breaking/datacontract-fields-array-v1.yaml delete mode 100644 tests/fixtures/breaking/datacontract-fields-array-v2.yaml delete mode 100644 tests/fixtures/breaking/datacontract-fields-v1.yaml delete mode 100644 tests/fixtures/breaking/datacontract-fields-v2.yaml delete mode 100644 tests/fixtures/breaking/datacontract-fields-v3.yaml delete mode 100644 tests/fixtures/breaking/datacontract-info-v1.yaml delete mode 100644 tests/fixtures/breaking/datacontract-info-v2.yaml delete mode 100644 tests/fixtures/breaking/datacontract-info-v3.yaml delete mode 100644 tests/fixtures/breaking/datacontract-models-v1.yaml delete mode 100644 tests/fixtures/breaking/datacontract-models-v2.yaml delete mode 100644 tests/fixtures/breaking/datacontract-models-v3.yaml delete mode 100644 tests/fixtures/breaking/datacontract-quality-v1.yaml delete mode 100644 tests/fixtures/breaking/datacontract-quality-v2.yaml delete mode 100644 tests/fixtures/breaking/datacontract-quality-v3.yaml delete mode 100644 tests/fixtures/breaking/datacontract-terms-v1.yaml delete mode 100644 tests/fixtures/breaking/datacontract-terms-v2.yaml delete mode 100644 tests/fixtures/breaking/datacontract-terms-v3.yaml create mode 100644 tests/fixtures/changelog/golden_changelog_text.txt create mode 100644 tests/fixtures/changelog/helper/generate_golden.py create mode 100644 tests/fixtures/changelog/integration/changelog_integration_v1.yaml create mode 100644 tests/fixtures/changelog/integration/changelog_integration_v2.yaml create mode 100644 tests/fixtures/changelog/unit/changelog_unit_v1.yaml create mode 100644 tests/fixtures/changelog/unit/changelog_unit_v2.yaml create mode 100644 tests/test_changelog.py create mode 100644 tests/test_changelog_engine.py create mode 100644 tests/test_changelog_normalize.py create mode 100644 tests/test_changelog_output_text.py diff --git a/API.md b/API.md index f44074632..934e2936f 100644 --- a/API.md +++ b/API.md @@ -103,6 +103,19 @@ curl -X POST "http://localhost:4242/export?format=sql" \ --data-binary @datacontract.yaml ``` +## Changelog Two Data Contracts + +Compare two ODCS data contracts and receive a changelog. POST a JSON body with `v1` (source/before) and `v2` (target/after) as YAML strings. Returns a JSON object with `summary` and `entries`. + +```bash +curl -X POST "http://localhost:4242/changelog" \ + -H "Content-Type: application/json" \ + -d '{ + "v1": "'"$(cat v1.odcs.yaml)"'", + "v2": "'"$(cat v2.odcs.yaml)"'" + }' +``` + ## Try it out You can also use the Swagger UI to execute the commands directly. diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fe1aebb1..0407a8ec3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added `ci` command for CI/CD-optimized test runs: multi-file support, GitHub Actions annotations and step summary, Azure DevOps annotations, `--fail-on` flag, `--json` output +- Added `changelog` command and API endpoint (#1118) ### Fixed - Fix SQL export generating multiple PRIMARY KEY constraints for composite keys (#1026) @@ -951,4 +952,4 @@ The Golang version can be found at [cli-go](https://github.com/datacontract/cli- ## [0.1.1] ### Added -- Initial release. +- Initial release. \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index e2d743f28..d3a96171a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -89,6 +89,10 @@ datacontract export --format html datacontract.yaml --output datacontract.html # Import from a different format datacontract import --format sql --source my-ddl.sql --dialect postgres --output datacontract.yaml + +# Show a changelog between two data contracts +datacontract changelog datacontract-v1.yaml datacontract-v2.yaml + ``` ## Project Architecture @@ -111,6 +115,8 @@ The Data Contract CLI is an open-source command-line tool for working with data 5. **Linting (`datacontract/lint/`)**: Tools for validating data contract files against schema and best practices. +6. **Changelog (`datacontract/changelog/`)**: Semantic comparison of ODCS data contracts. + ### Extension Pattern The project uses factory patterns for extensibility: diff --git a/README.md b/README.md index c18fda953..bdb0dfecd 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,9 @@ $ datacontract init odcs.yaml # lint the odcs.yaml $ datacontract lint odcs.yaml +# show a changelog between two data contracts +$ datacontract changelog v1.odcs.yaml v2.odcs.yaml + # execute schema and quality checks (define credentials as environment variables) $ datacontract test odcs.yaml @@ -260,6 +263,7 @@ Commands - [init](#init) - [lint](#lint) +- [changelog](#changelog) - [test](#test) - [ci](#ci) - [export](#export) @@ -318,10 +322,32 @@ Commands ``` +### changelog +``` + + Usage: datacontract changelog [OPTIONS] V1 V2 + + Show a changelog between two data contracts. + +╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮ +│ * v1 TEXT The location (path) of the source (before) data contract YAML. [required] │ +│ * v2 TEXT The location (path) of the target (after) data contract YAML. [required] │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮ +│ --debug --no-debug Enable debug logging │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ + +``` + +```bash +$ datacontract changelog v1.odcs.yaml v2.odcs.yaml +``` + ### test ``` - Usage: datacontract test [OPTIONS] [LOCATION] + Usage: datacontract test [OPTIONS] [LOCATION] Run schema and quality tests on configured servers. diff --git a/datacontract/api.py b/datacontract/api.py index 09a940730..d922b8bd3 100644 --- a/datacontract/api.py +++ b/datacontract/api.py @@ -1,12 +1,16 @@ import logging import os +import tempfile from typing import Annotated, Optional +import pydantic import typer +import yaml from fastapi import Body, Depends, FastAPI, HTTPException, Query, status from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import PlainTextResponse from fastapi.security.api_key import APIKeyHeader +from pydantic import BaseModel from datacontract.data_contract import DataContract, ExportFormat from datacontract.model.run import Run @@ -358,6 +362,45 @@ async def lint( return {"result": lint_result.result, "checks": lint_result.checks} +class ChangelogRequest(BaseModel): + v1: str = DATA_CONTRACT_EXAMPLE_PAYLOAD + v2: str = DATA_CONTRACT_EXAMPLE_PAYLOAD + + +@app.post( + "/changelog", + tags=["changelog"], + summary="Show a changelog between two data contracts.", + description=""" + Compare two ODCS data contract YAMLs and return a changelog. + POST a JSON body with `v1` (source/before) and `v2` (target/after) as YAML strings. + """, +) +async def changelog_endpoint( + body: ChangelogRequest, + api_key: Annotated[str | None, Depends(api_key_header)] = None, +): + check_api_key(api_key) + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f1: + f1.write(body.v1) + v1_path = f1.name + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f2: + f2.write(body.v2) + v2_path = f2.name + + try: + result = DataContract(data_contract_file=v1_path).changelog(DataContract(data_contract_file=v2_path)) + return {"summary": result.summary, "entries": result.entries} + except yaml.YAMLError as e: + raise HTTPException(status_code=422, detail=f"Invalid YAML: {e}") + except pydantic.ValidationError as e: + raise HTTPException(status_code=422, detail=f"Invalid data contract: {e}") + finally: + os.unlink(v1_path) + os.unlink(v2_path) + + @app.post( "/export", tags=["export"], diff --git a/datacontract/changelog/__init__.py b/datacontract/changelog/__init__.py new file mode 100644 index 000000000..bda5d2938 --- /dev/null +++ b/datacontract/changelog/__init__.py @@ -0,0 +1,3 @@ +from datacontract.changelog.changelog import build_changelog, diff + +__all__ = ["diff", "build_changelog"] diff --git a/datacontract/changelog/changelog.py b/datacontract/changelog/changelog.py new file mode 100644 index 000000000..8a071de7b --- /dev/null +++ b/datacontract/changelog/changelog.py @@ -0,0 +1,175 @@ +""" +changelog — ODCS contract changelog builder +--------------------------------------------- +Provides two public functions: + diff() — normalise two ODCS contract dicts and return a raw DeepDiff dict + build_changelog() — transform a raw DeepDiff dict into structured summary + detail data + ready to be rendered as a changelog report. +""" + +import json +import re +from datetime import datetime, timezone + +from deepdiff import DeepDiff + +from datacontract.changelog.normalize import normalize + + +def diff(v1: dict, v2: dict) -> dict: + """Return the DeepDiff result as a plain dict. + + ignore_order=True - dict key ordering is irrelevant + verbose_level=2 - include old/new values, not just paths + """ + n1 = normalize(v1) + n2 = normalize(v2) + result = DeepDiff(n1, n2, ignore_order=True, verbose_level=2) + return json.loads(result.to_json()) + +_CHANGE_TYPE_MAP = { + "dictionary_item_added": "Added", + "dictionary_item_removed": "Removed", + "values_changed": "Changed", + "type_changes": "Changed", + "iterable_item_added": "Added", + "iterable_item_removed": "Removed", +} + + +def build_changelog(diff_result: dict, source_label: str = "v1", target_label: str = "v2") -> dict: + """Produce a JSON-serialisable dict with all data needed to render + the full changelog. + + Both summary.changes and detail.changes share the same shape: + { + "path": str, # dot-separated field path + "changeType": str, # Added | Removed | Changed + "old_value": any, # present for Changed/Removed; absent otherwise + "new_value": any, # present for Changed/Added; absent otherwise + } + + Summary rollup rules (detail always shows full leaf paths): + - Scalar Changed leaf → rolled up to parent (logicalType → field) + - Scalar Added/Removed leaf → rolled up to parent (businessName Added → field Added) + - Mixed Add+Remove on same parent → single entry with changeType Changed + - Dict Added/Removed (whole object) → stays at its own path, not rolled up + - List string item (tag) → rolled up to the tags parent in summary; + in detail the tag value is the final path segment (tags.pii Removed) + """ + + def _expand_to_entries(obj, change_type, base_segs): + entries = [] + for k, v in obj.items(): + segs = base_segs + [k] + if isinstance(v, dict): + entry = {"path": ".".join(segs), "changeType": change_type} + entries.append(entry) + entries.extend(_expand_to_entries(v, change_type, segs)) + else: + entry = {"path": ".".join(segs), "changeType": change_type} + if change_type == "Added": + entry["new_value"] = v + else: + entry["old_value"] = v + entries.append(entry) + return entries + + detail_changes = [] + for deepdiff_key, items in diff_result.items(): + change_type = _CHANGE_TYPE_MAP.get(deepdiff_key) + if not change_type: + continue + for raw_path, payload in items.items(): + # match ['key'] or ["key"] + segs = re.findall(r"""(?:\['([^']+)'\]|\["([^"]+)"\])""", raw_path) + segs = [group[0] if group[0] else group[1] for group in segs] + is_iterable = deepdiff_key in ("iterable_item_added", "iterable_item_removed") + if isinstance(payload, dict) and "old_value" in payload: + entry = { + "path": ".".join(segs), + "changeType": change_type, + "old_value": payload["old_value"], + "new_value": payload["new_value"], + } + detail_changes.append(entry) + elif change_type in ("Added", "Removed") and isinstance(payload, dict): + detail_changes.append({"path": ".".join(segs), "changeType": change_type}) + detail_changes.extend(_expand_to_entries(payload, change_type, segs)) + elif is_iterable and isinstance(payload, str): + entry = {"path": ".".join(segs + [payload]), "changeType": change_type} + detail_changes.append(entry) + else: + entry = {"path": ".".join(segs), "changeType": change_type} + if change_type == "Added": + entry["new_value"] = payload + else: + entry["old_value"] = payload + detail_changes.append(entry) + + detail_changes.sort(key=lambda x: x["path"]) + + detail_counts = { + "added": sum(1 for c in detail_changes if c["changeType"] == "Added"), + "removed": sum(1 for c in detail_changes if c["changeType"] == "Removed"), + "changed": sum(1 for c in detail_changes if c["changeType"] == "Changed"), + } + + summary_groups: dict[tuple, dict] = {} + for deepdiff_key, items in diff_result.items(): + change_type = _CHANGE_TYPE_MAP.get(deepdiff_key) + if not change_type: + continue + for raw_path, payload in items.items(): + segs = re.findall(r"""(?:\['([^']+)'\]|\["([^"]+)"\])""", raw_path) + segs = [group[0] if group[0] else group[1] for group in segs] + is_iterable = deepdiff_key in ("iterable_item_added", "iterable_item_removed") + is_scalar_change = ( + change_type == "Changed" + and isinstance(payload, dict) + and "old_value" in payload + and not isinstance(payload.get("old_value"), dict) + and not isinstance(payload.get("new_value"), dict) + ) + is_scalar_leaf = ( + change_type in ("Added", "Removed") and not isinstance(payload, dict) and not is_iterable + ) + if is_iterable and isinstance(payload, str): + display_segs = tuple(segs) + elif (is_scalar_change or is_scalar_leaf) and len(segs) > 1: + display_segs = tuple(segs[:-1]) + else: + display_segs = tuple(segs) + if display_segs not in summary_groups: + summary_groups[display_segs] = {"changeType": change_type} + else: + if summary_groups[display_segs]["changeType"] != change_type: + summary_groups[display_segs]["changeType"] = "Changed" + + summary_changes = [] + for segs, data in sorted(summary_groups.items(), key=lambda x: ".".join(x[0])): + summary_changes.append({"path": ".".join(segs), "changeType": data["changeType"]}) + + summary_counts = { + "added": sum(1 for c in summary_changes if c["changeType"] == "Added"), + "removed": sum(1 for c in summary_changes if c["changeType"] == "Removed"), + "changed": sum(1 for c in summary_changes if c["changeType"] == "Changed"), + } + + return { + "source_label": source_label, + "target_label": target_label, + "header": { + "title": "ODCS Data Contract Changelog", + "subtitle": f"{source_label} \u2192 {target_label}", + "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC"), + }, + "summary": { + "counts": summary_counts, + "changes": summary_changes, + }, + "detail": { + "counts": detail_counts, + "changes": detail_changes, + }, + } diff --git a/datacontract/changelog/normalize.py b/datacontract/changelog/normalize.py new file mode 100644 index 000000000..e451110e1 --- /dev/null +++ b/datacontract/changelog/normalize.py @@ -0,0 +1,207 @@ +""" +normalize — ODCS contract normalization +---------------------------------------- +Converts named lists in a contract dict to dicts keyed by their natural key, +so DeepDiff can match items semantically rather than by position. + +DeepDiff matches list items by position by default, which produces +incorrect diffs when items are added/removed mid-list. Keying by the +natural key gives stable, semantically correct paths and meaningful +field names in the output: + + schema.orders.properties.order_id.logicalType Changed + rather than + schema[0].properties[1].logicalType Changed + +Example (schema list, inserting "customers" before "orders"): + + Before normalization — schema is a list of dicts: + "schema": [ + {"name": "orders", "physicalType": "table", "properties": [...]}, + {"name": "customers", "physicalType": "view", "properties": [...]}, + ] + + After normalization — schema is a dict keyed by name, with the key field stripped: + "schema": { + "orders": {"physicalType": "table", "properties": {...}}, + "customers": {"physicalType": "view", "properties": {...}}, + } + + Without normalization, DeepDiff matches by position and reports a spurious change: + "values_changed": {"root['schema'][0]['name']": {"old": "orders", "new": "customers"}} + + With normalization, DeepDiff matches by key and reports correctly: + "dictionary_item_added": {"root['schema']['customers']": {...}} + "dictionary_item_removed": {"root['schema']['orders']": {...}} + +# NOTE: Natural keys are hardcoded here because the open-data-contract-standard +# Pydantic models don't yet expose them. The planned fix is to add a __natural_key__ +# class var or Field annotation to each model upstream, then replace this table with +# a single reflection-based loop that derives both the list containers and their +# natural keys from the model metadata. + +Current hardcoded natural keys: +schema[] SchemaObject -> .name (required: [name]) +schema[].properties[] SchemaProperty -> .name (required: [name], recursive) +slaProperties[] SLAProperty -> .property +servers[] Server -> .server +servers[].roles[] Role -> .role +servers[].customProperties[] CustomProperty -> .property +support[] SupportItem -> .channel +roles[] Role -> .role +team.members[] TeamMember -> .username +authoritativeDefinitions[] AuthoritativeDefinition -> .url +description.authoritativeDefinitions[] AuthoritativeDefinition -> .url +description.customProperties[] CustomProperty -> .property +""" + +def _normalize_by(items: list[dict], key_field: str) -> dict: + """Key a list of dicts by a named field, omitting the key field from the value. + + Falls back to the list index if the key field is absent on an item. + """ + result = {} + for i, item in enumerate(items): + key = item.get(key_field, f"__pos_{i}__") + result[key] = {k: v for k, v in item.items() if k != key_field} + return result + + +def _normalize_auth_defs(items: list[dict]) -> dict: + """Key authoritativeDefinitions by url with id and positional fallback. + + Unlike _normalize_by, the key (url) is retained in the value dict because + AuthoritativeDefinition has no single required key — url is only inferred, + so stripping it would lose data when the positional fallback fires. + """ + result = {} + for i, item in enumerate(items): + key = item.get("url") or item.get("id") or f"__pos_{i}__" + result[key] = item + return result + + +def _normalize_relationships(items: list[dict], schema_level: bool = True) -> dict: + """Key relationships by a stable composite key. + + Schema-level: from:to composite. Property-level: to only. + Falls back to positional index if key fields are absent. + """ + result = {} + for i, item in enumerate(items): + if schema_level: + from_val = str(item.get("from", "")) + to_val = str(item.get("to", "")) + key = f"{from_val}:{to_val}" if (from_val or to_val) else f"__pos_{i}__" + else: + to_val = item.get("to") + key = str(to_val) if to_val else f"__pos_{i}__" + result[key] = item + return result + + +def _normalize_quality(items: list[dict]) -> dict: + """Key DataQuality items by name (with positional fallback).""" + result = {} + for i, item in enumerate(items): + key = item.get("name") or f"__pos_{i}__" + entry = {k: v for k, v in item.items() if k != "name"} + if "customProperties" in entry and isinstance(entry["customProperties"], list): + entry["customProperties"] = _normalize_by(entry["customProperties"], "property") + if "authoritativeDefinitions" in entry and isinstance(entry["authoritativeDefinitions"], list): + entry["authoritativeDefinitions"] = _normalize_auth_defs(entry["authoritativeDefinitions"]) + result[key] = entry + return result + + +def _normalize_schema_fields(entry: dict, *, schema_level: bool) -> dict: + """Normalize nested list fields shared by SchemaObject and SchemaProperty.""" + if "quality" in entry and isinstance(entry["quality"], list): + entry["quality"] = _normalize_quality(entry["quality"]) + if "customProperties" in entry and isinstance(entry["customProperties"], list): + entry["customProperties"] = _normalize_by(entry["customProperties"], "property") + if "authoritativeDefinitions" in entry and isinstance(entry["authoritativeDefinitions"], list): + entry["authoritativeDefinitions"] = _normalize_auth_defs(entry["authoritativeDefinitions"]) + if "relationships" in entry and isinstance(entry["relationships"], list): + entry["relationships"] = _normalize_relationships(entry["relationships"], schema_level=schema_level) + return entry + + +def _normalize_properties(properties: list[dict]) -> dict: + """Recursively key SchemaProperty lists by .name.""" + result = {} + for prop in properties: + key = prop.get("name", prop.get("id", str(prop))) + entry = {k: v for k, v in prop.items() if k != "name"} + if "properties" in entry and isinstance(entry["properties"], list): + entry["properties"] = _normalize_properties(entry["properties"]) + entry = _normalize_schema_fields(entry, schema_level=False) + result[key] = entry + return result + + +def normalize(contract: dict) -> dict: + """Convert named lists to dicts keyed by their natural key field. + + See headers comments for more details. + + """ + out = dict(contract) + + if "schema" in out and isinstance(out["schema"], list): + normalized_schema = {} + for tbl in out["schema"]: + key = tbl.get("name", tbl.get("id", str(tbl))) + entry = {k: v for k, v in tbl.items() if k != "name"} + if "properties" in entry and isinstance(entry["properties"], list): + entry["properties"] = _normalize_properties(entry["properties"]) + entry = _normalize_schema_fields(entry, schema_level=True) + normalized_schema[key] = entry + out["schema"] = normalized_schema + + if "slaProperties" in out and isinstance(out["slaProperties"], list): + out["slaProperties"] = _normalize_by(out["slaProperties"], "property") + + if "servers" in out and isinstance(out["servers"], list): + normalized_servers = {} + for s in out["servers"]: + if not s.get("server"): + continue + key = s["server"] + entry = {k: v for k, v in s.items() if k != "server"} + if "roles" in entry and isinstance(entry["roles"], list): + entry["roles"] = _normalize_by(entry["roles"], "role") + if "customProperties" in entry and isinstance(entry["customProperties"], list): + entry["customProperties"] = _normalize_by(entry["customProperties"], "property") + normalized_servers[key] = entry + out["servers"] = normalized_servers + + if "support" in out and isinstance(out["support"], list): + out["support"] = _normalize_by(out["support"], "channel") + + if "roles" in out and isinstance(out["roles"], list): + out["roles"] = _normalize_by(out["roles"], "role") + + if "customProperties" in out and isinstance(out["customProperties"], list): + out["customProperties"] = _normalize_by(out["customProperties"], "property") + + if "team" in out: + team = out["team"] + if isinstance(team, dict) and "members" in team and isinstance(team["members"], list): + out["team"] = {**team, "members": _normalize_by(team["members"], "username")} + elif isinstance(team, list): + out["team"] = _normalize_by(team, "username") + + if "authoritativeDefinitions" in out and isinstance(out["authoritativeDefinitions"], list): + out["authoritativeDefinitions"] = _normalize_auth_defs(out["authoritativeDefinitions"]) + + if "description" in out and isinstance(out["description"], dict): + desc = out["description"] + normalized_desc = dict(desc) + if "authoritativeDefinitions" in desc and isinstance(desc["authoritativeDefinitions"], list): + normalized_desc["authoritativeDefinitions"] = _normalize_auth_defs(desc["authoritativeDefinitions"]) + if "customProperties" in desc and isinstance(desc["customProperties"], list): + normalized_desc["customProperties"] = _normalize_by(desc["customProperties"], "property") + out["description"] = normalized_desc + + return out diff --git a/datacontract/cli.py b/datacontract/cli.py index fea5f8518..7f671becb 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -24,6 +24,7 @@ from datacontract.output.ci_output import write_ci_output, write_ci_summary, write_json_results from datacontract.output.output_format import OutputFormat from datacontract.output.test_results_writer import write_test_result +from datacontract.output.text_changelog_results import write_text_changelog_results console = Console() @@ -127,6 +128,18 @@ def enable_debug_logging(debug: bool): ) +@app.command(name="changelog") +def changelog( + v1: Annotated[str, typer.Argument(help="The location (path) of the source (before) data contract YAML.")], + v2: Annotated[str, typer.Argument(help="The location (path) of the target (after) data contract YAML.")], + debug: debug_option = None, +): + """Show a changelog between two data contracts.""" + enable_debug_logging(debug) + result = DataContract(data_contract_file=v1).changelog(DataContract(data_contract_file=v2)) + write_text_changelog_results(result, console) + + @app.command(name="test") def test( location: Annotated[ diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index a086e1695..d9e032b10 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -1,6 +1,7 @@ import logging import typing +import yaml from open_data_contract_standard.model import OpenDataContractStandard, Team if typing.TYPE_CHECKING: @@ -14,6 +15,7 @@ from datacontract.init.init_template import get_init_template from datacontract.integration.entropy_data import publish_test_results_to_entropy_data from datacontract.lint import resolve +from datacontract.model.changelog import ChangelogEntry, ChangelogResult, ChangelogType from datacontract.model.exceptions import DataContractException from datacontract.model.run import Check, ResultEnum, Run @@ -187,6 +189,50 @@ def export( export_args=kwargs, ) + def _to_odcs_dict(self) -> dict: + """Resolve this data contract to an OpenDataContractStandard dict.""" + if self._data_contract is not None: + contract = self._data_contract + elif self._data_contract_file is not None: + with open(self._data_contract_file, encoding="utf-8") as f: + contract = OpenDataContractStandard.model_validate(yaml.safe_load(f)) + elif self._data_contract_str is not None: + contract = OpenDataContractStandard.model_validate(yaml.safe_load(self._data_contract_str)) + else: + raise DataContractException( + type="changelog", + result=ResultEnum.error, + name="Resolve Data Contract", + reason="No data contract source provided", + engine="datacontract", + ) + return contract.model_dump(exclude_none=True, by_alias=True) + + def changelog(self, other: "DataContract") -> ChangelogResult: + """Generate a changelog between this data contract and another, returning a ChangelogResult.""" + from datacontract.changelog.changelog import build_changelog, diff + + v1_label = self._data_contract_file or "" + v2_label = other._data_contract_file or "" + + raw_diff = diff(self._to_odcs_dict(), other._to_odcs_dict()) + changelog = build_changelog(raw_diff, source_label=v1_label, target_label=v2_label) + + result = ChangelogResult(v1=v1_label, v2=v2_label) + for change in changelog["summary"]["changes"]: + result.summary.append(ChangelogEntry( + path=change["path"], + type=ChangelogType(change["changeType"].lower()), + )) + for change in changelog["detail"]["changes"]: + result.entries.append(ChangelogEntry( + path=change["path"], + type=ChangelogType(change["changeType"].lower()), + old_value=str(change["old_value"]) if change.get("old_value") is not None else None, + new_value=str(change["new_value"]) if change.get("new_value") is not None else None, + )) + return result + @classmethod def import_from_source( cls, diff --git a/datacontract/model/changelog.py b/datacontract/model/changelog.py new file mode 100644 index 000000000..dfc2cba20 --- /dev/null +++ b/datacontract/model/changelog.py @@ -0,0 +1,30 @@ +from enum import Enum + +from pydantic import BaseModel + + +class ChangelogType(str, Enum): + added = "added" + removed = "removed" + changed = "changed" + + +class ChangelogEntry(BaseModel): + path: str + type: ChangelogType + old_value: str | None = None + new_value: str | None = None + + +class ChangelogResult(BaseModel): + v1: str + v2: str + summary: list[ChangelogEntry] = [] + entries: list[ChangelogEntry] = [] + + def has_changes(self) -> bool: + return len(self.entries) > 0 + + def pretty(self) -> str: + return self.model_dump_json(indent=2) + diff --git a/datacontract/output/text_changelog_results.py b/datacontract/output/text_changelog_results.py new file mode 100644 index 000000000..9c65d2982 --- /dev/null +++ b/datacontract/output/text_changelog_results.py @@ -0,0 +1,90 @@ +import io + +from rich import box +from rich.console import Console +from rich.table import Table + +from datacontract.model.changelog import ChangelogResult, ChangelogType + +_VAL_W = 30 + + +def write_text_changelog_results(result: ChangelogResult, console: Console): + _print_summary(result, console) + _print_table(result, console) + + +def _badges(entries: list) -> str: + removed = sum(1 for e in entries if e.type == ChangelogType.removed) + changed = sum(1 for e in entries if e.type == ChangelogType.changed) + added = sum(1 for e in entries if e.type == ChangelogType.added) + parts = [] + if removed: + parts.append(f"[ {removed} Removed ]") + if changed: + parts.append(f"[ {changed} Changed ]") + if added: + parts.append(f"[ {added} Added ]") + return " ".join(parts) + + +def _print_summary(result: ChangelogResult, console: Console): + if not result.summary: + return + console.print("Summary") + console.print(_badges(result.summary)) + table = Table(box=box.ROUNDED, show_header=True) + table.add_column("Change", no_wrap=True) + table.add_column("Field", no_wrap=True) + for entry in result.summary: + table.add_row(_with_markup(entry.type), entry.path) + buf = io.StringIO() + wide = Console(file=buf, width=300, highlight=False, force_terminal=console.is_terminal, no_color=console.no_color) + wide.print(table) + print(buf.getvalue(), end="") + print("") + + +def _print_table(result: ChangelogResult, console: Console): + console.print("Details") + table = Table(box=box.ROUNDED) + table.add_column("Change", no_wrap=True) + table.add_column("Path", no_wrap=True) + table.add_column("Old Value", max_width=_VAL_W, no_wrap=True) + table.add_column("New Value", max_width=_VAL_W, no_wrap=True) + for entry in result.entries: + table.add_row( + _with_markup(entry.type), + entry.path, + _wrap(entry.old_value or "", _VAL_W), + _wrap(entry.new_value or "", _VAL_W), + ) + buf = io.StringIO() + wide = Console(file=buf, width=300, highlight=False, force_terminal=console.is_terminal, no_color=console.no_color) + wide.print(table) + print(buf.getvalue(), end="") + + +def _with_markup(changelog_type: ChangelogType) -> str: + if changelog_type == ChangelogType.added: + return "[green]added[/green]" + if changelog_type == ChangelogType.removed: + return "[red]removed[/red]" + if changelog_type == ChangelogType.changed: + return "[yellow]changed[/yellow]" + return changelog_type.value + + +def _wrap(text: str, max_width: int) -> str: + if len(text) <= max_width: + return text + lines, current = [], "" + for word in text.split(): + if current and len(current) + 1 + len(word) > max_width: + lines.append(current) + current = word + else: + current = (current + " " + word).lstrip() + if current: + lines.append(current) + return "\n".join(lines) diff --git a/pyproject.toml b/pyproject.toml index bddb8d953..4b7372c4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "jinja_partials>=0.2.1,<1.0.0", "datacontract-specification>=1.2.3,<2.0.0", "open-data-contract-standard>=3.1.2,<4.0.0", + "deepdiff>=6.0.0,<9.0.0", ] [project.optional-dependencies] diff --git a/tests/fixtures/breaking/datacontract-definitions-v1.yaml b/tests/fixtures/breaking/datacontract-definitions-v1.yaml deleted file mode 100644 index dcf2470b7..000000000 --- a/tests/fixtures/breaking/datacontract-definitions-v1.yaml +++ /dev/null @@ -1,13 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - my_field: - required: false diff --git a/tests/fixtures/breaking/datacontract-definitions-v2.yaml b/tests/fixtures/breaking/datacontract-definitions-v2.yaml deleted file mode 100644 index 06110a335..000000000 --- a/tests/fixtures/breaking/datacontract-definitions-v2.yaml +++ /dev/null @@ -1,36 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - my_field: - $ref: '#/definitions/my_definition' - -definitions: - my_definition: - name: my_definition - domain: global - title: my_title - description: My Description - type: string - enum: [my_enum] - format: uuid - minLength: 8 - maxLength: 14 - pattern: .* - minimum: 8 - exclusiveMaximum: 8 - maximum: 14 - exclusiveMinimum: 14 - example: my_example - pii: false - classification: internal - tags: [my_tags] - - diff --git a/tests/fixtures/breaking/datacontract-definitions-v3.yaml b/tests/fixtures/breaking/datacontract-definitions-v3.yaml deleted file mode 100644 index 0f096113e..000000000 --- a/tests/fixtures/breaking/datacontract-definitions-v3.yaml +++ /dev/null @@ -1,36 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - my_field: - $ref: '#/definitions/my_definition_2' - -definitions: - my_definition_2: - name: my_definition_2 - domain: global - title: my_title_2 - description: My Description 2 - type: integer - enum: [my_enum_2] - format: url - minLength: 10 - maxLength: 20 - pattern: .*.* - minimum: 10 - exclusiveMaximum: 20 - maximum: 20 - exclusiveMinimum: 10 - example: my_example_2 - pii: true - classification: sensitive - tags: [my_tags_2] - - diff --git a/tests/fixtures/breaking/datacontract-fields-array-v1.yaml b/tests/fixtures/breaking/datacontract-fields-array-v1.yaml deleted file mode 100644 index 2334c5435..000000000 --- a/tests/fixtures/breaking/datacontract-fields-array-v1.yaml +++ /dev/null @@ -1,64 +0,0 @@ -dataContractSpecification: 1.2.1 -id: demo-contract-id -info: - title: Test for changes in datacontracts that includes arrays - version: 0.0.1 - description: my desc - owner: owner - contact: - name: john doe - email: john.doe@example.com -models: - DataType: - namespace: dp.schemas - description: Description - type: table - fields: - Records: - type: array - required: true - description: Some fields on this complex array structure will change - items: - type: object - fields: - Field1: - type: int - required: false - description: Field1 desc - sample: 1 - pii: false - classification: Unclassified - Discount: - type: record - required: false - fields: - IsAutomatic: - type: boolean - required: true - description: Indicates if the application is automatic - sample: true - pii: false - classification: Unclassified - Conditions: - type: object - required: true - fields: - Min1: - type: double - required: false - description: Minimum test1 - sample: 50.0 - pii: false - classification: Unclassified - Hierarchy: - type: record - required: false - fields: - HasArticles: - type: string - required: true - description: Indicates if articles are included - sample: false - pii: false - classification: Unclassified - \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-fields-array-v2.yaml b/tests/fixtures/breaking/datacontract-fields-array-v2.yaml deleted file mode 100644 index 42f582b0c..000000000 --- a/tests/fixtures/breaking/datacontract-fields-array-v2.yaml +++ /dev/null @@ -1,64 +0,0 @@ -dataContractSpecification: 1.2.1 -id: demo-contract-id -info: - title: Test for changes in datacontracts that includes arrays - version: 0.0.1 - description: my desc - owner: owner - contact: - name: john doe - email: john.doe@example.com -models: - DataType: - namespace: dp.schemas - description: Description - type: table - fields: - Records: - type: array - required: true - description: Some fields on this complex array structure will change - items: - type: object - fields: - Field1: - type: int - required: false - description: CHANGING PII (THIS DESCRIPTION THROWS 0 ERRORS) - sample: 1 - pii: true - classification: Unclassified - Discount: - type: record - required: false - fields: - IsAutomatic: - type: boolean - required: true - description: Changed classification - sample: true - pii: false - classification: classified - Conditions: - type: object - required: true - fields: - Min1: - type: double - required: false - description: Minimum test1 - sample: 50.0 - pii: false - classification: Unclassified - Hierarchy: - type: record - required: false - fields: - HasArticles: - type: int - required: true - description: changing type from string to int - sample: false - pii: false - classification: Unclassified - \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-fields-v1.yaml b/tests/fixtures/breaking/datacontract-fields-v1.yaml deleted file mode 100644 index eef3a512a..000000000 --- a/tests/fixtures/breaking/datacontract-fields-v1.yaml +++ /dev/null @@ -1,55 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - field_type: - description: My Description - field_format: - type: string - field_required: - type: string - field_primaryKey: - type: string - field_references: - type: string - field_unique: - type: string - field_description: - type: string - field_pii: - type: string - field_classification: - type: string - field_pattern: - type: string - field_minLength: - type: string - field_maxLength: - type: string - field_minimum: - type: string - field_exclusiveMinimum: - type: string - field_maximum: - type: string - field_exclusiveMaximum: - type: string - field_enum: - type: string - field_tags: - type: string - field_ref: - type: string - field_fields: - fields: - nested_field_1: - type: string - field_custom_key: - type: string diff --git a/tests/fixtures/breaking/datacontract-fields-v2.yaml b/tests/fixtures/breaking/datacontract-fields-v2.yaml deleted file mode 100644 index 1ebf8fce6..000000000 --- a/tests/fixtures/breaking/datacontract-fields-v2.yaml +++ /dev/null @@ -1,83 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - field_type: - type: string - description: My Description - field_format: - type: string - format: email - field_required: - type: string - required: false - field_primaryKey: - type: string - primaryKey: false - field_references: - type: string - references: my_table.field_type - field_unique: - type: string - unique: false - field_description: - type: string - description: My Description - field_pii: - type: string - pii: true - field_classification: - type: string - classification: sensitive - field_pattern: - type: string - pattern: ^[A-Za-z0-9]{8,14}$ - field_minLength: - type: string - minLength: 8 - field_maxLength: - type: string - maxLength: 14 - field_minimum: - type: string - minimum: 8 - field_exclusiveMinimum: - type: string - exclusiveMinimum: 8 - field_maximum: - type: string - maximum: 14 - field_exclusiveMaximum: - type: string - exclusiveMaximum: 14 - field_enum: - type: string - enum: [one] - field_tags: - type: string - tags: [one] - field_ref: - type: string - $ref: '#/definitions/my_definition' - field_fields: - fields: - nested_field_1: - type: string - new_nested_field: - type: string - new_field: - type: string - field_custom_key: - type: string - custom-key: some value -definitions: - my_definition: - name: my_definition - type: string \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-fields-v3.yaml b/tests/fixtures/breaking/datacontract-fields-v3.yaml deleted file mode 100644 index 36187ad08..000000000 --- a/tests/fixtures/breaking/datacontract-fields-v3.yaml +++ /dev/null @@ -1,84 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - field_type: - type: integer - description: My Description - field_format: - type: string - format: url - field_required: - type: string - required: true - field_primaryKey: - type: string - primaryKey: true - field_references: - type: string - references: my_table.field_format - field_unique: - type: string - unique: true - field_description: - type: string - description: My updated Description - field_pii: - type: string - pii: false - field_classification: - type: string - classification: restricted - field_pattern: - type: string - pattern: ^[A-Za-z0-9]$ - field_minLength: - type: string - minLength: 10 - field_maxLength: - type: string - maxLength: 20 - field_minimum: - type: string - minimum: 10 - field_exclusiveMinimum: - type: string - exclusiveMinimum: 10 - field_maximum: - type: string - maximum: 20 - field_exclusiveMaximum: - type: string - exclusiveMaximum: 20 - field_enum: - type: string - enum: [one, two] - field_tags: - type: string - tags: [one, two] - field_ref: - type: string - $ref: '#/definitions/my_definition_2' - field_fields: - fields: - nested_field_1: - type: integer - new_nested_field: - type: string - new_field: - type: string - field_custom_key: - type: string - custom-key: some other value - -definitions: - my_definition_2: - name: my_definition_2 - type: string \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-info-v1.yaml b/tests/fixtures/breaking/datacontract-info-v1.yaml deleted file mode 100644 index 5703faa9a..000000000 --- a/tests/fixtures/breaking/datacontract-info-v1.yaml +++ /dev/null @@ -1,10 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-info-v2.yaml b/tests/fixtures/breaking/datacontract-info-v2.yaml deleted file mode 100644 index d5c85d499..000000000 --- a/tests/fixtures/breaking/datacontract-info-v2.yaml +++ /dev/null @@ -1,14 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - owner: Data Team - some-other-key: some information - contact: - email: datateam@work.com -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-info-v3.yaml b/tests/fixtures/breaking/datacontract-info-v3.yaml deleted file mode 100644 index 06a4a7d48..000000000 --- a/tests/fixtures/breaking/datacontract-info-v3.yaml +++ /dev/null @@ -1,14 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - owner: Another Team - some-other-key: new information - contact: - email: anotherteam@work.com -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-models-v1.yaml b/tests/fixtures/breaking/datacontract-models-v1.yaml deleted file mode 100644 index ac46a5c7c..000000000 --- a/tests/fixtures/breaking/datacontract-models-v1.yaml +++ /dev/null @@ -1,12 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - fields: - my_field: - description: My Description \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-models-v2.yaml b/tests/fixtures/breaking/datacontract-models-v2.yaml deleted file mode 100644 index 11cf57e25..000000000 --- a/tests/fixtures/breaking/datacontract-models-v2.yaml +++ /dev/null @@ -1,20 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - description: My Model Description - fields: - my_field: - description: My Description - another-key: original value - my_table_2: - fields: - my_field_2: - description: My Description 2 - some-other-key: some value \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-models-v3.yaml b/tests/fixtures/breaking/datacontract-models-v3.yaml deleted file mode 100644 index dfab651ae..000000000 --- a/tests/fixtures/breaking/datacontract-models-v3.yaml +++ /dev/null @@ -1,19 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: object - description: My Updated Model Description - fields: - my_field: - description: My Description - another-key: updated value - my_table_2: - fields: - my_field_2: - description: My Description 2 \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-quality-v1.yaml b/tests/fixtures/breaking/datacontract-quality-v1.yaml deleted file mode 100644 index 7baba622f..000000000 --- a/tests/fixtures/breaking/datacontract-quality-v1.yaml +++ /dev/null @@ -1,10 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-quality-v2.yaml b/tests/fixtures/breaking/datacontract-quality-v2.yaml deleted file mode 100644 index 9e8c33c4e..000000000 --- a/tests/fixtures/breaking/datacontract-quality-v2.yaml +++ /dev/null @@ -1,15 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string -quality: - type: SodaCL - specification: |- - checks for orders: - - freshness(column_1) < 1d diff --git a/tests/fixtures/breaking/datacontract-quality-v3.yaml b/tests/fixtures/breaking/datacontract-quality-v3.yaml deleted file mode 100644 index 4832289a5..000000000 --- a/tests/fixtures/breaking/datacontract-quality-v3.yaml +++ /dev/null @@ -1,15 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string -quality: - type: custom - specification: |- - checks for orders: - - freshness(column_1) < 2d diff --git a/tests/fixtures/breaking/datacontract-terms-v1.yaml b/tests/fixtures/breaking/datacontract-terms-v1.yaml deleted file mode 100644 index 5703faa9a..000000000 --- a/tests/fixtures/breaking/datacontract-terms-v1.yaml +++ /dev/null @@ -1,10 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-terms-v2.yaml b/tests/fixtures/breaking/datacontract-terms-v2.yaml deleted file mode 100644 index 5c6379bb7..000000000 --- a/tests/fixtures/breaking/datacontract-terms-v2.yaml +++ /dev/null @@ -1,20 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -terms: - usage: | - Data can be used for reports, analytics and machine learning use cases. - Order may be linked and joined by other tables - limitations: | - Not suitable for real-time use cases. - Data may not be used to identify individual customers. - Max data processing per day: 10 TiB - billing: 5000 USD per month - noticePeriod: P3M -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-terms-v3.yaml b/tests/fixtures/breaking/datacontract-terms-v3.yaml deleted file mode 100644 index e9d1dcdc5..000000000 --- a/tests/fixtures/breaking/datacontract-terms-v3.yaml +++ /dev/null @@ -1,15 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -terms: - usage: Data can be used for anything - billing: 1000000 GBP per month - noticePeriod: P1Y - someOtherTerms: must abide by policies -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/changelog/golden_changelog_text.txt b/tests/fixtures/changelog/golden_changelog_text.txt new file mode 100644 index 000000000..2ce690aeb --- /dev/null +++ b/tests/fixtures/changelog/golden_changelog_text.txt @@ -0,0 +1,62 @@ +Summary +[ 1 Removed ] [ 4 Changed ] [ 2 Added ] +╭─────────┬───────────────────────────────────────╮ +│ Change │ Field │ +├─────────┼───────────────────────────────────────┤ +│ added │ schema.customers │ +│ removed │ schema.orders.properties.customer_id │ +│ changed │ schema.orders.properties.order_date │ +│ changed │ schema.orders.properties.order_id │ +│ added │ schema.orders.properties.region │ +│ changed │ schema.orders.properties.total_amount │ +│ changed │ slaProperties.availability │ +╰─────────┴───────────────────────────────────────╯ + +Details +╭─────────┬──────────────────────────────────────────────────────────┬────────────────────────────────┬───────────────────────────────╮ +│ Change │ Path │ Old Value │ New Value │ +├─────────┼──────────────────────────────────────────────────────────┼────────────────────────────────┼───────────────────────────────┤ +│ added │ schema.customers │ │ │ +│ added │ schema.customers.physicalName │ │ customers_tbl │ +│ added │ schema.customers.properties │ │ │ +│ added │ schema.customers.properties.country │ │ │ +│ added │ schema.customers.properties.country.logicalType │ │ string │ +│ added │ schema.customers.properties.country.partitionKeyPosition │ │ 1 │ +│ added │ schema.customers.properties.country.partitioned │ │ True │ +│ added │ schema.customers.properties.country.required │ │ False │ +│ added │ schema.customers.properties.created_at │ │ │ +│ added │ schema.customers.properties.created_at.description │ │ Record creation timestamp │ +│ added │ schema.customers.properties.created_at.logicalType │ │ timestamp │ +│ added │ schema.customers.properties.created_at.required │ │ True │ +│ added │ schema.customers.properties.customer_id │ │ │ +│ added │ schema.customers.properties.customer_id.description │ │ Unique order ID │ +│ added │ schema.customers.properties.customer_id.logicalType │ │ string │ +│ added │ schema.customers.properties.customer_id.primaryKey │ │ True │ +│ added │ schema.customers.properties.customer_id.required │ │ True │ +│ added │ schema.customers.properties.date_of_birth │ │ │ +│ added │ schema.customers.properties.date_of_birth.classification │ │ restricted │ +│ added │ schema.customers.properties.date_of_birth.logicalType │ │ date │ +│ added │ schema.customers.properties.date_of_birth.required │ │ False │ +│ added │ schema.customers.properties.email │ │ │ +│ added │ schema.customers.properties.email.classification │ │ confidential │ +│ added │ schema.customers.properties.email.encryptedName │ │ email_encrypt │ +│ added │ schema.customers.properties.email.logicalType │ │ string │ +│ added │ schema.customers.properties.email.required │ │ True │ +│ added │ schema.customers.properties.full_name │ │ │ +│ added │ schema.customers.properties.full_name.businessName │ │ Customer Full Name │ +│ added │ schema.customers.properties.full_name.logicalType │ │ string │ +│ added │ schema.customers.properties.full_name.required │ │ True │ +│ removed │ schema.orders.properties.customer_id │ │ │ +│ removed │ schema.orders.properties.customer_id.logicalType │ string │ │ +│ removed │ schema.orders.properties.customer_id.required │ True │ │ +│ changed │ schema.orders.properties.order_date.logicalType │ string │ date │ +│ changed │ schema.orders.properties.order_id.description │ Unique order ID and a rather │ Unique order ID and another │ +│ │ │ lenghty description that │ rather lenghty description │ +│ │ │ should be wrapped in the table │ that should be wrapped in the │ +│ │ │ │ table │ +│ added │ schema.orders.properties.region │ │ │ +│ added │ schema.orders.properties.region.logicalType │ │ string │ +│ added │ schema.orders.properties.region.required │ │ False │ +│ changed │ schema.orders.properties.total_amount.required │ False │ True │ +│ changed │ slaProperties.availability.value │ 99.9% │ 99.5% │ +╰─────────┴──────────────────────────────────────────────────────────┴────────────────────────────────┴───────────────────────────────╯ diff --git a/tests/fixtures/changelog/helper/generate_golden.py b/tests/fixtures/changelog/helper/generate_golden.py new file mode 100644 index 000000000..18ff2036f --- /dev/null +++ b/tests/fixtures/changelog/helper/generate_golden.py @@ -0,0 +1,54 @@ +""" +generate_golden.py — Regenerate changelog golden fixtures +---------------------------------------------------------- +Run this script whenever the changelog text output intentionally changes and the +golden file in tests/fixtures/changelog/ needs to be updated. + +Usage (from the repo root): + python tests/fixtures/changelog/helper/generate_golden.py + +Golden files written: + tests/fixtures/changelog/golden_changelog_text.txt + +After running, review the diff with git and commit if the changes are expected: + git diff tests/fixtures/changelog/ +""" + +import io +import os +import sys + +from rich.console import Console + +FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "..") +REPO_ROOT = os.path.join(os.path.dirname(__file__), "..", "..", "..", "..") + +V1 = os.path.normpath(os.path.join(REPO_ROOT, "tests/fixtures/changelog/integration/changelog_integration_v1.yaml")) +V2 = os.path.normpath(os.path.join(REPO_ROOT, "tests/fixtures/changelog/integration/changelog_integration_v2.yaml")) + + +def generate(): + # Import here so the script can be run from the repo root with venv activated + from datacontract.data_contract import DataContract + from datacontract.output.text_changelog_results import write_text_changelog_results + + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + + buf = io.StringIO() + con = Console(file=buf, width=300, highlight=False, no_color=True) + old_stdout = sys.stdout + sys.stdout = buf + try: + write_text_changelog_results(result, con) + finally: + sys.stdout = old_stdout + + text_path = os.path.normpath(os.path.join(FIXTURE_DIR, "golden_changelog_text.txt")) + with open(text_path, "w", encoding="utf-8") as f: + f.write(buf.getvalue()) + print(f"Written: {text_path}") + print("\nDone. Review changes with: git diff tests/fixtures/changelog/") + + +if __name__ == "__main__": + generate() diff --git a/tests/fixtures/changelog/integration/changelog_integration_v1.yaml b/tests/fixtures/changelog/integration/changelog_integration_v1.yaml new file mode 100644 index 000000000..40ae07845 --- /dev/null +++ b/tests/fixtures/changelog/integration/changelog_integration_v1.yaml @@ -0,0 +1,29 @@ +apiVersion: v3.0.2 +kind: DataContract +id: orders-contract-001 +schema: + - name: orders + physicalName: orders_tbl + properties: + - name: order_id + logicalType: string + required: true + description: Unique order ID and a rather lenghty description that should be wrapped in the table + - name: customer_id + logicalType: string + required: true + - name: order_date + logicalType: string + required: true + - name: total_amount + logicalType: number + required: false +servers: + - server: production + type: snowflake + database: PROD_DB +slaProperties: + - property: availability + value: "99.9%" + - property: latency + value: "500ms" diff --git a/tests/fixtures/changelog/integration/changelog_integration_v2.yaml b/tests/fixtures/changelog/integration/changelog_integration_v2.yaml new file mode 100644 index 000000000..71a080919 --- /dev/null +++ b/tests/fixtures/changelog/integration/changelog_integration_v2.yaml @@ -0,0 +1,59 @@ +apiVersion: v3.0.2 +kind: DataContract +id: orders-contract-001 +schema: + - name: orders + physicalName: orders_tbl + properties: + - name: order_id + logicalType: string + required: true + description: Unique order ID and another rather lenghty description that should be wrapped in the table + - name: order_date + logicalType: date + required: true + - name: total_amount + logicalType: number + required: true + - name: region + logicalType: string + required: false + - name: customers + physicalName: customers_tbl + properties: + - name: customer_id + logicalType: string + required: true + primaryKey: true + description: Unique order ID + - name: email + logicalType: string + required: true + classification: confidential + encryptedName: email_encrypt + - name: full_name + logicalType: string + required: true + businessName: Customer Full Name + - name: date_of_birth + logicalType: date + required: false + classification: restricted + - name: country + logicalType: string + required: false + partitioned: true + partitionKeyPosition: 1 + - name: created_at + logicalType: timestamp + required: true + description: Record creation timestamp +servers: + - server: production + type: snowflake + database: PROD_DB +slaProperties: + - property: availability + value: "99.5%" + - property: latency + value: "500ms" diff --git a/tests/fixtures/changelog/unit/changelog_unit_v1.yaml b/tests/fixtures/changelog/unit/changelog_unit_v1.yaml new file mode 100644 index 000000000..657b1fa5c --- /dev/null +++ b/tests/fixtures/changelog/unit/changelog_unit_v1.yaml @@ -0,0 +1,95 @@ +apiVersion: v3.0.2 +kind: DataContract +id: orders-contract-001 +name: Orders Contract +version: 1.0.0 +status: active +tenant: acme +domain: sales + +description: + purpose: Provides order data for analytics + usage: Used by the analytics team + limitations: Last 2 years only + customProperties: + - property: sensitivity + value: internal + - property: data-owner + value: data-platform-team + +price: + priceAmount: 0 + priceCurrency: USD + priceUnit: monthly + +schema: + - name: orders + physicalName: orders_tbl + customProperties: + - property: domain + value: sales + quality: + - name: row_count + type: sql + mustBeGreaterThan: 0 + properties: + - name: order_id + logicalType: string + required: true + description: Unique order ID + - name: customer_id + logicalType: string + required: true + - name: order_date + logicalType: string + required: true + - name: total_amount + logicalType: number + required: false + quality: + - name: positive + type: sql + mustBeGreaterThan: 0 + customProperties: + - property: pii + value: "false" + +servers: + - server: production + type: snowflake + database: PROD_DB + roles: + - role: reader + access: read + - role: writer + access: write + +slaProperties: + - property: availability + value: "99.9%" + - property: latency + value: "500ms" + +roles: + - role: admin + access: write + - role: analyst + access: read + +support: + - channel: slack + url: https://slack.example.com/data-contracts + +customProperties: + - property: owner + value: data-platform-team + - property: classification + value: internal + +team: + name: Data Platform + members: + - username: alice + role: lead + - username: bob + role: engineer diff --git a/tests/fixtures/changelog/unit/changelog_unit_v2.yaml b/tests/fixtures/changelog/unit/changelog_unit_v2.yaml new file mode 100644 index 000000000..4ef17bc10 --- /dev/null +++ b/tests/fixtures/changelog/unit/changelog_unit_v2.yaml @@ -0,0 +1,106 @@ +apiVersion: v3.0.2 +kind: DataContract +id: orders-contract-001 +name: Orders Contract v2 # changed: Orders Contract → Orders Contract v2 +version: 2.0.0 # changed: 1.0.0 → 2.0.0 +status: deprecated # changed: active → deprecated +tenant: acme +domain: commerce # changed: sales → commerce + +description: + purpose: Provides order and line item data # changed + usage: Used by the analytics team + limitations: Last 2 years only + customProperties: + - property: data-owner # reordered (tests stability) + value: data-platform-team + - property: sensitivity + value: confidential # changed: internal → confidential + +price: + priceAmount: 100 # changed: 0 → 100 + priceCurrency: USD + priceUnit: monthly + +schema: + - name: orders + physicalName: orders_tbl + customProperties: + - property: domain + value: finance # changed: sales → finance + quality: + - name: row_count + type: sql + mustBeGreaterThan: 100 # changed: 0 → 100 + properties: + - name: order_id + logicalType: string + required: true + description: Unique order identifier # changed + - name: order_date + logicalType: date # changed: string → date + required: true + - name: total_amount + logicalType: number + required: true # changed: false → true + quality: + - name: positive + type: sql + mustBeGreaterThan: 1 # changed: 0 → 1 + customProperties: + - property: pii + value: "true" # changed: false → true + - name: region + logicalType: string # added field + required: false + - name: customers # added schema object + physicalName: customers_tbl + properties: + - name: customer_id + logicalType: string + required: true + +servers: + - server: production + type: snowflake + database: PROD_DB + roles: + - role: reader + access: read + # writer role removed from server + +slaProperties: + - property: availability + value: "99.5%" # changed: 99.9% → 99.5% + - property: latency + value: "500ms" + +roles: + - role: admin + access: write + - role: analyst + access: read + - role: viewer # added top-level role + access: read + +support: + - channel: slack + url: https://slack.example.com/data-contracts + - channel: email # added support channel + url: mailto:data-contracts@example.com + +customProperties: + - property: owner + value: data-platform-team + - property: classification + value: confidential # changed: internal → confidential + +team: + name: Data Platform + members: + - username: alice + role: lead + - username: bob + role: senior-engineer # changed: engineer → senior-engineer + - username: carol # added team member + role: engineer diff --git a/tests/test_api.py b/tests/test_api.py index 9e246d9b4..59ad1895e 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -33,3 +33,40 @@ def test_export_jsonschema_dcs(): expected_json_schema = file.read() print(expected_json_schema) assert response.text == expected_json_schema + + +def test_changelog(): + with open("fixtures/changelog/integration/changelog_integration_v1.yaml", "r") as f: + v1 = f.read() + with open("fixtures/changelog/integration/changelog_integration_v2.yaml", "r") as f: + v2 = f.read() + response = client.post(url="/changelog", json={"v1": v1, "v2": v2}) + assert response.status_code == 200 + data = response.json() + assert "summary" in data + assert "entries" in data + assert len(data["entries"]) > 0 + assert len(data["summary"]) > 0 + entry = data["entries"][0] + assert "path" in entry + assert entry["type"] in ("added", "removed", "changed") + assert "old_value" in entry + assert "new_value" in entry + + +def test_changelog_invalid_yaml(): + invalid_yaml = "invalid: yaml: content: [" + response = client.post(url="/changelog", json={"v1": invalid_yaml, "v2": "valid: yaml"}) + assert response.status_code == 422 + assert "Invalid YAML" in response.json()["detail"] + + +def test_changelog_invalid_data_contract(): + invalid_contract = """ + apiVersion: '1.0' + servers: + - type: invalid_type + """ + response = client.post(url="/changelog", json={"v1": invalid_contract, "v2": "valid: yaml"}) + assert response.status_code == 422 + assert "Invalid data contract" in response.json()["detail"] diff --git a/tests/test_changelog.py b/tests/test_changelog.py new file mode 100644 index 000000000..62898df0b --- /dev/null +++ b/tests/test_changelog.py @@ -0,0 +1,58 @@ +from datacontract.data_contract import DataContract +from datacontract.model.changelog import ChangelogResult, ChangelogType + +V1 = "fixtures/changelog/integration/changelog_integration_v1.yaml" +V2 = "fixtures/changelog/integration/changelog_integration_v2.yaml" + + +def test_changelog_returns_changelog_result(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert isinstance(result, ChangelogResult) + + +def test_changelog_has_changes(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert result.has_changes() + + +def test_changelog_no_changes(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V1)) + assert not result.has_changes() + assert result.entries == [] + assert result.summary == [] + + +def test_changelog_entry_types(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + types = {e.type for e in result.entries} + assert ChangelogType.added in types + assert ChangelogType.removed in types + assert ChangelogType.changed in types + + +def test_changelog_summary_is_rolled_up(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert len(result.summary) < len(result.entries) + + +def test_changelog_summary_paths(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + paths = [e.path for e in result.summary] + assert "schema.customers" in paths + assert "schema.orders.properties.customer_id" in paths + assert "slaProperties.availability" in paths + + +def test_changelog_entry_values(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + changed = [e for e in result.entries if e.path == "schema.orders.properties.order_date.logicalType"] + assert len(changed) == 1 + assert changed[0].type == ChangelogType.changed + assert changed[0].old_value == "string" + assert changed[0].new_value == "date" + + +def test_changelog_v1_v2_labels(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert result.v1 == V1 + assert result.v2 == V2 diff --git a/tests/test_changelog_engine.py b/tests/test_changelog_engine.py new file mode 100644 index 000000000..077bf9d59 --- /dev/null +++ b/tests/test_changelog_engine.py @@ -0,0 +1,786 @@ +""" +test_changelog_engine — Unit tests for changelog.py +------------------------------------------------------------------- +Test classes: + TestBuildReportDataStructure — build_changelog() output shape and empty-diff + TestBuildReportDataAdded — Added change entries (scalar and dict payloads) + TestBuildReportDataRemoved — Removed change entries + TestBuildReportDataChanged — Changed entries and scalar rollup to parent + TestBuildReportDataSummaryRollup — summary deduplication and count consistency + TestBuildReportDataTags — tag field changes + TestSummaryRollupScalarLeaves — scalar leaf rollup behaviour + TestDiff — diff(): semantic correctness (added/removed/changed/mid-list) + TestDiffFixtures — diff(): end-to-end using fixtures/changelog/unit/ + TestDiffFixturesPriceDescriptionScalars — diff(): price, description, and top-level scalar fields +""" + +import os +import tempfile + +import yaml +from open_data_contract_standard.model import OpenDataContractStandard + +from datacontract.changelog.changelog import build_changelog, diff + +REPORT = build_changelog + + +def _added(path: str, payload) -> dict: + return {"dictionary_item_added": {f"root['{path}']": payload}} + + +def _added_double_quotes(path: str, payload) -> dict: + return {"dictionary_item_added": {f'root["{path}"]': payload}} + + +def _removed(path: str, payload) -> dict: + return {"dictionary_item_removed": {f"root['{path}']": payload}} + + +def _changed(path: str, old, new) -> dict: + return {"values_changed": {f"root['{path}']": {"old_value": old, "new_value": new}}} + + +def _merge(*diffs: dict) -> dict: + """Merge multiple single-key DeepDiff dicts into one.""" + merged = {} + for d in diffs: + for k, v in d.items(): + merged.setdefault(k, {}).update(v) + return merged + + +class TestBuildReportDataStructure: + def test_returns_expected_top_level_keys(self): + rd = build_changelog({}) + assert set(rd.keys()) == {"source_label", "target_label", "header", "summary", "detail"} + + def test_header_contains_title_and_subtitle(self): + rd = build_changelog({}, source_label="v1.yaml", target_label="v2.yaml") + assert rd["header"]["title"] == "ODCS Data Contract Changelog" + assert "v1.yaml" in rd["header"]["subtitle"] + assert "v2.yaml" in rd["header"]["subtitle"] + + def test_source_and_target_labels_stored(self): + rd = build_changelog({}, source_label="before.yaml", target_label="after.yaml") + assert rd["source_label"] == "before.yaml" + assert rd["target_label"] == "after.yaml" + + def test_empty_diff_produces_zero_counts(self): + rd = build_changelog({}) + assert rd["summary"]["counts"] == {"added": 0, "removed": 0, "changed": 0} + assert rd["detail"]["counts"] == {"added": 0, "removed": 0, "changed": 0} + + def test_empty_diff_produces_empty_changes(self): + rd = build_changelog({}) + assert rd["summary"]["changes"] == [] + assert rd["detail"]["changes"] == [] + + def test_unknown_deepdiff_keys_ignored(self): + rd = build_changelog({"unknown_key": {"root['x']": 1}}) + assert rd["summary"]["changes"] == [] + + +class TestBuildReportDataAdded: + def test_added_scalar_appears_in_detail(self): + rd = build_changelog(_added("schema']['orders", "v")) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert any("orders" in p for p in paths) + + def test_added_scalar_change_type(self): + rd = build_changelog(_added("schema']['orders", "val")) + match = next(c for c in rd["detail"]["changes"] if "orders" in c["path"]) + assert match["changeType"] == "Added" + + def test_added_scalar_has_new_value(self): + rd = build_changelog(_added("schema']['orders", "val")) + match = next(c for c in rd["detail"]["changes"] if c["path"] == "schema.orders") + assert match.get("new_value") == "val" + + def test_added_dict_expands_to_leaf_entries(self): + payload = {"physicalName": "orders_tbl", "description": "Orders"} + rd = build_changelog(_added("schema']['orders", payload)) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.physicalName" in paths + assert "schema.orders.description" in paths + + def test_added_dict_parent_entry_included(self): + payload = {"physicalName": "orders_tbl"} + rd = build_changelog(_added("schema']['orders", payload)) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders" in paths + + def test_added_count_incremented(self): + rd = build_changelog(_added("schema']['orders", "v")) + assert rd["detail"]["counts"]["added"] >= 1 + + def test_added_appears_in_summary(self): + # Scalar Added rolls up to parent — use a 2-level path so it lands at schema.orders + rd = build_changelog(_added("schema']['orders']['physicalName", "v")) + paths = [c["path"] for c in rd["summary"]["changes"]] + assert any("orders" in p for p in paths) + + def test_added_double_quotes_path_parsing(self): + """Test that double-quoted paths are parsed correctly in both detail and summary""" + rd = build_changelog(_added_double_quotes('schema"]["orders"]["physicalName', "v")) + detail_paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.physicalName" in detail_paths + summary_paths = [c["path"] for c in rd["summary"]["changes"]] + assert any("orders" in p for p in summary_paths) + + +class TestBuildReportDataRemoved: + def test_removed_scalar_appears_in_detail(self): + rd = build_changelog(_removed("schema']['orders", "v")) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert any("orders" in p for p in paths) + + def test_removed_scalar_has_old_value(self): + rd = build_changelog(_removed("schema']['orders", "val")) + match = next(c for c in rd["detail"]["changes"] if c["path"] == "schema.orders") + assert match.get("old_value") == "val" + + def test_removed_dict_expands_to_leaf_entries(self): + payload = {"logicalType": "string", "required": True} + rd = build_changelog(_removed("schema']['orders']['properties']['amount", payload)) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.properties.amount.logicalType" in paths + + def test_removed_count_incremented(self): + rd = build_changelog(_removed("schema']['orders", "v")) + assert rd["detail"]["counts"]["removed"] >= 1 + + +class TestBuildReportDataChanged: + def test_changed_scalar_in_detail(self): + rd = build_changelog( + _changed("schema']['orders']['properties']['order_date']['logicalType", "string", "date") + ) + match = next((c for c in rd["detail"]["changes"] if "logicalType" in c["path"]), None) + assert match is not None + assert match["changeType"] == "Changed" + assert match["old_value"] == "string" + assert match["new_value"] == "date" + + def test_changed_count_incremented(self): + rd = build_changelog(_changed("slaProperties']['availability']['value", "99.9%", "99.5%")) + assert rd["detail"]["counts"]["changed"] == 1 + + def test_changed_scalar_rolled_up_to_parent_in_summary(self): + rd = build_changelog( + _changed("schema']['orders']['properties']['order_date']['logicalType", "string", "date") + ) + summary_paths = [c["path"] for c in rd["summary"]["changes"]] + assert not any("logicalType" in p for p in summary_paths) + assert any("order_date" in p for p in summary_paths) + + +class TestBuildReportDataSummaryRollup: + def test_multiple_scalar_changes_on_same_parent_produce_one_summary_entry(self): + diff = _merge( + _changed("schema']['orders']['properties']['order_date']['logicalType", "string", "date"), + _changed("schema']['orders']['properties']['order_date']['description", "old desc", "new desc"), + ) + rd = build_changelog(diff) + order_date_entries = [c for c in rd["summary"]["changes"] if c["path"] == "schema.orders.properties.order_date"] + assert len(order_date_entries) == 1 + + def test_summary_change_type_is_changed_when_field_both_added_and_removed(self): + # Scalar Added + Removed on the same parent path collapse to Changed. + # Use a 3-level path so rollup lands at schema.orders.properties.order_id + diff = _merge( + _added("schema']['orders']['properties']['order_id']['businessName", "Order ID"), + _removed("schema']['orders']['properties']['order_id']['description", "Old desc"), + ) + rd = build_changelog(diff) + match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders.properties.order_id") + assert match["changeType"] == "Changed" + + def test_summary_counts_match_summary_changes(self): + diff = _merge( + _added("schema']['customers", {"physicalName": "c"}), + _removed("schema']['orders']['properties']['customer_id", {"logicalType": "string"}), + _changed("slaProperties']['availability']['value", "99.9%", "99.5%"), + ) + rd = build_changelog(diff) + counts = rd["summary"]["counts"] + changes = rd["summary"]["changes"] + assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") + assert counts["removed"] == sum(1 for c in changes if c["changeType"] == "Removed") + assert counts["changed"] == sum(1 for c in changes if c["changeType"] == "Changed") + + def test_detail_counts_match_detail_changes(self): + diff = _merge( + _added("schema']['customers", {"physicalName": "c"}), + _changed("slaProperties']['availability']['value", "99.9%", "99.5%"), + ) + rd = build_changelog(diff) + counts = rd["summary"]["counts"] + changes = rd["summary"]["changes"] + assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") + assert counts["changed"] == sum(1 for c in changes if c["changeType"] == "Changed") + + def test_detail_changes_sorted_by_path(self): + diff = _merge( + _added("schema']['orders", "v"), + _added("schema']['customers", "v"), + ) + rd = build_changelog(diff) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert paths == sorted(paths) + + +class TestBuildReportDataTags: + """Tags (list[str]) — added/removed tags should surface as path segments, + not as new_value/old_value on the parent path.""" + + def _tag_diff(self, v1_tags, v2_tags, location="top"): + """Build report_data from a synthetic tags diff at the given location.""" + if location == "top": + v1 = {"apiVersion": "v3.0.2", "kind": "DataContract", "id": "t", "tags": v1_tags} + v2 = {"apiVersion": "v3.0.2", "kind": "DataContract", "id": "t", "tags": v2_tags} + elif location == "schema": + v1 = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "t", + "schema": [{"name": "orders", "physicalName": "orders_tbl", "tags": v1_tags}], + } + v2 = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "t", + "schema": [{"name": "orders", "physicalName": "orders_tbl", "tags": v2_tags}], + } + else: + v1 = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "t", + "schema": [ + { + "name": "orders", + "physicalName": "orders_tbl", + "properties": [{"name": "order_id", "logicalType": "string", "tags": v1_tags}], + } + ], + } + v2 = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "t", + "schema": [ + { + "name": "orders", + "physicalName": "orders_tbl", + "properties": [{"name": "order_id", "logicalType": "string", "tags": v2_tags}], + } + ], + } + from datacontract.changelog.changelog import diff + + raw = diff(v1, v2) + return build_changelog(raw) + + def test_added_tag_path_includes_tag_value(self): + rd = self._tag_diff(["analytics"], ["analytics", "pii"]) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "tags.pii" in paths + + def test_removed_tag_path_includes_tag_value(self): + rd = self._tag_diff(["analytics", "pii"], ["analytics"]) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "tags.pii" in paths + + def test_added_tag_has_no_new_value_field(self): + rd = self._tag_diff(["analytics"], ["analytics", "pii"]) + tag_change = next(c for c in rd["detail"]["changes"] if c["path"] == "tags.pii") + assert "new_value" not in tag_change + assert "old_value" not in tag_change + + def test_added_tag_change_type_is_added(self): + rd = self._tag_diff(["analytics"], ["analytics", "pii"]) + tag_change = next(c for c in rd["detail"]["changes"] if c["path"] == "tags.pii") + assert tag_change["changeType"] == "Added" + + def test_removed_tag_change_type_is_removed(self): + rd = self._tag_diff(["analytics", "pii"], ["analytics"]) + tag_change = next(c for c in rd["detail"]["changes"] if c["path"] == "tags.pii") + assert tag_change["changeType"] == "Removed" + + def test_summary_rolls_up_to_tags_parent(self): + rd = self._tag_diff(["analytics"], ["analytics", "pii", "transactions"]) + summary_paths = [c["path"] for c in rd["summary"]["changes"]] + assert "tags" in summary_paths + assert "tags.pii" not in summary_paths + assert "tags.transactions" not in summary_paths + + def test_schema_object_tag_uses_value_as_path_segment(self): + rd = self._tag_diff(["e-commerce"], ["e-commerce", "reporting"], location="schema") + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.tags.reporting" in paths + + def test_schema_property_tag_uses_value_as_path_segment(self): + rd = self._tag_diff(["primary-key"], ["primary-key", "required"], location="property") + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.properties.order_id.tags.required" in paths + + def test_unchanged_tags_produce_no_diff(self): + rd = self._tag_diff(["analytics", "pii"], ["analytics", "pii"]) + assert rd["detail"]["changes"] == [] + + def test_reordered_tags_produce_no_diff(self): + rd = self._tag_diff(["analytics", "pii"], ["pii", "analytics"]) + assert rd["detail"]["changes"] == [] + + +class TestSummaryRollupScalarLeaves: + """Scalar Added/Removed leaf fields roll up to their parent in the summary, + consistent with how scalar Changed fields behave.""" + + def _rd(self, *diffs): + return build_changelog(_merge(*diffs)) + + def test_scalar_added_rolls_up_to_parent(self): + rd = self._rd(_added("schema']['orders']['businessName", "Orders")) + paths = [c["path"] for c in rd["summary"]["changes"]] + assert "schema.orders" in paths + assert "schema.orders.businessName" not in paths + + def test_scalar_removed_rolls_up_to_parent(self): + rd = self._rd(_removed("schema']['orders']['description", "old desc")) + paths = [c["path"] for c in rd["summary"]["changes"]] + assert "schema.orders" in paths + assert "schema.orders.description" not in paths + + def test_scalar_added_parent_change_type_is_added(self): + rd = self._rd(_added("schema']['orders']['businessName", "Orders")) + match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") + assert match["changeType"] == "Added" + + def test_scalar_removed_parent_change_type_is_removed(self): + rd = self._rd(_removed("schema']['orders']['description", "old")) + match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") + assert match["changeType"] == "Removed" + + def test_mixed_add_remove_same_parent_collapses_to_changed(self): + rd = self._rd( + _added("schema']['orders']['businessName", "Orders"), + _removed("schema']['orders']['description", "old desc"), + ) + match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") + assert match["changeType"] == "Changed" + paths = [c["path"] for c in rd["summary"]["changes"]] + assert "schema.orders.businessName" not in paths + assert "schema.orders.description" not in paths + + def test_mixed_add_scalar_changed_same_parent_collapses_to_changed(self): + rd = self._rd( + _added("schema']['orders']['businessName", "Orders"), + _changed("schema']['orders']['logicalType", "string", "integer"), + ) + match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") + assert match["changeType"] == "Changed" + + def test_dict_added_does_not_roll_up(self): + """A whole dict payload (e.g. a new schema object) should not roll up — + only scalar leafs do.""" + rd = self._rd(_added("schema']['customers", {"physicalName": "customers_tbl"})) + paths = [c["path"] for c in rd["summary"]["changes"]] + assert "schema.customers" in paths + assert "schema" not in paths + + def test_top_level_scalar_added_stays_at_top_level(self): + """A scalar at depth 1 (e.g. root['version']) has no parent to roll up to.""" + rd = self._rd(_added("version", "2.0.0")) + paths = [c["path"] for c in rd["summary"]["changes"]] + assert "version" in paths + + def test_summary_counts_consistent_after_rollup(self): + rd = self._rd( + _added("schema']['orders']['businessName", "Orders"), + _removed("schema']['orders']['description", "old"), + ) + counts = rd["summary"]["counts"] + changes = rd["summary"]["changes"] + assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") + assert counts["removed"] == sum(1 for c in changes if c["changeType"] == "Removed") + assert counts["changed"] == sum(1 for c in changes if c["changeType"] == "Changed") + + def test_detail_still_shows_full_leaf_paths(self): + """Rollup only affects summary — detail must still show the full leaf paths.""" + rd = self._rd( + _added("schema']['orders']['businessName", "Orders"), + _removed("schema']['orders']['description", "old desc"), + ) + detail_paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.businessName" in detail_paths + assert "schema.orders.description" in detail_paths + +# --------------------------------------------------------------------------- +# Helpers for diff() tests +# --------------------------------------------------------------------------- + +MINIMAL_CONTRACT = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "test-001", +} + + +def _load_contract(path: str) -> dict: + with open(path, encoding="utf-8") as f: + raw = yaml.safe_load(f) + return OpenDataContractStandard.model_validate(raw).model_dump(exclude_none=True, by_alias=True) + + +def _write_yaml(data: dict, path: str) -> None: + with open(path, "w") as f: + yaml.dump(data, f) + + +def _contract(**kwargs) -> dict: + return {**MINIMAL_CONTRACT, **kwargs} +class TestDiff: + def _base(self) -> dict: + return _contract( + schema=[ + { + "name": "orders", + "properties": [ + {"name": "order_id", "logicalType": "string", "required": True}, + {"name": "amount", "logicalType": "number", "required": False}, + ], + } + ] + ) + + def test_identical_contracts_produce_no_diff(self): + c = self._base() + result = diff(c, c) + assert result == {} + + def test_field_added(self): + v1 = self._base() + v2 = self._base() + v2["schema"][0]["properties"].append({"name": "region", "logicalType": "string"}) + result = diff(v1, v2) + assert "dictionary_item_added" in result + + def test_field_removed(self): + v1 = self._base() + v2 = self._base() + v2["schema"][0]["properties"] = [v2["schema"][0]["properties"][0]] # remove amount + result = diff(v1, v2) + assert "dictionary_item_removed" in result + + def test_field_type_changed(self): + v1 = self._base() + v2 = self._base() + v2["schema"][0]["properties"][0]["logicalType"] = "integer" + result = diff(v1, v2) + assert "values_changed" in result + + def test_schema_removed_mid_list_is_not_misreported_as_change(self): + v1 = _contract( + schema=[ + {"name": "orders", "physicalName": "orders_tbl"}, + {"name": "customers", "physicalName": "customers_tbl"}, + ] + ) + v2 = _contract( + schema=[ + {"name": "customers", "physicalName": "customers_tbl"}, + ] + ) + result = diff(v1, v2) + removed = result.get("dictionary_item_removed", {}) + changed = result.get("values_changed", {}) + assert any("orders" in k for k in removed) + assert not any("customers" in k for k in changed) + assert not any("customers" in k for k in removed) + + def test_sla_value_changed(self): + v1 = _contract(slaProperties=[{"property": "availability", "value": "99.9%"}]) + v2 = _contract(slaProperties=[{"property": "availability", "value": "99.5%"}]) + result = diff(v1, v2) + assert "values_changed" in result + + def test_server_added(self): + v1 = _contract(servers=[{"server": "production", "type": "snowflake"}]) + v2 = _contract( + servers=[ + {"server": "production", "type": "snowflake"}, + {"server": "staging", "type": "snowflake"}, + ] + ) + result = diff(v1, v2) + assert "dictionary_item_added" in result + + def test_server_role_added(self): + v1 = _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "roles": [ + {"role": "reader", "access": "read"}, + ], + } + ] + ) + v2 = _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "roles": [ + {"role": "reader", "access": "read"}, + {"role": "writer", "access": "write"}, + ], + } + ] + ) + result = diff(v1, v2) + added = result.get("dictionary_item_added", {}) + assert any("writer" in k for k in added) + + def test_server_role_removed(self): + v1 = _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "roles": [ + {"role": "reader", "access": "read"}, + {"role": "writer", "access": "write"}, + ], + } + ] + ) + v2 = _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "roles": [ + {"role": "reader", "access": "read"}, + ], + } + ] + ) + result = diff(v1, v2) + removed = result.get("dictionary_item_removed", {}) + assert any("writer" in k for k in removed) + + def test_schema_object_custom_property_changed(self): + v1 = _contract( + schema=[ + { + "name": "orders", + "customProperties": [ + {"property": "domain", "value": "sales"}, + ], + } + ] + ) + v2 = _contract( + schema=[ + { + "name": "orders", + "customProperties": [ + {"property": "domain", "value": "finance"}, + ], + } + ] + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("domain" in k for k in changed) + + def test_schema_property_quality_rule_changed(self): + v1 = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "quality": [{"name": "positive", "metric": "rowCount", "mustBeGreaterThan": 0}], + } + ], + } + ] + ) + v2 = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "quality": [{"name": "positive", "metric": "rowCount", "mustBeGreaterThan": 100}], + } + ], + } + ] + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("positive" in k for k in changed) + + def test_schema_property_custom_property_added(self): + v1 = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + } + ], + } + ] + ) + v2 = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "customProperties": [{"property": "sensitivity", "value": "high"}], + } + ], + } + ] + ) + result = diff(v1, v2) + assert "dictionary_item_added" in result + + +class TestDiffFixtures: + FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "fixtures", "changelog", "unit") + + def _generate(self): + v1 = _load_contract(os.path.join(self.FIXTURE_DIR, "changelog_unit_v1.yaml")) + v2 = _load_contract(os.path.join(self.FIXTURE_DIR, "changelog_unit_v2.yaml")) + return diff(v1, v2) + + def test_diff_returns_dict(self): + assert isinstance(self._generate(), dict) + + def test_diff_detects_known_changes(self): + result = self._generate() + added = result.get("dictionary_item_added", {}) + removed = result.get("dictionary_item_removed", {}) + changed = result.get("values_changed", {}) + assert any("customers" in k for k in added) + assert any("customer_id" in k for k in removed) + assert any("availability" in k for k in changed) + + def test_diff_identical_files_no_diff(self): + v1_path = os.path.join(self.FIXTURE_DIR, "changelog_unit_v1.yaml") + v = _load_contract(v1_path) + assert diff(v, v) == {} + + def test_diff_with_temp_files(self): + contract = _contract(schema=[{"name": "orders", "physicalName": "orders_tbl"}]) + with ( + tempfile.NamedTemporaryFile(suffix=".yaml", mode="w", delete=False) as f1, + tempfile.NamedTemporaryFile(suffix=".yaml", mode="w", delete=False) as f2, + ): + yaml.dump(contract, f1) + yaml.dump(contract, f2) + try: + v1 = _load_contract(f1.name) + v2 = _load_contract(f2.name) + assert diff(v1, v2) == {} + finally: + os.unlink(f1.name) + os.unlink(f2.name) + + def test_schema_object_custom_property_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("domain" in k for k in changed) + + def test_schema_object_quality_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("row_count" in k for k in changed) + + def test_schema_property_quality_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("positive" in k for k in changed) + + def test_schema_property_custom_property_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("pii" in k for k in changed) + + def test_server_role_removed(self): + removed = self._generate().get("dictionary_item_removed", {}) + assert any("writer" in k for k in removed) + + def test_top_level_role_added(self): + added = self._generate().get("dictionary_item_added", {}) + assert any("viewer" in k for k in added) + + def test_support_channel_added(self): + added = self._generate().get("dictionary_item_added", {}) + assert any("email" in k for k in added) + + def test_top_level_custom_property_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("classification" in k for k in changed) + + def test_team_member_role_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("bob" in k for k in changed) + + def test_team_member_added(self): + added = self._generate().get("dictionary_item_added", {}) + assert any("carol" in k for k in added) + + + +class TestDiffFixturesPriceDescriptionScalars(TestDiffFixtures): + """Extends the end-to-end fixture tests to cover price, description, and + top-level scalar fields that were previously absent from the unit fixtures.""" + + def test_price_amount_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("priceAmount" in k for k in changed) + + def test_description_purpose_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("purpose" in k for k in changed) + + def test_description_custom_property_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("sensitivity" in k for k in changed) + + def test_description_custom_property_reorder_stable(self): + """The description.customProperties reorder in v2 must not produce + a false positive — only the sensitivity value change should appear.""" + changed = self._generate().get("values_changed", {}) + # data-owner is unchanged and reordered — must not appear + assert not any("data-owner" in k for k in changed) + + def test_top_level_version_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("version" in k for k in changed) + + def test_top_level_name_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("'name'" in k for k in changed) + + def test_top_level_status_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("status" in k for k in changed) + + def test_top_level_domain_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("'domain'" in k for k in changed) diff --git a/tests/test_changelog_normalize.py b/tests/test_changelog_normalize.py new file mode 100644 index 000000000..742380ea3 --- /dev/null +++ b/tests/test_changelog_normalize.py @@ -0,0 +1,1057 @@ +""" +test_changelog_normalize — Unit tests for normalize.py +----------------------------------------------------------- +Test classes: + TestNormalizeBy — _normalize_by: key field extraction and positional fallback + TestNormalizeProperties — _normalize_properties: recursive SchemaProperty keying + TestNormalize — normalize(): all natural-key paths and edge cases + TestNormalizeAuthDefs — _normalize_auth_defs: url/id/positional fallback + TestNormalizeRelationships — _normalize_relationships: schema-level and property-level + TestNormalizeDescription — normalize(): description.authDefs and customProperties + TestNormalizeServerCustomProperties — normalize(): server customProperties + TestNormalizeQualityNested — _normalize_quality: nested customProperties and authDefs + TestGeneratePriceDescriptionScalars — end-to-end normalize via diff() for price/desc fields +""" + + +import yaml + +from datacontract.changelog.changelog import diff +from datacontract.changelog.normalize import ( + _normalize_auth_defs, + _normalize_by, + _normalize_properties, + _normalize_relationships, + normalize, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +MINIMAL_CONTRACT = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "test-001", +} + + +def _write_yaml(data: dict, path: str) -> None: + with open(path, "w") as f: + yaml.dump(data, f) + + +def _contract(**kwargs) -> dict: + return {**MINIMAL_CONTRACT, **kwargs} + + +class TestNormalizeBy: + def test_keys_by_named_field(self): + items = [ + {"role": "admin", "access": "read"}, + {"role": "viewer", "access": "read"}, + ] + result = _normalize_by(items, "role") + assert set(result.keys()) == {"admin", "viewer"} + assert result["admin"] == {"access": "read"} + + def test_key_field_omitted_from_value(self): + items = [{"channel": "slack", "url": "https://slack.com"}] + result = _normalize_by(items, "channel") + assert "channel" not in result["slack"] + + def test_positional_fallback_when_key_absent(self): + items = [{"type": "sql", "rule": "count > 0"}, {"type": "sql"}] + result = _normalize_by(items, "name") + assert "__pos_0__" in result + assert "__pos_1__" in result + + def test_mixed_present_and_absent_key(self): + items = [ + {"name": "row_count", "rule": "count > 0"}, + {"rule": "no_nulls"}, # name absent + ] + result = _normalize_by(items, "name") + assert "row_count" in result + assert "__pos_1__" in result + + def test_empty_list(self): + assert _normalize_by([], "role") == {} + + +class TestNormalizeProperties: + def test_flat_properties_keyed_by_name(self): + props = [ + {"name": "order_id", "logicalType": "string"}, + {"name": "amount", "logicalType": "number"}, + ] + result = _normalize_properties(props) + assert set(result.keys()) == {"order_id", "amount"} + assert result["order_id"]["logicalType"] == "string" + assert "name" not in result["order_id"] + + def test_nested_properties_recursed(self): + props = [ + { + "name": "address", + "logicalType": "object", + "properties": [ + {"name": "street", "logicalType": "string"}, + {"name": "city", "logicalType": "string"}, + ], + } + ] + result = _normalize_properties(props) + assert isinstance(result["address"]["properties"], dict) + assert "street" in result["address"]["properties"] + assert "city" in result["address"]["properties"] + + def test_empty_properties(self): + assert _normalize_properties([]) == {} + + +class TestNormalize: + def test_schema_keyed_by_name(self): + contract = _contract( + schema=[ + {"name": "orders", "physicalName": "orders_tbl"}, + {"name": "customers", "physicalName": "customers_tbl"}, + ] + ) + result = normalize(contract) + assert isinstance(result["schema"], dict) + assert set(result["schema"].keys()) == {"orders", "customers"} + assert "name" not in result["schema"]["orders"] + + def test_schema_properties_keyed_by_name(self): + contract = _contract( + schema=[ + { + "name": "orders", + "properties": [ + {"name": "order_id", "logicalType": "string"}, + ], + } + ] + ) + result = normalize(contract) + assert isinstance(result["schema"]["orders"]["properties"], dict) + assert "order_id" in result["schema"]["orders"]["properties"] + + def test_sla_properties_keyed_by_property(self): + contract = _contract( + slaProperties=[ + {"property": "availability", "value": "99.9%"}, + {"property": "latency", "value": "500ms"}, + ] + ) + result = normalize(contract) + assert isinstance(result["slaProperties"], dict) + assert "availability" in result["slaProperties"] + assert result["slaProperties"]["availability"] == {"value": "99.9%"} + + def test_servers_keyed_by_server(self): + contract = _contract( + servers=[ + {"server": "production", "type": "snowflake"}, + {"server": "staging", "type": "snowflake"}, + ] + ) + result = normalize(contract) + assert isinstance(result["servers"], dict) + assert set(result["servers"].keys()) == {"production", "staging"} + assert "server" not in result["servers"]["production"] + + def test_roles_keyed_by_role(self): + contract = _contract( + roles=[ + {"role": "admin", "access": "write"}, + {"role": "viewer", "access": "read"}, + ] + ) + result = normalize(contract) + assert isinstance(result["roles"], dict) + assert "admin" in result["roles"] + + def test_support_keyed_by_channel(self): + contract = _contract( + support=[ + {"channel": "slack", "url": "https://slack.com"}, + ] + ) + result = normalize(contract) + assert "slack" in result["support"] + + def test_custom_properties_keyed_by_property(self): + contract = _contract( + customProperties=[ + {"property": "domain", "value": "sales"}, + {"property": "team_name", "value": "orders"}, + ] + ) + result = normalize(contract) + assert "domain" in result["customProperties"] + assert "team_name" in result["customProperties"] + + def test_team_members_keyed_by_username(self): + contract = _contract( + team={ + "name": "Data Team", + "members": [ + {"username": "alice", "role": "lead"}, + {"username": "bob", "role": "engineer"}, + ], + } + ) + result = normalize(contract) + assert "alice" in result["team"]["members"] + assert "bob" in result["team"]["members"] + + def test_team_deprecated_array_form(self): + contract = _contract( + team=[ + {"username": "alice", "role": "lead"}, + ] + ) + result = normalize(contract) + assert isinstance(result["team"], dict) + assert "alice" in result["team"] + + def test_quality_keyed_by_name_with_positional_fallback(self): + contract = _contract( + schema=[ + { + "name": "orders", + "quality": [ + {"name": "row_count", "metric": "rowCount"}, + {"metric": "duplicateValues"}, # no name + ], + } + ] + ) + result = normalize(contract) + quality = result["schema"]["orders"]["quality"] + assert "row_count" in quality + assert "__pos_1__" in quality + + def test_non_list_fields_unchanged(self): + contract = _contract(description="a contract") + result = normalize(contract) + assert result["description"] == "a contract" + + def test_schema_object_custom_properties_keyed_by_property(self): + contract = _contract( + schema=[ + { + "name": "orders", + "customProperties": [ + {"property": "domain", "value": "sales"}, + {"property": "team_name", "value": "orders"}, + ], + } + ] + ) + result = normalize(contract) + cp = result["schema"]["orders"]["customProperties"] + assert isinstance(cp, dict) + assert "domain" in cp + assert "team_name" in cp + + def test_schema_object_quality_keyed_by_name(self): + contract = _contract( + schema=[ + { + "name": "orders", + "quality": [ + {"name": "row_count", "metric": "rowCount"}, + {"name": "no_nulls", "metric": "nullValues"}, + ], + } + ] + ) + result = normalize(contract) + quality = result["schema"]["orders"]["quality"] + assert isinstance(quality, dict) + assert "row_count" in quality + assert "no_nulls" in quality + + def test_schema_property_quality_keyed_by_name(self): + contract = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "quality": [ + {"name": "positive", "metric": "rowCount"}, + ], + } + ], + } + ] + ) + result = normalize(contract) + quality = result["schema"]["orders"]["properties"]["amount"]["quality"] + assert isinstance(quality, dict) + assert "positive" in quality + + def test_schema_property_custom_properties_keyed_by_property(self): + contract = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "customProperties": [ + {"property": "sensitivity", "value": "high"}, + ], + } + ], + } + ] + ) + result = normalize(contract) + cp = result["schema"]["orders"]["properties"]["amount"]["customProperties"] + assert isinstance(cp, dict) + assert "sensitivity" in cp + + def test_server_roles_keyed_by_role(self): + contract = _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "roles": [ + {"role": "admin", "access": "write"}, + {"role": "reader", "access": "read"}, + ], + } + ] + ) + result = normalize(contract) + roles = result["servers"]["production"]["roles"] + assert isinstance(roles, dict) + assert "admin" in roles + assert "reader" in roles + + def test_server_without_server_key_skipped(self): + contract = _contract( + servers=[ + {"type": "snowflake"}, # no "server" key — skip + {"server": "production", "type": "snowflake"}, # valid — retain + ] + ) + result = normalize(contract) + assert isinstance(result["servers"], dict) + assert "production" in result["servers"] + assert len(result["servers"]) == 1 + + def test_no_mutation_of_input(self): + contract = _contract(schema=[{"name": "orders"}]) + original = _contract(schema=[{"name": "orders"}]) + normalize(contract) + assert contract == original + + +# --------------------------------------------------------------------------- +# _diff — semantic correctness +# --------------------------------------------------------------------------- + + +class TestNormalizeAuthDefs: + def test_keys_by_url(self): + items = [ + {"url": "https://example.com/wiki", "type": "definition"}, + {"url": "https://example.com/slack", "type": "support"}, + ] + result = _normalize_auth_defs(items) + assert set(result.keys()) == {"https://example.com/wiki", "https://example.com/slack"} + + def test_all_fields_preserved_in_value(self): + items = [{"url": "https://example.com/wiki", "type": "definition", "description": "main ref"}] + result = _normalize_auth_defs(items) + assert result["https://example.com/wiki"]["type"] == "definition" + assert result["https://example.com/wiki"]["description"] == "main ref" + + def test_id_fallback_when_url_absent(self): + items = [{"id": "def-001", "type": "definition"}] + result = _normalize_auth_defs(items) + assert "def-001" in result + + def test_positional_fallback_when_url_and_id_absent(self): + items = [{"type": "definition"}, {"type": "support"}] + result = _normalize_auth_defs(items) + assert "__pos_0__" in result + assert "__pos_1__" in result + + def test_empty_list_returns_empty_dict(self): + assert _normalize_auth_defs([]) == {} + + def test_reorder_produces_no_diff(self): + v1 = _contract( + authoritativeDefinitions=[ + {"url": "https://example.com/wiki", "type": "definition"}, + {"url": "https://example.com/slack", "type": "support"}, + ] + ) + v2 = _contract( + authoritativeDefinitions=[ + {"url": "https://example.com/slack", "type": "support"}, + {"url": "https://example.com/wiki", "type": "definition"}, + ] + ) + assert diff(v1, v2) == {} + + def test_url_change_detected(self): + v1 = _contract(authoritativeDefinitions=[{"url": "https://example.com/wiki", "type": "definition"}]) + v2 = _contract(authoritativeDefinitions=[{"url": "https://example.com/NEW", "type": "definition"}]) + result = diff(v1, v2) + # Changing a url changes the dict key — DeepDiff reports this as + # dictionary_item_added + dictionary_item_removed or values_changed + assert result != {} + + def test_type_change_detected(self): + v1 = _contract(authoritativeDefinitions=[{"url": "https://example.com/wiki", "type": "definition"}]) + v2 = _contract(authoritativeDefinitions=[{"url": "https://example.com/wiki", "type": "policy"}]) + result = diff(v1, v2) + assert "values_changed" in result + + def test_schema_object_auth_defs_reorder_no_diff(self): + def contract(defs): + return _contract(schema=[{"name": "orders", "authoritativeDefinitions": defs}]) + + v1 = contract( + [ + {"url": "https://a.com", "type": "definition"}, + {"url": "https://b.com", "type": "support"}, + ] + ) + v2 = contract( + [ + {"url": "https://b.com", "type": "support"}, + {"url": "https://a.com", "type": "definition"}, + ] + ) + assert diff(v1, v2) == {} + + def test_schema_property_auth_defs_reorder_no_diff(self): + def contract(defs): + return _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "order_id", + "logicalType": "string", + "authoritativeDefinitions": defs, + } + ], + } + ] + ) + + v1 = contract( + [ + {"url": "https://a.com", "type": "definition"}, + {"url": "https://b.com", "type": "support"}, + ] + ) + v2 = contract( + [ + {"url": "https://b.com", "type": "support"}, + {"url": "https://a.com", "type": "definition"}, + ] + ) + assert diff(v1, v2) == {} + + def test_description_auth_defs_reorder_no_diff(self): + v1 = _contract( + **{ + "description": { + "purpose": "test", + "authoritativeDefinitions": [ + {"url": "https://a.com", "type": "policy"}, + {"url": "https://b.com", "type": "definition"}, + ], + } + } + ) + v2 = _contract( + **{ + "description": { + "purpose": "test", + "authoritativeDefinitions": [ + {"url": "https://b.com", "type": "definition"}, + {"url": "https://a.com", "type": "policy"}, + ], + } + } + ) + assert diff(v1, v2) == {} + + +class TestNormalizeRelationships: + def test_schema_level_keyed_by_from_to(self): + items = [ + {"from": "orders.order_id", "to": "line_items.order_id", "type": "foreignKey"}, + {"from": "orders.customer_id", "to": "customers.customer_id", "type": "foreignKey"}, + ] + result = _normalize_relationships(items, schema_level=True) + assert "orders.order_id:line_items.order_id" in result + assert "orders.customer_id:customers.customer_id" in result + + def test_property_level_keyed_by_to(self): + items = [ + {"to": "customers.customer_id", "type": "foreignKey"}, + ] + result = _normalize_relationships(items, schema_level=False) + assert "customers.customer_id" in result + + def test_positional_fallback_when_fields_absent(self): + items = [{"type": "foreignKey"}] + result = _normalize_relationships(items, schema_level=True) + assert "__pos_0__" in result + + def test_empty_list_returns_empty_dict(self): + assert _normalize_relationships([], schema_level=True) == {} + + def test_schema_relationships_reorder_no_diff(self): + def contract(rels): + return _contract(schema=[{"name": "orders", "relationships": rels}]) + + v1 = contract( + [ + {"from": "orders.order_id", "to": "line_items.order_id", "type": "foreignKey"}, + {"from": "orders.customer_id", "to": "customers.customer_id", "type": "foreignKey"}, + ] + ) + v2 = contract( + [ + {"from": "orders.customer_id", "to": "customers.customer_id", "type": "foreignKey"}, + {"from": "orders.order_id", "to": "line_items.order_id", "type": "foreignKey"}, + ] + ) + assert diff(v1, v2) == {} + + def test_property_relationships_reorder_no_diff(self): + def contract(rels): + return _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "order_id", + "logicalType": "string", + "relationships": rels, + } + ], + } + ] + ) + + v1 = contract( + [ + {"to": "line_items.order_id", "type": "foreignKey"}, + {"to": "audit_log.order_id", "type": "reference"}, + ] + ) + v2 = contract( + [ + {"to": "audit_log.order_id", "type": "reference"}, + {"to": "line_items.order_id", "type": "foreignKey"}, + ] + ) + assert diff(v1, v2) == {} + + def test_relationship_added_detected(self): + v1 = _contract( + schema=[ + { + "name": "orders", + "relationships": [ + {"from": "orders.order_id", "to": "line_items.order_id", "type": "foreignKey"}, + ], + } + ] + ) + v2 = _contract( + schema=[ + { + "name": "orders", + "relationships": [ + {"from": "orders.order_id", "to": "line_items.order_id", "type": "foreignKey"}, + {"from": "orders.customer_id", "to": "customers.customer_id", "type": "foreignKey"}, + ], + } + ] + ) + result = diff(v1, v2) + added = result.get("dictionary_item_added", {}) + assert any("customer_id" in k for k in added) + + def test_relationship_type_change_detected(self): + def contract(t): + return _contract( + schema=[ + { + "name": "orders", + "relationships": [ + {"from": "orders.order_id", "to": "line_items.order_id", "type": t}, + ], + } + ] + ) + + result = diff(contract("foreignKey"), contract("reference")) + assert "values_changed" in result + + +class TestNormalizeDescription: + def test_description_purpose_change_detected(self): + v1 = _contract(**{"description": {"purpose": "Provides order data"}}) + v2 = _contract(**{"description": {"purpose": "Provides order and line item data"}}) + result = diff(v1, v2) + assert "values_changed" in result + changed = result["values_changed"] + assert any("purpose" in k for k in changed) + + def test_description_custom_property_change_detected(self): + v1 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "sensitivity", "value": "internal"}, + ], + } + } + ) + v2 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "sensitivity", "value": "confidential"}, + ], + } + } + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("sensitivity" in k for k in changed) + + def test_description_custom_property_reorder_no_diff(self): + v1 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "sensitivity", "value": "internal"}, + {"property": "owner", "value": "data-team"}, + ], + } + } + ) + v2 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "owner", "value": "data-team"}, + {"property": "sensitivity", "value": "internal"}, + ], + } + } + ) + assert diff(v1, v2) == {} + + def test_description_custom_property_added(self): + v1 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "sensitivity", "value": "internal"}, + ], + } + } + ) + v2 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "sensitivity", "value": "internal"}, + {"property": "owner", "value": "data-team"}, + ], + } + } + ) + result = diff(v1, v2) + added = result.get("dictionary_item_added", {}) + assert any("owner" in k for k in added) + + def test_description_auth_defs_reorder_no_diff(self): + v1 = _contract( + **{ + "description": { + "purpose": "test", + "authoritativeDefinitions": [ + {"url": "https://a.com", "type": "policy"}, + {"url": "https://b.com", "type": "definition"}, + ], + } + } + ) + v2 = _contract( + **{ + "description": { + "purpose": "test", + "authoritativeDefinitions": [ + {"url": "https://b.com", "type": "definition"}, + {"url": "https://a.com", "type": "policy"}, + ], + } + } + ) + assert diff(v1, v2) == {} + + def test_description_scalar_fields_all_detected(self): + """purpose, usage, and limitations are all plain strings — changes must be detected.""" + for field in ("purpose", "usage", "limitations"): + v1 = _contract(**{"description": {field: "original value"}}) + v2 = _contract(**{"description": {field: "updated value"}}) + result = diff(v1, v2) + assert "values_changed" in result, f"change in {field} not detected" + assert any(field in k for k in result["values_changed"]) + + +class TestNormalizeServerCustomProperties: + def _server(self, custom_props): + return _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "customProperties": custom_props, + } + ] + ) + + def test_custom_property_change_detected(self): + v1 = self._server([{"property": "cost-center", "value": "eng-001"}]) + v2 = self._server([{"property": "cost-center", "value": "eng-999"}]) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("cost-center" in k for k in changed) + + def test_reorder_no_diff(self): + v1 = self._server( + [ + {"property": "team", "value": "data-platform"}, + {"property": "cost-center", "value": "eng-001"}, + ] + ) + v2 = self._server( + [ + {"property": "cost-center", "value": "eng-001"}, + {"property": "team", "value": "data-platform"}, + ] + ) + assert diff(v1, v2) == {} + + def test_change_with_reorder_path_includes_property_name(self): + """When value changes and list is simultaneously reordered, the path + must name the property (not use a positional index).""" + v1 = self._server( + [ + {"property": "team", "value": "data-platform"}, + {"property": "cost-center", "value": "eng-001"}, + {"property": "env", "value": "prod"}, + ] + ) + v2 = self._server( + [ + {"property": "env", "value": "prod"}, + {"property": "team", "value": "data-platform"}, + {"property": "cost-center", "value": "eng-999"}, + ] + ) + raw = diff(v1, v2) + changed = raw.get("values_changed", {}) + assert any("cost-center" in k for k in changed) + assert not any(k.endswith("][0]") or k.endswith("][1]") or k.endswith("][2]") for k in changed) + + def test_custom_property_added(self): + v1 = self._server([{"property": "team", "value": "data-platform"}]) + v2 = self._server( + [ + {"property": "team", "value": "data-platform"}, + {"property": "owner", "value": "alice"}, + ] + ) + result = diff(v1, v2) + added = result.get("dictionary_item_added", {}) + assert any("owner" in k for k in added) + + def test_custom_property_removed(self): + v1 = self._server( + [ + {"property": "team", "value": "data-platform"}, + {"property": "owner", "value": "alice"}, + ] + ) + v2 = self._server([{"property": "team", "value": "data-platform"}]) + result = diff(v1, v2) + removed = result.get("dictionary_item_removed", {}) + assert any("owner" in k for k in removed) + + def test_multiple_servers_independent(self): + """customProperties on two different servers are normalized independently.""" + v1 = _contract( + servers=[ + {"server": "prod", "type": "snowflake", "customProperties": [{"property": "env", "value": "prod"}]}, + { + "server": "staging", + "type": "snowflake", + "customProperties": [{"property": "env", "value": "staging"}], + }, + ] + ) + v2 = _contract( + servers=[ + {"server": "prod", "type": "snowflake", "customProperties": [{"property": "env", "value": "prod"}]}, + { + "server": "staging", + "type": "snowflake", + "customProperties": [{"property": "env", "value": "staging-new"}], + }, + ] + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("staging" in k for k in changed) + assert not any("prod" in k and "customProperties" in k for k in changed) + + +class TestNormalizeQualityNested: + def _schema_quality(self, quality_items): + return _contract( + schema=[ + { + "name": "orders", + "physicalName": "orders_tbl", + "quality": quality_items, + } + ] + ) + + def _property_quality(self, quality_items): + return _contract( + schema=[ + { + "name": "orders", + "physicalName": "orders_tbl", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "quality": quality_items, + } + ], + } + ] + ) + + def test_schema_quality_custom_property_change_detected(self): + v1 = self._schema_quality( + [{"name": "row_count", "type": "sql", "customProperties": [{"property": "severity", "value": "high"}]}] + ) + v2 = self._schema_quality( + [{"name": "row_count", "type": "sql", "customProperties": [{"property": "severity", "value": "critical"}]}] + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("severity" in k for k in changed) + + def test_schema_quality_custom_property_reorder_no_diff(self): + v1 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "customProperties": [ + {"property": "severity", "value": "high"}, + {"property": "owner", "value": "data-team"}, + ], + } + ] + ) + v2 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "customProperties": [ + {"property": "owner", "value": "data-team"}, + {"property": "severity", "value": "high"}, + ], + } + ] + ) + assert diff(v1, v2) == {} + + def test_schema_quality_change_with_reorder_path_has_property_name(self): + """Path must name the property, not use a positional index.""" + v1 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "customProperties": [ + {"property": "severity", "value": "high"}, + {"property": "owner", "value": "data-team"}, + {"property": "env", "value": "prod"}, + ], + } + ] + ) + v2 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "customProperties": [ + {"property": "env", "value": "prod"}, + {"property": "severity", "value": "critical"}, + {"property": "owner", "value": "data-team"}, + ], + } + ] + ) + raw = diff(v1, v2) + changed = raw.get("values_changed", {}) + assert any("severity" in k for k in changed) + assert not any("][0]" in k or "][1]" in k or "][2]" in k for k in changed) + + def test_schema_quality_auth_defs_reorder_no_diff(self): + v1 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "authoritativeDefinitions": [ + {"url": "https://a.com", "type": "definition"}, + {"url": "https://b.com", "type": "support"}, + ], + } + ] + ) + v2 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "authoritativeDefinitions": [ + {"url": "https://b.com", "type": "support"}, + {"url": "https://a.com", "type": "definition"}, + ], + } + ] + ) + assert diff(v1, v2) == {} + + def test_schema_quality_auth_def_change_detected(self): + v1 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "authoritativeDefinitions": [{"url": "https://a.com", "type": "definition"}], + } + ] + ) + v2 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "authoritativeDefinitions": [{"url": "https://a.com", "type": "policy"}], + } + ] + ) + result = diff(v1, v2) + assert result != {} + + def test_property_quality_custom_property_change_detected(self): + v1 = self._property_quality( + [{"name": "positive", "type": "sql", "customProperties": [{"property": "priority", "value": "p1"}]}] + ) + v2 = self._property_quality( + [{"name": "positive", "type": "sql", "customProperties": [{"property": "priority", "value": "p2"}]}] + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("priority" in k for k in changed) + + def test_property_quality_custom_property_reorder_no_diff(self): + v1 = self._property_quality( + [ + { + "name": "positive", + "type": "sql", + "customProperties": [ + {"property": "priority", "value": "p1"}, + {"property": "team", "value": "data"}, + ], + } + ] + ) + v2 = self._property_quality( + [ + { + "name": "positive", + "type": "sql", + "customProperties": [ + {"property": "team", "value": "data"}, + {"property": "priority", "value": "p1"}, + ], + } + ] + ) + assert diff(v1, v2) == {} + + def test_property_quality_auth_defs_reorder_no_diff(self): + v1 = self._property_quality( + [ + { + "name": "positive", + "type": "sql", + "authoritativeDefinitions": [ + {"url": "https://a.com", "type": "definition"}, + {"url": "https://b.com", "type": "support"}, + ], + } + ] + ) + v2 = self._property_quality( + [ + { + "name": "positive", + "type": "sql", + "authoritativeDefinitions": [ + {"url": "https://b.com", "type": "support"}, + {"url": "https://a.com", "type": "definition"}, + ], + } + ] + ) + assert diff(v1, v2) == {} + + diff --git a/tests/test_changelog_output_text.py b/tests/test_changelog_output_text.py new file mode 100644 index 000000000..2aadba268 --- /dev/null +++ b/tests/test_changelog_output_text.py @@ -0,0 +1,171 @@ +import io +import sys +from pathlib import Path + +from rich.console import Console + +from datacontract.data_contract import DataContract +from datacontract.model.changelog import ChangelogEntry, ChangelogResult, ChangelogType +from datacontract.output.text_changelog_results import _badges, _with_markup, _wrap, write_text_changelog_results + +V1 = "fixtures/changelog/integration/changelog_integration_v1.yaml" +V2 = "fixtures/changelog/integration/changelog_integration_v2.yaml" + +GOLDEN_TEXT = Path(__file__).parent / "fixtures/changelog/golden_changelog_text.txt" + + +def _make_entries(added=0, removed=0, changed=0): + entries = [] + for _ in range(added): + entries.append(ChangelogEntry(path="a.b", type=ChangelogType.added)) + for _ in range(removed): + entries.append(ChangelogEntry(path="a.b", type=ChangelogType.removed)) + for _ in range(changed): + entries.append(ChangelogEntry(path="a.b", type=ChangelogType.changed)) + return entries + + +def _render(result: ChangelogResult) -> str: + buf = io.StringIO() + con = Console(file=buf, width=300, highlight=False) + old_stdout = sys.stdout + sys.stdout = buf + try: + write_text_changelog_results(result, con) + finally: + sys.stdout = old_stdout + return buf.getvalue() + + +class TestBadges: + def test_all_types(self): + result = _badges(_make_entries(added=2, removed=1, changed=3)) + assert "1 Removed" in result + assert "3 Changed" in result + assert "2 Added" in result + + def test_ordering_removed_changed_added(self): + result = _badges(_make_entries(added=1, removed=1, changed=1)) + assert result.index("Removed") < result.index("Changed") < result.index("Added") + + def test_zero_count_omitted(self): + result = _badges(_make_entries(added=3)) + assert "Removed" not in result + assert "Changed" not in result + assert "3 Added" in result + + def test_empty_list_returns_empty_string(self): + assert _badges([]) == "" + + def test_separator_between_badges(self): + result = _badges(_make_entries(removed=1, added=1)) + assert " " in result + + +class TestWrap: + def test_short_text_returned_as_is(self): + assert _wrap("hello", 20) == "hello" + + def test_exact_max_width_not_wrapped(self): + text = "a" * 20 + assert _wrap(text, 20) == text + + def test_single_word_longer_than_max_returned_as_is(self): + long_word = "a" * 35 + assert _wrap(long_word, 30) == long_word + + def test_multi_word_each_line_within_max_width(self): + result = _wrap("hello world foo bar", 11) + for line in result.split("\n"): + assert len(line) <= 11 + + def test_multi_word_produces_multiple_lines(self): + assert "\n" in _wrap("one two three four five six", 9) + + def test_empty_string_returned_as_is(self): + assert _wrap("", 10) == "" + + +class TestWithMarkup: + def test_added_green(self): + assert _with_markup(ChangelogType.added) == "[green]added[/green]" + + def test_removed_red(self): + assert _with_markup(ChangelogType.removed) == "[red]removed[/red]" + + def test_changed_yellow(self): + assert _with_markup(ChangelogType.changed) == "[yellow]changed[/yellow]" + + +class TestTerminalStateInheritance: + """The wide rendering console inherits terminal/color state from the caller's console. + This prevents colors being silently stripped when the outer console is a real TTY.""" + + def test_colors_present_when_terminal(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + buf = io.StringIO() + con = Console(file=buf, width=300, force_terminal=True) + old_stdout = sys.stdout + sys.stdout = buf + try: + write_text_changelog_results(result, con) + finally: + sys.stdout = old_stdout + assert "\033[" in buf.getvalue() + + def test_colors_absent_when_not_terminal(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + buf = io.StringIO() + con = Console(file=buf, width=300, no_color=True) + old_stdout = sys.stdout + sys.stdout = buf + try: + write_text_changelog_results(result, con) + finally: + sys.stdout = old_stdout + assert "\033[" not in buf.getvalue() + + +class TestWriteTextChangelogResults: + def test_summary_header_present(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert "Summary" in _render(result) + + def test_details_header_present(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert "Details" in _render(result) + + def test_badges_present(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + output = _render(result) + assert "Removed" in output or "Changed" in output or "Added" in output + + def test_all_change_types_present(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + output = _render(result) + assert "added" in output + assert "removed" in output + assert "changed" in output + + def test_no_changes_suppresses_summary(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V1)) + assert "Summary" not in _render(result) + + def test_no_changes_still_renders_details(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V1)) + assert "Details" in _render(result) + + def test_golden_output(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + buf = io.StringIO() + con = Console(file=buf, width=300, highlight=False, no_color=True) + old_stdout = sys.stdout + sys.stdout = buf + try: + write_text_changelog_results(result, con) + finally: + sys.stdout = old_stdout + assert buf.getvalue() == GOLDEN_TEXT.read_text(encoding="utf-8"), ( + "Changelog text output has changed. If intentional, regenerate " + "golden_changelog_text.txt (see tests/fixtures/changelog/helper/generate_golden.py)." + ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 9c6f40dc8..d9f7b71bb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -16,3 +16,22 @@ def test_file_does_not_exist(): result = runner.invoke(app, ["test", "unknown.yaml"]) assert result.exit_code == 1 assert "The file 'unknown.yaml' does not \nexist." in result.stdout + + +def test_changelog_help(): + result = runner.invoke(app, ["changelog", "--help"]) + assert result.exit_code == 0 + + +def test_changelog_with_changes(): + result = runner.invoke(app, [ + "changelog", + "fixtures/changelog/integration/changelog_integration_v1.yaml", + "fixtures/changelog/integration/changelog_integration_v2.yaml", + ]) + assert result.exit_code == 0 + assert "Summary" in result.output + assert "Details" in result.output + assert "removed" in result.output + assert "changed" in result.output + assert "added" in result.output From eb719afb4c57eaadcf230d5b18592590ce883463 Mon Sep 17 00:00:00 2001 From: Benjamin David Date: Sun, 5 Apr 2026 10:27:05 +0100 Subject: [PATCH 2/9] feat(changelog): color-code and reorder summary badges Co-Authored-By: Claude Sonnet 4.6 --- datacontract/output/text_changelog_results.py | 37 ++++++++++--------- .../changelog/golden_changelog_text.txt | 2 +- tests/test_changelog_output_text.py | 16 +++++++- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/datacontract/output/text_changelog_results.py b/datacontract/output/text_changelog_results.py index 9c65d2982..02ed9d58d 100644 --- a/datacontract/output/text_changelog_results.py +++ b/datacontract/output/text_changelog_results.py @@ -1,30 +1,36 @@ import io +from collections import Counter from rich import box from rich.console import Console from rich.table import Table -from datacontract.model.changelog import ChangelogResult, ChangelogType +from datacontract.model.changelog import ChangelogEntry, ChangelogResult, ChangelogType _VAL_W = 30 +_CHANGE_COLOR = { + ChangelogType.added: "green", + ChangelogType.changed: "yellow", + ChangelogType.removed: "red", +} + +_BADGE_ORDER = [ChangelogType.added, ChangelogType.changed, ChangelogType.removed] + def write_text_changelog_results(result: ChangelogResult, console: Console): _print_summary(result, console) _print_table(result, console) -def _badges(entries: list) -> str: - removed = sum(1 for e in entries if e.type == ChangelogType.removed) - changed = sum(1 for e in entries if e.type == ChangelogType.changed) - added = sum(1 for e in entries if e.type == ChangelogType.added) +def _badges(entries: list[ChangelogEntry]) -> str: + counts = Counter(e.type for e in entries) parts = [] - if removed: - parts.append(f"[ {removed} Removed ]") - if changed: - parts.append(f"[ {changed} Changed ]") - if added: - parts.append(f"[ {added} Added ]") + for ct in _BADGE_ORDER: + n = counts[ct] + if n: + color = _CHANGE_COLOR[ct] + parts.append(f"[ [{color}]{n} {ct.value.capitalize()}[/{color}] ]") return " ".join(parts) @@ -66,12 +72,9 @@ def _print_table(result: ChangelogResult, console: Console): def _with_markup(changelog_type: ChangelogType) -> str: - if changelog_type == ChangelogType.added: - return "[green]added[/green]" - if changelog_type == ChangelogType.removed: - return "[red]removed[/red]" - if changelog_type == ChangelogType.changed: - return "[yellow]changed[/yellow]" + color = _CHANGE_COLOR.get(changelog_type) + if color: + return f"[{color}]{changelog_type.value}[/{color}]" return changelog_type.value diff --git a/tests/fixtures/changelog/golden_changelog_text.txt b/tests/fixtures/changelog/golden_changelog_text.txt index 2ce690aeb..31f621641 100644 --- a/tests/fixtures/changelog/golden_changelog_text.txt +++ b/tests/fixtures/changelog/golden_changelog_text.txt @@ -1,5 +1,5 @@ Summary -[ 1 Removed ] [ 4 Changed ] [ 2 Added ] +[ 2 Added ] [ 4 Changed ] [ 1 Removed ] ╭─────────┬───────────────────────────────────────╮ │ Change │ Field │ ├─────────┼───────────────────────────────────────┤ diff --git a/tests/test_changelog_output_text.py b/tests/test_changelog_output_text.py index 2aadba268..aba3f0cd0 100644 --- a/tests/test_changelog_output_text.py +++ b/tests/test_changelog_output_text.py @@ -44,9 +44,21 @@ def test_all_types(self): assert "3 Changed" in result assert "2 Added" in result - def test_ordering_removed_changed_added(self): + def test_ordering_added_changed_removed(self): result = _badges(_make_entries(added=1, removed=1, changed=1)) - assert result.index("Removed") < result.index("Changed") < result.index("Added") + assert result.index("Added") < result.index("Changed") < result.index("Removed") + + def test_added_badge_green(self): + result = _badges(_make_entries(added=1)) + assert "[ [green]1 Added[/green] ]" == result + + def test_changed_badge_yellow(self): + result = _badges(_make_entries(changed=1)) + assert "[ [yellow]1 Changed[/yellow] ]" == result + + def test_removed_badge_red(self): + result = _badges(_make_entries(removed=1)) + assert "[ [red]1 Removed[/red] ]" == result def test_zero_count_omitted(self): result = _badges(_make_entries(added=3)) From 4585f29836637775838712809e99f62df4ba569a Mon Sep 17 00:00:00 2001 From: Benjamin David Date: Mon, 6 Apr 2026 13:10:23 +0100 Subject: [PATCH 3/9] feat(changelog): capitalize change type labels in table Co-Authored-By: Claude Sonnet 4.6 --- datacontract/output/text_changelog_results.py | 4 +- .../changelog/golden_changelog_text.txt | 94 +++++++++---------- tests/test_changelog_output_text.py | 12 +-- 3 files changed, 55 insertions(+), 55 deletions(-) diff --git a/datacontract/output/text_changelog_results.py b/datacontract/output/text_changelog_results.py index 02ed9d58d..c11a028da 100644 --- a/datacontract/output/text_changelog_results.py +++ b/datacontract/output/text_changelog_results.py @@ -74,8 +74,8 @@ def _print_table(result: ChangelogResult, console: Console): def _with_markup(changelog_type: ChangelogType) -> str: color = _CHANGE_COLOR.get(changelog_type) if color: - return f"[{color}]{changelog_type.value}[/{color}]" - return changelog_type.value + return f"[{color}]{changelog_type.value.capitalize()}[/{color}]" + return changelog_type.value.capitalize() def _wrap(text: str, max_width: int) -> str: diff --git a/tests/fixtures/changelog/golden_changelog_text.txt b/tests/fixtures/changelog/golden_changelog_text.txt index 31f621641..70b6819ea 100644 --- a/tests/fixtures/changelog/golden_changelog_text.txt +++ b/tests/fixtures/changelog/golden_changelog_text.txt @@ -3,60 +3,60 @@ Summary ╭─────────┬───────────────────────────────────────╮ │ Change │ Field │ ├─────────┼───────────────────────────────────────┤ -│ added │ schema.customers │ -│ removed │ schema.orders.properties.customer_id │ -│ changed │ schema.orders.properties.order_date │ -│ changed │ schema.orders.properties.order_id │ -│ added │ schema.orders.properties.region │ -│ changed │ schema.orders.properties.total_amount │ -│ changed │ slaProperties.availability │ +│ Added │ schema.customers │ +│ Removed │ schema.orders.properties.customer_id │ +│ Changed │ schema.orders.properties.order_date │ +│ Changed │ schema.orders.properties.order_id │ +│ Added │ schema.orders.properties.region │ +│ Changed │ schema.orders.properties.total_amount │ +│ Changed │ slaProperties.availability │ ╰─────────┴───────────────────────────────────────╯ Details ╭─────────┬──────────────────────────────────────────────────────────┬────────────────────────────────┬───────────────────────────────╮ │ Change │ Path │ Old Value │ New Value │ ├─────────┼──────────────────────────────────────────────────────────┼────────────────────────────────┼───────────────────────────────┤ -│ added │ schema.customers │ │ │ -│ added │ schema.customers.physicalName │ │ customers_tbl │ -│ added │ schema.customers.properties │ │ │ -│ added │ schema.customers.properties.country │ │ │ -│ added │ schema.customers.properties.country.logicalType │ │ string │ -│ added │ schema.customers.properties.country.partitionKeyPosition │ │ 1 │ -│ added │ schema.customers.properties.country.partitioned │ │ True │ -│ added │ schema.customers.properties.country.required │ │ False │ -│ added │ schema.customers.properties.created_at │ │ │ -│ added │ schema.customers.properties.created_at.description │ │ Record creation timestamp │ -│ added │ schema.customers.properties.created_at.logicalType │ │ timestamp │ -│ added │ schema.customers.properties.created_at.required │ │ True │ -│ added │ schema.customers.properties.customer_id │ │ │ -│ added │ schema.customers.properties.customer_id.description │ │ Unique order ID │ -│ added │ schema.customers.properties.customer_id.logicalType │ │ string │ -│ added │ schema.customers.properties.customer_id.primaryKey │ │ True │ -│ added │ schema.customers.properties.customer_id.required │ │ True │ -│ added │ schema.customers.properties.date_of_birth │ │ │ -│ added │ schema.customers.properties.date_of_birth.classification │ │ restricted │ -│ added │ schema.customers.properties.date_of_birth.logicalType │ │ date │ -│ added │ schema.customers.properties.date_of_birth.required │ │ False │ -│ added │ schema.customers.properties.email │ │ │ -│ added │ schema.customers.properties.email.classification │ │ confidential │ -│ added │ schema.customers.properties.email.encryptedName │ │ email_encrypt │ -│ added │ schema.customers.properties.email.logicalType │ │ string │ -│ added │ schema.customers.properties.email.required │ │ True │ -│ added │ schema.customers.properties.full_name │ │ │ -│ added │ schema.customers.properties.full_name.businessName │ │ Customer Full Name │ -│ added │ schema.customers.properties.full_name.logicalType │ │ string │ -│ added │ schema.customers.properties.full_name.required │ │ True │ -│ removed │ schema.orders.properties.customer_id │ │ │ -│ removed │ schema.orders.properties.customer_id.logicalType │ string │ │ -│ removed │ schema.orders.properties.customer_id.required │ True │ │ -│ changed │ schema.orders.properties.order_date.logicalType │ string │ date │ -│ changed │ schema.orders.properties.order_id.description │ Unique order ID and a rather │ Unique order ID and another │ +│ Added │ schema.customers │ │ │ +│ Added │ schema.customers.physicalName │ │ customers_tbl │ +│ Added │ schema.customers.properties │ │ │ +│ Added │ schema.customers.properties.country │ │ │ +│ Added │ schema.customers.properties.country.logicalType │ │ string │ +│ Added │ schema.customers.properties.country.partitionKeyPosition │ │ 1 │ +│ Added │ schema.customers.properties.country.partitioned │ │ True │ +│ Added │ schema.customers.properties.country.required │ │ False │ +│ Added │ schema.customers.properties.created_at │ │ │ +│ Added │ schema.customers.properties.created_at.description │ │ Record creation timestamp │ +│ Added │ schema.customers.properties.created_at.logicalType │ │ timestamp │ +│ Added │ schema.customers.properties.created_at.required │ │ True │ +│ Added │ schema.customers.properties.customer_id │ │ │ +│ Added │ schema.customers.properties.customer_id.description │ │ Unique order ID │ +│ Added │ schema.customers.properties.customer_id.logicalType │ │ string │ +│ Added │ schema.customers.properties.customer_id.primaryKey │ │ True │ +│ Added │ schema.customers.properties.customer_id.required │ │ True │ +│ Added │ schema.customers.properties.date_of_birth │ │ │ +│ Added │ schema.customers.properties.date_of_birth.classification │ │ restricted │ +│ Added │ schema.customers.properties.date_of_birth.logicalType │ │ date │ +│ Added │ schema.customers.properties.date_of_birth.required │ │ False │ +│ Added │ schema.customers.properties.email │ │ │ +│ Added │ schema.customers.properties.email.classification │ │ confidential │ +│ Added │ schema.customers.properties.email.encryptedName │ │ email_encrypt │ +│ Added │ schema.customers.properties.email.logicalType │ │ string │ +│ Added │ schema.customers.properties.email.required │ │ True │ +│ Added │ schema.customers.properties.full_name │ │ │ +│ Added │ schema.customers.properties.full_name.businessName │ │ Customer Full Name │ +│ Added │ schema.customers.properties.full_name.logicalType │ │ string │ +│ Added │ schema.customers.properties.full_name.required │ │ True │ +│ Removed │ schema.orders.properties.customer_id │ │ │ +│ Removed │ schema.orders.properties.customer_id.logicalType │ string │ │ +│ Removed │ schema.orders.properties.customer_id.required │ True │ │ +│ Changed │ schema.orders.properties.order_date.logicalType │ string │ date │ +│ Changed │ schema.orders.properties.order_id.description │ Unique order ID and a rather │ Unique order ID and another │ │ │ │ lenghty description that │ rather lenghty description │ │ │ │ should be wrapped in the table │ that should be wrapped in the │ │ │ │ │ table │ -│ added │ schema.orders.properties.region │ │ │ -│ added │ schema.orders.properties.region.logicalType │ │ string │ -│ added │ schema.orders.properties.region.required │ │ False │ -│ changed │ schema.orders.properties.total_amount.required │ False │ True │ -│ changed │ slaProperties.availability.value │ 99.9% │ 99.5% │ +│ Added │ schema.orders.properties.region │ │ │ +│ Added │ schema.orders.properties.region.logicalType │ │ string │ +│ Added │ schema.orders.properties.region.required │ │ False │ +│ Changed │ schema.orders.properties.total_amount.required │ False │ True │ +│ Changed │ slaProperties.availability.value │ 99.9% │ 99.5% │ ╰─────────┴──────────────────────────────────────────────────────────┴────────────────────────────────┴───────────────────────────────╯ diff --git a/tests/test_changelog_output_text.py b/tests/test_changelog_output_text.py index aba3f0cd0..f2e503a8c 100644 --- a/tests/test_changelog_output_text.py +++ b/tests/test_changelog_output_text.py @@ -100,13 +100,13 @@ def test_empty_string_returned_as_is(self): class TestWithMarkup: def test_added_green(self): - assert _with_markup(ChangelogType.added) == "[green]added[/green]" + assert _with_markup(ChangelogType.added) == "[green]Added[/green]" def test_removed_red(self): - assert _with_markup(ChangelogType.removed) == "[red]removed[/red]" + assert _with_markup(ChangelogType.removed) == "[red]Removed[/red]" def test_changed_yellow(self): - assert _with_markup(ChangelogType.changed) == "[yellow]changed[/yellow]" + assert _with_markup(ChangelogType.changed) == "[yellow]Changed[/yellow]" class TestTerminalStateInheritance: @@ -155,9 +155,9 @@ def test_badges_present(self): def test_all_change_types_present(self): result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) output = _render(result) - assert "added" in output - assert "removed" in output - assert "changed" in output + assert "Added" in output + assert "Removed" in output + assert "Changed" in output def test_no_changes_suppresses_summary(self): result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V1)) From fa9d62d6c449c3997303dd5ac4bc47a4de91522f Mon Sep 17 00:00:00 2001 From: Benjamin David Date: Mon, 6 Apr 2026 15:26:52 +0100 Subject: [PATCH 4/9] refactor(changelog): rename ChangelogType.changed to updated Co-Authored-By: Claude Sonnet 4.6 --- datacontract/changelog/changelog.py | 16 ++++++------- datacontract/model/changelog.py | 2 +- datacontract/output/text_changelog_results.py | 4 ++-- .../changelog/golden_changelog_text.txt | 18 +++++++------- tests/test_api.py | 2 +- tests/test_changelog.py | 4 ++-- tests/test_changelog_engine.py | 24 +++++++++---------- tests/test_changelog_output_text.py | 22 ++++++++--------- tests/test_cli.py | 6 ++--- 9 files changed, 49 insertions(+), 49 deletions(-) diff --git a/datacontract/changelog/changelog.py b/datacontract/changelog/changelog.py index 8a071de7b..72240812b 100644 --- a/datacontract/changelog/changelog.py +++ b/datacontract/changelog/changelog.py @@ -30,8 +30,8 @@ def diff(v1: dict, v2: dict) -> dict: _CHANGE_TYPE_MAP = { "dictionary_item_added": "Added", "dictionary_item_removed": "Removed", - "values_changed": "Changed", - "type_changes": "Changed", + "values_changed": "Updated", + "type_changes": "Updated", "iterable_item_added": "Added", "iterable_item_removed": "Removed", } @@ -44,7 +44,7 @@ def build_changelog(diff_result: dict, source_label: str = "v1", target_label: s Both summary.changes and detail.changes share the same shape: { "path": str, # dot-separated field path - "changeType": str, # Added | Removed | Changed + "changeType": str, # Added | Removed | Updated "old_value": any, # present for Changed/Removed; absent otherwise "new_value": any, # present for Changed/Added; absent otherwise } @@ -52,7 +52,7 @@ def build_changelog(diff_result: dict, source_label: str = "v1", target_label: s Summary rollup rules (detail always shows full leaf paths): - Scalar Changed leaf → rolled up to parent (logicalType → field) - Scalar Added/Removed leaf → rolled up to parent (businessName Added → field Added) - - Mixed Add+Remove on same parent → single entry with changeType Changed + - Mixed Add+Remove on same parent → single entry with changeType Updated - Dict Added/Removed (whole object) → stays at its own path, not rolled up - List string item (tag) → rolled up to the tags parent in summary; in detail the tag value is the final path segment (tags.pii Removed) @@ -112,7 +112,7 @@ def _expand_to_entries(obj, change_type, base_segs): detail_counts = { "added": sum(1 for c in detail_changes if c["changeType"] == "Added"), "removed": sum(1 for c in detail_changes if c["changeType"] == "Removed"), - "changed": sum(1 for c in detail_changes if c["changeType"] == "Changed"), + "updated": sum(1 for c in detail_changes if c["changeType"] == "Updated"), } summary_groups: dict[tuple, dict] = {} @@ -125,7 +125,7 @@ def _expand_to_entries(obj, change_type, base_segs): segs = [group[0] if group[0] else group[1] for group in segs] is_iterable = deepdiff_key in ("iterable_item_added", "iterable_item_removed") is_scalar_change = ( - change_type == "Changed" + change_type == "Updated" and isinstance(payload, dict) and "old_value" in payload and not isinstance(payload.get("old_value"), dict) @@ -144,7 +144,7 @@ def _expand_to_entries(obj, change_type, base_segs): summary_groups[display_segs] = {"changeType": change_type} else: if summary_groups[display_segs]["changeType"] != change_type: - summary_groups[display_segs]["changeType"] = "Changed" + summary_groups[display_segs]["changeType"] = "Updated" summary_changes = [] for segs, data in sorted(summary_groups.items(), key=lambda x: ".".join(x[0])): @@ -153,7 +153,7 @@ def _expand_to_entries(obj, change_type, base_segs): summary_counts = { "added": sum(1 for c in summary_changes if c["changeType"] == "Added"), "removed": sum(1 for c in summary_changes if c["changeType"] == "Removed"), - "changed": sum(1 for c in summary_changes if c["changeType"] == "Changed"), + "updated": sum(1 for c in summary_changes if c["changeType"] == "Updated"), } return { diff --git a/datacontract/model/changelog.py b/datacontract/model/changelog.py index dfc2cba20..11a0ee0cd 100644 --- a/datacontract/model/changelog.py +++ b/datacontract/model/changelog.py @@ -6,7 +6,7 @@ class ChangelogType(str, Enum): added = "added" removed = "removed" - changed = "changed" + updated = "updated" class ChangelogEntry(BaseModel): diff --git a/datacontract/output/text_changelog_results.py b/datacontract/output/text_changelog_results.py index c11a028da..54073c3d9 100644 --- a/datacontract/output/text_changelog_results.py +++ b/datacontract/output/text_changelog_results.py @@ -11,11 +11,11 @@ _CHANGE_COLOR = { ChangelogType.added: "green", - ChangelogType.changed: "yellow", + ChangelogType.updated: "yellow", ChangelogType.removed: "red", } -_BADGE_ORDER = [ChangelogType.added, ChangelogType.changed, ChangelogType.removed] +_BADGE_ORDER = [ChangelogType.added, ChangelogType.updated, ChangelogType.removed] def write_text_changelog_results(result: ChangelogResult, console: Console): diff --git a/tests/fixtures/changelog/golden_changelog_text.txt b/tests/fixtures/changelog/golden_changelog_text.txt index 70b6819ea..3f3d2e2de 100644 --- a/tests/fixtures/changelog/golden_changelog_text.txt +++ b/tests/fixtures/changelog/golden_changelog_text.txt @@ -1,15 +1,15 @@ Summary -[ 2 Added ] [ 4 Changed ] [ 1 Removed ] +[ 2 Added ] [ 4 Updated ] [ 1 Removed ] ╭─────────┬───────────────────────────────────────╮ │ Change │ Field │ ├─────────┼───────────────────────────────────────┤ │ Added │ schema.customers │ │ Removed │ schema.orders.properties.customer_id │ -│ Changed │ schema.orders.properties.order_date │ -│ Changed │ schema.orders.properties.order_id │ +│ Updated │ schema.orders.properties.order_date │ +│ Updated │ schema.orders.properties.order_id │ │ Added │ schema.orders.properties.region │ -│ Changed │ schema.orders.properties.total_amount │ -│ Changed │ slaProperties.availability │ +│ Updated │ schema.orders.properties.total_amount │ +│ Updated │ slaProperties.availability │ ╰─────────┴───────────────────────────────────────╯ Details @@ -49,14 +49,14 @@ Details │ Removed │ schema.orders.properties.customer_id │ │ │ │ Removed │ schema.orders.properties.customer_id.logicalType │ string │ │ │ Removed │ schema.orders.properties.customer_id.required │ True │ │ -│ Changed │ schema.orders.properties.order_date.logicalType │ string │ date │ -│ Changed │ schema.orders.properties.order_id.description │ Unique order ID and a rather │ Unique order ID and another │ +│ Updated │ schema.orders.properties.order_date.logicalType │ string │ date │ +│ Updated │ schema.orders.properties.order_id.description │ Unique order ID and a rather │ Unique order ID and another │ │ │ │ lenghty description that │ rather lenghty description │ │ │ │ should be wrapped in the table │ that should be wrapped in the │ │ │ │ │ table │ │ Added │ schema.orders.properties.region │ │ │ │ Added │ schema.orders.properties.region.logicalType │ │ string │ │ Added │ schema.orders.properties.region.required │ │ False │ -│ Changed │ schema.orders.properties.total_amount.required │ False │ True │ -│ Changed │ slaProperties.availability.value │ 99.9% │ 99.5% │ +│ Updated │ schema.orders.properties.total_amount.required │ False │ True │ +│ Updated │ slaProperties.availability.value │ 99.9% │ 99.5% │ ╰─────────┴──────────────────────────────────────────────────────────┴────────────────────────────────┴───────────────────────────────╯ diff --git a/tests/test_api.py b/tests/test_api.py index 59ad1895e..67a370d2b 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -49,7 +49,7 @@ def test_changelog(): assert len(data["summary"]) > 0 entry = data["entries"][0] assert "path" in entry - assert entry["type"] in ("added", "removed", "changed") + assert entry["type"] in ("added", "removed", "updated") assert "old_value" in entry assert "new_value" in entry diff --git a/tests/test_changelog.py b/tests/test_changelog.py index 62898df0b..735c8695a 100644 --- a/tests/test_changelog.py +++ b/tests/test_changelog.py @@ -27,7 +27,7 @@ def test_changelog_entry_types(): types = {e.type for e in result.entries} assert ChangelogType.added in types assert ChangelogType.removed in types - assert ChangelogType.changed in types + assert ChangelogType.updated in types def test_changelog_summary_is_rolled_up(): @@ -47,7 +47,7 @@ def test_changelog_entry_values(): result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) changed = [e for e in result.entries if e.path == "schema.orders.properties.order_date.logicalType"] assert len(changed) == 1 - assert changed[0].type == ChangelogType.changed + assert changed[0].type == ChangelogType.updated assert changed[0].old_value == "string" assert changed[0].new_value == "date" diff --git a/tests/test_changelog_engine.py b/tests/test_changelog_engine.py index 077bf9d59..26a545c5c 100644 --- a/tests/test_changelog_engine.py +++ b/tests/test_changelog_engine.py @@ -68,8 +68,8 @@ def test_source_and_target_labels_stored(self): def test_empty_diff_produces_zero_counts(self): rd = build_changelog({}) - assert rd["summary"]["counts"] == {"added": 0, "removed": 0, "changed": 0} - assert rd["detail"]["counts"] == {"added": 0, "removed": 0, "changed": 0} + assert rd["summary"]["counts"] == {"added": 0, "removed": 0, "updated": 0} + assert rd["detail"]["counts"] == {"added": 0, "removed": 0, "updated": 0} def test_empty_diff_produces_empty_changes(self): rd = build_changelog({}) @@ -158,13 +158,13 @@ def test_changed_scalar_in_detail(self): ) match = next((c for c in rd["detail"]["changes"] if "logicalType" in c["path"]), None) assert match is not None - assert match["changeType"] == "Changed" + assert match["changeType"] == "Updated" assert match["old_value"] == "string" assert match["new_value"] == "date" def test_changed_count_incremented(self): rd = build_changelog(_changed("slaProperties']['availability']['value", "99.9%", "99.5%")) - assert rd["detail"]["counts"]["changed"] == 1 + assert rd["detail"]["counts"]["updated"] == 1 def test_changed_scalar_rolled_up_to_parent_in_summary(self): rd = build_changelog( @@ -194,7 +194,7 @@ def test_summary_change_type_is_changed_when_field_both_added_and_removed(self): ) rd = build_changelog(diff) match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders.properties.order_id") - assert match["changeType"] == "Changed" + assert match["changeType"] == "Updated" def test_summary_counts_match_summary_changes(self): diff = _merge( @@ -207,7 +207,7 @@ def test_summary_counts_match_summary_changes(self): changes = rd["summary"]["changes"] assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") assert counts["removed"] == sum(1 for c in changes if c["changeType"] == "Removed") - assert counts["changed"] == sum(1 for c in changes if c["changeType"] == "Changed") + assert counts["updated"] == sum(1 for c in changes if c["changeType"] == "Updated") def test_detail_counts_match_detail_changes(self): diff = _merge( @@ -218,7 +218,7 @@ def test_detail_counts_match_detail_changes(self): counts = rd["summary"]["counts"] changes = rd["summary"]["changes"] assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") - assert counts["changed"] == sum(1 for c in changes if c["changeType"] == "Changed") + assert counts["updated"] == sum(1 for c in changes if c["changeType"] == "Updated") def test_detail_changes_sorted_by_path(self): diff = _merge( @@ -363,24 +363,24 @@ def test_scalar_removed_parent_change_type_is_removed(self): match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") assert match["changeType"] == "Removed" - def test_mixed_add_remove_same_parent_collapses_to_changed(self): + def test_mixed_add_remove_same_parent_collapses_to_updated(self): rd = self._rd( _added("schema']['orders']['businessName", "Orders"), _removed("schema']['orders']['description", "old desc"), ) match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") - assert match["changeType"] == "Changed" + assert match["changeType"] == "Updated" paths = [c["path"] for c in rd["summary"]["changes"]] assert "schema.orders.businessName" not in paths assert "schema.orders.description" not in paths - def test_mixed_add_scalar_changed_same_parent_collapses_to_changed(self): + def test_mixed_add_scalar_changed_same_parent_collapses_to_updated(self): rd = self._rd( _added("schema']['orders']['businessName", "Orders"), _changed("schema']['orders']['logicalType", "string", "integer"), ) match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") - assert match["changeType"] == "Changed" + assert match["changeType"] == "Updated" def test_dict_added_does_not_roll_up(self): """A whole dict payload (e.g. a new schema object) should not roll up — @@ -405,7 +405,7 @@ def test_summary_counts_consistent_after_rollup(self): changes = rd["summary"]["changes"] assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") assert counts["removed"] == sum(1 for c in changes if c["changeType"] == "Removed") - assert counts["changed"] == sum(1 for c in changes if c["changeType"] == "Changed") + assert counts["updated"] == sum(1 for c in changes if c["changeType"] == "Updated") def test_detail_still_shows_full_leaf_paths(self): """Rollup only affects summary — detail must still show the full leaf paths.""" diff --git a/tests/test_changelog_output_text.py b/tests/test_changelog_output_text.py index f2e503a8c..d3b542d3a 100644 --- a/tests/test_changelog_output_text.py +++ b/tests/test_changelog_output_text.py @@ -21,7 +21,7 @@ def _make_entries(added=0, removed=0, changed=0): for _ in range(removed): entries.append(ChangelogEntry(path="a.b", type=ChangelogType.removed)) for _ in range(changed): - entries.append(ChangelogEntry(path="a.b", type=ChangelogType.changed)) + entries.append(ChangelogEntry(path="a.b", type=ChangelogType.updated)) return entries @@ -41,20 +41,20 @@ class TestBadges: def test_all_types(self): result = _badges(_make_entries(added=2, removed=1, changed=3)) assert "1 Removed" in result - assert "3 Changed" in result + assert "3 Updated" in result assert "2 Added" in result - def test_ordering_added_changed_removed(self): + def test_ordering_added_updated_removed(self): result = _badges(_make_entries(added=1, removed=1, changed=1)) - assert result.index("Added") < result.index("Changed") < result.index("Removed") + assert result.index("Added") < result.index("Updated") < result.index("Removed") def test_added_badge_green(self): result = _badges(_make_entries(added=1)) assert "[ [green]1 Added[/green] ]" == result - def test_changed_badge_yellow(self): + def test_updated_badge_yellow(self): result = _badges(_make_entries(changed=1)) - assert "[ [yellow]1 Changed[/yellow] ]" == result + assert "[ [yellow]1 Updated[/yellow] ]" == result def test_removed_badge_red(self): result = _badges(_make_entries(removed=1)) @@ -63,7 +63,7 @@ def test_removed_badge_red(self): def test_zero_count_omitted(self): result = _badges(_make_entries(added=3)) assert "Removed" not in result - assert "Changed" not in result + assert "Updated" not in result assert "3 Added" in result def test_empty_list_returns_empty_string(self): @@ -105,8 +105,8 @@ def test_added_green(self): def test_removed_red(self): assert _with_markup(ChangelogType.removed) == "[red]Removed[/red]" - def test_changed_yellow(self): - assert _with_markup(ChangelogType.changed) == "[yellow]Changed[/yellow]" + def test_updated_yellow(self): + assert _with_markup(ChangelogType.updated) == "[yellow]Updated[/yellow]" class TestTerminalStateInheritance: @@ -150,14 +150,14 @@ def test_details_header_present(self): def test_badges_present(self): result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) output = _render(result) - assert "Removed" in output or "Changed" in output or "Added" in output + assert "Removed" in output or "Updated" in output or "Added" in output def test_all_change_types_present(self): result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) output = _render(result) assert "Added" in output assert "Removed" in output - assert "Changed" in output + assert "Updated" in output def test_no_changes_suppresses_summary(self): result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V1)) diff --git a/tests/test_cli.py b/tests/test_cli.py index d9f7b71bb..a75d3c8c7 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -32,6 +32,6 @@ def test_changelog_with_changes(): assert result.exit_code == 0 assert "Summary" in result.output assert "Details" in result.output - assert "removed" in result.output - assert "changed" in result.output - assert "added" in result.output + assert "Removed" in result.output + assert "Updated" in result.output + assert "Added" in result.output From 2ee2a41c00e9834e6d992cec5b11085e142eeb15 Mon Sep 17 00:00:00 2001 From: Benjamin David Date: Mon, 6 Apr 2026 20:41:02 +0100 Subject: [PATCH 5/9] refactor(changelog): replace _to_odcs_dict with get_data_contract() Co-Authored-By: Claude Sonnet 4.6 --- datacontract/data_contract.py | 25 +++---------------- .../integration/changelog_integration_v1.yaml | 4 +++ .../integration/changelog_integration_v2.yaml | 4 +++ 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index d9e032b10..9eaaf06db 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -1,7 +1,6 @@ import logging import typing -import yaml from open_data_contract_standard.model import OpenDataContractStandard, Team if typing.TYPE_CHECKING: @@ -189,25 +188,6 @@ def export( export_args=kwargs, ) - def _to_odcs_dict(self) -> dict: - """Resolve this data contract to an OpenDataContractStandard dict.""" - if self._data_contract is not None: - contract = self._data_contract - elif self._data_contract_file is not None: - with open(self._data_contract_file, encoding="utf-8") as f: - contract = OpenDataContractStandard.model_validate(yaml.safe_load(f)) - elif self._data_contract_str is not None: - contract = OpenDataContractStandard.model_validate(yaml.safe_load(self._data_contract_str)) - else: - raise DataContractException( - type="changelog", - result=ResultEnum.error, - name="Resolve Data Contract", - reason="No data contract source provided", - engine="datacontract", - ) - return contract.model_dump(exclude_none=True, by_alias=True) - def changelog(self, other: "DataContract") -> ChangelogResult: """Generate a changelog between this data contract and another, returning a ChangelogResult.""" from datacontract.changelog.changelog import build_changelog, diff @@ -215,7 +195,10 @@ def changelog(self, other: "DataContract") -> ChangelogResult: v1_label = self._data_contract_file or "" v2_label = other._data_contract_file or "" - raw_diff = diff(self._to_odcs_dict(), other._to_odcs_dict()) + raw_diff = diff( + self.get_data_contract().model_dump(exclude_none=True, by_alias=True), + other.get_data_contract().model_dump(exclude_none=True, by_alias=True), + ) changelog = build_changelog(raw_diff, source_label=v1_label, target_label=v2_label) result = ChangelogResult(v1=v1_label, v2=v2_label) diff --git a/tests/fixtures/changelog/integration/changelog_integration_v1.yaml b/tests/fixtures/changelog/integration/changelog_integration_v1.yaml index 40ae07845..a4201979b 100644 --- a/tests/fixtures/changelog/integration/changelog_integration_v1.yaml +++ b/tests/fixtures/changelog/integration/changelog_integration_v1.yaml @@ -1,6 +1,8 @@ apiVersion: v3.0.2 kind: DataContract id: orders-contract-001 +status: active +version: 1.0.0 schema: - name: orders physicalName: orders_tbl @@ -21,7 +23,9 @@ schema: servers: - server: production type: snowflake + account: example-account database: PROD_DB + schema: PUBLIC slaProperties: - property: availability value: "99.9%" diff --git a/tests/fixtures/changelog/integration/changelog_integration_v2.yaml b/tests/fixtures/changelog/integration/changelog_integration_v2.yaml index 71a080919..5557ea0d5 100644 --- a/tests/fixtures/changelog/integration/changelog_integration_v2.yaml +++ b/tests/fixtures/changelog/integration/changelog_integration_v2.yaml @@ -1,6 +1,8 @@ apiVersion: v3.0.2 kind: DataContract id: orders-contract-001 +status: active +version: 2.0.0 schema: - name: orders physicalName: orders_tbl @@ -51,7 +53,9 @@ schema: servers: - server: production type: snowflake + account: example-account database: PROD_DB + schema: PUBLIC slaProperties: - property: availability value: "99.5%" From f9ddeebb6baffe628b43f96dd08498c5a98c8a82 Mon Sep 17 00:00:00 2001 From: Benjamin David Date: Tue, 7 Apr 2026 10:49:56 +0100 Subject: [PATCH 6/9] refactor(changelog): move diff logic out of DataContract.changelog() Co-Authored-By: Claude Sonnet 4.6 --- datacontract/changelog/changelog.py | 19 ++++- datacontract/data_contract.py | 17 ++--- tests/test_changelog_engine.py | 105 ++++++++++++++++++++-------- tests/test_data_contract.py | 13 ++++ 4 files changed, 114 insertions(+), 40 deletions(-) create mode 100644 tests/test_data_contract.py diff --git a/datacontract/changelog/changelog.py b/datacontract/changelog/changelog.py index 72240812b..9f6f50d3d 100644 --- a/datacontract/changelog/changelog.py +++ b/datacontract/changelog/changelog.py @@ -12,6 +12,7 @@ from datetime import datetime, timezone from deepdiff import DeepDiff +from open_data_contract_standard.model import OpenDataContractStandard from datacontract.changelog.normalize import normalize @@ -37,7 +38,23 @@ def diff(v1: dict, v2: dict) -> dict: } -def build_changelog(diff_result: dict, source_label: str = "v1", target_label: str = "v2") -> dict: +def build_changelog( + source: OpenDataContractStandard, + source_file: str | None, + other: OpenDataContractStandard, + other_file: str | None, +) -> dict: + """Produce a JSON-serialisable changelog dict by diffing two ODCS contracts.""" + source_label = source_file or "v1" + target_label = other_file or "v2" + diff_result = diff( + source.model_dump(exclude_none=True, by_alias=True), + other.model_dump(exclude_none=True, by_alias=True), + ) + return _build_changelog_from_diff(diff_result, source_label=source_label, target_label=target_label) + + +def _build_changelog_from_diff(diff_result: dict, source_label: str = "v1", target_label: str = "v2") -> dict: """Produce a JSON-serialisable dict with all data needed to render the full changelog. diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index 9eaaf06db..a956f204c 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -148,6 +148,9 @@ def get_data_contract(self) -> OpenDataContractStandard: inline_definitions=self._inline_definitions, ) + def get_data_contract_file(self) -> str | None: + return self._data_contract_file + def export( self, export_format: ExportFormat, schema_name: str = "all", sql_server_type: str = "auto", **kwargs ) -> str | bytes: @@ -190,17 +193,15 @@ def export( def changelog(self, other: "DataContract") -> ChangelogResult: """Generate a changelog between this data contract and another, returning a ChangelogResult.""" - from datacontract.changelog.changelog import build_changelog, diff - - v1_label = self._data_contract_file or "" - v2_label = other._data_contract_file or "" + from datacontract.changelog.changelog import build_changelog - raw_diff = diff( - self.get_data_contract().model_dump(exclude_none=True, by_alias=True), - other.get_data_contract().model_dump(exclude_none=True, by_alias=True), + changelog = build_changelog( + self.get_data_contract(), self.get_data_contract_file(), + other.get_data_contract(), other.get_data_contract_file(), ) - changelog = build_changelog(raw_diff, source_label=v1_label, target_label=v2_label) + v1_label = changelog["source_label"] + v2_label = changelog["target_label"] result = ChangelogResult(v1=v1_label, v2=v2_label) for change in changelog["summary"]["changes"]: result.summary.append(ChangelogEntry( diff --git a/tests/test_changelog_engine.py b/tests/test_changelog_engine.py index 26a545c5c..c3f73b60a 100644 --- a/tests/test_changelog_engine.py +++ b/tests/test_changelog_engine.py @@ -2,7 +2,7 @@ test_changelog_engine — Unit tests for changelog.py ------------------------------------------------------------------- Test classes: - TestBuildReportDataStructure — build_changelog() output shape and empty-diff + TestBuildReportDataStructure — _build_changelog_from_diff() output shape and empty-diff TestBuildReportDataAdded — Added change entries (scalar and dict payloads) TestBuildReportDataRemoved — Removed change entries TestBuildReportDataChanged — Changed entries and scalar rollup to parent @@ -12,6 +12,7 @@ TestDiff — diff(): semantic correctness (added/removed/changed/mid-list) TestDiffFixtures — diff(): end-to-end using fixtures/changelog/unit/ TestDiffFixturesPriceDescriptionScalars — diff(): price, description, and top-level scalar fields + TestBuildChangelog — build_changelog() with OpenDataContractStandard objects """ import os @@ -20,9 +21,9 @@ import yaml from open_data_contract_standard.model import OpenDataContractStandard -from datacontract.changelog.changelog import build_changelog, diff +from datacontract.changelog.changelog import _build_changelog_from_diff, build_changelog, diff -REPORT = build_changelog +REPORT = _build_changelog_from_diff def _added(path: str, payload) -> dict: @@ -52,77 +53,77 @@ def _merge(*diffs: dict) -> dict: class TestBuildReportDataStructure: def test_returns_expected_top_level_keys(self): - rd = build_changelog({}) + rd = _build_changelog_from_diff({}) assert set(rd.keys()) == {"source_label", "target_label", "header", "summary", "detail"} def test_header_contains_title_and_subtitle(self): - rd = build_changelog({}, source_label="v1.yaml", target_label="v2.yaml") + rd = _build_changelog_from_diff({}, source_label="v1.yaml", target_label="v2.yaml") assert rd["header"]["title"] == "ODCS Data Contract Changelog" assert "v1.yaml" in rd["header"]["subtitle"] assert "v2.yaml" in rd["header"]["subtitle"] def test_source_and_target_labels_stored(self): - rd = build_changelog({}, source_label="before.yaml", target_label="after.yaml") + rd = _build_changelog_from_diff({}, source_label="before.yaml", target_label="after.yaml") assert rd["source_label"] == "before.yaml" assert rd["target_label"] == "after.yaml" def test_empty_diff_produces_zero_counts(self): - rd = build_changelog({}) + rd = _build_changelog_from_diff({}) assert rd["summary"]["counts"] == {"added": 0, "removed": 0, "updated": 0} assert rd["detail"]["counts"] == {"added": 0, "removed": 0, "updated": 0} def test_empty_diff_produces_empty_changes(self): - rd = build_changelog({}) + rd = _build_changelog_from_diff({}) assert rd["summary"]["changes"] == [] assert rd["detail"]["changes"] == [] def test_unknown_deepdiff_keys_ignored(self): - rd = build_changelog({"unknown_key": {"root['x']": 1}}) + rd = _build_changelog_from_diff({"unknown_key": {"root['x']": 1}}) assert rd["summary"]["changes"] == [] class TestBuildReportDataAdded: def test_added_scalar_appears_in_detail(self): - rd = build_changelog(_added("schema']['orders", "v")) + rd = _build_changelog_from_diff(_added("schema']['orders", "v")) paths = [c["path"] for c in rd["detail"]["changes"]] assert any("orders" in p for p in paths) def test_added_scalar_change_type(self): - rd = build_changelog(_added("schema']['orders", "val")) + rd = _build_changelog_from_diff(_added("schema']['orders", "val")) match = next(c for c in rd["detail"]["changes"] if "orders" in c["path"]) assert match["changeType"] == "Added" def test_added_scalar_has_new_value(self): - rd = build_changelog(_added("schema']['orders", "val")) + rd = _build_changelog_from_diff(_added("schema']['orders", "val")) match = next(c for c in rd["detail"]["changes"] if c["path"] == "schema.orders") assert match.get("new_value") == "val" def test_added_dict_expands_to_leaf_entries(self): payload = {"physicalName": "orders_tbl", "description": "Orders"} - rd = build_changelog(_added("schema']['orders", payload)) + rd = _build_changelog_from_diff(_added("schema']['orders", payload)) paths = [c["path"] for c in rd["detail"]["changes"]] assert "schema.orders.physicalName" in paths assert "schema.orders.description" in paths def test_added_dict_parent_entry_included(self): payload = {"physicalName": "orders_tbl"} - rd = build_changelog(_added("schema']['orders", payload)) + rd = _build_changelog_from_diff(_added("schema']['orders", payload)) paths = [c["path"] for c in rd["detail"]["changes"]] assert "schema.orders" in paths def test_added_count_incremented(self): - rd = build_changelog(_added("schema']['orders", "v")) + rd = _build_changelog_from_diff(_added("schema']['orders", "v")) assert rd["detail"]["counts"]["added"] >= 1 def test_added_appears_in_summary(self): # Scalar Added rolls up to parent — use a 2-level path so it lands at schema.orders - rd = build_changelog(_added("schema']['orders']['physicalName", "v")) + rd = _build_changelog_from_diff(_added("schema']['orders']['physicalName", "v")) paths = [c["path"] for c in rd["summary"]["changes"]] assert any("orders" in p for p in paths) def test_added_double_quotes_path_parsing(self): """Test that double-quoted paths are parsed correctly in both detail and summary""" - rd = build_changelog(_added_double_quotes('schema"]["orders"]["physicalName', "v")) + rd = _build_changelog_from_diff(_added_double_quotes('schema"]["orders"]["physicalName', "v")) detail_paths = [c["path"] for c in rd["detail"]["changes"]] assert "schema.orders.physicalName" in detail_paths summary_paths = [c["path"] for c in rd["summary"]["changes"]] @@ -131,29 +132,29 @@ def test_added_double_quotes_path_parsing(self): class TestBuildReportDataRemoved: def test_removed_scalar_appears_in_detail(self): - rd = build_changelog(_removed("schema']['orders", "v")) + rd = _build_changelog_from_diff(_removed("schema']['orders", "v")) paths = [c["path"] for c in rd["detail"]["changes"]] assert any("orders" in p for p in paths) def test_removed_scalar_has_old_value(self): - rd = build_changelog(_removed("schema']['orders", "val")) + rd = _build_changelog_from_diff(_removed("schema']['orders", "val")) match = next(c for c in rd["detail"]["changes"] if c["path"] == "schema.orders") assert match.get("old_value") == "val" def test_removed_dict_expands_to_leaf_entries(self): payload = {"logicalType": "string", "required": True} - rd = build_changelog(_removed("schema']['orders']['properties']['amount", payload)) + rd = _build_changelog_from_diff(_removed("schema']['orders']['properties']['amount", payload)) paths = [c["path"] for c in rd["detail"]["changes"]] assert "schema.orders.properties.amount.logicalType" in paths def test_removed_count_incremented(self): - rd = build_changelog(_removed("schema']['orders", "v")) + rd = _build_changelog_from_diff(_removed("schema']['orders", "v")) assert rd["detail"]["counts"]["removed"] >= 1 class TestBuildReportDataChanged: def test_changed_scalar_in_detail(self): - rd = build_changelog( + rd = _build_changelog_from_diff( _changed("schema']['orders']['properties']['order_date']['logicalType", "string", "date") ) match = next((c for c in rd["detail"]["changes"] if "logicalType" in c["path"]), None) @@ -163,11 +164,11 @@ def test_changed_scalar_in_detail(self): assert match["new_value"] == "date" def test_changed_count_incremented(self): - rd = build_changelog(_changed("slaProperties']['availability']['value", "99.9%", "99.5%")) + rd = _build_changelog_from_diff(_changed("slaProperties']['availability']['value", "99.9%", "99.5%")) assert rd["detail"]["counts"]["updated"] == 1 def test_changed_scalar_rolled_up_to_parent_in_summary(self): - rd = build_changelog( + rd = _build_changelog_from_diff( _changed("schema']['orders']['properties']['order_date']['logicalType", "string", "date") ) summary_paths = [c["path"] for c in rd["summary"]["changes"]] @@ -181,7 +182,7 @@ def test_multiple_scalar_changes_on_same_parent_produce_one_summary_entry(self): _changed("schema']['orders']['properties']['order_date']['logicalType", "string", "date"), _changed("schema']['orders']['properties']['order_date']['description", "old desc", "new desc"), ) - rd = build_changelog(diff) + rd = _build_changelog_from_diff(diff) order_date_entries = [c for c in rd["summary"]["changes"] if c["path"] == "schema.orders.properties.order_date"] assert len(order_date_entries) == 1 @@ -192,7 +193,7 @@ def test_summary_change_type_is_changed_when_field_both_added_and_removed(self): _added("schema']['orders']['properties']['order_id']['businessName", "Order ID"), _removed("schema']['orders']['properties']['order_id']['description", "Old desc"), ) - rd = build_changelog(diff) + rd = _build_changelog_from_diff(diff) match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders.properties.order_id") assert match["changeType"] == "Updated" @@ -202,7 +203,7 @@ def test_summary_counts_match_summary_changes(self): _removed("schema']['orders']['properties']['customer_id", {"logicalType": "string"}), _changed("slaProperties']['availability']['value", "99.9%", "99.5%"), ) - rd = build_changelog(diff) + rd = _build_changelog_from_diff(diff) counts = rd["summary"]["counts"] changes = rd["summary"]["changes"] assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") @@ -214,7 +215,7 @@ def test_detail_counts_match_detail_changes(self): _added("schema']['customers", {"physicalName": "c"}), _changed("slaProperties']['availability']['value", "99.9%", "99.5%"), ) - rd = build_changelog(diff) + rd = _build_changelog_from_diff(diff) counts = rd["summary"]["counts"] changes = rd["summary"]["changes"] assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") @@ -225,7 +226,7 @@ def test_detail_changes_sorted_by_path(self): _added("schema']['orders", "v"), _added("schema']['customers", "v"), ) - rd = build_changelog(diff) + rd = _build_changelog_from_diff(diff) paths = [c["path"] for c in rd["detail"]["changes"]] assert paths == sorted(paths) @@ -280,7 +281,7 @@ def _tag_diff(self, v1_tags, v2_tags, location="top"): from datacontract.changelog.changelog import diff raw = diff(v1, v2) - return build_changelog(raw) + return _build_changelog_from_diff(raw) def test_added_tag_path_includes_tag_value(self): rd = self._tag_diff(["analytics"], ["analytics", "pii"]) @@ -339,7 +340,7 @@ class TestSummaryRollupScalarLeaves: consistent with how scalar Changed fields behave.""" def _rd(self, *diffs): - return build_changelog(_merge(*diffs)) + return _build_changelog_from_diff(_merge(*diffs)) def test_scalar_added_rolls_up_to_parent(self): rd = self._rd(_added("schema']['orders']['businessName", "Orders")) @@ -784,3 +785,45 @@ def test_top_level_status_changed(self): def test_top_level_domain_changed(self): changed = self._generate().get("values_changed", {}) assert any("'domain'" in k for k in changed) + + +V1_YAML = "fixtures/changelog/integration/changelog_integration_v1.yaml" +V2_YAML = "fixtures/changelog/integration/changelog_integration_v2.yaml" + + +class TestBuildChangelog: + def _load(self, path: str) -> OpenDataContractStandard: + import yaml + from open_data_contract_standard.model import OpenDataContractStandard + with open(os.path.join(os.path.dirname(__file__), path)) as f: + return OpenDataContractStandard.model_validate(yaml.safe_load(f)) + + def test_returns_expected_top_level_keys(self): + v1 = self._load(V1_YAML) + v2 = self._load(V2_YAML) + result = build_changelog(v1, V1_YAML, v2, V2_YAML) + assert set(result.keys()) == {"source_label", "target_label", "header", "summary", "detail"} + + def test_source_and_target_labels_from_files(self): + v1 = self._load(V1_YAML) + v2 = self._load(V2_YAML) + result = build_changelog(v1, V1_YAML, v2, V2_YAML) + assert result["source_label"] == V1_YAML + assert result["target_label"] == V2_YAML + + def test_fallback_labels_when_file_is_none(self): + v1 = self._load(V1_YAML) + result = build_changelog(v1, None, v1, None) + assert result["source_label"] == "v1" + assert result["target_label"] == "v2" + + def test_no_changes_on_identical_contracts(self): + v1 = self._load(V1_YAML) + result = build_changelog(v1, V1_YAML, v1, V1_YAML) + assert result["detail"]["changes"] == [] + + def test_detects_changes_between_versions(self): + v1 = self._load(V1_YAML) + v2 = self._load(V2_YAML) + result = build_changelog(v1, V1_YAML, v2, V2_YAML) + assert result["detail"]["counts"]["added"] + result["detail"]["counts"]["removed"] + result["detail"]["counts"]["updated"] > 0 diff --git a/tests/test_data_contract.py b/tests/test_data_contract.py new file mode 100644 index 000000000..933f0590f --- /dev/null +++ b/tests/test_data_contract.py @@ -0,0 +1,13 @@ +from datacontract.data_contract import DataContract + +V1 = "fixtures/changelog/integration/changelog_integration_v1.yaml" + + +def test_get_data_contract_file_returns_path(): + dc = DataContract(data_contract_file=V1) + assert dc.get_data_contract_file() == V1 + + +def test_get_data_contract_file_returns_none_when_not_set(): + dc = DataContract(data_contract_str="dataContractSpecification: 1.1.0\nid: test\ninfo:\n title: t\n version: 1.0.0\n") + assert dc.get_data_contract_file() is None From 1cb97ceac64923106fa066970f3ec99975971d55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Wed, 8 Apr 2026 15:29:59 +0200 Subject: [PATCH 7/9] ruff format --- datacontract/changelog/changelog.py | 5 ++--- datacontract/changelog/normalize.py | 1 + datacontract/data_contract.py | 30 +++++++++++++++++------------ datacontract/model/changelog.py | 1 - datacontract/output/ci_output.py | 4 +++- tests/test_changelog_engine.py | 12 ++++++++++-- tests/test_changelog_normalize.py | 3 --- tests/test_cli.py | 13 ++++++++----- tests/test_data_contract.py | 4 +++- tests/test_export_markdown.py | 4 +--- 10 files changed, 46 insertions(+), 31 deletions(-) diff --git a/datacontract/changelog/changelog.py b/datacontract/changelog/changelog.py index 9f6f50d3d..f60a2f666 100644 --- a/datacontract/changelog/changelog.py +++ b/datacontract/changelog/changelog.py @@ -28,6 +28,7 @@ def diff(v1: dict, v2: dict) -> dict: result = DeepDiff(n1, n2, ignore_order=True, verbose_level=2) return json.loads(result.to_json()) + _CHANGE_TYPE_MAP = { "dictionary_item_added": "Added", "dictionary_item_removed": "Removed", @@ -148,9 +149,7 @@ def _expand_to_entries(obj, change_type, base_segs): and not isinstance(payload.get("old_value"), dict) and not isinstance(payload.get("new_value"), dict) ) - is_scalar_leaf = ( - change_type in ("Added", "Removed") and not isinstance(payload, dict) and not is_iterable - ) + is_scalar_leaf = change_type in ("Added", "Removed") and not isinstance(payload, dict) and not is_iterable if is_iterable and isinstance(payload, str): display_segs = tuple(segs) elif (is_scalar_change or is_scalar_leaf) and len(segs) > 1: diff --git a/datacontract/changelog/normalize.py b/datacontract/changelog/normalize.py index e451110e1..6610c5c40 100644 --- a/datacontract/changelog/normalize.py +++ b/datacontract/changelog/normalize.py @@ -55,6 +55,7 @@ description.customProperties[] CustomProperty -> .property """ + def _normalize_by(items: list[dict], key_field: str) -> dict: """Key a list of dicts by a named field, omitting the key field from the value. diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index a956f204c..d23c29002 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -196,25 +196,31 @@ def changelog(self, other: "DataContract") -> ChangelogResult: from datacontract.changelog.changelog import build_changelog changelog = build_changelog( - self.get_data_contract(), self.get_data_contract_file(), - other.get_data_contract(), other.get_data_contract_file(), + self.get_data_contract(), + self.get_data_contract_file(), + other.get_data_contract(), + other.get_data_contract_file(), ) v1_label = changelog["source_label"] v2_label = changelog["target_label"] result = ChangelogResult(v1=v1_label, v2=v2_label) for change in changelog["summary"]["changes"]: - result.summary.append(ChangelogEntry( - path=change["path"], - type=ChangelogType(change["changeType"].lower()), - )) + result.summary.append( + ChangelogEntry( + path=change["path"], + type=ChangelogType(change["changeType"].lower()), + ) + ) for change in changelog["detail"]["changes"]: - result.entries.append(ChangelogEntry( - path=change["path"], - type=ChangelogType(change["changeType"].lower()), - old_value=str(change["old_value"]) if change.get("old_value") is not None else None, - new_value=str(change["new_value"]) if change.get("new_value") is not None else None, - )) + result.entries.append( + ChangelogEntry( + path=change["path"], + type=ChangelogType(change["changeType"].lower()), + old_value=str(change["old_value"]) if change.get("old_value") is not None else None, + new_value=str(change["new_value"]) if change.get("new_value") is not None else None, + ) + ) return result @classmethod diff --git a/datacontract/model/changelog.py b/datacontract/model/changelog.py index 11a0ee0cd..3c84e50ad 100644 --- a/datacontract/model/changelog.py +++ b/datacontract/model/changelog.py @@ -27,4 +27,3 @@ def has_changes(self) -> bool: def pretty(self) -> str: return self.model_dump_json(indent=2) - diff --git a/datacontract/output/ci_output.py b/datacontract/output/ci_output.py index 4df73065b..5716219ad 100644 --- a/datacontract/output/ci_output.py +++ b/datacontract/output/ci_output.py @@ -93,7 +93,9 @@ def _write_github_step_summary(results: List[Tuple[str, Run]], summary_path: str # Per-contract detail sections for data_contract_file, run in results: - result_display = RESULT_EMOJI.get(run.result, run.result.value if hasattr(run.result, "value") else str(run.result)) + result_display = RESULT_EMOJI.get( + run.result, run.result.value if hasattr(run.result, "value") else str(run.result) + ) n_total = len(run.checks) if run.checks else 0 n_passed = sum(1 for c in run.checks if c.result == "passed") if run.checks else 0 diff --git a/tests/test_changelog_engine.py b/tests/test_changelog_engine.py index c3f73b60a..95708b65b 100644 --- a/tests/test_changelog_engine.py +++ b/tests/test_changelog_engine.py @@ -418,6 +418,7 @@ def test_detail_still_shows_full_leaf_paths(self): assert "schema.orders.businessName" in detail_paths assert "schema.orders.description" in detail_paths + # --------------------------------------------------------------------------- # Helpers for diff() tests # --------------------------------------------------------------------------- @@ -442,6 +443,8 @@ def _write_yaml(data: dict, path: str) -> None: def _contract(**kwargs) -> dict: return {**MINIMAL_CONTRACT, **kwargs} + + class TestDiff: def _base(self) -> dict: return _contract( @@ -746,7 +749,6 @@ def test_team_member_added(self): assert any("carol" in k for k in added) - class TestDiffFixturesPriceDescriptionScalars(TestDiffFixtures): """Extends the end-to-end fixture tests to cover price, description, and top-level scalar fields that were previously absent from the unit fixtures.""" @@ -795,6 +797,7 @@ class TestBuildChangelog: def _load(self, path: str) -> OpenDataContractStandard: import yaml from open_data_contract_standard.model import OpenDataContractStandard + with open(os.path.join(os.path.dirname(__file__), path)) as f: return OpenDataContractStandard.model_validate(yaml.safe_load(f)) @@ -826,4 +829,9 @@ def test_detects_changes_between_versions(self): v1 = self._load(V1_YAML) v2 = self._load(V2_YAML) result = build_changelog(v1, V1_YAML, v2, V2_YAML) - assert result["detail"]["counts"]["added"] + result["detail"]["counts"]["removed"] + result["detail"]["counts"]["updated"] > 0 + assert ( + result["detail"]["counts"]["added"] + + result["detail"]["counts"]["removed"] + + result["detail"]["counts"]["updated"] + > 0 + ) diff --git a/tests/test_changelog_normalize.py b/tests/test_changelog_normalize.py index 742380ea3..eb298deec 100644 --- a/tests/test_changelog_normalize.py +++ b/tests/test_changelog_normalize.py @@ -13,7 +13,6 @@ TestGeneratePriceDescriptionScalars — end-to-end normalize via diff() for price/desc fields """ - import yaml from datacontract.changelog.changelog import diff @@ -1053,5 +1052,3 @@ def test_property_quality_auth_defs_reorder_no_diff(self): ] ) assert diff(v1, v2) == {} - - diff --git a/tests/test_cli.py b/tests/test_cli.py index a75d3c8c7..53be4dd63 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -24,11 +24,14 @@ def test_changelog_help(): def test_changelog_with_changes(): - result = runner.invoke(app, [ - "changelog", - "fixtures/changelog/integration/changelog_integration_v1.yaml", - "fixtures/changelog/integration/changelog_integration_v2.yaml", - ]) + result = runner.invoke( + app, + [ + "changelog", + "fixtures/changelog/integration/changelog_integration_v1.yaml", + "fixtures/changelog/integration/changelog_integration_v2.yaml", + ], + ) assert result.exit_code == 0 assert "Summary" in result.output assert "Details" in result.output diff --git a/tests/test_data_contract.py b/tests/test_data_contract.py index 933f0590f..0db8cc10a 100644 --- a/tests/test_data_contract.py +++ b/tests/test_data_contract.py @@ -9,5 +9,7 @@ def test_get_data_contract_file_returns_path(): def test_get_data_contract_file_returns_none_when_not_set(): - dc = DataContract(data_contract_str="dataContractSpecification: 1.1.0\nid: test\ninfo:\n title: t\n version: 1.0.0\n") + dc = DataContract( + data_contract_str="dataContractSpecification: 1.1.0\nid: test\ninfo:\n title: t\n version: 1.0.0\n" + ) assert dc.get_data_contract_file() is None diff --git a/tests/test_export_markdown.py b/tests/test_export_markdown.py index 4fd05b863..a828dbd18 100644 --- a/tests/test_export_markdown.py +++ b/tests/test_export_markdown.py @@ -54,6 +54,4 @@ def test_pipe_chars_escaped_in_table_cells(): assert lines, "order_id table row not found" row = lines[0] # The row must have exactly 4 pipe chars as table delimiters (| col1 | col2 | col3 |) - assert row.count("|") == 4, ( - f"Expected 4 pipe delimiters in row, got {row.count('|')}: {row!r}" - ) + assert row.count("|") == 4, f"Expected 4 pipe delimiters in row, got {row.count('|')}: {row!r}" From ba9bd5810592d30970e6c4d653bab3338da9ef3b Mon Sep 17 00:00:00 2001 From: Benjamin David Date: Wed, 8 Apr 2026 14:52:38 +0100 Subject: [PATCH 8/9] fix: regenerate golden changelog text after fixture version bump The integration fixtures (changelog_integration_v1.yaml / v2.yaml) gained explicit version fields in a prior commit but the golden file wasn't updated, causing test_golden_output to fail. Regenerated golden_changelog_text.txt to reflect the new version Updated entry in both the summary badge count and the details table. Co-Authored-By: Claude Sonnet 4.6 --- tests/fixtures/changelog/golden_changelog_text.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/fixtures/changelog/golden_changelog_text.txt b/tests/fixtures/changelog/golden_changelog_text.txt index 3f3d2e2de..fab59f571 100644 --- a/tests/fixtures/changelog/golden_changelog_text.txt +++ b/tests/fixtures/changelog/golden_changelog_text.txt @@ -1,5 +1,5 @@ Summary -[ 2 Added ] [ 4 Updated ] [ 1 Removed ] +[ 2 Added ] [ 5 Updated ] [ 1 Removed ] ╭─────────┬───────────────────────────────────────╮ │ Change │ Field │ ├─────────┼───────────────────────────────────────┤ @@ -10,6 +10,7 @@ Summary │ Added │ schema.orders.properties.region │ │ Updated │ schema.orders.properties.total_amount │ │ Updated │ slaProperties.availability │ +│ Updated │ version │ ╰─────────┴───────────────────────────────────────╯ Details @@ -59,4 +60,5 @@ Details │ Added │ schema.orders.properties.region.required │ │ False │ │ Updated │ schema.orders.properties.total_amount.required │ False │ True │ │ Updated │ slaProperties.availability.value │ 99.9% │ 99.5% │ +│ Updated │ version │ 1.0.0 │ 2.0.0 │ ╰─────────┴──────────────────────────────────────────────────────────┴────────────────────────────────┴───────────────────────────────╯ From 6edcfb068603417d6d5f4a5102e28d901b50db07 Mon Sep 17 00:00:00 2001 From: Benjamin David Date: Wed, 8 Apr 2026 15:21:32 +0100 Subject: [PATCH 9/9] fix(api): catch DataContractException in changelog endpoint and harden error message tests Co-Authored-By: Claude Sonnet 4.6 --- datacontract/api.py | 3 +++ tests/test_api.py | 54 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/datacontract/api.py b/datacontract/api.py index d922b8bd3..ddfbe131d 100644 --- a/datacontract/api.py +++ b/datacontract/api.py @@ -13,6 +13,7 @@ from pydantic import BaseModel from datacontract.data_contract import DataContract, ExportFormat +from datacontract.model.exceptions import DataContractException from datacontract.model.run import Run DATA_CONTRACT_EXAMPLE_PAYLOAD = """apiVersion: v3.1.0 @@ -396,6 +397,8 @@ async def changelog_endpoint( raise HTTPException(status_code=422, detail=f"Invalid YAML: {e}") except pydantic.ValidationError as e: raise HTTPException(status_code=422, detail=f"Invalid data contract: {e}") + except DataContractException as e: + raise HTTPException(status_code=422, detail=f"Data Contract Validation Failure: {e}") finally: os.unlink(v1_path) os.unlink(v2_path) diff --git a/tests/test_api.py b/tests/test_api.py index 67a370d2b..646fb725b 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,6 +1,9 @@ +from unittest.mock import patch + from fastapi.testclient import TestClient from datacontract.api import app +from datacontract.model.exceptions import DataContractException client = TestClient(app) @@ -58,7 +61,9 @@ def test_changelog_invalid_yaml(): invalid_yaml = "invalid: yaml: content: [" response = client.post(url="/changelog", json={"v1": invalid_yaml, "v2": "valid: yaml"}) assert response.status_code == 422 - assert "Invalid YAML" in response.json()["detail"] + detail = response.json()["detail"] + assert detail.startswith("Data Contract Validation Failure:") + assert "Cannot parse YAML" in detail def test_changelog_invalid_data_contract(): @@ -70,3 +75,50 @@ def test_changelog_invalid_data_contract(): response = client.post(url="/changelog", json={"v1": invalid_contract, "v2": "valid: yaml"}) assert response.status_code == 422 assert "Invalid data contract" in response.json()["detail"] + + +def _valid_contract_yaml(): + with open("fixtures/changelog/integration/changelog_integration_v1.yaml", "r") as f: + return f.read() + + +def test_changelog_yaml_error_returns_422(): + import yaml + + with patch("datacontract.api.DataContract") as mock_dc: + mock_dc.side_effect = yaml.YAMLError("bad yaml") + response = client.post(url="/changelog", json={"v1": _valid_contract_yaml(), "v2": _valid_contract_yaml()}) + assert response.status_code == 422 + detail = response.json()["detail"] + assert detail.startswith("Invalid YAML:") + assert "bad yaml" in detail + + +def test_changelog_pydantic_validation_error_returns_422(): + import pydantic + + class _StrictModel(pydantic.BaseModel): + required_int: int + + try: + _StrictModel(required_int="not-an-int") + except pydantic.ValidationError as exc: + validation_error = exc + + with patch("datacontract.api.DataContract") as mock_dc: + mock_dc.side_effect = validation_error + response = client.post(url="/changelog", json={"v1": _valid_contract_yaml(), "v2": _valid_contract_yaml()}) + assert response.status_code == 422 + detail = response.json()["detail"] + assert detail.startswith("Invalid data contract:") + assert "required_int" in detail + + +def test_changelog_data_contract_exception_returns_422(): + with patch("datacontract.api.DataContract") as mock_dc: + mock_dc.side_effect = DataContractException(type="test", name="test", reason="something went wrong") + response = client.post(url="/changelog", json={"v1": _valid_contract_yaml(), "v2": _valid_contract_yaml()}) + assert response.status_code == 422 + detail = response.json()["detail"] + assert detail.startswith("Data Contract Validation Failure:") + assert "something went wrong" in detail