diff --git a/API.md b/API.md index f44074632..934e2936f 100644 --- a/API.md +++ b/API.md @@ -103,6 +103,19 @@ curl -X POST "http://localhost:4242/export?format=sql" \ --data-binary @datacontract.yaml ``` +## Changelog Two Data Contracts + +Compare two ODCS data contracts and receive a changelog. POST a JSON body with `v1` (source/before) and `v2` (target/after) as YAML strings. Returns a JSON object with `summary` and `entries`. + +```bash +curl -X POST "http://localhost:4242/changelog" \ + -H "Content-Type: application/json" \ + -d '{ + "v1": "'"$(cat v1.odcs.yaml)"'", + "v2": "'"$(cat v2.odcs.yaml)"'" + }' +``` + ## Try it out You can also use the Swagger UI to execute the commands directly. diff --git a/CHANGELOG.md b/CHANGELOG.md index 57d306387..1a8503504 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added `ci` command for CI/CD-optimized test runs: multi-file support, GitHub Actions annotations and step summary, Azure DevOps annotations, `--fail-on` flag, `--json` output +- Added `changelog` command and API endpoint (#1118) ### Fixed - Avro importer now raises an error for union fields with multiple non-null types, which are not supported by ODCS @@ -953,4 +954,4 @@ The Golang version can be found at [cli-go](https://github.com/datacontract/cli- ## [0.1.1] ### Added -- Initial release. +- Initial release. \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index e2d743f28..d3a96171a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -89,6 +89,10 @@ datacontract export --format html datacontract.yaml --output datacontract.html # Import from a different format datacontract import --format sql --source my-ddl.sql --dialect postgres --output datacontract.yaml + +# Show a changelog between two data contracts +datacontract changelog datacontract-v1.yaml datacontract-v2.yaml + ``` ## Project Architecture @@ -111,6 +115,8 @@ The Data Contract CLI is an open-source command-line tool for working with data 5. **Linting (`datacontract/lint/`)**: Tools for validating data contract files against schema and best practices. +6. **Changelog (`datacontract/changelog/`)**: Semantic comparison of ODCS data contracts. + ### Extension Pattern The project uses factory patterns for extensibility: diff --git a/README.md b/README.md index c18fda953..bdb0dfecd 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,9 @@ $ datacontract init odcs.yaml # lint the odcs.yaml $ datacontract lint odcs.yaml +# show a changelog between two data contracts +$ datacontract changelog v1.odcs.yaml v2.odcs.yaml + # execute schema and quality checks (define credentials as environment variables) $ datacontract test odcs.yaml @@ -260,6 +263,7 @@ Commands - [init](#init) - [lint](#lint) +- [changelog](#changelog) - [test](#test) - [ci](#ci) - [export](#export) @@ -318,10 +322,32 @@ Commands ``` +### changelog +``` + + Usage: datacontract changelog [OPTIONS] V1 V2 + + Show a changelog between two data contracts. + +╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮ +│ * v1 TEXT The location (path) of the source (before) data contract YAML. [required] │ +│ * v2 TEXT The location (path) of the target (after) data contract YAML. [required] │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮ +│ --debug --no-debug Enable debug logging │ +│ --help Show this message and exit. │ +╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ + +``` + +```bash +$ datacontract changelog v1.odcs.yaml v2.odcs.yaml +``` + ### test ``` - Usage: datacontract test [OPTIONS] [LOCATION] + Usage: datacontract test [OPTIONS] [LOCATION] Run schema and quality tests on configured servers. diff --git a/datacontract/api.py b/datacontract/api.py index 09a940730..ddfbe131d 100644 --- a/datacontract/api.py +++ b/datacontract/api.py @@ -1,14 +1,19 @@ import logging import os +import tempfile from typing import Annotated, Optional +import pydantic import typer +import yaml from fastapi import Body, Depends, FastAPI, HTTPException, Query, status from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import PlainTextResponse from fastapi.security.api_key import APIKeyHeader +from pydantic import BaseModel from datacontract.data_contract import DataContract, ExportFormat +from datacontract.model.exceptions import DataContractException from datacontract.model.run import Run DATA_CONTRACT_EXAMPLE_PAYLOAD = """apiVersion: v3.1.0 @@ -358,6 +363,47 @@ async def lint( return {"result": lint_result.result, "checks": lint_result.checks} +class ChangelogRequest(BaseModel): + v1: str = DATA_CONTRACT_EXAMPLE_PAYLOAD + v2: str = DATA_CONTRACT_EXAMPLE_PAYLOAD + + +@app.post( + "/changelog", + tags=["changelog"], + summary="Show a changelog between two data contracts.", + description=""" + Compare two ODCS data contract YAMLs and return a changelog. + POST a JSON body with `v1` (source/before) and `v2` (target/after) as YAML strings. + """, +) +async def changelog_endpoint( + body: ChangelogRequest, + api_key: Annotated[str | None, Depends(api_key_header)] = None, +): + check_api_key(api_key) + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f1: + f1.write(body.v1) + v1_path = f1.name + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f2: + f2.write(body.v2) + v2_path = f2.name + + try: + result = DataContract(data_contract_file=v1_path).changelog(DataContract(data_contract_file=v2_path)) + return {"summary": result.summary, "entries": result.entries} + except yaml.YAMLError as e: + raise HTTPException(status_code=422, detail=f"Invalid YAML: {e}") + except pydantic.ValidationError as e: + raise HTTPException(status_code=422, detail=f"Invalid data contract: {e}") + except DataContractException as e: + raise HTTPException(status_code=422, detail=f"Data Contract Validation Failure: {e}") + finally: + os.unlink(v1_path) + os.unlink(v2_path) + + @app.post( "/export", tags=["export"], diff --git a/datacontract/changelog/__init__.py b/datacontract/changelog/__init__.py new file mode 100644 index 000000000..bda5d2938 --- /dev/null +++ b/datacontract/changelog/__init__.py @@ -0,0 +1,3 @@ +from datacontract.changelog.changelog import build_changelog, diff + +__all__ = ["diff", "build_changelog"] diff --git a/datacontract/changelog/changelog.py b/datacontract/changelog/changelog.py new file mode 100644 index 000000000..f60a2f666 --- /dev/null +++ b/datacontract/changelog/changelog.py @@ -0,0 +1,191 @@ +""" +changelog — ODCS contract changelog builder +--------------------------------------------- +Provides two public functions: + diff() — normalise two ODCS contract dicts and return a raw DeepDiff dict + build_changelog() — transform a raw DeepDiff dict into structured summary + detail data + ready to be rendered as a changelog report. +""" + +import json +import re +from datetime import datetime, timezone + +from deepdiff import DeepDiff +from open_data_contract_standard.model import OpenDataContractStandard + +from datacontract.changelog.normalize import normalize + + +def diff(v1: dict, v2: dict) -> dict: + """Return the DeepDiff result as a plain dict. + + ignore_order=True - dict key ordering is irrelevant + verbose_level=2 - include old/new values, not just paths + """ + n1 = normalize(v1) + n2 = normalize(v2) + result = DeepDiff(n1, n2, ignore_order=True, verbose_level=2) + return json.loads(result.to_json()) + + +_CHANGE_TYPE_MAP = { + "dictionary_item_added": "Added", + "dictionary_item_removed": "Removed", + "values_changed": "Updated", + "type_changes": "Updated", + "iterable_item_added": "Added", + "iterable_item_removed": "Removed", +} + + +def build_changelog( + source: OpenDataContractStandard, + source_file: str | None, + other: OpenDataContractStandard, + other_file: str | None, +) -> dict: + """Produce a JSON-serialisable changelog dict by diffing two ODCS contracts.""" + source_label = source_file or "v1" + target_label = other_file or "v2" + diff_result = diff( + source.model_dump(exclude_none=True, by_alias=True), + other.model_dump(exclude_none=True, by_alias=True), + ) + return _build_changelog_from_diff(diff_result, source_label=source_label, target_label=target_label) + + +def _build_changelog_from_diff(diff_result: dict, source_label: str = "v1", target_label: str = "v2") -> dict: + """Produce a JSON-serialisable dict with all data needed to render + the full changelog. + + Both summary.changes and detail.changes share the same shape: + { + "path": str, # dot-separated field path + "changeType": str, # Added | Removed | Updated + "old_value": any, # present for Changed/Removed; absent otherwise + "new_value": any, # present for Changed/Added; absent otherwise + } + + Summary rollup rules (detail always shows full leaf paths): + - Scalar Changed leaf → rolled up to parent (logicalType → field) + - Scalar Added/Removed leaf → rolled up to parent (businessName Added → field Added) + - Mixed Add+Remove on same parent → single entry with changeType Updated + - Dict Added/Removed (whole object) → stays at its own path, not rolled up + - List string item (tag) → rolled up to the tags parent in summary; + in detail the tag value is the final path segment (tags.pii Removed) + """ + + def _expand_to_entries(obj, change_type, base_segs): + entries = [] + for k, v in obj.items(): + segs = base_segs + [k] + if isinstance(v, dict): + entry = {"path": ".".join(segs), "changeType": change_type} + entries.append(entry) + entries.extend(_expand_to_entries(v, change_type, segs)) + else: + entry = {"path": ".".join(segs), "changeType": change_type} + if change_type == "Added": + entry["new_value"] = v + else: + entry["old_value"] = v + entries.append(entry) + return entries + + detail_changes = [] + for deepdiff_key, items in diff_result.items(): + change_type = _CHANGE_TYPE_MAP.get(deepdiff_key) + if not change_type: + continue + for raw_path, payload in items.items(): + # match ['key'] or ["key"] + segs = re.findall(r"""(?:\['([^']+)'\]|\["([^"]+)"\])""", raw_path) + segs = [group[0] if group[0] else group[1] for group in segs] + is_iterable = deepdiff_key in ("iterable_item_added", "iterable_item_removed") + if isinstance(payload, dict) and "old_value" in payload: + entry = { + "path": ".".join(segs), + "changeType": change_type, + "old_value": payload["old_value"], + "new_value": payload["new_value"], + } + detail_changes.append(entry) + elif change_type in ("Added", "Removed") and isinstance(payload, dict): + detail_changes.append({"path": ".".join(segs), "changeType": change_type}) + detail_changes.extend(_expand_to_entries(payload, change_type, segs)) + elif is_iterable and isinstance(payload, str): + entry = {"path": ".".join(segs + [payload]), "changeType": change_type} + detail_changes.append(entry) + else: + entry = {"path": ".".join(segs), "changeType": change_type} + if change_type == "Added": + entry["new_value"] = payload + else: + entry["old_value"] = payload + detail_changes.append(entry) + + detail_changes.sort(key=lambda x: x["path"]) + + detail_counts = { + "added": sum(1 for c in detail_changes if c["changeType"] == "Added"), + "removed": sum(1 for c in detail_changes if c["changeType"] == "Removed"), + "updated": sum(1 for c in detail_changes if c["changeType"] == "Updated"), + } + + summary_groups: dict[tuple, dict] = {} + for deepdiff_key, items in diff_result.items(): + change_type = _CHANGE_TYPE_MAP.get(deepdiff_key) + if not change_type: + continue + for raw_path, payload in items.items(): + segs = re.findall(r"""(?:\['([^']+)'\]|\["([^"]+)"\])""", raw_path) + segs = [group[0] if group[0] else group[1] for group in segs] + is_iterable = deepdiff_key in ("iterable_item_added", "iterable_item_removed") + is_scalar_change = ( + change_type == "Updated" + and isinstance(payload, dict) + and "old_value" in payload + and not isinstance(payload.get("old_value"), dict) + and not isinstance(payload.get("new_value"), dict) + ) + is_scalar_leaf = change_type in ("Added", "Removed") and not isinstance(payload, dict) and not is_iterable + if is_iterable and isinstance(payload, str): + display_segs = tuple(segs) + elif (is_scalar_change or is_scalar_leaf) and len(segs) > 1: + display_segs = tuple(segs[:-1]) + else: + display_segs = tuple(segs) + if display_segs not in summary_groups: + summary_groups[display_segs] = {"changeType": change_type} + else: + if summary_groups[display_segs]["changeType"] != change_type: + summary_groups[display_segs]["changeType"] = "Updated" + + summary_changes = [] + for segs, data in sorted(summary_groups.items(), key=lambda x: ".".join(x[0])): + summary_changes.append({"path": ".".join(segs), "changeType": data["changeType"]}) + + summary_counts = { + "added": sum(1 for c in summary_changes if c["changeType"] == "Added"), + "removed": sum(1 for c in summary_changes if c["changeType"] == "Removed"), + "updated": sum(1 for c in summary_changes if c["changeType"] == "Updated"), + } + + return { + "source_label": source_label, + "target_label": target_label, + "header": { + "title": "ODCS Data Contract Changelog", + "subtitle": f"{source_label} \u2192 {target_label}", + "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC"), + }, + "summary": { + "counts": summary_counts, + "changes": summary_changes, + }, + "detail": { + "counts": detail_counts, + "changes": detail_changes, + }, + } diff --git a/datacontract/changelog/normalize.py b/datacontract/changelog/normalize.py new file mode 100644 index 000000000..6610c5c40 --- /dev/null +++ b/datacontract/changelog/normalize.py @@ -0,0 +1,208 @@ +""" +normalize — ODCS contract normalization +---------------------------------------- +Converts named lists in a contract dict to dicts keyed by their natural key, +so DeepDiff can match items semantically rather than by position. + +DeepDiff matches list items by position by default, which produces +incorrect diffs when items are added/removed mid-list. Keying by the +natural key gives stable, semantically correct paths and meaningful +field names in the output: + + schema.orders.properties.order_id.logicalType Changed + rather than + schema[0].properties[1].logicalType Changed + +Example (schema list, inserting "customers" before "orders"): + + Before normalization — schema is a list of dicts: + "schema": [ + {"name": "orders", "physicalType": "table", "properties": [...]}, + {"name": "customers", "physicalType": "view", "properties": [...]}, + ] + + After normalization — schema is a dict keyed by name, with the key field stripped: + "schema": { + "orders": {"physicalType": "table", "properties": {...}}, + "customers": {"physicalType": "view", "properties": {...}}, + } + + Without normalization, DeepDiff matches by position and reports a spurious change: + "values_changed": {"root['schema'][0]['name']": {"old": "orders", "new": "customers"}} + + With normalization, DeepDiff matches by key and reports correctly: + "dictionary_item_added": {"root['schema']['customers']": {...}} + "dictionary_item_removed": {"root['schema']['orders']": {...}} + +# NOTE: Natural keys are hardcoded here because the open-data-contract-standard +# Pydantic models don't yet expose them. The planned fix is to add a __natural_key__ +# class var or Field annotation to each model upstream, then replace this table with +# a single reflection-based loop that derives both the list containers and their +# natural keys from the model metadata. + +Current hardcoded natural keys: +schema[] SchemaObject -> .name (required: [name]) +schema[].properties[] SchemaProperty -> .name (required: [name], recursive) +slaProperties[] SLAProperty -> .property +servers[] Server -> .server +servers[].roles[] Role -> .role +servers[].customProperties[] CustomProperty -> .property +support[] SupportItem -> .channel +roles[] Role -> .role +team.members[] TeamMember -> .username +authoritativeDefinitions[] AuthoritativeDefinition -> .url +description.authoritativeDefinitions[] AuthoritativeDefinition -> .url +description.customProperties[] CustomProperty -> .property +""" + + +def _normalize_by(items: list[dict], key_field: str) -> dict: + """Key a list of dicts by a named field, omitting the key field from the value. + + Falls back to the list index if the key field is absent on an item. + """ + result = {} + for i, item in enumerate(items): + key = item.get(key_field, f"__pos_{i}__") + result[key] = {k: v for k, v in item.items() if k != key_field} + return result + + +def _normalize_auth_defs(items: list[dict]) -> dict: + """Key authoritativeDefinitions by url with id and positional fallback. + + Unlike _normalize_by, the key (url) is retained in the value dict because + AuthoritativeDefinition has no single required key — url is only inferred, + so stripping it would lose data when the positional fallback fires. + """ + result = {} + for i, item in enumerate(items): + key = item.get("url") or item.get("id") or f"__pos_{i}__" + result[key] = item + return result + + +def _normalize_relationships(items: list[dict], schema_level: bool = True) -> dict: + """Key relationships by a stable composite key. + + Schema-level: from:to composite. Property-level: to only. + Falls back to positional index if key fields are absent. + """ + result = {} + for i, item in enumerate(items): + if schema_level: + from_val = str(item.get("from", "")) + to_val = str(item.get("to", "")) + key = f"{from_val}:{to_val}" if (from_val or to_val) else f"__pos_{i}__" + else: + to_val = item.get("to") + key = str(to_val) if to_val else f"__pos_{i}__" + result[key] = item + return result + + +def _normalize_quality(items: list[dict]) -> dict: + """Key DataQuality items by name (with positional fallback).""" + result = {} + for i, item in enumerate(items): + key = item.get("name") or f"__pos_{i}__" + entry = {k: v for k, v in item.items() if k != "name"} + if "customProperties" in entry and isinstance(entry["customProperties"], list): + entry["customProperties"] = _normalize_by(entry["customProperties"], "property") + if "authoritativeDefinitions" in entry and isinstance(entry["authoritativeDefinitions"], list): + entry["authoritativeDefinitions"] = _normalize_auth_defs(entry["authoritativeDefinitions"]) + result[key] = entry + return result + + +def _normalize_schema_fields(entry: dict, *, schema_level: bool) -> dict: + """Normalize nested list fields shared by SchemaObject and SchemaProperty.""" + if "quality" in entry and isinstance(entry["quality"], list): + entry["quality"] = _normalize_quality(entry["quality"]) + if "customProperties" in entry and isinstance(entry["customProperties"], list): + entry["customProperties"] = _normalize_by(entry["customProperties"], "property") + if "authoritativeDefinitions" in entry and isinstance(entry["authoritativeDefinitions"], list): + entry["authoritativeDefinitions"] = _normalize_auth_defs(entry["authoritativeDefinitions"]) + if "relationships" in entry and isinstance(entry["relationships"], list): + entry["relationships"] = _normalize_relationships(entry["relationships"], schema_level=schema_level) + return entry + + +def _normalize_properties(properties: list[dict]) -> dict: + """Recursively key SchemaProperty lists by .name.""" + result = {} + for prop in properties: + key = prop.get("name", prop.get("id", str(prop))) + entry = {k: v for k, v in prop.items() if k != "name"} + if "properties" in entry and isinstance(entry["properties"], list): + entry["properties"] = _normalize_properties(entry["properties"]) + entry = _normalize_schema_fields(entry, schema_level=False) + result[key] = entry + return result + + +def normalize(contract: dict) -> dict: + """Convert named lists to dicts keyed by their natural key field. + + See headers comments for more details. + + """ + out = dict(contract) + + if "schema" in out and isinstance(out["schema"], list): + normalized_schema = {} + for tbl in out["schema"]: + key = tbl.get("name", tbl.get("id", str(tbl))) + entry = {k: v for k, v in tbl.items() if k != "name"} + if "properties" in entry and isinstance(entry["properties"], list): + entry["properties"] = _normalize_properties(entry["properties"]) + entry = _normalize_schema_fields(entry, schema_level=True) + normalized_schema[key] = entry + out["schema"] = normalized_schema + + if "slaProperties" in out and isinstance(out["slaProperties"], list): + out["slaProperties"] = _normalize_by(out["slaProperties"], "property") + + if "servers" in out and isinstance(out["servers"], list): + normalized_servers = {} + for s in out["servers"]: + if not s.get("server"): + continue + key = s["server"] + entry = {k: v for k, v in s.items() if k != "server"} + if "roles" in entry and isinstance(entry["roles"], list): + entry["roles"] = _normalize_by(entry["roles"], "role") + if "customProperties" in entry and isinstance(entry["customProperties"], list): + entry["customProperties"] = _normalize_by(entry["customProperties"], "property") + normalized_servers[key] = entry + out["servers"] = normalized_servers + + if "support" in out and isinstance(out["support"], list): + out["support"] = _normalize_by(out["support"], "channel") + + if "roles" in out and isinstance(out["roles"], list): + out["roles"] = _normalize_by(out["roles"], "role") + + if "customProperties" in out and isinstance(out["customProperties"], list): + out["customProperties"] = _normalize_by(out["customProperties"], "property") + + if "team" in out: + team = out["team"] + if isinstance(team, dict) and "members" in team and isinstance(team["members"], list): + out["team"] = {**team, "members": _normalize_by(team["members"], "username")} + elif isinstance(team, list): + out["team"] = _normalize_by(team, "username") + + if "authoritativeDefinitions" in out and isinstance(out["authoritativeDefinitions"], list): + out["authoritativeDefinitions"] = _normalize_auth_defs(out["authoritativeDefinitions"]) + + if "description" in out and isinstance(out["description"], dict): + desc = out["description"] + normalized_desc = dict(desc) + if "authoritativeDefinitions" in desc and isinstance(desc["authoritativeDefinitions"], list): + normalized_desc["authoritativeDefinitions"] = _normalize_auth_defs(desc["authoritativeDefinitions"]) + if "customProperties" in desc and isinstance(desc["customProperties"], list): + normalized_desc["customProperties"] = _normalize_by(desc["customProperties"], "property") + out["description"] = normalized_desc + + return out diff --git a/datacontract/cli.py b/datacontract/cli.py index fea5f8518..7f671becb 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -24,6 +24,7 @@ from datacontract.output.ci_output import write_ci_output, write_ci_summary, write_json_results from datacontract.output.output_format import OutputFormat from datacontract.output.test_results_writer import write_test_result +from datacontract.output.text_changelog_results import write_text_changelog_results console = Console() @@ -127,6 +128,18 @@ def enable_debug_logging(debug: bool): ) +@app.command(name="changelog") +def changelog( + v1: Annotated[str, typer.Argument(help="The location (path) of the source (before) data contract YAML.")], + v2: Annotated[str, typer.Argument(help="The location (path) of the target (after) data contract YAML.")], + debug: debug_option = None, +): + """Show a changelog between two data contracts.""" + enable_debug_logging(debug) + result = DataContract(data_contract_file=v1).changelog(DataContract(data_contract_file=v2)) + write_text_changelog_results(result, console) + + @app.command(name="test") def test( location: Annotated[ diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index a086e1695..d23c29002 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -14,6 +14,7 @@ from datacontract.init.init_template import get_init_template from datacontract.integration.entropy_data import publish_test_results_to_entropy_data from datacontract.lint import resolve +from datacontract.model.changelog import ChangelogEntry, ChangelogResult, ChangelogType from datacontract.model.exceptions import DataContractException from datacontract.model.run import Check, ResultEnum, Run @@ -147,6 +148,9 @@ def get_data_contract(self) -> OpenDataContractStandard: inline_definitions=self._inline_definitions, ) + def get_data_contract_file(self) -> str | None: + return self._data_contract_file + def export( self, export_format: ExportFormat, schema_name: str = "all", sql_server_type: str = "auto", **kwargs ) -> str | bytes: @@ -187,6 +191,38 @@ def export( export_args=kwargs, ) + def changelog(self, other: "DataContract") -> ChangelogResult: + """Generate a changelog between this data contract and another, returning a ChangelogResult.""" + from datacontract.changelog.changelog import build_changelog + + changelog = build_changelog( + self.get_data_contract(), + self.get_data_contract_file(), + other.get_data_contract(), + other.get_data_contract_file(), + ) + + v1_label = changelog["source_label"] + v2_label = changelog["target_label"] + result = ChangelogResult(v1=v1_label, v2=v2_label) + for change in changelog["summary"]["changes"]: + result.summary.append( + ChangelogEntry( + path=change["path"], + type=ChangelogType(change["changeType"].lower()), + ) + ) + for change in changelog["detail"]["changes"]: + result.entries.append( + ChangelogEntry( + path=change["path"], + type=ChangelogType(change["changeType"].lower()), + old_value=str(change["old_value"]) if change.get("old_value") is not None else None, + new_value=str(change["new_value"]) if change.get("new_value") is not None else None, + ) + ) + return result + @classmethod def import_from_source( cls, diff --git a/datacontract/model/changelog.py b/datacontract/model/changelog.py new file mode 100644 index 000000000..3c84e50ad --- /dev/null +++ b/datacontract/model/changelog.py @@ -0,0 +1,29 @@ +from enum import Enum + +from pydantic import BaseModel + + +class ChangelogType(str, Enum): + added = "added" + removed = "removed" + updated = "updated" + + +class ChangelogEntry(BaseModel): + path: str + type: ChangelogType + old_value: str | None = None + new_value: str | None = None + + +class ChangelogResult(BaseModel): + v1: str + v2: str + summary: list[ChangelogEntry] = [] + entries: list[ChangelogEntry] = [] + + def has_changes(self) -> bool: + return len(self.entries) > 0 + + def pretty(self) -> str: + return self.model_dump_json(indent=2) diff --git a/datacontract/output/ci_output.py b/datacontract/output/ci_output.py index 4df73065b..5716219ad 100644 --- a/datacontract/output/ci_output.py +++ b/datacontract/output/ci_output.py @@ -93,7 +93,9 @@ def _write_github_step_summary(results: List[Tuple[str, Run]], summary_path: str # Per-contract detail sections for data_contract_file, run in results: - result_display = RESULT_EMOJI.get(run.result, run.result.value if hasattr(run.result, "value") else str(run.result)) + result_display = RESULT_EMOJI.get( + run.result, run.result.value if hasattr(run.result, "value") else str(run.result) + ) n_total = len(run.checks) if run.checks else 0 n_passed = sum(1 for c in run.checks if c.result == "passed") if run.checks else 0 diff --git a/datacontract/output/text_changelog_results.py b/datacontract/output/text_changelog_results.py new file mode 100644 index 000000000..54073c3d9 --- /dev/null +++ b/datacontract/output/text_changelog_results.py @@ -0,0 +1,93 @@ +import io +from collections import Counter + +from rich import box +from rich.console import Console +from rich.table import Table + +from datacontract.model.changelog import ChangelogEntry, ChangelogResult, ChangelogType + +_VAL_W = 30 + +_CHANGE_COLOR = { + ChangelogType.added: "green", + ChangelogType.updated: "yellow", + ChangelogType.removed: "red", +} + +_BADGE_ORDER = [ChangelogType.added, ChangelogType.updated, ChangelogType.removed] + + +def write_text_changelog_results(result: ChangelogResult, console: Console): + _print_summary(result, console) + _print_table(result, console) + + +def _badges(entries: list[ChangelogEntry]) -> str: + counts = Counter(e.type for e in entries) + parts = [] + for ct in _BADGE_ORDER: + n = counts[ct] + if n: + color = _CHANGE_COLOR[ct] + parts.append(f"[ [{color}]{n} {ct.value.capitalize()}[/{color}] ]") + return " ".join(parts) + + +def _print_summary(result: ChangelogResult, console: Console): + if not result.summary: + return + console.print("Summary") + console.print(_badges(result.summary)) + table = Table(box=box.ROUNDED, show_header=True) + table.add_column("Change", no_wrap=True) + table.add_column("Field", no_wrap=True) + for entry in result.summary: + table.add_row(_with_markup(entry.type), entry.path) + buf = io.StringIO() + wide = Console(file=buf, width=300, highlight=False, force_terminal=console.is_terminal, no_color=console.no_color) + wide.print(table) + print(buf.getvalue(), end="") + print("") + + +def _print_table(result: ChangelogResult, console: Console): + console.print("Details") + table = Table(box=box.ROUNDED) + table.add_column("Change", no_wrap=True) + table.add_column("Path", no_wrap=True) + table.add_column("Old Value", max_width=_VAL_W, no_wrap=True) + table.add_column("New Value", max_width=_VAL_W, no_wrap=True) + for entry in result.entries: + table.add_row( + _with_markup(entry.type), + entry.path, + _wrap(entry.old_value or "", _VAL_W), + _wrap(entry.new_value or "", _VAL_W), + ) + buf = io.StringIO() + wide = Console(file=buf, width=300, highlight=False, force_terminal=console.is_terminal, no_color=console.no_color) + wide.print(table) + print(buf.getvalue(), end="") + + +def _with_markup(changelog_type: ChangelogType) -> str: + color = _CHANGE_COLOR.get(changelog_type) + if color: + return f"[{color}]{changelog_type.value.capitalize()}[/{color}]" + return changelog_type.value.capitalize() + + +def _wrap(text: str, max_width: int) -> str: + if len(text) <= max_width: + return text + lines, current = [], "" + for word in text.split(): + if current and len(current) + 1 + len(word) > max_width: + lines.append(current) + current = word + else: + current = (current + " " + word).lstrip() + if current: + lines.append(current) + return "\n".join(lines) diff --git a/pyproject.toml b/pyproject.toml index b571d88b0..f52de7dec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "jinja_partials>=0.2.1,<1.0.0", "datacontract-specification>=1.2.3,<2.0.0", "open-data-contract-standard>=3.1.2,<4.0.0", + "deepdiff>=6.0.0,<9.0.0", ] [project.optional-dependencies] diff --git a/tests/fixtures/breaking/datacontract-definitions-v1.yaml b/tests/fixtures/breaking/datacontract-definitions-v1.yaml deleted file mode 100644 index dcf2470b7..000000000 --- a/tests/fixtures/breaking/datacontract-definitions-v1.yaml +++ /dev/null @@ -1,13 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - my_field: - required: false diff --git a/tests/fixtures/breaking/datacontract-definitions-v2.yaml b/tests/fixtures/breaking/datacontract-definitions-v2.yaml deleted file mode 100644 index 06110a335..000000000 --- a/tests/fixtures/breaking/datacontract-definitions-v2.yaml +++ /dev/null @@ -1,36 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - my_field: - $ref: '#/definitions/my_definition' - -definitions: - my_definition: - name: my_definition - domain: global - title: my_title - description: My Description - type: string - enum: [my_enum] - format: uuid - minLength: 8 - maxLength: 14 - pattern: .* - minimum: 8 - exclusiveMaximum: 8 - maximum: 14 - exclusiveMinimum: 14 - example: my_example - pii: false - classification: internal - tags: [my_tags] - - diff --git a/tests/fixtures/breaking/datacontract-definitions-v3.yaml b/tests/fixtures/breaking/datacontract-definitions-v3.yaml deleted file mode 100644 index 0f096113e..000000000 --- a/tests/fixtures/breaking/datacontract-definitions-v3.yaml +++ /dev/null @@ -1,36 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - my_field: - $ref: '#/definitions/my_definition_2' - -definitions: - my_definition_2: - name: my_definition_2 - domain: global - title: my_title_2 - description: My Description 2 - type: integer - enum: [my_enum_2] - format: url - minLength: 10 - maxLength: 20 - pattern: .*.* - minimum: 10 - exclusiveMaximum: 20 - maximum: 20 - exclusiveMinimum: 10 - example: my_example_2 - pii: true - classification: sensitive - tags: [my_tags_2] - - diff --git a/tests/fixtures/breaking/datacontract-fields-array-v1.yaml b/tests/fixtures/breaking/datacontract-fields-array-v1.yaml deleted file mode 100644 index 2334c5435..000000000 --- a/tests/fixtures/breaking/datacontract-fields-array-v1.yaml +++ /dev/null @@ -1,64 +0,0 @@ -dataContractSpecification: 1.2.1 -id: demo-contract-id -info: - title: Test for changes in datacontracts that includes arrays - version: 0.0.1 - description: my desc - owner: owner - contact: - name: john doe - email: john.doe@example.com -models: - DataType: - namespace: dp.schemas - description: Description - type: table - fields: - Records: - type: array - required: true - description: Some fields on this complex array structure will change - items: - type: object - fields: - Field1: - type: int - required: false - description: Field1 desc - sample: 1 - pii: false - classification: Unclassified - Discount: - type: record - required: false - fields: - IsAutomatic: - type: boolean - required: true - description: Indicates if the application is automatic - sample: true - pii: false - classification: Unclassified - Conditions: - type: object - required: true - fields: - Min1: - type: double - required: false - description: Minimum test1 - sample: 50.0 - pii: false - classification: Unclassified - Hierarchy: - type: record - required: false - fields: - HasArticles: - type: string - required: true - description: Indicates if articles are included - sample: false - pii: false - classification: Unclassified - \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-fields-array-v2.yaml b/tests/fixtures/breaking/datacontract-fields-array-v2.yaml deleted file mode 100644 index 42f582b0c..000000000 --- a/tests/fixtures/breaking/datacontract-fields-array-v2.yaml +++ /dev/null @@ -1,64 +0,0 @@ -dataContractSpecification: 1.2.1 -id: demo-contract-id -info: - title: Test for changes in datacontracts that includes arrays - version: 0.0.1 - description: my desc - owner: owner - contact: - name: john doe - email: john.doe@example.com -models: - DataType: - namespace: dp.schemas - description: Description - type: table - fields: - Records: - type: array - required: true - description: Some fields on this complex array structure will change - items: - type: object - fields: - Field1: - type: int - required: false - description: CHANGING PII (THIS DESCRIPTION THROWS 0 ERRORS) - sample: 1 - pii: true - classification: Unclassified - Discount: - type: record - required: false - fields: - IsAutomatic: - type: boolean - required: true - description: Changed classification - sample: true - pii: false - classification: classified - Conditions: - type: object - required: true - fields: - Min1: - type: double - required: false - description: Minimum test1 - sample: 50.0 - pii: false - classification: Unclassified - Hierarchy: - type: record - required: false - fields: - HasArticles: - type: int - required: true - description: changing type from string to int - sample: false - pii: false - classification: Unclassified - \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-fields-v1.yaml b/tests/fixtures/breaking/datacontract-fields-v1.yaml deleted file mode 100644 index eef3a512a..000000000 --- a/tests/fixtures/breaking/datacontract-fields-v1.yaml +++ /dev/null @@ -1,55 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - field_type: - description: My Description - field_format: - type: string - field_required: - type: string - field_primaryKey: - type: string - field_references: - type: string - field_unique: - type: string - field_description: - type: string - field_pii: - type: string - field_classification: - type: string - field_pattern: - type: string - field_minLength: - type: string - field_maxLength: - type: string - field_minimum: - type: string - field_exclusiveMinimum: - type: string - field_maximum: - type: string - field_exclusiveMaximum: - type: string - field_enum: - type: string - field_tags: - type: string - field_ref: - type: string - field_fields: - fields: - nested_field_1: - type: string - field_custom_key: - type: string diff --git a/tests/fixtures/breaking/datacontract-fields-v2.yaml b/tests/fixtures/breaking/datacontract-fields-v2.yaml deleted file mode 100644 index 1ebf8fce6..000000000 --- a/tests/fixtures/breaking/datacontract-fields-v2.yaml +++ /dev/null @@ -1,83 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - field_type: - type: string - description: My Description - field_format: - type: string - format: email - field_required: - type: string - required: false - field_primaryKey: - type: string - primaryKey: false - field_references: - type: string - references: my_table.field_type - field_unique: - type: string - unique: false - field_description: - type: string - description: My Description - field_pii: - type: string - pii: true - field_classification: - type: string - classification: sensitive - field_pattern: - type: string - pattern: ^[A-Za-z0-9]{8,14}$ - field_minLength: - type: string - minLength: 8 - field_maxLength: - type: string - maxLength: 14 - field_minimum: - type: string - minimum: 8 - field_exclusiveMinimum: - type: string - exclusiveMinimum: 8 - field_maximum: - type: string - maximum: 14 - field_exclusiveMaximum: - type: string - exclusiveMaximum: 14 - field_enum: - type: string - enum: [one] - field_tags: - type: string - tags: [one] - field_ref: - type: string - $ref: '#/definitions/my_definition' - field_fields: - fields: - nested_field_1: - type: string - new_nested_field: - type: string - new_field: - type: string - field_custom_key: - type: string - custom-key: some value -definitions: - my_definition: - name: my_definition - type: string \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-fields-v3.yaml b/tests/fixtures/breaking/datacontract-fields-v3.yaml deleted file mode 100644 index 36187ad08..000000000 --- a/tests/fixtures/breaking/datacontract-fields-v3.yaml +++ /dev/null @@ -1,84 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - fields: - field_type: - type: integer - description: My Description - field_format: - type: string - format: url - field_required: - type: string - required: true - field_primaryKey: - type: string - primaryKey: true - field_references: - type: string - references: my_table.field_format - field_unique: - type: string - unique: true - field_description: - type: string - description: My updated Description - field_pii: - type: string - pii: false - field_classification: - type: string - classification: restricted - field_pattern: - type: string - pattern: ^[A-Za-z0-9]$ - field_minLength: - type: string - minLength: 10 - field_maxLength: - type: string - maxLength: 20 - field_minimum: - type: string - minimum: 10 - field_exclusiveMinimum: - type: string - exclusiveMinimum: 10 - field_maximum: - type: string - maximum: 20 - field_exclusiveMaximum: - type: string - exclusiveMaximum: 20 - field_enum: - type: string - enum: [one, two] - field_tags: - type: string - tags: [one, two] - field_ref: - type: string - $ref: '#/definitions/my_definition_2' - field_fields: - fields: - nested_field_1: - type: integer - new_nested_field: - type: string - new_field: - type: string - field_custom_key: - type: string - custom-key: some other value - -definitions: - my_definition_2: - name: my_definition_2 - type: string \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-info-v1.yaml b/tests/fixtures/breaking/datacontract-info-v1.yaml deleted file mode 100644 index 5703faa9a..000000000 --- a/tests/fixtures/breaking/datacontract-info-v1.yaml +++ /dev/null @@ -1,10 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-info-v2.yaml b/tests/fixtures/breaking/datacontract-info-v2.yaml deleted file mode 100644 index d5c85d499..000000000 --- a/tests/fixtures/breaking/datacontract-info-v2.yaml +++ /dev/null @@ -1,14 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - owner: Data Team - some-other-key: some information - contact: - email: datateam@work.com -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-info-v3.yaml b/tests/fixtures/breaking/datacontract-info-v3.yaml deleted file mode 100644 index 06a4a7d48..000000000 --- a/tests/fixtures/breaking/datacontract-info-v3.yaml +++ /dev/null @@ -1,14 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - owner: Another Team - some-other-key: new information - contact: - email: anotherteam@work.com -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-models-v1.yaml b/tests/fixtures/breaking/datacontract-models-v1.yaml deleted file mode 100644 index ac46a5c7c..000000000 --- a/tests/fixtures/breaking/datacontract-models-v1.yaml +++ /dev/null @@ -1,12 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - fields: - my_field: - description: My Description \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-models-v2.yaml b/tests/fixtures/breaking/datacontract-models-v2.yaml deleted file mode 100644 index 11cf57e25..000000000 --- a/tests/fixtures/breaking/datacontract-models-v2.yaml +++ /dev/null @@ -1,20 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: table - description: My Model Description - fields: - my_field: - description: My Description - another-key: original value - my_table_2: - fields: - my_field_2: - description: My Description 2 - some-other-key: some value \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-models-v3.yaml b/tests/fixtures/breaking/datacontract-models-v3.yaml deleted file mode 100644 index dfab651ae..000000000 --- a/tests/fixtures/breaking/datacontract-models-v3.yaml +++ /dev/null @@ -1,19 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 - my-custom-required-field: hello - -models: - my_table: - type: object - description: My Updated Model Description - fields: - my_field: - description: My Description - another-key: updated value - my_table_2: - fields: - my_field_2: - description: My Description 2 \ No newline at end of file diff --git a/tests/fixtures/breaking/datacontract-quality-v1.yaml b/tests/fixtures/breaking/datacontract-quality-v1.yaml deleted file mode 100644 index 7baba622f..000000000 --- a/tests/fixtures/breaking/datacontract-quality-v1.yaml +++ /dev/null @@ -1,10 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-quality-v2.yaml b/tests/fixtures/breaking/datacontract-quality-v2.yaml deleted file mode 100644 index 9e8c33c4e..000000000 --- a/tests/fixtures/breaking/datacontract-quality-v2.yaml +++ /dev/null @@ -1,15 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string -quality: - type: SodaCL - specification: |- - checks for orders: - - freshness(column_1) < 1d diff --git a/tests/fixtures/breaking/datacontract-quality-v3.yaml b/tests/fixtures/breaking/datacontract-quality-v3.yaml deleted file mode 100644 index 4832289a5..000000000 --- a/tests/fixtures/breaking/datacontract-quality-v3.yaml +++ /dev/null @@ -1,15 +0,0 @@ -dataContractSpecification: 1.2.1 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string -quality: - type: custom - specification: |- - checks for orders: - - freshness(column_1) < 2d diff --git a/tests/fixtures/breaking/datacontract-terms-v1.yaml b/tests/fixtures/breaking/datacontract-terms-v1.yaml deleted file mode 100644 index 5703faa9a..000000000 --- a/tests/fixtures/breaking/datacontract-terms-v1.yaml +++ /dev/null @@ -1,10 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-terms-v2.yaml b/tests/fixtures/breaking/datacontract-terms-v2.yaml deleted file mode 100644 index 5c6379bb7..000000000 --- a/tests/fixtures/breaking/datacontract-terms-v2.yaml +++ /dev/null @@ -1,20 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -terms: - usage: | - Data can be used for reports, analytics and machine learning use cases. - Order may be linked and joined by other tables - limitations: | - Not suitable for real-time use cases. - Data may not be used to identify individual customers. - Max data processing per day: 10 TiB - billing: 5000 USD per month - noticePeriod: P3M -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/breaking/datacontract-terms-v3.yaml b/tests/fixtures/breaking/datacontract-terms-v3.yaml deleted file mode 100644 index e9d1dcdc5..000000000 --- a/tests/fixtures/breaking/datacontract-terms-v3.yaml +++ /dev/null @@ -1,15 +0,0 @@ -dataContractSpecification: 0.9.2 -id: my-data-contract-id -info: - title: My Data Contract - version: 0.0.1 -terms: - usage: Data can be used for anything - billing: 1000000 GBP per month - noticePeriod: P1Y - someOtherTerms: must abide by policies -models: - orders: - fields: - column_1: - type: string diff --git a/tests/fixtures/changelog/golden_changelog_text.txt b/tests/fixtures/changelog/golden_changelog_text.txt new file mode 100644 index 000000000..fab59f571 --- /dev/null +++ b/tests/fixtures/changelog/golden_changelog_text.txt @@ -0,0 +1,64 @@ +Summary +[ 2 Added ] [ 5 Updated ] [ 1 Removed ] +╭─────────┬───────────────────────────────────────╮ +│ Change │ Field │ +├─────────┼───────────────────────────────────────┤ +│ Added │ schema.customers │ +│ Removed │ schema.orders.properties.customer_id │ +│ Updated │ schema.orders.properties.order_date │ +│ Updated │ schema.orders.properties.order_id │ +│ Added │ schema.orders.properties.region │ +│ Updated │ schema.orders.properties.total_amount │ +│ Updated │ slaProperties.availability │ +│ Updated │ version │ +╰─────────┴───────────────────────────────────────╯ + +Details +╭─────────┬──────────────────────────────────────────────────────────┬────────────────────────────────┬───────────────────────────────╮ +│ Change │ Path │ Old Value │ New Value │ +├─────────┼──────────────────────────────────────────────────────────┼────────────────────────────────┼───────────────────────────────┤ +│ Added │ schema.customers │ │ │ +│ Added │ schema.customers.physicalName │ │ customers_tbl │ +│ Added │ schema.customers.properties │ │ │ +│ Added │ schema.customers.properties.country │ │ │ +│ Added │ schema.customers.properties.country.logicalType │ │ string │ +│ Added │ schema.customers.properties.country.partitionKeyPosition │ │ 1 │ +│ Added │ schema.customers.properties.country.partitioned │ │ True │ +│ Added │ schema.customers.properties.country.required │ │ False │ +│ Added │ schema.customers.properties.created_at │ │ │ +│ Added │ schema.customers.properties.created_at.description │ │ Record creation timestamp │ +│ Added │ schema.customers.properties.created_at.logicalType │ │ timestamp │ +│ Added │ schema.customers.properties.created_at.required │ │ True │ +│ Added │ schema.customers.properties.customer_id │ │ │ +│ Added │ schema.customers.properties.customer_id.description │ │ Unique order ID │ +│ Added │ schema.customers.properties.customer_id.logicalType │ │ string │ +│ Added │ schema.customers.properties.customer_id.primaryKey │ │ True │ +│ Added │ schema.customers.properties.customer_id.required │ │ True │ +│ Added │ schema.customers.properties.date_of_birth │ │ │ +│ Added │ schema.customers.properties.date_of_birth.classification │ │ restricted │ +│ Added │ schema.customers.properties.date_of_birth.logicalType │ │ date │ +│ Added │ schema.customers.properties.date_of_birth.required │ │ False │ +│ Added │ schema.customers.properties.email │ │ │ +│ Added │ schema.customers.properties.email.classification │ │ confidential │ +│ Added │ schema.customers.properties.email.encryptedName │ │ email_encrypt │ +│ Added │ schema.customers.properties.email.logicalType │ │ string │ +│ Added │ schema.customers.properties.email.required │ │ True │ +│ Added │ schema.customers.properties.full_name │ │ │ +│ Added │ schema.customers.properties.full_name.businessName │ │ Customer Full Name │ +│ Added │ schema.customers.properties.full_name.logicalType │ │ string │ +│ Added │ schema.customers.properties.full_name.required │ │ True │ +│ Removed │ schema.orders.properties.customer_id │ │ │ +│ Removed │ schema.orders.properties.customer_id.logicalType │ string │ │ +│ Removed │ schema.orders.properties.customer_id.required │ True │ │ +│ Updated │ schema.orders.properties.order_date.logicalType │ string │ date │ +│ Updated │ schema.orders.properties.order_id.description │ Unique order ID and a rather │ Unique order ID and another │ +│ │ │ lenghty description that │ rather lenghty description │ +│ │ │ should be wrapped in the table │ that should be wrapped in the │ +│ │ │ │ table │ +│ Added │ schema.orders.properties.region │ │ │ +│ Added │ schema.orders.properties.region.logicalType │ │ string │ +│ Added │ schema.orders.properties.region.required │ │ False │ +│ Updated │ schema.orders.properties.total_amount.required │ False │ True │ +│ Updated │ slaProperties.availability.value │ 99.9% │ 99.5% │ +│ Updated │ version │ 1.0.0 │ 2.0.0 │ +╰─────────┴──────────────────────────────────────────────────────────┴────────────────────────────────┴───────────────────────────────╯ diff --git a/tests/fixtures/changelog/helper/generate_golden.py b/tests/fixtures/changelog/helper/generate_golden.py new file mode 100644 index 000000000..18ff2036f --- /dev/null +++ b/tests/fixtures/changelog/helper/generate_golden.py @@ -0,0 +1,54 @@ +""" +generate_golden.py — Regenerate changelog golden fixtures +---------------------------------------------------------- +Run this script whenever the changelog text output intentionally changes and the +golden file in tests/fixtures/changelog/ needs to be updated. + +Usage (from the repo root): + python tests/fixtures/changelog/helper/generate_golden.py + +Golden files written: + tests/fixtures/changelog/golden_changelog_text.txt + +After running, review the diff with git and commit if the changes are expected: + git diff tests/fixtures/changelog/ +""" + +import io +import os +import sys + +from rich.console import Console + +FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "..") +REPO_ROOT = os.path.join(os.path.dirname(__file__), "..", "..", "..", "..") + +V1 = os.path.normpath(os.path.join(REPO_ROOT, "tests/fixtures/changelog/integration/changelog_integration_v1.yaml")) +V2 = os.path.normpath(os.path.join(REPO_ROOT, "tests/fixtures/changelog/integration/changelog_integration_v2.yaml")) + + +def generate(): + # Import here so the script can be run from the repo root with venv activated + from datacontract.data_contract import DataContract + from datacontract.output.text_changelog_results import write_text_changelog_results + + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + + buf = io.StringIO() + con = Console(file=buf, width=300, highlight=False, no_color=True) + old_stdout = sys.stdout + sys.stdout = buf + try: + write_text_changelog_results(result, con) + finally: + sys.stdout = old_stdout + + text_path = os.path.normpath(os.path.join(FIXTURE_DIR, "golden_changelog_text.txt")) + with open(text_path, "w", encoding="utf-8") as f: + f.write(buf.getvalue()) + print(f"Written: {text_path}") + print("\nDone. Review changes with: git diff tests/fixtures/changelog/") + + +if __name__ == "__main__": + generate() diff --git a/tests/fixtures/changelog/integration/changelog_integration_v1.yaml b/tests/fixtures/changelog/integration/changelog_integration_v1.yaml new file mode 100644 index 000000000..a4201979b --- /dev/null +++ b/tests/fixtures/changelog/integration/changelog_integration_v1.yaml @@ -0,0 +1,33 @@ +apiVersion: v3.0.2 +kind: DataContract +id: orders-contract-001 +status: active +version: 1.0.0 +schema: + - name: orders + physicalName: orders_tbl + properties: + - name: order_id + logicalType: string + required: true + description: Unique order ID and a rather lenghty description that should be wrapped in the table + - name: customer_id + logicalType: string + required: true + - name: order_date + logicalType: string + required: true + - name: total_amount + logicalType: number + required: false +servers: + - server: production + type: snowflake + account: example-account + database: PROD_DB + schema: PUBLIC +slaProperties: + - property: availability + value: "99.9%" + - property: latency + value: "500ms" diff --git a/tests/fixtures/changelog/integration/changelog_integration_v2.yaml b/tests/fixtures/changelog/integration/changelog_integration_v2.yaml new file mode 100644 index 000000000..5557ea0d5 --- /dev/null +++ b/tests/fixtures/changelog/integration/changelog_integration_v2.yaml @@ -0,0 +1,63 @@ +apiVersion: v3.0.2 +kind: DataContract +id: orders-contract-001 +status: active +version: 2.0.0 +schema: + - name: orders + physicalName: orders_tbl + properties: + - name: order_id + logicalType: string + required: true + description: Unique order ID and another rather lenghty description that should be wrapped in the table + - name: order_date + logicalType: date + required: true + - name: total_amount + logicalType: number + required: true + - name: region + logicalType: string + required: false + - name: customers + physicalName: customers_tbl + properties: + - name: customer_id + logicalType: string + required: true + primaryKey: true + description: Unique order ID + - name: email + logicalType: string + required: true + classification: confidential + encryptedName: email_encrypt + - name: full_name + logicalType: string + required: true + businessName: Customer Full Name + - name: date_of_birth + logicalType: date + required: false + classification: restricted + - name: country + logicalType: string + required: false + partitioned: true + partitionKeyPosition: 1 + - name: created_at + logicalType: timestamp + required: true + description: Record creation timestamp +servers: + - server: production + type: snowflake + account: example-account + database: PROD_DB + schema: PUBLIC +slaProperties: + - property: availability + value: "99.5%" + - property: latency + value: "500ms" diff --git a/tests/fixtures/changelog/unit/changelog_unit_v1.yaml b/tests/fixtures/changelog/unit/changelog_unit_v1.yaml new file mode 100644 index 000000000..657b1fa5c --- /dev/null +++ b/tests/fixtures/changelog/unit/changelog_unit_v1.yaml @@ -0,0 +1,95 @@ +apiVersion: v3.0.2 +kind: DataContract +id: orders-contract-001 +name: Orders Contract +version: 1.0.0 +status: active +tenant: acme +domain: sales + +description: + purpose: Provides order data for analytics + usage: Used by the analytics team + limitations: Last 2 years only + customProperties: + - property: sensitivity + value: internal + - property: data-owner + value: data-platform-team + +price: + priceAmount: 0 + priceCurrency: USD + priceUnit: monthly + +schema: + - name: orders + physicalName: orders_tbl + customProperties: + - property: domain + value: sales + quality: + - name: row_count + type: sql + mustBeGreaterThan: 0 + properties: + - name: order_id + logicalType: string + required: true + description: Unique order ID + - name: customer_id + logicalType: string + required: true + - name: order_date + logicalType: string + required: true + - name: total_amount + logicalType: number + required: false + quality: + - name: positive + type: sql + mustBeGreaterThan: 0 + customProperties: + - property: pii + value: "false" + +servers: + - server: production + type: snowflake + database: PROD_DB + roles: + - role: reader + access: read + - role: writer + access: write + +slaProperties: + - property: availability + value: "99.9%" + - property: latency + value: "500ms" + +roles: + - role: admin + access: write + - role: analyst + access: read + +support: + - channel: slack + url: https://slack.example.com/data-contracts + +customProperties: + - property: owner + value: data-platform-team + - property: classification + value: internal + +team: + name: Data Platform + members: + - username: alice + role: lead + - username: bob + role: engineer diff --git a/tests/fixtures/changelog/unit/changelog_unit_v2.yaml b/tests/fixtures/changelog/unit/changelog_unit_v2.yaml new file mode 100644 index 000000000..4ef17bc10 --- /dev/null +++ b/tests/fixtures/changelog/unit/changelog_unit_v2.yaml @@ -0,0 +1,106 @@ +apiVersion: v3.0.2 +kind: DataContract +id: orders-contract-001 +name: Orders Contract v2 # changed: Orders Contract → Orders Contract v2 +version: 2.0.0 # changed: 1.0.0 → 2.0.0 +status: deprecated # changed: active → deprecated +tenant: acme +domain: commerce # changed: sales → commerce + +description: + purpose: Provides order and line item data # changed + usage: Used by the analytics team + limitations: Last 2 years only + customProperties: + - property: data-owner # reordered (tests stability) + value: data-platform-team + - property: sensitivity + value: confidential # changed: internal → confidential + +price: + priceAmount: 100 # changed: 0 → 100 + priceCurrency: USD + priceUnit: monthly + +schema: + - name: orders + physicalName: orders_tbl + customProperties: + - property: domain + value: finance # changed: sales → finance + quality: + - name: row_count + type: sql + mustBeGreaterThan: 100 # changed: 0 → 100 + properties: + - name: order_id + logicalType: string + required: true + description: Unique order identifier # changed + - name: order_date + logicalType: date # changed: string → date + required: true + - name: total_amount + logicalType: number + required: true # changed: false → true + quality: + - name: positive + type: sql + mustBeGreaterThan: 1 # changed: 0 → 1 + customProperties: + - property: pii + value: "true" # changed: false → true + - name: region + logicalType: string # added field + required: false + - name: customers # added schema object + physicalName: customers_tbl + properties: + - name: customer_id + logicalType: string + required: true + +servers: + - server: production + type: snowflake + database: PROD_DB + roles: + - role: reader + access: read + # writer role removed from server + +slaProperties: + - property: availability + value: "99.5%" # changed: 99.9% → 99.5% + - property: latency + value: "500ms" + +roles: + - role: admin + access: write + - role: analyst + access: read + - role: viewer # added top-level role + access: read + +support: + - channel: slack + url: https://slack.example.com/data-contracts + - channel: email # added support channel + url: mailto:data-contracts@example.com + +customProperties: + - property: owner + value: data-platform-team + - property: classification + value: confidential # changed: internal → confidential + +team: + name: Data Platform + members: + - username: alice + role: lead + - username: bob + role: senior-engineer # changed: engineer → senior-engineer + - username: carol # added team member + role: engineer diff --git a/tests/test_api.py b/tests/test_api.py index 9e246d9b4..646fb725b 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,6 +1,9 @@ +from unittest.mock import patch + from fastapi.testclient import TestClient from datacontract.api import app +from datacontract.model.exceptions import DataContractException client = TestClient(app) @@ -33,3 +36,89 @@ def test_export_jsonschema_dcs(): expected_json_schema = file.read() print(expected_json_schema) assert response.text == expected_json_schema + + +def test_changelog(): + with open("fixtures/changelog/integration/changelog_integration_v1.yaml", "r") as f: + v1 = f.read() + with open("fixtures/changelog/integration/changelog_integration_v2.yaml", "r") as f: + v2 = f.read() + response = client.post(url="/changelog", json={"v1": v1, "v2": v2}) + assert response.status_code == 200 + data = response.json() + assert "summary" in data + assert "entries" in data + assert len(data["entries"]) > 0 + assert len(data["summary"]) > 0 + entry = data["entries"][0] + assert "path" in entry + assert entry["type"] in ("added", "removed", "updated") + assert "old_value" in entry + assert "new_value" in entry + + +def test_changelog_invalid_yaml(): + invalid_yaml = "invalid: yaml: content: [" + response = client.post(url="/changelog", json={"v1": invalid_yaml, "v2": "valid: yaml"}) + assert response.status_code == 422 + detail = response.json()["detail"] + assert detail.startswith("Data Contract Validation Failure:") + assert "Cannot parse YAML" in detail + + +def test_changelog_invalid_data_contract(): + invalid_contract = """ + apiVersion: '1.0' + servers: + - type: invalid_type + """ + response = client.post(url="/changelog", json={"v1": invalid_contract, "v2": "valid: yaml"}) + assert response.status_code == 422 + assert "Invalid data contract" in response.json()["detail"] + + +def _valid_contract_yaml(): + with open("fixtures/changelog/integration/changelog_integration_v1.yaml", "r") as f: + return f.read() + + +def test_changelog_yaml_error_returns_422(): + import yaml + + with patch("datacontract.api.DataContract") as mock_dc: + mock_dc.side_effect = yaml.YAMLError("bad yaml") + response = client.post(url="/changelog", json={"v1": _valid_contract_yaml(), "v2": _valid_contract_yaml()}) + assert response.status_code == 422 + detail = response.json()["detail"] + assert detail.startswith("Invalid YAML:") + assert "bad yaml" in detail + + +def test_changelog_pydantic_validation_error_returns_422(): + import pydantic + + class _StrictModel(pydantic.BaseModel): + required_int: int + + try: + _StrictModel(required_int="not-an-int") + except pydantic.ValidationError as exc: + validation_error = exc + + with patch("datacontract.api.DataContract") as mock_dc: + mock_dc.side_effect = validation_error + response = client.post(url="/changelog", json={"v1": _valid_contract_yaml(), "v2": _valid_contract_yaml()}) + assert response.status_code == 422 + detail = response.json()["detail"] + assert detail.startswith("Invalid data contract:") + assert "required_int" in detail + + +def test_changelog_data_contract_exception_returns_422(): + with patch("datacontract.api.DataContract") as mock_dc: + mock_dc.side_effect = DataContractException(type="test", name="test", reason="something went wrong") + response = client.post(url="/changelog", json={"v1": _valid_contract_yaml(), "v2": _valid_contract_yaml()}) + assert response.status_code == 422 + detail = response.json()["detail"] + assert detail.startswith("Data Contract Validation Failure:") + assert "something went wrong" in detail diff --git a/tests/test_changelog.py b/tests/test_changelog.py new file mode 100644 index 000000000..735c8695a --- /dev/null +++ b/tests/test_changelog.py @@ -0,0 +1,58 @@ +from datacontract.data_contract import DataContract +from datacontract.model.changelog import ChangelogResult, ChangelogType + +V1 = "fixtures/changelog/integration/changelog_integration_v1.yaml" +V2 = "fixtures/changelog/integration/changelog_integration_v2.yaml" + + +def test_changelog_returns_changelog_result(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert isinstance(result, ChangelogResult) + + +def test_changelog_has_changes(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert result.has_changes() + + +def test_changelog_no_changes(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V1)) + assert not result.has_changes() + assert result.entries == [] + assert result.summary == [] + + +def test_changelog_entry_types(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + types = {e.type for e in result.entries} + assert ChangelogType.added in types + assert ChangelogType.removed in types + assert ChangelogType.updated in types + + +def test_changelog_summary_is_rolled_up(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert len(result.summary) < len(result.entries) + + +def test_changelog_summary_paths(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + paths = [e.path for e in result.summary] + assert "schema.customers" in paths + assert "schema.orders.properties.customer_id" in paths + assert "slaProperties.availability" in paths + + +def test_changelog_entry_values(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + changed = [e for e in result.entries if e.path == "schema.orders.properties.order_date.logicalType"] + assert len(changed) == 1 + assert changed[0].type == ChangelogType.updated + assert changed[0].old_value == "string" + assert changed[0].new_value == "date" + + +def test_changelog_v1_v2_labels(): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert result.v1 == V1 + assert result.v2 == V2 diff --git a/tests/test_changelog_engine.py b/tests/test_changelog_engine.py new file mode 100644 index 000000000..95708b65b --- /dev/null +++ b/tests/test_changelog_engine.py @@ -0,0 +1,837 @@ +""" +test_changelog_engine — Unit tests for changelog.py +------------------------------------------------------------------- +Test classes: + TestBuildReportDataStructure — _build_changelog_from_diff() output shape and empty-diff + TestBuildReportDataAdded — Added change entries (scalar and dict payloads) + TestBuildReportDataRemoved — Removed change entries + TestBuildReportDataChanged — Changed entries and scalar rollup to parent + TestBuildReportDataSummaryRollup — summary deduplication and count consistency + TestBuildReportDataTags — tag field changes + TestSummaryRollupScalarLeaves — scalar leaf rollup behaviour + TestDiff — diff(): semantic correctness (added/removed/changed/mid-list) + TestDiffFixtures — diff(): end-to-end using fixtures/changelog/unit/ + TestDiffFixturesPriceDescriptionScalars — diff(): price, description, and top-level scalar fields + TestBuildChangelog — build_changelog() with OpenDataContractStandard objects +""" + +import os +import tempfile + +import yaml +from open_data_contract_standard.model import OpenDataContractStandard + +from datacontract.changelog.changelog import _build_changelog_from_diff, build_changelog, diff + +REPORT = _build_changelog_from_diff + + +def _added(path: str, payload) -> dict: + return {"dictionary_item_added": {f"root['{path}']": payload}} + + +def _added_double_quotes(path: str, payload) -> dict: + return {"dictionary_item_added": {f'root["{path}"]': payload}} + + +def _removed(path: str, payload) -> dict: + return {"dictionary_item_removed": {f"root['{path}']": payload}} + + +def _changed(path: str, old, new) -> dict: + return {"values_changed": {f"root['{path}']": {"old_value": old, "new_value": new}}} + + +def _merge(*diffs: dict) -> dict: + """Merge multiple single-key DeepDiff dicts into one.""" + merged = {} + for d in diffs: + for k, v in d.items(): + merged.setdefault(k, {}).update(v) + return merged + + +class TestBuildReportDataStructure: + def test_returns_expected_top_level_keys(self): + rd = _build_changelog_from_diff({}) + assert set(rd.keys()) == {"source_label", "target_label", "header", "summary", "detail"} + + def test_header_contains_title_and_subtitle(self): + rd = _build_changelog_from_diff({}, source_label="v1.yaml", target_label="v2.yaml") + assert rd["header"]["title"] == "ODCS Data Contract Changelog" + assert "v1.yaml" in rd["header"]["subtitle"] + assert "v2.yaml" in rd["header"]["subtitle"] + + def test_source_and_target_labels_stored(self): + rd = _build_changelog_from_diff({}, source_label="before.yaml", target_label="after.yaml") + assert rd["source_label"] == "before.yaml" + assert rd["target_label"] == "after.yaml" + + def test_empty_diff_produces_zero_counts(self): + rd = _build_changelog_from_diff({}) + assert rd["summary"]["counts"] == {"added": 0, "removed": 0, "updated": 0} + assert rd["detail"]["counts"] == {"added": 0, "removed": 0, "updated": 0} + + def test_empty_diff_produces_empty_changes(self): + rd = _build_changelog_from_diff({}) + assert rd["summary"]["changes"] == [] + assert rd["detail"]["changes"] == [] + + def test_unknown_deepdiff_keys_ignored(self): + rd = _build_changelog_from_diff({"unknown_key": {"root['x']": 1}}) + assert rd["summary"]["changes"] == [] + + +class TestBuildReportDataAdded: + def test_added_scalar_appears_in_detail(self): + rd = _build_changelog_from_diff(_added("schema']['orders", "v")) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert any("orders" in p for p in paths) + + def test_added_scalar_change_type(self): + rd = _build_changelog_from_diff(_added("schema']['orders", "val")) + match = next(c for c in rd["detail"]["changes"] if "orders" in c["path"]) + assert match["changeType"] == "Added" + + def test_added_scalar_has_new_value(self): + rd = _build_changelog_from_diff(_added("schema']['orders", "val")) + match = next(c for c in rd["detail"]["changes"] if c["path"] == "schema.orders") + assert match.get("new_value") == "val" + + def test_added_dict_expands_to_leaf_entries(self): + payload = {"physicalName": "orders_tbl", "description": "Orders"} + rd = _build_changelog_from_diff(_added("schema']['orders", payload)) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.physicalName" in paths + assert "schema.orders.description" in paths + + def test_added_dict_parent_entry_included(self): + payload = {"physicalName": "orders_tbl"} + rd = _build_changelog_from_diff(_added("schema']['orders", payload)) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders" in paths + + def test_added_count_incremented(self): + rd = _build_changelog_from_diff(_added("schema']['orders", "v")) + assert rd["detail"]["counts"]["added"] >= 1 + + def test_added_appears_in_summary(self): + # Scalar Added rolls up to parent — use a 2-level path so it lands at schema.orders + rd = _build_changelog_from_diff(_added("schema']['orders']['physicalName", "v")) + paths = [c["path"] for c in rd["summary"]["changes"]] + assert any("orders" in p for p in paths) + + def test_added_double_quotes_path_parsing(self): + """Test that double-quoted paths are parsed correctly in both detail and summary""" + rd = _build_changelog_from_diff(_added_double_quotes('schema"]["orders"]["physicalName', "v")) + detail_paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.physicalName" in detail_paths + summary_paths = [c["path"] for c in rd["summary"]["changes"]] + assert any("orders" in p for p in summary_paths) + + +class TestBuildReportDataRemoved: + def test_removed_scalar_appears_in_detail(self): + rd = _build_changelog_from_diff(_removed("schema']['orders", "v")) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert any("orders" in p for p in paths) + + def test_removed_scalar_has_old_value(self): + rd = _build_changelog_from_diff(_removed("schema']['orders", "val")) + match = next(c for c in rd["detail"]["changes"] if c["path"] == "schema.orders") + assert match.get("old_value") == "val" + + def test_removed_dict_expands_to_leaf_entries(self): + payload = {"logicalType": "string", "required": True} + rd = _build_changelog_from_diff(_removed("schema']['orders']['properties']['amount", payload)) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.properties.amount.logicalType" in paths + + def test_removed_count_incremented(self): + rd = _build_changelog_from_diff(_removed("schema']['orders", "v")) + assert rd["detail"]["counts"]["removed"] >= 1 + + +class TestBuildReportDataChanged: + def test_changed_scalar_in_detail(self): + rd = _build_changelog_from_diff( + _changed("schema']['orders']['properties']['order_date']['logicalType", "string", "date") + ) + match = next((c for c in rd["detail"]["changes"] if "logicalType" in c["path"]), None) + assert match is not None + assert match["changeType"] == "Updated" + assert match["old_value"] == "string" + assert match["new_value"] == "date" + + def test_changed_count_incremented(self): + rd = _build_changelog_from_diff(_changed("slaProperties']['availability']['value", "99.9%", "99.5%")) + assert rd["detail"]["counts"]["updated"] == 1 + + def test_changed_scalar_rolled_up_to_parent_in_summary(self): + rd = _build_changelog_from_diff( + _changed("schema']['orders']['properties']['order_date']['logicalType", "string", "date") + ) + summary_paths = [c["path"] for c in rd["summary"]["changes"]] + assert not any("logicalType" in p for p in summary_paths) + assert any("order_date" in p for p in summary_paths) + + +class TestBuildReportDataSummaryRollup: + def test_multiple_scalar_changes_on_same_parent_produce_one_summary_entry(self): + diff = _merge( + _changed("schema']['orders']['properties']['order_date']['logicalType", "string", "date"), + _changed("schema']['orders']['properties']['order_date']['description", "old desc", "new desc"), + ) + rd = _build_changelog_from_diff(diff) + order_date_entries = [c for c in rd["summary"]["changes"] if c["path"] == "schema.orders.properties.order_date"] + assert len(order_date_entries) == 1 + + def test_summary_change_type_is_changed_when_field_both_added_and_removed(self): + # Scalar Added + Removed on the same parent path collapse to Changed. + # Use a 3-level path so rollup lands at schema.orders.properties.order_id + diff = _merge( + _added("schema']['orders']['properties']['order_id']['businessName", "Order ID"), + _removed("schema']['orders']['properties']['order_id']['description", "Old desc"), + ) + rd = _build_changelog_from_diff(diff) + match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders.properties.order_id") + assert match["changeType"] == "Updated" + + def test_summary_counts_match_summary_changes(self): + diff = _merge( + _added("schema']['customers", {"physicalName": "c"}), + _removed("schema']['orders']['properties']['customer_id", {"logicalType": "string"}), + _changed("slaProperties']['availability']['value", "99.9%", "99.5%"), + ) + rd = _build_changelog_from_diff(diff) + counts = rd["summary"]["counts"] + changes = rd["summary"]["changes"] + assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") + assert counts["removed"] == sum(1 for c in changes if c["changeType"] == "Removed") + assert counts["updated"] == sum(1 for c in changes if c["changeType"] == "Updated") + + def test_detail_counts_match_detail_changes(self): + diff = _merge( + _added("schema']['customers", {"physicalName": "c"}), + _changed("slaProperties']['availability']['value", "99.9%", "99.5%"), + ) + rd = _build_changelog_from_diff(diff) + counts = rd["summary"]["counts"] + changes = rd["summary"]["changes"] + assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") + assert counts["updated"] == sum(1 for c in changes if c["changeType"] == "Updated") + + def test_detail_changes_sorted_by_path(self): + diff = _merge( + _added("schema']['orders", "v"), + _added("schema']['customers", "v"), + ) + rd = _build_changelog_from_diff(diff) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert paths == sorted(paths) + + +class TestBuildReportDataTags: + """Tags (list[str]) — added/removed tags should surface as path segments, + not as new_value/old_value on the parent path.""" + + def _tag_diff(self, v1_tags, v2_tags, location="top"): + """Build report_data from a synthetic tags diff at the given location.""" + if location == "top": + v1 = {"apiVersion": "v3.0.2", "kind": "DataContract", "id": "t", "tags": v1_tags} + v2 = {"apiVersion": "v3.0.2", "kind": "DataContract", "id": "t", "tags": v2_tags} + elif location == "schema": + v1 = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "t", + "schema": [{"name": "orders", "physicalName": "orders_tbl", "tags": v1_tags}], + } + v2 = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "t", + "schema": [{"name": "orders", "physicalName": "orders_tbl", "tags": v2_tags}], + } + else: + v1 = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "t", + "schema": [ + { + "name": "orders", + "physicalName": "orders_tbl", + "properties": [{"name": "order_id", "logicalType": "string", "tags": v1_tags}], + } + ], + } + v2 = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "t", + "schema": [ + { + "name": "orders", + "physicalName": "orders_tbl", + "properties": [{"name": "order_id", "logicalType": "string", "tags": v2_tags}], + } + ], + } + from datacontract.changelog.changelog import diff + + raw = diff(v1, v2) + return _build_changelog_from_diff(raw) + + def test_added_tag_path_includes_tag_value(self): + rd = self._tag_diff(["analytics"], ["analytics", "pii"]) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "tags.pii" in paths + + def test_removed_tag_path_includes_tag_value(self): + rd = self._tag_diff(["analytics", "pii"], ["analytics"]) + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "tags.pii" in paths + + def test_added_tag_has_no_new_value_field(self): + rd = self._tag_diff(["analytics"], ["analytics", "pii"]) + tag_change = next(c for c in rd["detail"]["changes"] if c["path"] == "tags.pii") + assert "new_value" not in tag_change + assert "old_value" not in tag_change + + def test_added_tag_change_type_is_added(self): + rd = self._tag_diff(["analytics"], ["analytics", "pii"]) + tag_change = next(c for c in rd["detail"]["changes"] if c["path"] == "tags.pii") + assert tag_change["changeType"] == "Added" + + def test_removed_tag_change_type_is_removed(self): + rd = self._tag_diff(["analytics", "pii"], ["analytics"]) + tag_change = next(c for c in rd["detail"]["changes"] if c["path"] == "tags.pii") + assert tag_change["changeType"] == "Removed" + + def test_summary_rolls_up_to_tags_parent(self): + rd = self._tag_diff(["analytics"], ["analytics", "pii", "transactions"]) + summary_paths = [c["path"] for c in rd["summary"]["changes"]] + assert "tags" in summary_paths + assert "tags.pii" not in summary_paths + assert "tags.transactions" not in summary_paths + + def test_schema_object_tag_uses_value_as_path_segment(self): + rd = self._tag_diff(["e-commerce"], ["e-commerce", "reporting"], location="schema") + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.tags.reporting" in paths + + def test_schema_property_tag_uses_value_as_path_segment(self): + rd = self._tag_diff(["primary-key"], ["primary-key", "required"], location="property") + paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.properties.order_id.tags.required" in paths + + def test_unchanged_tags_produce_no_diff(self): + rd = self._tag_diff(["analytics", "pii"], ["analytics", "pii"]) + assert rd["detail"]["changes"] == [] + + def test_reordered_tags_produce_no_diff(self): + rd = self._tag_diff(["analytics", "pii"], ["pii", "analytics"]) + assert rd["detail"]["changes"] == [] + + +class TestSummaryRollupScalarLeaves: + """Scalar Added/Removed leaf fields roll up to their parent in the summary, + consistent with how scalar Changed fields behave.""" + + def _rd(self, *diffs): + return _build_changelog_from_diff(_merge(*diffs)) + + def test_scalar_added_rolls_up_to_parent(self): + rd = self._rd(_added("schema']['orders']['businessName", "Orders")) + paths = [c["path"] for c in rd["summary"]["changes"]] + assert "schema.orders" in paths + assert "schema.orders.businessName" not in paths + + def test_scalar_removed_rolls_up_to_parent(self): + rd = self._rd(_removed("schema']['orders']['description", "old desc")) + paths = [c["path"] for c in rd["summary"]["changes"]] + assert "schema.orders" in paths + assert "schema.orders.description" not in paths + + def test_scalar_added_parent_change_type_is_added(self): + rd = self._rd(_added("schema']['orders']['businessName", "Orders")) + match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") + assert match["changeType"] == "Added" + + def test_scalar_removed_parent_change_type_is_removed(self): + rd = self._rd(_removed("schema']['orders']['description", "old")) + match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") + assert match["changeType"] == "Removed" + + def test_mixed_add_remove_same_parent_collapses_to_updated(self): + rd = self._rd( + _added("schema']['orders']['businessName", "Orders"), + _removed("schema']['orders']['description", "old desc"), + ) + match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") + assert match["changeType"] == "Updated" + paths = [c["path"] for c in rd["summary"]["changes"]] + assert "schema.orders.businessName" not in paths + assert "schema.orders.description" not in paths + + def test_mixed_add_scalar_changed_same_parent_collapses_to_updated(self): + rd = self._rd( + _added("schema']['orders']['businessName", "Orders"), + _changed("schema']['orders']['logicalType", "string", "integer"), + ) + match = next(c for c in rd["summary"]["changes"] if c["path"] == "schema.orders") + assert match["changeType"] == "Updated" + + def test_dict_added_does_not_roll_up(self): + """A whole dict payload (e.g. a new schema object) should not roll up — + only scalar leafs do.""" + rd = self._rd(_added("schema']['customers", {"physicalName": "customers_tbl"})) + paths = [c["path"] for c in rd["summary"]["changes"]] + assert "schema.customers" in paths + assert "schema" not in paths + + def test_top_level_scalar_added_stays_at_top_level(self): + """A scalar at depth 1 (e.g. root['version']) has no parent to roll up to.""" + rd = self._rd(_added("version", "2.0.0")) + paths = [c["path"] for c in rd["summary"]["changes"]] + assert "version" in paths + + def test_summary_counts_consistent_after_rollup(self): + rd = self._rd( + _added("schema']['orders']['businessName", "Orders"), + _removed("schema']['orders']['description", "old"), + ) + counts = rd["summary"]["counts"] + changes = rd["summary"]["changes"] + assert counts["added"] == sum(1 for c in changes if c["changeType"] == "Added") + assert counts["removed"] == sum(1 for c in changes if c["changeType"] == "Removed") + assert counts["updated"] == sum(1 for c in changes if c["changeType"] == "Updated") + + def test_detail_still_shows_full_leaf_paths(self): + """Rollup only affects summary — detail must still show the full leaf paths.""" + rd = self._rd( + _added("schema']['orders']['businessName", "Orders"), + _removed("schema']['orders']['description", "old desc"), + ) + detail_paths = [c["path"] for c in rd["detail"]["changes"]] + assert "schema.orders.businessName" in detail_paths + assert "schema.orders.description" in detail_paths + + +# --------------------------------------------------------------------------- +# Helpers for diff() tests +# --------------------------------------------------------------------------- + +MINIMAL_CONTRACT = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "test-001", +} + + +def _load_contract(path: str) -> dict: + with open(path, encoding="utf-8") as f: + raw = yaml.safe_load(f) + return OpenDataContractStandard.model_validate(raw).model_dump(exclude_none=True, by_alias=True) + + +def _write_yaml(data: dict, path: str) -> None: + with open(path, "w") as f: + yaml.dump(data, f) + + +def _contract(**kwargs) -> dict: + return {**MINIMAL_CONTRACT, **kwargs} + + +class TestDiff: + def _base(self) -> dict: + return _contract( + schema=[ + { + "name": "orders", + "properties": [ + {"name": "order_id", "logicalType": "string", "required": True}, + {"name": "amount", "logicalType": "number", "required": False}, + ], + } + ] + ) + + def test_identical_contracts_produce_no_diff(self): + c = self._base() + result = diff(c, c) + assert result == {} + + def test_field_added(self): + v1 = self._base() + v2 = self._base() + v2["schema"][0]["properties"].append({"name": "region", "logicalType": "string"}) + result = diff(v1, v2) + assert "dictionary_item_added" in result + + def test_field_removed(self): + v1 = self._base() + v2 = self._base() + v2["schema"][0]["properties"] = [v2["schema"][0]["properties"][0]] # remove amount + result = diff(v1, v2) + assert "dictionary_item_removed" in result + + def test_field_type_changed(self): + v1 = self._base() + v2 = self._base() + v2["schema"][0]["properties"][0]["logicalType"] = "integer" + result = diff(v1, v2) + assert "values_changed" in result + + def test_schema_removed_mid_list_is_not_misreported_as_change(self): + v1 = _contract( + schema=[ + {"name": "orders", "physicalName": "orders_tbl"}, + {"name": "customers", "physicalName": "customers_tbl"}, + ] + ) + v2 = _contract( + schema=[ + {"name": "customers", "physicalName": "customers_tbl"}, + ] + ) + result = diff(v1, v2) + removed = result.get("dictionary_item_removed", {}) + changed = result.get("values_changed", {}) + assert any("orders" in k for k in removed) + assert not any("customers" in k for k in changed) + assert not any("customers" in k for k in removed) + + def test_sla_value_changed(self): + v1 = _contract(slaProperties=[{"property": "availability", "value": "99.9%"}]) + v2 = _contract(slaProperties=[{"property": "availability", "value": "99.5%"}]) + result = diff(v1, v2) + assert "values_changed" in result + + def test_server_added(self): + v1 = _contract(servers=[{"server": "production", "type": "snowflake"}]) + v2 = _contract( + servers=[ + {"server": "production", "type": "snowflake"}, + {"server": "staging", "type": "snowflake"}, + ] + ) + result = diff(v1, v2) + assert "dictionary_item_added" in result + + def test_server_role_added(self): + v1 = _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "roles": [ + {"role": "reader", "access": "read"}, + ], + } + ] + ) + v2 = _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "roles": [ + {"role": "reader", "access": "read"}, + {"role": "writer", "access": "write"}, + ], + } + ] + ) + result = diff(v1, v2) + added = result.get("dictionary_item_added", {}) + assert any("writer" in k for k in added) + + def test_server_role_removed(self): + v1 = _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "roles": [ + {"role": "reader", "access": "read"}, + {"role": "writer", "access": "write"}, + ], + } + ] + ) + v2 = _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "roles": [ + {"role": "reader", "access": "read"}, + ], + } + ] + ) + result = diff(v1, v2) + removed = result.get("dictionary_item_removed", {}) + assert any("writer" in k for k in removed) + + def test_schema_object_custom_property_changed(self): + v1 = _contract( + schema=[ + { + "name": "orders", + "customProperties": [ + {"property": "domain", "value": "sales"}, + ], + } + ] + ) + v2 = _contract( + schema=[ + { + "name": "orders", + "customProperties": [ + {"property": "domain", "value": "finance"}, + ], + } + ] + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("domain" in k for k in changed) + + def test_schema_property_quality_rule_changed(self): + v1 = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "quality": [{"name": "positive", "metric": "rowCount", "mustBeGreaterThan": 0}], + } + ], + } + ] + ) + v2 = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "quality": [{"name": "positive", "metric": "rowCount", "mustBeGreaterThan": 100}], + } + ], + } + ] + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("positive" in k for k in changed) + + def test_schema_property_custom_property_added(self): + v1 = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + } + ], + } + ] + ) + v2 = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "customProperties": [{"property": "sensitivity", "value": "high"}], + } + ], + } + ] + ) + result = diff(v1, v2) + assert "dictionary_item_added" in result + + +class TestDiffFixtures: + FIXTURE_DIR = os.path.join(os.path.dirname(__file__), "fixtures", "changelog", "unit") + + def _generate(self): + v1 = _load_contract(os.path.join(self.FIXTURE_DIR, "changelog_unit_v1.yaml")) + v2 = _load_contract(os.path.join(self.FIXTURE_DIR, "changelog_unit_v2.yaml")) + return diff(v1, v2) + + def test_diff_returns_dict(self): + assert isinstance(self._generate(), dict) + + def test_diff_detects_known_changes(self): + result = self._generate() + added = result.get("dictionary_item_added", {}) + removed = result.get("dictionary_item_removed", {}) + changed = result.get("values_changed", {}) + assert any("customers" in k for k in added) + assert any("customer_id" in k for k in removed) + assert any("availability" in k for k in changed) + + def test_diff_identical_files_no_diff(self): + v1_path = os.path.join(self.FIXTURE_DIR, "changelog_unit_v1.yaml") + v = _load_contract(v1_path) + assert diff(v, v) == {} + + def test_diff_with_temp_files(self): + contract = _contract(schema=[{"name": "orders", "physicalName": "orders_tbl"}]) + with ( + tempfile.NamedTemporaryFile(suffix=".yaml", mode="w", delete=False) as f1, + tempfile.NamedTemporaryFile(suffix=".yaml", mode="w", delete=False) as f2, + ): + yaml.dump(contract, f1) + yaml.dump(contract, f2) + try: + v1 = _load_contract(f1.name) + v2 = _load_contract(f2.name) + assert diff(v1, v2) == {} + finally: + os.unlink(f1.name) + os.unlink(f2.name) + + def test_schema_object_custom_property_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("domain" in k for k in changed) + + def test_schema_object_quality_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("row_count" in k for k in changed) + + def test_schema_property_quality_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("positive" in k for k in changed) + + def test_schema_property_custom_property_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("pii" in k for k in changed) + + def test_server_role_removed(self): + removed = self._generate().get("dictionary_item_removed", {}) + assert any("writer" in k for k in removed) + + def test_top_level_role_added(self): + added = self._generate().get("dictionary_item_added", {}) + assert any("viewer" in k for k in added) + + def test_support_channel_added(self): + added = self._generate().get("dictionary_item_added", {}) + assert any("email" in k for k in added) + + def test_top_level_custom_property_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("classification" in k for k in changed) + + def test_team_member_role_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("bob" in k for k in changed) + + def test_team_member_added(self): + added = self._generate().get("dictionary_item_added", {}) + assert any("carol" in k for k in added) + + +class TestDiffFixturesPriceDescriptionScalars(TestDiffFixtures): + """Extends the end-to-end fixture tests to cover price, description, and + top-level scalar fields that were previously absent from the unit fixtures.""" + + def test_price_amount_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("priceAmount" in k for k in changed) + + def test_description_purpose_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("purpose" in k for k in changed) + + def test_description_custom_property_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("sensitivity" in k for k in changed) + + def test_description_custom_property_reorder_stable(self): + """The description.customProperties reorder in v2 must not produce + a false positive — only the sensitivity value change should appear.""" + changed = self._generate().get("values_changed", {}) + # data-owner is unchanged and reordered — must not appear + assert not any("data-owner" in k for k in changed) + + def test_top_level_version_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("version" in k for k in changed) + + def test_top_level_name_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("'name'" in k for k in changed) + + def test_top_level_status_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("status" in k for k in changed) + + def test_top_level_domain_changed(self): + changed = self._generate().get("values_changed", {}) + assert any("'domain'" in k for k in changed) + + +V1_YAML = "fixtures/changelog/integration/changelog_integration_v1.yaml" +V2_YAML = "fixtures/changelog/integration/changelog_integration_v2.yaml" + + +class TestBuildChangelog: + def _load(self, path: str) -> OpenDataContractStandard: + import yaml + from open_data_contract_standard.model import OpenDataContractStandard + + with open(os.path.join(os.path.dirname(__file__), path)) as f: + return OpenDataContractStandard.model_validate(yaml.safe_load(f)) + + def test_returns_expected_top_level_keys(self): + v1 = self._load(V1_YAML) + v2 = self._load(V2_YAML) + result = build_changelog(v1, V1_YAML, v2, V2_YAML) + assert set(result.keys()) == {"source_label", "target_label", "header", "summary", "detail"} + + def test_source_and_target_labels_from_files(self): + v1 = self._load(V1_YAML) + v2 = self._load(V2_YAML) + result = build_changelog(v1, V1_YAML, v2, V2_YAML) + assert result["source_label"] == V1_YAML + assert result["target_label"] == V2_YAML + + def test_fallback_labels_when_file_is_none(self): + v1 = self._load(V1_YAML) + result = build_changelog(v1, None, v1, None) + assert result["source_label"] == "v1" + assert result["target_label"] == "v2" + + def test_no_changes_on_identical_contracts(self): + v1 = self._load(V1_YAML) + result = build_changelog(v1, V1_YAML, v1, V1_YAML) + assert result["detail"]["changes"] == [] + + def test_detects_changes_between_versions(self): + v1 = self._load(V1_YAML) + v2 = self._load(V2_YAML) + result = build_changelog(v1, V1_YAML, v2, V2_YAML) + assert ( + result["detail"]["counts"]["added"] + + result["detail"]["counts"]["removed"] + + result["detail"]["counts"]["updated"] + > 0 + ) diff --git a/tests/test_changelog_normalize.py b/tests/test_changelog_normalize.py new file mode 100644 index 000000000..eb298deec --- /dev/null +++ b/tests/test_changelog_normalize.py @@ -0,0 +1,1054 @@ +""" +test_changelog_normalize — Unit tests for normalize.py +----------------------------------------------------------- +Test classes: + TestNormalizeBy — _normalize_by: key field extraction and positional fallback + TestNormalizeProperties — _normalize_properties: recursive SchemaProperty keying + TestNormalize — normalize(): all natural-key paths and edge cases + TestNormalizeAuthDefs — _normalize_auth_defs: url/id/positional fallback + TestNormalizeRelationships — _normalize_relationships: schema-level and property-level + TestNormalizeDescription — normalize(): description.authDefs and customProperties + TestNormalizeServerCustomProperties — normalize(): server customProperties + TestNormalizeQualityNested — _normalize_quality: nested customProperties and authDefs + TestGeneratePriceDescriptionScalars — end-to-end normalize via diff() for price/desc fields +""" + +import yaml + +from datacontract.changelog.changelog import diff +from datacontract.changelog.normalize import ( + _normalize_auth_defs, + _normalize_by, + _normalize_properties, + _normalize_relationships, + normalize, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +MINIMAL_CONTRACT = { + "apiVersion": "v3.0.2", + "kind": "DataContract", + "id": "test-001", +} + + +def _write_yaml(data: dict, path: str) -> None: + with open(path, "w") as f: + yaml.dump(data, f) + + +def _contract(**kwargs) -> dict: + return {**MINIMAL_CONTRACT, **kwargs} + + +class TestNormalizeBy: + def test_keys_by_named_field(self): + items = [ + {"role": "admin", "access": "read"}, + {"role": "viewer", "access": "read"}, + ] + result = _normalize_by(items, "role") + assert set(result.keys()) == {"admin", "viewer"} + assert result["admin"] == {"access": "read"} + + def test_key_field_omitted_from_value(self): + items = [{"channel": "slack", "url": "https://slack.com"}] + result = _normalize_by(items, "channel") + assert "channel" not in result["slack"] + + def test_positional_fallback_when_key_absent(self): + items = [{"type": "sql", "rule": "count > 0"}, {"type": "sql"}] + result = _normalize_by(items, "name") + assert "__pos_0__" in result + assert "__pos_1__" in result + + def test_mixed_present_and_absent_key(self): + items = [ + {"name": "row_count", "rule": "count > 0"}, + {"rule": "no_nulls"}, # name absent + ] + result = _normalize_by(items, "name") + assert "row_count" in result + assert "__pos_1__" in result + + def test_empty_list(self): + assert _normalize_by([], "role") == {} + + +class TestNormalizeProperties: + def test_flat_properties_keyed_by_name(self): + props = [ + {"name": "order_id", "logicalType": "string"}, + {"name": "amount", "logicalType": "number"}, + ] + result = _normalize_properties(props) + assert set(result.keys()) == {"order_id", "amount"} + assert result["order_id"]["logicalType"] == "string" + assert "name" not in result["order_id"] + + def test_nested_properties_recursed(self): + props = [ + { + "name": "address", + "logicalType": "object", + "properties": [ + {"name": "street", "logicalType": "string"}, + {"name": "city", "logicalType": "string"}, + ], + } + ] + result = _normalize_properties(props) + assert isinstance(result["address"]["properties"], dict) + assert "street" in result["address"]["properties"] + assert "city" in result["address"]["properties"] + + def test_empty_properties(self): + assert _normalize_properties([]) == {} + + +class TestNormalize: + def test_schema_keyed_by_name(self): + contract = _contract( + schema=[ + {"name": "orders", "physicalName": "orders_tbl"}, + {"name": "customers", "physicalName": "customers_tbl"}, + ] + ) + result = normalize(contract) + assert isinstance(result["schema"], dict) + assert set(result["schema"].keys()) == {"orders", "customers"} + assert "name" not in result["schema"]["orders"] + + def test_schema_properties_keyed_by_name(self): + contract = _contract( + schema=[ + { + "name": "orders", + "properties": [ + {"name": "order_id", "logicalType": "string"}, + ], + } + ] + ) + result = normalize(contract) + assert isinstance(result["schema"]["orders"]["properties"], dict) + assert "order_id" in result["schema"]["orders"]["properties"] + + def test_sla_properties_keyed_by_property(self): + contract = _contract( + slaProperties=[ + {"property": "availability", "value": "99.9%"}, + {"property": "latency", "value": "500ms"}, + ] + ) + result = normalize(contract) + assert isinstance(result["slaProperties"], dict) + assert "availability" in result["slaProperties"] + assert result["slaProperties"]["availability"] == {"value": "99.9%"} + + def test_servers_keyed_by_server(self): + contract = _contract( + servers=[ + {"server": "production", "type": "snowflake"}, + {"server": "staging", "type": "snowflake"}, + ] + ) + result = normalize(contract) + assert isinstance(result["servers"], dict) + assert set(result["servers"].keys()) == {"production", "staging"} + assert "server" not in result["servers"]["production"] + + def test_roles_keyed_by_role(self): + contract = _contract( + roles=[ + {"role": "admin", "access": "write"}, + {"role": "viewer", "access": "read"}, + ] + ) + result = normalize(contract) + assert isinstance(result["roles"], dict) + assert "admin" in result["roles"] + + def test_support_keyed_by_channel(self): + contract = _contract( + support=[ + {"channel": "slack", "url": "https://slack.com"}, + ] + ) + result = normalize(contract) + assert "slack" in result["support"] + + def test_custom_properties_keyed_by_property(self): + contract = _contract( + customProperties=[ + {"property": "domain", "value": "sales"}, + {"property": "team_name", "value": "orders"}, + ] + ) + result = normalize(contract) + assert "domain" in result["customProperties"] + assert "team_name" in result["customProperties"] + + def test_team_members_keyed_by_username(self): + contract = _contract( + team={ + "name": "Data Team", + "members": [ + {"username": "alice", "role": "lead"}, + {"username": "bob", "role": "engineer"}, + ], + } + ) + result = normalize(contract) + assert "alice" in result["team"]["members"] + assert "bob" in result["team"]["members"] + + def test_team_deprecated_array_form(self): + contract = _contract( + team=[ + {"username": "alice", "role": "lead"}, + ] + ) + result = normalize(contract) + assert isinstance(result["team"], dict) + assert "alice" in result["team"] + + def test_quality_keyed_by_name_with_positional_fallback(self): + contract = _contract( + schema=[ + { + "name": "orders", + "quality": [ + {"name": "row_count", "metric": "rowCount"}, + {"metric": "duplicateValues"}, # no name + ], + } + ] + ) + result = normalize(contract) + quality = result["schema"]["orders"]["quality"] + assert "row_count" in quality + assert "__pos_1__" in quality + + def test_non_list_fields_unchanged(self): + contract = _contract(description="a contract") + result = normalize(contract) + assert result["description"] == "a contract" + + def test_schema_object_custom_properties_keyed_by_property(self): + contract = _contract( + schema=[ + { + "name": "orders", + "customProperties": [ + {"property": "domain", "value": "sales"}, + {"property": "team_name", "value": "orders"}, + ], + } + ] + ) + result = normalize(contract) + cp = result["schema"]["orders"]["customProperties"] + assert isinstance(cp, dict) + assert "domain" in cp + assert "team_name" in cp + + def test_schema_object_quality_keyed_by_name(self): + contract = _contract( + schema=[ + { + "name": "orders", + "quality": [ + {"name": "row_count", "metric": "rowCount"}, + {"name": "no_nulls", "metric": "nullValues"}, + ], + } + ] + ) + result = normalize(contract) + quality = result["schema"]["orders"]["quality"] + assert isinstance(quality, dict) + assert "row_count" in quality + assert "no_nulls" in quality + + def test_schema_property_quality_keyed_by_name(self): + contract = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "quality": [ + {"name": "positive", "metric": "rowCount"}, + ], + } + ], + } + ] + ) + result = normalize(contract) + quality = result["schema"]["orders"]["properties"]["amount"]["quality"] + assert isinstance(quality, dict) + assert "positive" in quality + + def test_schema_property_custom_properties_keyed_by_property(self): + contract = _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "customProperties": [ + {"property": "sensitivity", "value": "high"}, + ], + } + ], + } + ] + ) + result = normalize(contract) + cp = result["schema"]["orders"]["properties"]["amount"]["customProperties"] + assert isinstance(cp, dict) + assert "sensitivity" in cp + + def test_server_roles_keyed_by_role(self): + contract = _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "roles": [ + {"role": "admin", "access": "write"}, + {"role": "reader", "access": "read"}, + ], + } + ] + ) + result = normalize(contract) + roles = result["servers"]["production"]["roles"] + assert isinstance(roles, dict) + assert "admin" in roles + assert "reader" in roles + + def test_server_without_server_key_skipped(self): + contract = _contract( + servers=[ + {"type": "snowflake"}, # no "server" key — skip + {"server": "production", "type": "snowflake"}, # valid — retain + ] + ) + result = normalize(contract) + assert isinstance(result["servers"], dict) + assert "production" in result["servers"] + assert len(result["servers"]) == 1 + + def test_no_mutation_of_input(self): + contract = _contract(schema=[{"name": "orders"}]) + original = _contract(schema=[{"name": "orders"}]) + normalize(contract) + assert contract == original + + +# --------------------------------------------------------------------------- +# _diff — semantic correctness +# --------------------------------------------------------------------------- + + +class TestNormalizeAuthDefs: + def test_keys_by_url(self): + items = [ + {"url": "https://example.com/wiki", "type": "definition"}, + {"url": "https://example.com/slack", "type": "support"}, + ] + result = _normalize_auth_defs(items) + assert set(result.keys()) == {"https://example.com/wiki", "https://example.com/slack"} + + def test_all_fields_preserved_in_value(self): + items = [{"url": "https://example.com/wiki", "type": "definition", "description": "main ref"}] + result = _normalize_auth_defs(items) + assert result["https://example.com/wiki"]["type"] == "definition" + assert result["https://example.com/wiki"]["description"] == "main ref" + + def test_id_fallback_when_url_absent(self): + items = [{"id": "def-001", "type": "definition"}] + result = _normalize_auth_defs(items) + assert "def-001" in result + + def test_positional_fallback_when_url_and_id_absent(self): + items = [{"type": "definition"}, {"type": "support"}] + result = _normalize_auth_defs(items) + assert "__pos_0__" in result + assert "__pos_1__" in result + + def test_empty_list_returns_empty_dict(self): + assert _normalize_auth_defs([]) == {} + + def test_reorder_produces_no_diff(self): + v1 = _contract( + authoritativeDefinitions=[ + {"url": "https://example.com/wiki", "type": "definition"}, + {"url": "https://example.com/slack", "type": "support"}, + ] + ) + v2 = _contract( + authoritativeDefinitions=[ + {"url": "https://example.com/slack", "type": "support"}, + {"url": "https://example.com/wiki", "type": "definition"}, + ] + ) + assert diff(v1, v2) == {} + + def test_url_change_detected(self): + v1 = _contract(authoritativeDefinitions=[{"url": "https://example.com/wiki", "type": "definition"}]) + v2 = _contract(authoritativeDefinitions=[{"url": "https://example.com/NEW", "type": "definition"}]) + result = diff(v1, v2) + # Changing a url changes the dict key — DeepDiff reports this as + # dictionary_item_added + dictionary_item_removed or values_changed + assert result != {} + + def test_type_change_detected(self): + v1 = _contract(authoritativeDefinitions=[{"url": "https://example.com/wiki", "type": "definition"}]) + v2 = _contract(authoritativeDefinitions=[{"url": "https://example.com/wiki", "type": "policy"}]) + result = diff(v1, v2) + assert "values_changed" in result + + def test_schema_object_auth_defs_reorder_no_diff(self): + def contract(defs): + return _contract(schema=[{"name": "orders", "authoritativeDefinitions": defs}]) + + v1 = contract( + [ + {"url": "https://a.com", "type": "definition"}, + {"url": "https://b.com", "type": "support"}, + ] + ) + v2 = contract( + [ + {"url": "https://b.com", "type": "support"}, + {"url": "https://a.com", "type": "definition"}, + ] + ) + assert diff(v1, v2) == {} + + def test_schema_property_auth_defs_reorder_no_diff(self): + def contract(defs): + return _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "order_id", + "logicalType": "string", + "authoritativeDefinitions": defs, + } + ], + } + ] + ) + + v1 = contract( + [ + {"url": "https://a.com", "type": "definition"}, + {"url": "https://b.com", "type": "support"}, + ] + ) + v2 = contract( + [ + {"url": "https://b.com", "type": "support"}, + {"url": "https://a.com", "type": "definition"}, + ] + ) + assert diff(v1, v2) == {} + + def test_description_auth_defs_reorder_no_diff(self): + v1 = _contract( + **{ + "description": { + "purpose": "test", + "authoritativeDefinitions": [ + {"url": "https://a.com", "type": "policy"}, + {"url": "https://b.com", "type": "definition"}, + ], + } + } + ) + v2 = _contract( + **{ + "description": { + "purpose": "test", + "authoritativeDefinitions": [ + {"url": "https://b.com", "type": "definition"}, + {"url": "https://a.com", "type": "policy"}, + ], + } + } + ) + assert diff(v1, v2) == {} + + +class TestNormalizeRelationships: + def test_schema_level_keyed_by_from_to(self): + items = [ + {"from": "orders.order_id", "to": "line_items.order_id", "type": "foreignKey"}, + {"from": "orders.customer_id", "to": "customers.customer_id", "type": "foreignKey"}, + ] + result = _normalize_relationships(items, schema_level=True) + assert "orders.order_id:line_items.order_id" in result + assert "orders.customer_id:customers.customer_id" in result + + def test_property_level_keyed_by_to(self): + items = [ + {"to": "customers.customer_id", "type": "foreignKey"}, + ] + result = _normalize_relationships(items, schema_level=False) + assert "customers.customer_id" in result + + def test_positional_fallback_when_fields_absent(self): + items = [{"type": "foreignKey"}] + result = _normalize_relationships(items, schema_level=True) + assert "__pos_0__" in result + + def test_empty_list_returns_empty_dict(self): + assert _normalize_relationships([], schema_level=True) == {} + + def test_schema_relationships_reorder_no_diff(self): + def contract(rels): + return _contract(schema=[{"name": "orders", "relationships": rels}]) + + v1 = contract( + [ + {"from": "orders.order_id", "to": "line_items.order_id", "type": "foreignKey"}, + {"from": "orders.customer_id", "to": "customers.customer_id", "type": "foreignKey"}, + ] + ) + v2 = contract( + [ + {"from": "orders.customer_id", "to": "customers.customer_id", "type": "foreignKey"}, + {"from": "orders.order_id", "to": "line_items.order_id", "type": "foreignKey"}, + ] + ) + assert diff(v1, v2) == {} + + def test_property_relationships_reorder_no_diff(self): + def contract(rels): + return _contract( + schema=[ + { + "name": "orders", + "properties": [ + { + "name": "order_id", + "logicalType": "string", + "relationships": rels, + } + ], + } + ] + ) + + v1 = contract( + [ + {"to": "line_items.order_id", "type": "foreignKey"}, + {"to": "audit_log.order_id", "type": "reference"}, + ] + ) + v2 = contract( + [ + {"to": "audit_log.order_id", "type": "reference"}, + {"to": "line_items.order_id", "type": "foreignKey"}, + ] + ) + assert diff(v1, v2) == {} + + def test_relationship_added_detected(self): + v1 = _contract( + schema=[ + { + "name": "orders", + "relationships": [ + {"from": "orders.order_id", "to": "line_items.order_id", "type": "foreignKey"}, + ], + } + ] + ) + v2 = _contract( + schema=[ + { + "name": "orders", + "relationships": [ + {"from": "orders.order_id", "to": "line_items.order_id", "type": "foreignKey"}, + {"from": "orders.customer_id", "to": "customers.customer_id", "type": "foreignKey"}, + ], + } + ] + ) + result = diff(v1, v2) + added = result.get("dictionary_item_added", {}) + assert any("customer_id" in k for k in added) + + def test_relationship_type_change_detected(self): + def contract(t): + return _contract( + schema=[ + { + "name": "orders", + "relationships": [ + {"from": "orders.order_id", "to": "line_items.order_id", "type": t}, + ], + } + ] + ) + + result = diff(contract("foreignKey"), contract("reference")) + assert "values_changed" in result + + +class TestNormalizeDescription: + def test_description_purpose_change_detected(self): + v1 = _contract(**{"description": {"purpose": "Provides order data"}}) + v2 = _contract(**{"description": {"purpose": "Provides order and line item data"}}) + result = diff(v1, v2) + assert "values_changed" in result + changed = result["values_changed"] + assert any("purpose" in k for k in changed) + + def test_description_custom_property_change_detected(self): + v1 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "sensitivity", "value": "internal"}, + ], + } + } + ) + v2 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "sensitivity", "value": "confidential"}, + ], + } + } + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("sensitivity" in k for k in changed) + + def test_description_custom_property_reorder_no_diff(self): + v1 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "sensitivity", "value": "internal"}, + {"property": "owner", "value": "data-team"}, + ], + } + } + ) + v2 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "owner", "value": "data-team"}, + {"property": "sensitivity", "value": "internal"}, + ], + } + } + ) + assert diff(v1, v2) == {} + + def test_description_custom_property_added(self): + v1 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "sensitivity", "value": "internal"}, + ], + } + } + ) + v2 = _contract( + **{ + "description": { + "purpose": "test", + "customProperties": [ + {"property": "sensitivity", "value": "internal"}, + {"property": "owner", "value": "data-team"}, + ], + } + } + ) + result = diff(v1, v2) + added = result.get("dictionary_item_added", {}) + assert any("owner" in k for k in added) + + def test_description_auth_defs_reorder_no_diff(self): + v1 = _contract( + **{ + "description": { + "purpose": "test", + "authoritativeDefinitions": [ + {"url": "https://a.com", "type": "policy"}, + {"url": "https://b.com", "type": "definition"}, + ], + } + } + ) + v2 = _contract( + **{ + "description": { + "purpose": "test", + "authoritativeDefinitions": [ + {"url": "https://b.com", "type": "definition"}, + {"url": "https://a.com", "type": "policy"}, + ], + } + } + ) + assert diff(v1, v2) == {} + + def test_description_scalar_fields_all_detected(self): + """purpose, usage, and limitations are all plain strings — changes must be detected.""" + for field in ("purpose", "usage", "limitations"): + v1 = _contract(**{"description": {field: "original value"}}) + v2 = _contract(**{"description": {field: "updated value"}}) + result = diff(v1, v2) + assert "values_changed" in result, f"change in {field} not detected" + assert any(field in k for k in result["values_changed"]) + + +class TestNormalizeServerCustomProperties: + def _server(self, custom_props): + return _contract( + servers=[ + { + "server": "production", + "type": "snowflake", + "customProperties": custom_props, + } + ] + ) + + def test_custom_property_change_detected(self): + v1 = self._server([{"property": "cost-center", "value": "eng-001"}]) + v2 = self._server([{"property": "cost-center", "value": "eng-999"}]) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("cost-center" in k for k in changed) + + def test_reorder_no_diff(self): + v1 = self._server( + [ + {"property": "team", "value": "data-platform"}, + {"property": "cost-center", "value": "eng-001"}, + ] + ) + v2 = self._server( + [ + {"property": "cost-center", "value": "eng-001"}, + {"property": "team", "value": "data-platform"}, + ] + ) + assert diff(v1, v2) == {} + + def test_change_with_reorder_path_includes_property_name(self): + """When value changes and list is simultaneously reordered, the path + must name the property (not use a positional index).""" + v1 = self._server( + [ + {"property": "team", "value": "data-platform"}, + {"property": "cost-center", "value": "eng-001"}, + {"property": "env", "value": "prod"}, + ] + ) + v2 = self._server( + [ + {"property": "env", "value": "prod"}, + {"property": "team", "value": "data-platform"}, + {"property": "cost-center", "value": "eng-999"}, + ] + ) + raw = diff(v1, v2) + changed = raw.get("values_changed", {}) + assert any("cost-center" in k for k in changed) + assert not any(k.endswith("][0]") or k.endswith("][1]") or k.endswith("][2]") for k in changed) + + def test_custom_property_added(self): + v1 = self._server([{"property": "team", "value": "data-platform"}]) + v2 = self._server( + [ + {"property": "team", "value": "data-platform"}, + {"property": "owner", "value": "alice"}, + ] + ) + result = diff(v1, v2) + added = result.get("dictionary_item_added", {}) + assert any("owner" in k for k in added) + + def test_custom_property_removed(self): + v1 = self._server( + [ + {"property": "team", "value": "data-platform"}, + {"property": "owner", "value": "alice"}, + ] + ) + v2 = self._server([{"property": "team", "value": "data-platform"}]) + result = diff(v1, v2) + removed = result.get("dictionary_item_removed", {}) + assert any("owner" in k for k in removed) + + def test_multiple_servers_independent(self): + """customProperties on two different servers are normalized independently.""" + v1 = _contract( + servers=[ + {"server": "prod", "type": "snowflake", "customProperties": [{"property": "env", "value": "prod"}]}, + { + "server": "staging", + "type": "snowflake", + "customProperties": [{"property": "env", "value": "staging"}], + }, + ] + ) + v2 = _contract( + servers=[ + {"server": "prod", "type": "snowflake", "customProperties": [{"property": "env", "value": "prod"}]}, + { + "server": "staging", + "type": "snowflake", + "customProperties": [{"property": "env", "value": "staging-new"}], + }, + ] + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("staging" in k for k in changed) + assert not any("prod" in k and "customProperties" in k for k in changed) + + +class TestNormalizeQualityNested: + def _schema_quality(self, quality_items): + return _contract( + schema=[ + { + "name": "orders", + "physicalName": "orders_tbl", + "quality": quality_items, + } + ] + ) + + def _property_quality(self, quality_items): + return _contract( + schema=[ + { + "name": "orders", + "physicalName": "orders_tbl", + "properties": [ + { + "name": "amount", + "logicalType": "number", + "quality": quality_items, + } + ], + } + ] + ) + + def test_schema_quality_custom_property_change_detected(self): + v1 = self._schema_quality( + [{"name": "row_count", "type": "sql", "customProperties": [{"property": "severity", "value": "high"}]}] + ) + v2 = self._schema_quality( + [{"name": "row_count", "type": "sql", "customProperties": [{"property": "severity", "value": "critical"}]}] + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("severity" in k for k in changed) + + def test_schema_quality_custom_property_reorder_no_diff(self): + v1 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "customProperties": [ + {"property": "severity", "value": "high"}, + {"property": "owner", "value": "data-team"}, + ], + } + ] + ) + v2 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "customProperties": [ + {"property": "owner", "value": "data-team"}, + {"property": "severity", "value": "high"}, + ], + } + ] + ) + assert diff(v1, v2) == {} + + def test_schema_quality_change_with_reorder_path_has_property_name(self): + """Path must name the property, not use a positional index.""" + v1 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "customProperties": [ + {"property": "severity", "value": "high"}, + {"property": "owner", "value": "data-team"}, + {"property": "env", "value": "prod"}, + ], + } + ] + ) + v2 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "customProperties": [ + {"property": "env", "value": "prod"}, + {"property": "severity", "value": "critical"}, + {"property": "owner", "value": "data-team"}, + ], + } + ] + ) + raw = diff(v1, v2) + changed = raw.get("values_changed", {}) + assert any("severity" in k for k in changed) + assert not any("][0]" in k or "][1]" in k or "][2]" in k for k in changed) + + def test_schema_quality_auth_defs_reorder_no_diff(self): + v1 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "authoritativeDefinitions": [ + {"url": "https://a.com", "type": "definition"}, + {"url": "https://b.com", "type": "support"}, + ], + } + ] + ) + v2 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "authoritativeDefinitions": [ + {"url": "https://b.com", "type": "support"}, + {"url": "https://a.com", "type": "definition"}, + ], + } + ] + ) + assert diff(v1, v2) == {} + + def test_schema_quality_auth_def_change_detected(self): + v1 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "authoritativeDefinitions": [{"url": "https://a.com", "type": "definition"}], + } + ] + ) + v2 = self._schema_quality( + [ + { + "name": "row_count", + "type": "sql", + "authoritativeDefinitions": [{"url": "https://a.com", "type": "policy"}], + } + ] + ) + result = diff(v1, v2) + assert result != {} + + def test_property_quality_custom_property_change_detected(self): + v1 = self._property_quality( + [{"name": "positive", "type": "sql", "customProperties": [{"property": "priority", "value": "p1"}]}] + ) + v2 = self._property_quality( + [{"name": "positive", "type": "sql", "customProperties": [{"property": "priority", "value": "p2"}]}] + ) + result = diff(v1, v2) + changed = result.get("values_changed", {}) + assert any("priority" in k for k in changed) + + def test_property_quality_custom_property_reorder_no_diff(self): + v1 = self._property_quality( + [ + { + "name": "positive", + "type": "sql", + "customProperties": [ + {"property": "priority", "value": "p1"}, + {"property": "team", "value": "data"}, + ], + } + ] + ) + v2 = self._property_quality( + [ + { + "name": "positive", + "type": "sql", + "customProperties": [ + {"property": "team", "value": "data"}, + {"property": "priority", "value": "p1"}, + ], + } + ] + ) + assert diff(v1, v2) == {} + + def test_property_quality_auth_defs_reorder_no_diff(self): + v1 = self._property_quality( + [ + { + "name": "positive", + "type": "sql", + "authoritativeDefinitions": [ + {"url": "https://a.com", "type": "definition"}, + {"url": "https://b.com", "type": "support"}, + ], + } + ] + ) + v2 = self._property_quality( + [ + { + "name": "positive", + "type": "sql", + "authoritativeDefinitions": [ + {"url": "https://b.com", "type": "support"}, + {"url": "https://a.com", "type": "definition"}, + ], + } + ] + ) + assert diff(v1, v2) == {} diff --git a/tests/test_changelog_output_text.py b/tests/test_changelog_output_text.py new file mode 100644 index 000000000..d3b542d3a --- /dev/null +++ b/tests/test_changelog_output_text.py @@ -0,0 +1,183 @@ +import io +import sys +from pathlib import Path + +from rich.console import Console + +from datacontract.data_contract import DataContract +from datacontract.model.changelog import ChangelogEntry, ChangelogResult, ChangelogType +from datacontract.output.text_changelog_results import _badges, _with_markup, _wrap, write_text_changelog_results + +V1 = "fixtures/changelog/integration/changelog_integration_v1.yaml" +V2 = "fixtures/changelog/integration/changelog_integration_v2.yaml" + +GOLDEN_TEXT = Path(__file__).parent / "fixtures/changelog/golden_changelog_text.txt" + + +def _make_entries(added=0, removed=0, changed=0): + entries = [] + for _ in range(added): + entries.append(ChangelogEntry(path="a.b", type=ChangelogType.added)) + for _ in range(removed): + entries.append(ChangelogEntry(path="a.b", type=ChangelogType.removed)) + for _ in range(changed): + entries.append(ChangelogEntry(path="a.b", type=ChangelogType.updated)) + return entries + + +def _render(result: ChangelogResult) -> str: + buf = io.StringIO() + con = Console(file=buf, width=300, highlight=False) + old_stdout = sys.stdout + sys.stdout = buf + try: + write_text_changelog_results(result, con) + finally: + sys.stdout = old_stdout + return buf.getvalue() + + +class TestBadges: + def test_all_types(self): + result = _badges(_make_entries(added=2, removed=1, changed=3)) + assert "1 Removed" in result + assert "3 Updated" in result + assert "2 Added" in result + + def test_ordering_added_updated_removed(self): + result = _badges(_make_entries(added=1, removed=1, changed=1)) + assert result.index("Added") < result.index("Updated") < result.index("Removed") + + def test_added_badge_green(self): + result = _badges(_make_entries(added=1)) + assert "[ [green]1 Added[/green] ]" == result + + def test_updated_badge_yellow(self): + result = _badges(_make_entries(changed=1)) + assert "[ [yellow]1 Updated[/yellow] ]" == result + + def test_removed_badge_red(self): + result = _badges(_make_entries(removed=1)) + assert "[ [red]1 Removed[/red] ]" == result + + def test_zero_count_omitted(self): + result = _badges(_make_entries(added=3)) + assert "Removed" not in result + assert "Updated" not in result + assert "3 Added" in result + + def test_empty_list_returns_empty_string(self): + assert _badges([]) == "" + + def test_separator_between_badges(self): + result = _badges(_make_entries(removed=1, added=1)) + assert " " in result + + +class TestWrap: + def test_short_text_returned_as_is(self): + assert _wrap("hello", 20) == "hello" + + def test_exact_max_width_not_wrapped(self): + text = "a" * 20 + assert _wrap(text, 20) == text + + def test_single_word_longer_than_max_returned_as_is(self): + long_word = "a" * 35 + assert _wrap(long_word, 30) == long_word + + def test_multi_word_each_line_within_max_width(self): + result = _wrap("hello world foo bar", 11) + for line in result.split("\n"): + assert len(line) <= 11 + + def test_multi_word_produces_multiple_lines(self): + assert "\n" in _wrap("one two three four five six", 9) + + def test_empty_string_returned_as_is(self): + assert _wrap("", 10) == "" + + +class TestWithMarkup: + def test_added_green(self): + assert _with_markup(ChangelogType.added) == "[green]Added[/green]" + + def test_removed_red(self): + assert _with_markup(ChangelogType.removed) == "[red]Removed[/red]" + + def test_updated_yellow(self): + assert _with_markup(ChangelogType.updated) == "[yellow]Updated[/yellow]" + + +class TestTerminalStateInheritance: + """The wide rendering console inherits terminal/color state from the caller's console. + This prevents colors being silently stripped when the outer console is a real TTY.""" + + def test_colors_present_when_terminal(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + buf = io.StringIO() + con = Console(file=buf, width=300, force_terminal=True) + old_stdout = sys.stdout + sys.stdout = buf + try: + write_text_changelog_results(result, con) + finally: + sys.stdout = old_stdout + assert "\033[" in buf.getvalue() + + def test_colors_absent_when_not_terminal(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + buf = io.StringIO() + con = Console(file=buf, width=300, no_color=True) + old_stdout = sys.stdout + sys.stdout = buf + try: + write_text_changelog_results(result, con) + finally: + sys.stdout = old_stdout + assert "\033[" not in buf.getvalue() + + +class TestWriteTextChangelogResults: + def test_summary_header_present(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert "Summary" in _render(result) + + def test_details_header_present(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + assert "Details" in _render(result) + + def test_badges_present(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + output = _render(result) + assert "Removed" in output or "Updated" in output or "Added" in output + + def test_all_change_types_present(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + output = _render(result) + assert "Added" in output + assert "Removed" in output + assert "Updated" in output + + def test_no_changes_suppresses_summary(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V1)) + assert "Summary" not in _render(result) + + def test_no_changes_still_renders_details(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V1)) + assert "Details" in _render(result) + + def test_golden_output(self): + result = DataContract(data_contract_file=V1).changelog(DataContract(data_contract_file=V2)) + buf = io.StringIO() + con = Console(file=buf, width=300, highlight=False, no_color=True) + old_stdout = sys.stdout + sys.stdout = buf + try: + write_text_changelog_results(result, con) + finally: + sys.stdout = old_stdout + assert buf.getvalue() == GOLDEN_TEXT.read_text(encoding="utf-8"), ( + "Changelog text output has changed. If intentional, regenerate " + "golden_changelog_text.txt (see tests/fixtures/changelog/helper/generate_golden.py)." + ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 9c6f40dc8..53be4dd63 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -16,3 +16,25 @@ def test_file_does_not_exist(): result = runner.invoke(app, ["test", "unknown.yaml"]) assert result.exit_code == 1 assert "The file 'unknown.yaml' does not \nexist." in result.stdout + + +def test_changelog_help(): + result = runner.invoke(app, ["changelog", "--help"]) + assert result.exit_code == 0 + + +def test_changelog_with_changes(): + result = runner.invoke( + app, + [ + "changelog", + "fixtures/changelog/integration/changelog_integration_v1.yaml", + "fixtures/changelog/integration/changelog_integration_v2.yaml", + ], + ) + assert result.exit_code == 0 + assert "Summary" in result.output + assert "Details" in result.output + assert "Removed" in result.output + assert "Updated" in result.output + assert "Added" in result.output diff --git a/tests/test_data_contract.py b/tests/test_data_contract.py new file mode 100644 index 000000000..0db8cc10a --- /dev/null +++ b/tests/test_data_contract.py @@ -0,0 +1,15 @@ +from datacontract.data_contract import DataContract + +V1 = "fixtures/changelog/integration/changelog_integration_v1.yaml" + + +def test_get_data_contract_file_returns_path(): + dc = DataContract(data_contract_file=V1) + assert dc.get_data_contract_file() == V1 + + +def test_get_data_contract_file_returns_none_when_not_set(): + dc = DataContract( + data_contract_str="dataContractSpecification: 1.1.0\nid: test\ninfo:\n title: t\n version: 1.0.0\n" + ) + assert dc.get_data_contract_file() is None diff --git a/tests/test_export_markdown.py b/tests/test_export_markdown.py index 4fd05b863..a828dbd18 100644 --- a/tests/test_export_markdown.py +++ b/tests/test_export_markdown.py @@ -54,6 +54,4 @@ def test_pipe_chars_escaped_in_table_cells(): assert lines, "order_id table row not found" row = lines[0] # The row must have exactly 4 pipe chars as table delimiters (| col1 | col2 | col3 |) - assert row.count("|") == 4, ( - f"Expected 4 pipe delimiters in row, got {row.count('|')}: {row!r}" - ) + assert row.count("|") == 4, f"Expected 4 pipe delimiters in row, got {row.count('|')}: {row!r}"