datacontract · jschoedl · Apr 8, 2026 · Apr 4, 2026 · Apr 5, 2026 · Apr 6, 2026
diff --git a/API.md b/API.md
@@ -103,6 +103,19 @@ curl -X POST "http://localhost:4242/export?format=sql" \
   --data-binary @datacontract.yaml
 ```
 
+## Changelog Two Data Contracts
+
+Compare two ODCS data contracts and receive a changelog. POST a JSON body with `v1` (source/before) and `v2` (target/after) as YAML strings. Returns a JSON object with `summary` and `entries`.
+
+```bash
+curl -X POST "http://localhost:4242/changelog" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "v1": "'"$(cat v1.odcs.yaml)"'",
+    "v2": "'"$(cat v2.odcs.yaml)"'"
+  }'
+```
+
 ## Try it out
 
 You can also use the Swagger UI to execute the commands directly.

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 - Added `ci` command for CI/CD-optimized test runs: multi-file support, GitHub Actions annotations and step summary, Azure DevOps annotations, `--fail-on` flag, `--json` output
+- Added `changelog` command and API endpoint (#1118)
 
 ### Fixed
 - Avro importer now raises an error for union fields with multiple non-null types, which are not supported by ODCS
@@ -953,4 +954,4 @@ The Golang version can be found at [cli-go](https://github.com/datacontract/cli-
 
 ## [0.1.1]
 ### Added
-- Initial release.
+- Initial release.
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -89,6 +89,10 @@ datacontract export --format html datacontract.yaml --output datacontract.html
 
 # Import from a different format
 datacontract import --format sql --source my-ddl.sql --dialect postgres --output datacontract.yaml
+
+# Show a changelog between two data contracts
+datacontract changelog datacontract-v1.yaml datacontract-v2.yaml
+
 ```
 
 ## Project Architecture
@@ -111,6 +115,8 @@ The Data Contract CLI is an open-source command-line tool for working with data
 
 5. **Linting (`datacontract/lint/`)**: Tools for validating data contract files against schema and best practices.
 
+6. **Changelog (`datacontract/changelog/`)**: Semantic comparison of ODCS data contracts.
+
 ### Extension Pattern
 
 The project uses factory patterns for extensibility:

diff --git a/README.md b/README.md
@@ -117,6 +117,9 @@ $ datacontract init odcs.yaml
 # lint the odcs.yaml
 $ datacontract lint odcs.yaml
 
+# show a changelog between two data contracts
+$ datacontract changelog v1.odcs.yaml v2.odcs.yaml
+
 # execute schema and quality checks (define credentials as environment variables)
 $ datacontract test odcs.yaml
 
@@ -260,6 +263,7 @@ Commands
 
 - [init](#init)
 - [lint](#lint)
+- [changelog](#changelog)
 - [test](#test)
 - [ci](#ci)
 - [export](#export)
@@ -318,10 +322,32 @@ Commands
 
 ```
 
+### changelog
+```
+
+ Usage: datacontract changelog [OPTIONS] V1 V2                                                      
+
+ Show a changelog between two data contracts.                                                       
+
+╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────╮
+│ *    v1      TEXT  The location (path) of the source (before) data contract YAML. [required]     │
+│ *    v2      TEXT  The location (path) of the target (after) data contract YAML. [required]      │
+╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮
+│ --debug     --no-debug                 Enable debug logging                                      │
+│ --help                                 Show this message and exit.                               │
+╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
+
+```
+
+```bash
+$ datacontract changelog v1.odcs.yaml v2.odcs.yaml
+```
+
 ### test
 ```
 
- Usage: datacontract test [OPTIONS] [LOCATION]                                                      
+ Usage: datacontract test [OPTIONS] [LOCATION]
 
  Run schema and quality tests on configured servers.                                                
 

diff --git a/datacontract/api.py b/datacontract/api.py
@@ -1,14 +1,19 @@
 import logging
 import os
+import tempfile
 from typing import Annotated, Optional
 
+import pydantic
 import typer
+import yaml
 from fastapi import Body, Depends, FastAPI, HTTPException, Query, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import PlainTextResponse
 from fastapi.security.api_key import APIKeyHeader
+from pydantic import BaseModel
 
 from datacontract.data_contract import DataContract, ExportFormat
+from datacontract.model.exceptions import DataContractException
 from datacontract.model.run import Run
 
 DATA_CONTRACT_EXAMPLE_PAYLOAD = """apiVersion: v3.1.0
@@ -358,6 +363,47 @@ async def lint(
     return {"result": lint_result.result, "checks": lint_result.checks}
 
 
+class ChangelogRequest(BaseModel):
+    v1: str = DATA_CONTRACT_EXAMPLE_PAYLOAD
+    v2: str = DATA_CONTRACT_EXAMPLE_PAYLOAD
+
+
+@app.post(
+    "/changelog",
+    tags=["changelog"],
+    summary="Show a changelog between two data contracts.",
+    description="""
+        Compare two ODCS data contract YAMLs and return a changelog.
+        POST a JSON body with `v1` (source/before) and `v2` (target/after) as YAML strings.
+    """,
+)
+async def changelog_endpoint(
+    body: ChangelogRequest,
+    api_key: Annotated[str | None, Depends(api_key_header)] = None,
+):
+    check_api_key(api_key)
+
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f1:
+        f1.write(body.v1)
+        v1_path = f1.name
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f2:
+        f2.write(body.v2)
+        v2_path = f2.name
+
+    try:
+        result = DataContract(data_contract_file=v1_path).changelog(DataContract(data_contract_file=v2_path))
+        return {"summary": result.summary, "entries": result.entries}
+    except yaml.YAMLError as e:
+        raise HTTPException(status_code=422, detail=f"Invalid YAML: {e}")
+    except pydantic.ValidationError as e:
+        raise HTTPException(status_code=422, detail=f"Invalid data contract: {e}")
+    except DataContractException as e:
+        raise HTTPException(status_code=422, detail=f"Data Contract Validation Failure: {e}")
+    finally:
+        os.unlink(v1_path)
+        os.unlink(v2_path)
+
+
 @app.post(
     "/export",
     tags=["export"],

diff --git a/datacontract/changelog/__init__.py b/datacontract/changelog/__init__.py
@@ -0,0 +1,3 @@
+from datacontract.changelog.changelog import build_changelog, diff
+
+__all__ = ["diff", "build_changelog"]
diff --git a/datacontract/changelog/changelog.py b/datacontract/changelog/changelog.py
@@ -0,0 +1,191 @@
+"""
+changelog — ODCS contract changelog builder
+---------------------------------------------
+Provides two public functions:
+  diff()            — normalise two ODCS contract dicts and return a raw DeepDiff dict
+  build_changelog() — transform a raw DeepDiff dict into structured summary + detail data
+                      ready to be rendered as a changelog report.
+"""
+
+import json
+import re
+from datetime import datetime, timezone
+
+from deepdiff import DeepDiff
+from open_data_contract_standard.model import OpenDataContractStandard
+
+from datacontract.changelog.normalize import normalize
+
+
+def diff(v1: dict, v2: dict) -> dict:
+    """Return the DeepDiff result as a plain dict.
+
+    ignore_order=True   - dict key ordering is irrelevant
+    verbose_level=2     - include old/new values, not just paths
+    """
+    n1 = normalize(v1)
+    n2 = normalize(v2)
+    result = DeepDiff(n1, n2, ignore_order=True, verbose_level=2)
+    return json.loads(result.to_json())
+
+
+_CHANGE_TYPE_MAP = {
+    "dictionary_item_added": "Added",
+    "dictionary_item_removed": "Removed",
+    "values_changed": "Updated",
+    "type_changes": "Updated",
+    "iterable_item_added": "Added",
+    "iterable_item_removed": "Removed",
+}
+
+
+def build_changelog(
+    source: OpenDataContractStandard,
+    source_file: str | None,
+    other: OpenDataContractStandard,
+    other_file: str | None,
+) -> dict:
+    """Produce a JSON-serialisable changelog dict by diffing two ODCS contracts."""
+    source_label = source_file or "v1"
+    target_label = other_file or "v2"
+    diff_result = diff(
+        source.model_dump(exclude_none=True, by_alias=True),
+        other.model_dump(exclude_none=True, by_alias=True),
+    )
+    return _build_changelog_from_diff(diff_result, source_label=source_label, target_label=target_label)
+
+
+def _build_changelog_from_diff(diff_result: dict, source_label: str = "v1", target_label: str = "v2") -> dict:
+    """Produce a JSON-serialisable dict with all data needed to render
+    the full changelog.
+
+    Both summary.changes and detail.changes share the same shape:
+      {
+        "path":       str,   # dot-separated field path
+        "changeType": str,   # Added | Removed | Updated
+        "old_value":  any,   # present for Changed/Removed; absent otherwise
+        "new_value":  any,   # present for Changed/Added; absent otherwise
+      }
+
+    Summary rollup rules (detail always shows full leaf paths):
+      - Scalar Changed leaf      → rolled up to parent (logicalType → field)
+      - Scalar Added/Removed leaf → rolled up to parent (businessName Added → field Added)
+      - Mixed Add+Remove on same parent → single entry with changeType Updated
+      - Dict Added/Removed (whole object) → stays at its own path, not rolled up
+      - List string item (tag)   → rolled up to the tags parent in summary;
+        in detail the tag value is the final path segment (tags.pii Removed)
+    """
+
+    def _expand_to_entries(obj, change_type, base_segs):
+        entries = []
+        for k, v in obj.items():
+            segs = base_segs + [k]
+            if isinstance(v, dict):
+                entry = {"path": ".".join(segs), "changeType": change_type}
+                entries.append(entry)
+                entries.extend(_expand_to_entries(v, change_type, segs))
+            else:
+                entry = {"path": ".".join(segs), "changeType": change_type}
+                if change_type == "Added":
+                    entry["new_value"] = v
+                else:
+                    entry["old_value"] = v
+                entries.append(entry)
+        return entries
+
+    detail_changes = []
+    for deepdiff_key, items in diff_result.items():
+        change_type = _CHANGE_TYPE_MAP.get(deepdiff_key)
+        if not change_type:
+            continue
+        for raw_path, payload in items.items():
+            # match ['key'] or ["key"]
+            segs = re.findall(r"""(?:\['([^']+)'\]|\["([^"]+)"\])""", raw_path)
+            segs = [group[0] if group[0] else group[1] for group in segs]
+            is_iterable = deepdiff_key in ("iterable_item_added", "iterable_item_removed")
+            if isinstance(payload, dict) and "old_value" in payload:
+                entry = {
+                    "path": ".".join(segs),
+                    "changeType": change_type,
+                    "old_value": payload["old_value"],
+                    "new_value": payload["new_value"],
+                }
+                detail_changes.append(entry)
+            elif change_type in ("Added", "Removed") and isinstance(payload, dict):
+                detail_changes.append({"path": ".".join(segs), "changeType": change_type})
+                detail_changes.extend(_expand_to_entries(payload, change_type, segs))
+            elif is_iterable and isinstance(payload, str):
+                entry = {"path": ".".join(segs + [payload]), "changeType": change_type}
+                detail_changes.append(entry)
+            else:
+                entry = {"path": ".".join(segs), "changeType": change_type}
+                if change_type == "Added":
+                    entry["new_value"] = payload
+                else:
+                    entry["old_value"] = payload
+                detail_changes.append(entry)
+
+    detail_changes.sort(key=lambda x: x["path"])
+
+    detail_counts = {
+        "added": sum(1 for c in detail_changes if c["changeType"] == "Added"),
+        "removed": sum(1 for c in detail_changes if c["changeType"] == "Removed"),
+        "updated": sum(1 for c in detail_changes if c["changeType"] == "Updated"),
+    }
+
+    summary_groups: dict[tuple, dict] = {}
+    for deepdiff_key, items in diff_result.items():
+        change_type = _CHANGE_TYPE_MAP.get(deepdiff_key)
+        if not change_type:
+            continue
+        for raw_path, payload in items.items():
+            segs = re.findall(r"""(?:\['([^']+)'\]|\["([^"]+)"\])""", raw_path)
+            segs = [group[0] if group[0] else group[1] for group in segs]
+            is_iterable = deepdiff_key in ("iterable_item_added", "iterable_item_removed")
+            is_scalar_change = (
+                change_type == "Updated"
+                and isinstance(payload, dict)
+                and "old_value" in payload
+                and not isinstance(payload.get("old_value"), dict)
+                and not isinstance(payload.get("new_value"), dict)
+            )
+            is_scalar_leaf = change_type in ("Added", "Removed") and not isinstance(payload, dict) and not is_iterable
+            if is_iterable and isinstance(payload, str):
+                display_segs = tuple(segs)
+            elif (is_scalar_change or is_scalar_leaf) and len(segs) > 1:
+                display_segs = tuple(segs[:-1])
+            else:
+                display_segs = tuple(segs)
+            if display_segs not in summary_groups:
+                summary_groups[display_segs] = {"changeType": change_type}
+            else:
+                if summary_groups[display_segs]["changeType"] != change_type:
+                    summary_groups[display_segs]["changeType"] = "Updated"
+
+    summary_changes = []
+    for segs, data in sorted(summary_groups.items(), key=lambda x: ".".join(x[0])):
+        summary_changes.append({"path": ".".join(segs), "changeType": data["changeType"]})
+
+    summary_counts = {
+        "added": sum(1 for c in summary_changes if c["changeType"] == "Added"),
+        "removed": sum(1 for c in summary_changes if c["changeType"] == "Removed"),
+        "updated": sum(1 for c in summary_changes if c["changeType"] == "Updated"),
+    }
+
+    return {
+        "source_label": source_label,
+        "target_label": target_label,
+        "header": {
+            "title": "ODCS Data Contract Changelog",
+            "subtitle": f"{source_label} \u2192 {target_label}",
+            "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC"),
+        },
+        "summary": {
+            "counts": summary_counts,
+            "changes": summary_changes,
+        },
+        "detail": {
+            "counts": detail_counts,
+            "changes": detail_changes,
+        },
+    }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from datacontract.changelog.changelog import build_changelog, diff

		__all__ = ["diff", "build_changelog"]