From ab2a5fd39ee7b8b4cfa27e2018d5ec3aa582b8b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Mon, 30 Mar 2026 15:52:52 +0200 Subject: [PATCH 01/12] feat: add --ci flag to test and lint commands Adds CI/CD-optimized output when --ci is passed: - GitHub Actions annotations (::error/::warning) when GITHUB_ACTIONS=true - GitHub Step Summary markdown table when GITHUB_STEP_SUMMARY is set Co-Authored-By: Claude Opus 4.6 (1M context) --- datacontract/cli.py | 13 +++ datacontract/output/ci_output.py | 64 +++++++++++++ tests/test_ci_output.py | 157 +++++++++++++++++++++++++++++++ 3 files changed, 234 insertions(+) create mode 100644 datacontract/output/ci_output.py create mode 100644 tests/test_ci_output.py diff --git a/datacontract/cli.py b/datacontract/cli.py index 2eb13997..66ff1b30 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -20,6 +20,7 @@ ) from datacontract.lint.resolve import resolve_data_contract, resolve_data_contract_dict from datacontract.model.exceptions import DataContractException +from datacontract.output.ci_output import write_ci_output from datacontract.output.output_format import OutputFormat from datacontract.output.test_results_writer import write_test_result @@ -107,6 +108,10 @@ def lint( ), ] = None, output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None, + ci: Annotated[ + bool, + typer.Option(help="Optimize output for CI/CD pipelines. Emits GitHub Actions annotations and step summary."), + ] = False, debug: debug_option = None, ): """ @@ -116,6 +121,8 @@ def lint( run = DataContract(data_contract_file=location, schema_location=schema).lint() write_test_result(run, console, output_format, output) + if ci: + write_ci_output(run, location) def enable_debug_logging(debug: bool): @@ -156,6 +163,10 @@ def test( ] = None, output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None, logs: Annotated[bool, typer.Option(help="Print logs")] = False, + ci: Annotated[ + bool, + typer.Option(help="Optimize output for CI/CD pipelines. Emits GitHub Actions annotations and step summary."), + ] = False, ssl_verification: Annotated[ bool, typer.Option(help="SSL verification when publishing the data contract."), @@ -185,6 +196,8 @@ def test( except Exception: data_contract = None write_test_result(run, console, output_format, output, data_contract) + if ci: + write_ci_output(run, location) @app.command(name="export") diff --git a/datacontract/output/ci_output.py b/datacontract/output/ci_output.py new file mode 100644 index 00000000..4a33fbbe --- /dev/null +++ b/datacontract/output/ci_output.py @@ -0,0 +1,64 @@ +import os + +from datacontract.model.run import Run +from datacontract.output.test_results_writer import to_field + + +def write_ci_output(run: Run, data_contract_file: str): + """Write CI-specific output: GitHub annotations and step summary.""" + _write_github_annotations(run, data_contract_file) + _write_github_step_summary(run, data_contract_file) + + +def _write_github_annotations(run: Run, data_contract_file: str): + if os.environ.get("GITHUB_ACTIONS") != "true": + return + + for check in run.checks: + if check.result in ("failed", "error"): + print(f"::error file={data_contract_file}::{check.name}: {check.reason}") + elif check.result == "warning": + print(f"::warning file={data_contract_file}::{check.name}: {check.reason}") + + +def _write_github_step_summary(run: Run, data_contract_file: str): + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") + if not summary_path: + return + + result_emoji = { + "passed": "passed", + "warning": "warning", + "failed": "failed", + "error": "error", + } + result_str = run.result.value if hasattr(run.result, "value") else run.result + result_display = result_emoji.get(result_str, result_str) + + total = len(run.checks) if run.checks else 0 + passed = sum(1 for c in run.checks if c.result == "passed") if run.checks else 0 + failed = sum(1 for c in run.checks if c.result == "failed") if run.checks else 0 + warnings = sum(1 for c in run.checks if c.result == "warning") if run.checks else 0 + errors = sum(1 for c in run.checks if c.result == "error") if run.checks else 0 + + duration = (run.timestampEnd - run.timestampStart).total_seconds() if run.timestampStart and run.timestampEnd else 0 + + lines = [ + f"## Data Contract CI: {data_contract_file}", + "", + f"**Result: {result_display}** | {total} checks | {passed} passed | {failed} failed | {warnings} warnings | {errors} errors | {duration:.1f}s", + "", + ] + + if run.checks: + lines.append("| Result | Check | Field | Details |") + lines.append("|--------|-------|-------|---------|") + for check in sorted(run.checks, key=lambda c: (c.result or "", c.model or "", c.field or "")): + field = to_field(run, check) or "" + reason = check.reason or "" + result_val = check.result.value if hasattr(check.result, "value") else check.result + lines.append(f"| {result_val} | {check.name} | {field} | {reason} |") + lines.append("") + + with open(summary_path, "a") as f: + f.write("\n".join(lines) + "\n") diff --git a/tests/test_ci_output.py b/tests/test_ci_output.py new file mode 100644 index 00000000..8ba9b9f0 --- /dev/null +++ b/tests/test_ci_output.py @@ -0,0 +1,157 @@ +import os +import tempfile +from unittest.mock import patch + +from typer.testing import CliRunner + +from datacontract.cli import app +from datacontract.model.run import Check, ResultEnum, Run +from datacontract.output.ci_output import write_ci_output + +runner = CliRunner() + + +def _make_run(checks): + run = Run.create_run() + run.checks = checks + run.finish() + return run + + +def test_github_annotations_emitted(capsys): + run = _make_run( + [ + Check(type="schema", name="Check col types", result=ResultEnum.failed, reason="type mismatch"), + Check(type="schema", name="Check nullability", result=ResultEnum.warning, reason="nullable changed"), + Check(type="schema", name="Check row count", result=ResultEnum.passed, reason=None), + ] + ) + with patch.dict(os.environ, {"GITHUB_ACTIONS": "true"}): + write_ci_output(run, "datacontract.yaml") + + captured = capsys.readouterr() + assert "::error file=datacontract.yaml::Check col types: type mismatch" in captured.out + assert "::warning file=datacontract.yaml::Check nullability: nullable changed" in captured.out + assert "Check row count" not in captured.out + + +def test_no_annotations_outside_github(capsys): + run = _make_run( + [ + Check(type="schema", name="Check col types", result=ResultEnum.failed, reason="type mismatch"), + ] + ) + env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} + with patch.dict(os.environ, env, clear=True): + write_ci_output(run, "datacontract.yaml") + + captured = capsys.readouterr() + assert "::error" not in captured.out + + +def test_annotation_format_for_errors(capsys): + run = _make_run( + [ + Check(type="quality", name="freshness", result=ResultEnum.error, reason="connection timeout"), + ] + ) + with patch.dict(os.environ, {"GITHUB_ACTIONS": "true"}): + write_ci_output(run, "my/contract.yaml") + + captured = capsys.readouterr() + assert captured.out.strip() == "::error file=my/contract.yaml::freshness: connection timeout" + + +def test_step_summary_written(): + run = _make_run( + [ + Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None), + Check(type="schema", name="Check nulls", result=ResultEnum.failed, reason="not nullable"), + ] + ) + with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f: + summary_path = f.name + + try: + env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} + env["GITHUB_STEP_SUMMARY"] = summary_path + with patch.dict(os.environ, env, clear=True): + write_ci_output(run, "datacontract.yaml") + + with open(summary_path) as f: + content = f.read() + assert "## Data Contract CI: datacontract.yaml" in content + assert "| Result | Check | Field | Details |" in content + assert "Check types" in content + assert "Check nulls" in content + finally: + os.unlink(summary_path) + + +def test_no_summary_without_env(): + run = _make_run( + [ + Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None), + ] + ) + with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f: + summary_path = f.name + + try: + env = {k: v for k, v in os.environ.items() if k not in ("GITHUB_ACTIONS", "GITHUB_STEP_SUMMARY")} + with patch.dict(os.environ, env, clear=True): + write_ci_output(run, "datacontract.yaml") + + with open(summary_path) as f: + content = f.read() + assert content == "" + finally: + os.unlink(summary_path) + + +def test_step_summary_markdown_structure(): + run = _make_run( + [ + Check(type="schema", name="Check types", model="orders", field="id", result=ResultEnum.passed, reason=None), + Check(type="quality", name="Row count", model="orders", result=ResultEnum.failed, reason="0 rows"), + ] + ) + with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f: + summary_path = f.name + + try: + env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} + env["GITHUB_STEP_SUMMARY"] = summary_path + with patch.dict(os.environ, env, clear=True): + write_ci_output(run, "datacontract.yaml") + + with open(summary_path) as f: + content = f.read() + assert "**Result: failed**" in content + assert "2 checks" in content + assert "1 passed" in content + assert "1 failed" in content + finally: + os.unlink(summary_path) + + +def test_test_ci_flag(): + result = runner.invoke(app, ["test", "--ci", "--help"]) + assert result.exit_code == 0 + assert "--ci" in result.stdout + + +def test_lint_ci_flag(): + result = runner.invoke(app, ["lint", "--ci", "--help"]) + assert result.exit_code == 0 + assert "--ci" in result.stdout + + +def test_test_ci_flag_with_valid_contract(): + result = runner.invoke(app, ["test", "--ci", "fixtures/lint/valid_datacontract.yaml"]) + assert result.exit_code == 0 + + +def test_lint_ci_flag_with_valid_contract(): + result = runner.invoke(app, ["lint", "--ci", "fixtures/lint/valid_datacontract.yaml"]) + assert result.exit_code == 0 From c32081aaa41013f9285876a7b06100d303be61d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Mon, 30 Mar 2026 16:16:22 +0200 Subject: [PATCH 02/12] fix: call write_ci_output before write_test_result write_test_result raises typer.Exit(code=1) on failure, which prevented CI output from being written. Move CI output before the exit. Co-Authored-By: Claude Opus 4.6 (1M context) --- datacontract/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datacontract/cli.py b/datacontract/cli.py index 66ff1b30..6debcaf2 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -120,9 +120,9 @@ def lint( enable_debug_logging(debug) run = DataContract(data_contract_file=location, schema_location=schema).lint() - write_test_result(run, console, output_format, output) if ci: write_ci_output(run, location) + write_test_result(run, console, output_format, output) def enable_debug_logging(debug: bool): @@ -195,9 +195,9 @@ def test( data_contract = resolve_data_contract(location, schema_location=schema) except Exception: data_contract = None - write_test_result(run, console, output_format, output, data_contract) if ci: write_ci_output(run, location) + write_test_result(run, console, output_format, output, data_contract) @app.command(name="export") From ce8c92f7a76a624ce0f8bbb6646f13320f9f9911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Mon, 30 Mar 2026 16:20:23 +0200 Subject: [PATCH 03/12] docs: document --ci flag in README and CHANGELOG Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 3 +++ README.md | 20 ++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 233bb63e..bdf5132a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added +- Added `--ci` flag to `test` and `lint` commands for CI/CD-optimized output: GitHub Actions annotations and step summary + ### Fixed - Fix SQL export generating multiple PRIMARY KEY constraints for composite keys (#1026) - Preserve parametrized physicalTypes for SQL export (#1086) diff --git a/README.md b/README.md index 70704e99..3e202a82 100644 --- a/README.md +++ b/README.md @@ -358,6 +358,10 @@ Commands │ results. │ │ [default: None] │ │ --logs --no-logs Print logs [default: no-logs] │ +│ --ci --no-ci Optimize output for CI/CD │ +│ pipelines. Emits GitHub Actions │ +│ annotations and step summary. │ +│ [default: no-ci] │ │ --ssl-verification --no-ssl-verification SSL verification when publishing │ │ the data contract. │ │ [default: ssl-verification] │ @@ -374,6 +378,17 @@ Data Contract CLI connects to a data source and runs schema and quality tests to $ datacontract test --server production datacontract.yaml ``` +#### CI/CD Usage + +Use the `--ci` flag for CI/CD-optimized output. When running in GitHub Actions, it automatically emits: +- **Annotations**: Inline `::error` and `::warning` annotations for failed checks +- **Step Summary**: A markdown results table in the GitHub Actions job summary + +```bash +$ datacontract test --ci datacontract.yaml +$ datacontract lint --ci datacontract.yaml +``` + To connect to the databases the `server` block in the datacontract.yaml is used to set up the connection. In addition, credentials, such as username and passwords, may be defined with environment variables. @@ -1881,10 +1896,11 @@ Create a data contract based on the actual data. This is the fastest way to get $ datacontract lint ``` -4. Set up a CI pipeline that executes daily for continuous quality checks. You can also report the +4. Set up a CI pipeline that executes daily for continuous quality checks. Use the `--ci` flag for + CI-optimized output (GitHub Actions annotations and step summary). You can also report the test results to tools like [Data Mesh Manager](https://datamesh-manager.com) ```bash - $ datacontract test --publish https://api.datamesh-manager.com/api/test-results + $ datacontract test --ci --publish https://api.datamesh-manager.com/api/test-results ``` ### Contract-First From 264ac8c2fe476ec8d950ff06544478f1e614d0ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Mon, 30 Mar 2026 17:09:05 +0200 Subject: [PATCH 04/12] refactor: change --ci flag to dedicated ci command Replace the --ci flag on test/lint with a standalone `datacontract ci` command. Same functionality (GitHub annotations + step summary) but as a separate command like Biome and DataVow do. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 2 +- README.md | 14 +++----- datacontract/cli.py | 71 ++++++++++++++++++++++++++++++++++------- tests/test_ci_output.py | 22 +++++-------- 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bdf5132a..5489ad98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Added -- Added `--ci` flag to `test` and `lint` commands for CI/CD-optimized output: GitHub Actions annotations and step summary +- Added `ci` command for CI/CD-optimized test runs: GitHub Actions annotations and step summary ### Fixed - Fix SQL export generating multiple PRIMARY KEY constraints for composite keys (#1026) diff --git a/README.md b/README.md index 3e202a82..3c40fc0d 100644 --- a/README.md +++ b/README.md @@ -358,10 +358,6 @@ Commands │ results. │ │ [default: None] │ │ --logs --no-logs Print logs [default: no-logs] │ -│ --ci --no-ci Optimize output for CI/CD │ -│ pipelines. Emits GitHub Actions │ -│ annotations and step summary. │ -│ [default: no-ci] │ │ --ssl-verification --no-ssl-verification SSL verification when publishing │ │ the data contract. │ │ [default: ssl-verification] │ @@ -380,13 +376,13 @@ $ datacontract test --server production datacontract.yaml #### CI/CD Usage -Use the `--ci` flag for CI/CD-optimized output. When running in GitHub Actions, it automatically emits: +Use the `ci` command for CI/CD-optimized test runs. When running in GitHub Actions, it automatically emits: - **Annotations**: Inline `::error` and `::warning` annotations for failed checks - **Step Summary**: A markdown results table in the GitHub Actions job summary ```bash -$ datacontract test --ci datacontract.yaml -$ datacontract lint --ci datacontract.yaml +$ datacontract ci datacontract.yaml +$ datacontract ci --server production datacontract.yaml ``` To connect to the databases the `server` block in the datacontract.yaml is used to set up the connection. @@ -1896,11 +1892,11 @@ Create a data contract based on the actual data. This is the fastest way to get $ datacontract lint ``` -4. Set up a CI pipeline that executes daily for continuous quality checks. Use the `--ci` flag for +4. Set up a CI pipeline that executes daily for continuous quality checks. Use the `ci` command for CI-optimized output (GitHub Actions annotations and step summary). You can also report the test results to tools like [Data Mesh Manager](https://datamesh-manager.com) ```bash - $ datacontract test --ci --publish https://api.datamesh-manager.com/api/test-results + $ datacontract ci --publish https://api.datamesh-manager.com/api/test-results ``` ### Contract-First diff --git a/datacontract/cli.py b/datacontract/cli.py index 6debcaf2..773abaa9 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -108,10 +108,6 @@ def lint( ), ] = None, output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None, - ci: Annotated[ - bool, - typer.Option(help="Optimize output for CI/CD pipelines. Emits GitHub Actions annotations and step summary."), - ] = False, debug: debug_option = None, ): """ @@ -120,8 +116,6 @@ def lint( enable_debug_logging(debug) run = DataContract(data_contract_file=location, schema_location=schema).lint() - if ci: - write_ci_output(run, location) write_test_result(run, console, output_format, output) @@ -163,10 +157,6 @@ def test( ] = None, output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None, logs: Annotated[bool, typer.Option(help="Print logs")] = False, - ci: Annotated[ - bool, - typer.Option(help="Optimize output for CI/CD pipelines. Emits GitHub Actions annotations and step summary."), - ] = False, ssl_verification: Annotated[ bool, typer.Option(help="SSL verification when publishing the data contract."), @@ -195,8 +185,65 @@ def test( data_contract = resolve_data_contract(location, schema_location=schema) except Exception: data_contract = None - if ci: - write_ci_output(run, location) + write_test_result(run, console, output_format, output, data_contract) + + +@app.command(name="ci") +def ci( + location: Annotated[ + str, + typer.Argument(help="The location (url or path) of the data contract yaml."), + ] = "datacontract.yaml", + schema: Annotated[ + str, + typer.Option(help="The location (url or path) of the ODCS JSON Schema"), + ] = None, + server: Annotated[ + str, + typer.Option( + help="The server configuration to run the schema and quality tests. " + "Use the key of the server object in the data contract yaml file " + "to refer to a server, e.g., `production`, or `all` for all " + "servers (default)." + ), + ] = "all", + publish: Annotated[str, typer.Option(help="The url to publish the results after the test.")] = None, + output: Annotated[ + Path, + typer.Option( + help="Specify the file path where the test results should be written to (e.g., './test-results/TEST-datacontract.xml')." + ), + ] = None, + output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None, + logs: Annotated[bool, typer.Option(help="Print logs")] = False, + ssl_verification: Annotated[ + bool, + typer.Option(help="SSL verification when publishing the data contract."), + ] = True, + debug: debug_option = None, +): + """ + Run lint and tests for CI/CD pipelines. Emits GitHub Actions annotations and step summary. + """ + enable_debug_logging(debug) + + console.print(f"Testing {location}") + if server == "all": + server = None + run = DataContract( + data_contract_file=location, + schema_location=schema, + publish_url=publish, + server=server, + ssl_verification=ssl_verification, + ).test() + if logs: + _print_logs(run) + try: + data_contract = resolve_data_contract(location, schema_location=schema) + except Exception: + data_contract = None + write_ci_output(run, location) write_test_result(run, console, output_format, output, data_contract) diff --git a/tests/test_ci_output.py b/tests/test_ci_output.py index 8ba9b9f0..6bca50e8 100644 --- a/tests/test_ci_output.py +++ b/tests/test_ci_output.py @@ -135,23 +135,17 @@ def test_step_summary_markdown_structure(): os.unlink(summary_path) -def test_test_ci_flag(): - result = runner.invoke(app, ["test", "--ci", "--help"]) +def test_ci_help(): + result = runner.invoke(app, ["ci", "--help"]) assert result.exit_code == 0 - assert "--ci" in result.stdout + assert "CI/CD" in result.stdout -def test_lint_ci_flag(): - result = runner.invoke(app, ["lint", "--ci", "--help"]) +def test_ci_with_valid_contract(): + result = runner.invoke(app, ["ci", "fixtures/lint/valid_datacontract.yaml"]) assert result.exit_code == 0 - assert "--ci" in result.stdout -def test_test_ci_flag_with_valid_contract(): - result = runner.invoke(app, ["test", "--ci", "fixtures/lint/valid_datacontract.yaml"]) - assert result.exit_code == 0 - - -def test_lint_ci_flag_with_valid_contract(): - result = runner.invoke(app, ["lint", "--ci", "fixtures/lint/valid_datacontract.yaml"]) - assert result.exit_code == 0 +def test_ci_with_missing_file(): + result = runner.invoke(app, ["ci", "nonexistent.yaml"]) + assert result.exit_code == 1 From 45177b866a3f3dc5065357e03daa7d141b5dc9ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Wed, 1 Apr 2026 16:51:18 +0200 Subject: [PATCH 05/12] feat: enhance ci command with multi-file support, Azure annotations, and --fail-on - Accept multiple positional file arguments for batch testing - Add Azure Pipelines annotation support (##vso[task.logissue]) - Add --fail-on option (warning/error/never) to control exit code threshold - Add aggregated step summary with per-contract detail sections - Add result emojis to GitHub step summary - Continue testing remaining contracts after failures Co-Authored-By: Claude Opus 4.6 (1M context) --- datacontract/cli.py | 73 ++++++++++++----- datacontract/output/ci_output.py | 120 +++++++++++++++++++--------- tests/test_ci_output.py | 133 +++++++++++++++++++++++++++++-- 3 files changed, 257 insertions(+), 69 deletions(-) diff --git a/datacontract/cli.py b/datacontract/cli.py index 773abaa9..12c4f8f2 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -20,7 +20,7 @@ ) from datacontract.lint.resolve import resolve_data_contract, resolve_data_contract_dict from datacontract.model.exceptions import DataContractException -from datacontract.output.ci_output import write_ci_output +from datacontract.output.ci_output import write_ci_output, write_ci_summary from datacontract.output.output_format import OutputFormat from datacontract.output.test_results_writer import write_test_result @@ -190,10 +190,10 @@ def test( @app.command(name="ci") def ci( - location: Annotated[ - str, - typer.Argument(help="The location (url or path) of the data contract yaml."), - ] = "datacontract.yaml", + locations: Annotated[ + Optional[list[str]], + typer.Argument(help="The location(s) (url or path) of the data contract yaml file(s)."), + ] = None, schema: Annotated[ str, typer.Option(help="The location (url or path) of the ODCS JSON Schema"), @@ -216,6 +216,12 @@ def ci( ] = None, output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None, logs: Annotated[bool, typer.Option(help="Print logs")] = False, + fail_on: Annotated[ + str, + typer.Option( + help="Minimum severity that causes a non-zero exit code: 'warning', 'error', or 'never'." + ), + ] = "error", ssl_verification: Annotated[ bool, typer.Option(help="SSL verification when publishing the data contract."), @@ -223,28 +229,51 @@ def ci( debug: debug_option = None, ): """ - Run lint and tests for CI/CD pipelines. Emits GitHub Actions annotations and step summary. + Run tests for CI/CD pipelines. Emits GitHub Actions annotations and step summary. """ enable_debug_logging(debug) - console.print(f"Testing {location}") + if not locations: + locations = ["datacontract.yaml"] + if server == "all": server = None - run = DataContract( - data_contract_file=location, - schema_location=schema, - publish_url=publish, - server=server, - ssl_verification=ssl_verification, - ).test() - if logs: - _print_logs(run) - try: - data_contract = resolve_data_contract(location, schema_location=schema) - except Exception: - data_contract = None - write_ci_output(run, location) - write_test_result(run, console, output_format, output, data_contract) + + results = [] + severity_levels = {"warning": 0, "failed": 1, "error": 2} + should_fail = False + + for location in locations: + console.print(f"Testing {location}") + run = DataContract( + data_contract_file=location, + schema_location=schema, + publish_url=publish, + server=server, + ssl_verification=ssl_verification, + ).test() + if logs: + _print_logs(run) + results.append((location, run)) + write_ci_output(run, location) + try: + data_contract = resolve_data_contract(location, schema_location=schema) + except Exception: + data_contract = None + try: + write_test_result(run, console, output_format, output, data_contract) + except typer.Exit: + pass + result_str = run.result.value if hasattr(run.result, "value") else run.result + if fail_on != "never" and result_str in severity_levels: + fail_on_level = severity_levels.get(fail_on, 0) + if severity_levels[result_str] >= fail_on_level: + should_fail = True + + write_ci_summary(results) + + if should_fail: + raise typer.Exit(code=1) @app.command(name="export") diff --git a/datacontract/output/ci_output.py b/datacontract/output/ci_output.py index 4a33fbbe..bfc2117a 100644 --- a/datacontract/output/ci_output.py +++ b/datacontract/output/ci_output.py @@ -1,19 +1,28 @@ import os +from typing import List, Tuple from datacontract.model.run import Run from datacontract.output.test_results_writer import to_field def write_ci_output(run: Run, data_contract_file: str): - """Write CI-specific output: GitHub annotations and step summary.""" - _write_github_annotations(run, data_contract_file) - _write_github_step_summary(run, data_contract_file) + """Write CI-specific output for a single contract: annotations only.""" + _write_annotations(run, data_contract_file) -def _write_github_annotations(run: Run, data_contract_file: str): - if os.environ.get("GITHUB_ACTIONS") != "true": - return +def write_ci_summary(results: List[Tuple[str, Run]]): + """Write aggregated CI step summary for all contracts.""" + _write_github_step_summary(results) + + +def _write_annotations(run: Run, data_contract_file: str): + if os.environ.get("GITHUB_ACTIONS") == "true": + _write_github_annotations(run, data_contract_file) + elif os.environ.get("TF_BUILD") == "True": + _write_azure_annotations(run, data_contract_file) + +def _write_github_annotations(run: Run, data_contract_file: str): for check in run.checks: if check.result in ("failed", "error"): print(f"::error file={data_contract_file}::{check.name}: {check.reason}") @@ -21,44 +30,77 @@ def _write_github_annotations(run: Run, data_contract_file: str): print(f"::warning file={data_contract_file}::{check.name}: {check.reason}") -def _write_github_step_summary(run: Run, data_contract_file: str): +def _write_azure_annotations(run: Run, data_contract_file: str): + for check in run.checks: + if check.result in ("failed", "error"): + print(f"##vso[task.logissue type=error;sourcepath={data_contract_file}]{check.name}: {check.reason}") + elif check.result == "warning": + print(f"##vso[task.logissue type=warning;sourcepath={data_contract_file}]{check.name}: {check.reason}") + + +def _result_str(run: Run) -> str: + return run.result.value if hasattr(run.result, "value") else run.result + + +RESULT_EMOJI = { + "passed": "🟢 passed", + "warning": "🟠 warning", + "failed": "🔴 failed", + "error": "🔴 error", +} + + +def _write_github_step_summary(results: List[Tuple[str, Run]]): summary_path = os.environ.get("GITHUB_STEP_SUMMARY") if not summary_path: return - result_emoji = { - "passed": "passed", - "warning": "warning", - "failed": "failed", - "error": "error", - } - result_str = run.result.value if hasattr(run.result, "value") else run.result - result_display = result_emoji.get(result_str, result_str) - - total = len(run.checks) if run.checks else 0 - passed = sum(1 for c in run.checks if c.result == "passed") if run.checks else 0 - failed = sum(1 for c in run.checks if c.result == "failed") if run.checks else 0 - warnings = sum(1 for c in run.checks if c.result == "warning") if run.checks else 0 - errors = sum(1 for c in run.checks if c.result == "error") if run.checks else 0 - - duration = (run.timestampEnd - run.timestampStart).total_seconds() if run.timestampStart and run.timestampEnd else 0 - - lines = [ - f"## Data Contract CI: {data_contract_file}", - "", - f"**Result: {result_display}** | {total} checks | {passed} passed | {failed} failed | {warnings} warnings | {errors} errors | {duration:.1f}s", - "", - ] - - if run.checks: - lines.append("| Result | Check | Field | Details |") - lines.append("|--------|-------|-------|---------|") - for check in sorted(run.checks, key=lambda c: (c.result or "", c.model or "", c.field or "")): - field = to_field(run, check) or "" - reason = check.reason or "" - result_val = check.result.value if hasattr(check.result, "value") else check.result - lines.append(f"| {result_val} | {check.name} | {field} | {reason} |") + lines = [] + + # Aggregate header (only when multiple contracts) + if len(results) > 1: + total_contracts = len(results) + passed_contracts = sum(1 for _, run in results if _result_str(run) == "passed") + failed_contracts = total_contracts - passed_contracts + overall = "🟢 passed" if failed_contracts == 0 else "🔴 failed" + lines.append("## Data Contract CI") + lines.append("") + lines.append(f"**{overall}** | {total_contracts} contracts | {passed_contracts} passed | {failed_contracts} failed") + lines.append("") + lines.append("| Result | Contract |") + lines.append("|--------|----------|") + for data_contract_file, run in results: + result = RESULT_EMOJI.get(_result_str(run), _result_str(run)) + lines.append(f"| {result} | {data_contract_file} |") + lines.append("") + + # Per-contract detail sections + for data_contract_file, run in results: + result_display = RESULT_EMOJI.get(_result_str(run), _result_str(run)) + + total = len(run.checks) if run.checks else 0 + passed = sum(1 for c in run.checks if c.result == "passed") if run.checks else 0 + failed = sum(1 for c in run.checks if c.result == "failed") if run.checks else 0 + warnings = sum(1 for c in run.checks if c.result == "warning") if run.checks else 0 + errors = sum(1 for c in run.checks if c.result == "error") if run.checks else 0 + + duration = (run.timestampEnd - run.timestampStart).total_seconds() if run.timestampStart and run.timestampEnd else 0 + + heading_level = "###" if len(results) > 1 else "##" + lines.append(f"{heading_level} Data Contract CI: {data_contract_file}") + lines.append("") + lines.append(f"**Result: {result_display}** | {total} checks | {passed} passed | {failed} failed | {warnings} warnings | {errors} errors | {duration:.1f}s") lines.append("") + if run.checks: + lines.append("| Result | Check | Field | Details |") + lines.append("|--------|-------|-------|---------|") + for check in sorted(run.checks, key=lambda c: (c.result or "", c.model or "", c.field or "")): + field = to_field(run, check) or "" + reason = check.reason or "" + result_val = check.result.value if hasattr(check.result, "value") else check.result + lines.append(f"| {result_val} | {check.name} | {field} | {reason} |") + lines.append("") + with open(summary_path, "a") as f: f.write("\n".join(lines) + "\n") diff --git a/tests/test_ci_output.py b/tests/test_ci_output.py index 6bca50e8..8363a831 100644 --- a/tests/test_ci_output.py +++ b/tests/test_ci_output.py @@ -6,7 +6,7 @@ from datacontract.cli import app from datacontract.model.run import Check, ResultEnum, Run -from datacontract.output.ci_output import write_ci_output +from datacontract.output.ci_output import write_ci_output, write_ci_summary runner = CliRunner() @@ -18,6 +18,9 @@ def _make_run(checks): return run +# --- Annotation tests --- + + def test_github_annotations_emitted(capsys): run = _make_run( [ @@ -35,18 +38,19 @@ def test_github_annotations_emitted(capsys): assert "Check row count" not in captured.out -def test_no_annotations_outside_github(capsys): +def test_no_annotations_outside_ci(capsys): run = _make_run( [ Check(type="schema", name="Check col types", result=ResultEnum.failed, reason="type mismatch"), ] ) - env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} + env = {k: v for k, v in os.environ.items() if k not in ("GITHUB_ACTIONS", "TF_BUILD")} with patch.dict(os.environ, env, clear=True): write_ci_output(run, "datacontract.yaml") captured = capsys.readouterr() assert "::error" not in captured.out + assert "##vso" not in captured.out def test_annotation_format_for_errors(capsys): @@ -62,7 +66,40 @@ def test_annotation_format_for_errors(capsys): assert captured.out.strip() == "::error file=my/contract.yaml::freshness: connection timeout" -def test_step_summary_written(): +def test_azure_annotations_emitted(capsys): + run = _make_run( + [ + Check(type="schema", name="Check col types", result=ResultEnum.failed, reason="type mismatch"), + Check(type="schema", name="Check nullability", result=ResultEnum.warning, reason="nullable changed"), + Check(type="schema", name="Check row count", result=ResultEnum.passed, reason=None), + ] + ) + with patch.dict(os.environ, {"TF_BUILD": "True"}): + write_ci_output(run, "datacontract.yaml") + + captured = capsys.readouterr() + assert "##vso[task.logissue type=error;sourcepath=datacontract.yaml]Check col types: type mismatch" in captured.out + assert "##vso[task.logissue type=warning;sourcepath=datacontract.yaml]Check nullability: nullable changed" in captured.out + assert "Check row count" not in captured.out + + +def test_azure_annotation_format_for_errors(capsys): + run = _make_run( + [ + Check(type="quality", name="freshness", result=ResultEnum.error, reason="connection timeout"), + ] + ) + with patch.dict(os.environ, {"TF_BUILD": "True"}): + write_ci_output(run, "my/contract.yaml") + + captured = capsys.readouterr() + assert captured.out.strip() == "##vso[task.logissue type=error;sourcepath=my/contract.yaml]freshness: connection timeout" + + +# --- Step summary tests --- + + +def test_step_summary_single_contract(): run = _make_run( [ Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None), @@ -76,7 +113,7 @@ def test_step_summary_written(): env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} env["GITHUB_STEP_SUMMARY"] = summary_path with patch.dict(os.environ, env, clear=True): - write_ci_output(run, "datacontract.yaml") + write_ci_summary([("datacontract.yaml", run)]) with open(summary_path) as f: content = f.read() @@ -84,6 +121,8 @@ def test_step_summary_written(): assert "| Result | Check | Field | Details |" in content assert "Check types" in content assert "Check nulls" in content + # Single contract should not have aggregate header + assert "| Result | Contract |" not in content finally: os.unlink(summary_path) @@ -100,7 +139,7 @@ def test_no_summary_without_env(): try: env = {k: v for k, v in os.environ.items() if k not in ("GITHUB_ACTIONS", "GITHUB_STEP_SUMMARY")} with patch.dict(os.environ, env, clear=True): - write_ci_output(run, "datacontract.yaml") + write_ci_summary([("datacontract.yaml", run)]) with open(summary_path) as f: content = f.read() @@ -123,11 +162,11 @@ def test_step_summary_markdown_structure(): env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} env["GITHUB_STEP_SUMMARY"] = summary_path with patch.dict(os.environ, env, clear=True): - write_ci_output(run, "datacontract.yaml") + write_ci_summary([("datacontract.yaml", run)]) with open(summary_path) as f: content = f.read() - assert "**Result: failed**" in content + assert "**Result: 🔴 failed**" in content assert "2 checks" in content assert "1 passed" in content assert "1 failed" in content @@ -135,6 +174,42 @@ def test_step_summary_markdown_structure(): os.unlink(summary_path) +def test_step_summary_multi_contract(): + run_passed = _make_run( + [Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None)] + ) + run_failed = _make_run( + [Check(type="schema", name="Check nulls", result=ResultEnum.failed, reason="not nullable")] + ) + with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f: + summary_path = f.name + + try: + env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} + env["GITHUB_STEP_SUMMARY"] = summary_path + with patch.dict(os.environ, env, clear=True): + write_ci_summary([("orders.yaml", run_passed), ("customers.yaml", run_failed)]) + + with open(summary_path) as f: + content = f.read() + # Aggregate header + assert "## Data Contract CI" in content + assert "2 contracts" in content + assert "1 passed" in content + assert "1 failed" in content + assert "| Result | Contract |" in content + assert "orders.yaml" in content + assert "customers.yaml" in content + # Per-contract detail sections use ### when multiple + assert "### Data Contract CI: orders.yaml" in content + assert "### Data Contract CI: customers.yaml" in content + finally: + os.unlink(summary_path) + + +# --- CLI integration tests --- + + def test_ci_help(): result = runner.invoke(app, ["ci", "--help"]) assert result.exit_code == 0 @@ -149,3 +224,45 @@ def test_ci_with_valid_contract(): def test_ci_with_missing_file(): result = runner.invoke(app, ["ci", "nonexistent.yaml"]) assert result.exit_code == 1 + + +def test_ci_multiple_files(): + result = runner.invoke( + app, ["ci", "fixtures/lint/valid_datacontract.yaml", "fixtures/lint/valid_datacontract.yaml"] + ) + assert result.exit_code == 0 + + +def test_ci_multiple_files_with_failure(): + result = runner.invoke( + app, ["ci", "fixtures/lint/valid_datacontract.yaml", "nonexistent.yaml"] + ) + assert result.exit_code == 1 + + +def test_ci_fail_on_never(): + result = runner.invoke(app, ["ci", "--fail-on", "never", "nonexistent.yaml"]) + assert result.exit_code == 0 + + +def test_ci_fail_on_warning(): + # valid_datacontract.yaml produces a warning ("Schema block is missing") + result = runner.invoke(app, ["ci", "--fail-on", "warning", "fixtures/lint/valid_datacontract.yaml"]) + assert result.exit_code == 1 + + +def test_ci_fail_on_error_is_default(): + # valid_datacontract.yaml produces warnings but not errors/failures — should pass + result = runner.invoke(app, ["ci", "fixtures/lint/valid_datacontract.yaml"]) + assert result.exit_code == 0 + + +def test_ci_continues_after_failure(): + """CI should test all contracts even if one fails.""" + result = runner.invoke( + app, ["ci", "nonexistent.yaml", "fixtures/lint/valid_datacontract.yaml"] + ) + # Should still report on the second file + assert "fixtures/lint/valid_datacontract.yaml" in result.stdout + # But exit 1 because the first failed + assert result.exit_code == 1 From dfce493e36cb9b3ab0d1b5790ace56035cff4daa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Wed, 1 Apr 2026 17:26:52 +0200 Subject: [PATCH 06/12] feat: add --json flag and fix markdown table rendering - Add --json flag to ci command for machine-readable stdout output - Sanitize markdown table cells: escape pipes, collapse newlines - Fixes broken step summary when check reasons contain pipes or newlines Co-Authored-By: Claude Opus 4.6 (1M context) --- datacontract/cli.py | 5 ++- datacontract/output/ci_output.py | 22 ++++++++-- tests/test_ci_output.py | 75 +++++++++++++++++++++++++++++++- 3 files changed, 97 insertions(+), 5 deletions(-) diff --git a/datacontract/cli.py b/datacontract/cli.py index 12c4f8f2..e35eb051 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -20,7 +20,7 @@ ) from datacontract.lint.resolve import resolve_data_contract, resolve_data_contract_dict from datacontract.model.exceptions import DataContractException -from datacontract.output.ci_output import write_ci_output, write_ci_summary +from datacontract.output.ci_output import write_ci_output, write_ci_summary, write_json_results from datacontract.output.output_format import OutputFormat from datacontract.output.test_results_writer import write_test_result @@ -216,6 +216,7 @@ def ci( ] = None, output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None, logs: Annotated[bool, typer.Option(help="Print logs")] = False, + json_output: Annotated[bool, typer.Option("--json", help="Print test results as JSON to stdout.")] = False, fail_on: Annotated[ str, typer.Option( @@ -271,6 +272,8 @@ def ci( should_fail = True write_ci_summary(results) + if json_output: + write_json_results(results) if should_fail: raise typer.Exit(code=1) diff --git a/datacontract/output/ci_output.py b/datacontract/output/ci_output.py index bfc2117a..42c07ed1 100644 --- a/datacontract/output/ci_output.py +++ b/datacontract/output/ci_output.py @@ -1,3 +1,4 @@ +import json import os from typing import List, Tuple @@ -5,6 +6,11 @@ from datacontract.output.test_results_writer import to_field +def _sanitize_md_cell(text: str) -> str: + """Escape pipe characters and collapse newlines for use in markdown table cells.""" + return text.replace("|", "\\|").replace("\n", " ").strip() + + def write_ci_output(run: Run, data_contract_file: str): """Write CI-specific output for a single contract: annotations only.""" _write_annotations(run, data_contract_file) @@ -96,11 +102,21 @@ def _write_github_step_summary(results: List[Tuple[str, Run]]): lines.append("| Result | Check | Field | Details |") lines.append("|--------|-------|-------|---------|") for check in sorted(run.checks, key=lambda c: (c.result or "", c.model or "", c.field or "")): - field = to_field(run, check) or "" - reason = check.reason or "" + field = _sanitize_md_cell(to_field(run, check) or "") + reason = _sanitize_md_cell(check.reason or "") + name = _sanitize_md_cell(check.name or "") result_val = check.result.value if hasattr(check.result, "value") else check.result - lines.append(f"| {result_val} | {check.name} | {field} | {reason} |") + lines.append(f"| {result_val} | {name} | {field} | {reason} |") lines.append("") with open(summary_path, "a") as f: f.write("\n".join(lines) + "\n") + + +def write_json_results(results: List[Tuple[str, Run]]): + """Print test results as JSON to stdout.""" + if len(results) == 1: + print(results[0][1].model_dump_json(indent=2)) + else: + output = [json.loads(run.model_dump_json()) for _, run in results] + print(json.dumps(output, indent=2)) diff --git a/tests/test_ci_output.py b/tests/test_ci_output.py index 8363a831..1f83c41b 100644 --- a/tests/test_ci_output.py +++ b/tests/test_ci_output.py @@ -1,3 +1,4 @@ +import json import os import tempfile from unittest.mock import patch @@ -6,7 +7,7 @@ from datacontract.cli import app from datacontract.model.run import Check, ResultEnum, Run -from datacontract.output.ci_output import write_ci_output, write_ci_summary +from datacontract.output.ci_output import write_ci_output, write_ci_summary, write_json_results runner = CliRunner() @@ -174,6 +175,37 @@ def test_step_summary_markdown_structure(): os.unlink(summary_path) +def test_step_summary_sanitizes_reason(): + run = _make_run( + [ + Check( + type="schema", + name="Test Data Contract", + result=ResultEnum.error, + reason="1 validation error\ninfo.title\n Input should be a string | got int", + ), + ] + ) + with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f: + summary_path = f.name + + try: + env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} + env["GITHUB_STEP_SUMMARY"] = summary_path + with patch.dict(os.environ, env, clear=True): + write_ci_summary([("datacontract.yaml", run)]) + + with open(summary_path) as f: + content = f.read() + # Newlines in reason must be collapsed, pipes must be escaped + table_lines = [l for l in content.splitlines() if l.startswith("|") and "Test Data Contract" in l] + assert len(table_lines) == 1, "Check should be on a single table row" + assert "\\|" in table_lines[0], "Pipe in reason should be escaped" + assert "\n" not in table_lines[0] + finally: + os.unlink(summary_path) + + def test_step_summary_multi_contract(): run_passed = _make_run( [Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None)] @@ -266,3 +298,44 @@ def test_ci_continues_after_failure(): assert "fixtures/lint/valid_datacontract.yaml" in result.stdout # But exit 1 because the first failed assert result.exit_code == 1 + + +# --- JSON output tests --- + + +def test_json_output_single(capsys): + run = _make_run( + [Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None)] + ) + write_json_results([("datacontract.yaml", run)]) + captured = capsys.readouterr() + data = json.loads(captured.out) + assert data["result"] == "passed" + assert len(data["checks"]) == 1 + + +def test_json_output_multi(capsys): + run1 = _make_run( + [Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None)] + ) + run2 = _make_run( + [Check(type="schema", name="Check nulls", result=ResultEnum.failed, reason="not nullable")] + ) + write_json_results([("orders.yaml", run1), ("customers.yaml", run2)]) + captured = capsys.readouterr() + data = json.loads(captured.out) + assert isinstance(data, list) + assert len(data) == 2 + assert data[0]["result"] == "passed" + assert data[1]["result"] == "failed" + + +def test_ci_json_flag(): + result = runner.invoke(app, ["ci", "--json", "fixtures/lint/valid_datacontract.yaml"]) + assert result.exit_code == 0 + # stdout contains JSON — find it after the rich table output + # The JSON starts with '{' + json_start = result.stdout.index("{") + data = json.loads(result.stdout[json_start:]) + assert "result" in data + assert "checks" in data From da752281224cb3f2eb9c3a148d2d672d2b83ac24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Wed, 1 Apr 2026 17:40:22 +0200 Subject: [PATCH 07/12] docs: add ci command reference, workflow examples, and --json to README Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 2 +- README.md | 159 ++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 147 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5489ad98..ea28ef4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Added -- Added `ci` command for CI/CD-optimized test runs: GitHub Actions annotations and step summary +- Added `ci` command for CI/CD-optimized test runs: multi-file support, GitHub Actions annotations and step summary, Azure DevOps annotations, `--fail-on` flag, `--json` output ### Fixed - Fix SQL export generating multiple PRIMARY KEY constraints for composite keys (#1026) diff --git a/README.md b/README.md index 3c40fc0d..4c2987cb 100644 --- a/README.md +++ b/README.md @@ -261,6 +261,7 @@ Commands - [init](#init) - [lint](#lint) - [test](#test) +- [ci](#ci) - [export](#export) - [import](#import) - [catalog](#catalog) @@ -374,16 +375,7 @@ Data Contract CLI connects to a data source and runs schema and quality tests to $ datacontract test --server production datacontract.yaml ``` -#### CI/CD Usage - -Use the `ci` command for CI/CD-optimized test runs. When running in GitHub Actions, it automatically emits: -- **Annotations**: Inline `::error` and `::warning` annotations for failed checks -- **Step Summary**: A markdown results table in the GitHub Actions job summary - -```bash -$ datacontract ci datacontract.yaml -$ datacontract ci --server production datacontract.yaml -``` +For CI/CD pipelines, see [`ci`](#ci). To connect to the databases the `server` block in the datacontract.yaml is used to set up the connection. In addition, credentials, such as username and passwords, may be defined with environment variables. @@ -1077,6 +1069,147 @@ models: ``` +### ci +``` + + Usage: datacontract ci [OPTIONS] [LOCATIONS]... + + Run tests for CI/CD pipelines. Emits GitHub Actions annotations and step + summary. + +╭─ Arguments ──────────────────────────────────────────────────────────────────╮ +│ locations [LOCATIONS]... The location(s) (url or path) of the data │ +│ contract yaml file(s). │ +╰──────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────╮ +│ --schema TEXT The location (url │ +│ or path) of the │ +│ ODCS JSON Schema │ +│ --server TEXT The server │ +│ configuration to │ +│ run the schema and │ +│ quality tests. Use │ +│ the key of the │ +│ server object in │ +│ the data contract │ +│ yaml file to refer │ +│ to a server, e.g., │ +│ `production`, or │ +│ `all` for all │ +│ servers (default). │ +│ [default: all] │ +│ --publish TEXT The url to publish │ +│ the results after │ +│ the test. │ +│ --output PATH Specify the file │ +│ path where the test │ +│ results should be │ +│ written to (e.g., │ +│ './test-results/TE… │ +│ --output-format [json|junit] The target format │ +│ for the test │ +│ results. │ +│ --logs --no-logs Print logs │ +│ [default: no-logs] │ +│ --json --no-json Print test results │ +│ as JSON to stdout. │ +│ [default: no-json] │ +│ --fail-on TEXT Minimum severity │ +│ that causes a │ +│ non-zero exit code: │ +│ 'warning', 'error', │ +│ or 'never'. │ +│ [default: error] │ +│ --ssl-verification --no-ssl-verific… SSL verification │ +│ when publishing the │ +│ data contract. │ +│ [default: │ +│ ssl-verification] │ +│ --debug --no-debug Enable debug │ +│ logging │ +│ --help Show this message │ +│ and exit. │ +╰──────────────────────────────────────────────────────────────────────────────╯ + +``` + +The `ci` command wraps [`test`](#test) with CI/CD-specific features: + +- **Multiple contracts**: `datacontract ci contracts/*.yaml` +- **CI annotations:** Inline annotations for failed checks (GitHub Actions and Azure DevOps) +- **Markdown summary** of the test results (GitHub Actions) +- **`--json`**: Print test results as JSON to stdout for machine-readable output +- **`--fail-on`**: Control the minimum severity that causes a non-zero exit code. Default is `error`; set to `warning` to also fail on warnings, or `never` to always exit 0. + +See the [test command](#test) for supported server types and their configuration. + +```bash +# Single contract +$ datacontract ci datacontract.yaml + +# Multiple contracts +$ datacontract ci contracts/*.yaml + +# Fail on warnings too +$ datacontract ci --fail-on warning datacontract.yaml + +# JSON output for scripting +$ datacontract ci --json datacontract.yaml +``` + +
+GitHub Actions workflow example + +```yaml +# .github/workflows/datacontract.yml +name: Data Contract CI + +on: + push: + branches: [main] + pull_request: + +jobs: + datacontract-ci: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: pip install datacontract-cli + # Test one or more data contracts (supports globs, e.g. contracts/*.yaml) + - run: datacontract ci datacontract.yaml +``` + +
+ +
+Azure DevOps pipeline example + +```yaml +# azure-pipelines.yml +trigger: + branches: + include: + - main + +pool: + vmImage: "ubuntu-latest" + +steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: "3.11" + - script: pip install datacontract-cli + displayName: "Install datacontract-cli" + - script: datacontract ci datacontract.yaml + displayName: "Run data contract tests" +``` + +
+ + ### export ``` @@ -1892,9 +2025,9 @@ Create a data contract based on the actual data. This is the fastest way to get $ datacontract lint ``` -4. Set up a CI pipeline that executes daily for continuous quality checks. Use the `ci` command for - CI-optimized output (GitHub Actions annotations and step summary). You can also report the - test results to tools like [Data Mesh Manager](https://datamesh-manager.com) +4. Set up a CI pipeline that executes daily for continuous quality checks. Use the [`ci`](#ci) command for + CI-optimized output (GitHub Actions annotations and step summary, Azure DevOps annotations). + You can also report the test results to tools like [Data Mesh Manager](https://datamesh-manager.com). ```bash $ datacontract ci --publish https://api.datamesh-manager.com/api/test-results ``` From 230eae5604e8e5a1843d7bad675cc44d40d69dbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Wed, 1 Apr 2026 17:44:37 +0200 Subject: [PATCH 08/12] fix: ruff E741 ambiguous variable name in test, add comment to Azure example Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 1 + tests/test_ci_output.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4c2987cb..c18fda95 100644 --- a/README.md +++ b/README.md @@ -1203,6 +1203,7 @@ steps: versionSpec: "3.11" - script: pip install datacontract-cli displayName: "Install datacontract-cli" + # Test one or more data contracts (supports globs, e.g. contracts/*.yaml) - script: datacontract ci datacontract.yaml displayName: "Run data contract tests" ``` diff --git a/tests/test_ci_output.py b/tests/test_ci_output.py index 1f83c41b..bd11400a 100644 --- a/tests/test_ci_output.py +++ b/tests/test_ci_output.py @@ -198,7 +198,7 @@ def test_step_summary_sanitizes_reason(): with open(summary_path) as f: content = f.read() # Newlines in reason must be collapsed, pipes must be escaped - table_lines = [l for l in content.splitlines() if l.startswith("|") and "Test Data Contract" in l] + table_lines = [line for line in content.splitlines() if line.startswith("|") and "Test Data Contract" in line] assert len(table_lines) == 1, "Check should be on a single table row" assert "\\|" in table_lines[0], "Pipe in reason should be escaped" assert "\n" not in table_lines[0] From 7dcd7ae3a9254a74641f7758b6e556d92cf03e87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Wed, 1 Apr 2026 18:29:10 +0200 Subject: [PATCH 09/12] =?UTF-8?q?refactor:=20clean=20up=20ci=20output=20?= =?UTF-8?q?=E2=80=94=20remove=20redundant=20result=20helpers,=20fix=20--js?= =?UTF-8?q?on=20stdout?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove _result_str helper and hasattr guards (ResultEnum is a str enum) - Move GITHUB_STEP_SUMMARY check from private fn to write_ci_summary - Simplify multi-contract summary line to "passed X/Y contracts successful" - Send human output to stderr when --json is used so stdout is clean JSON - Exclude None fields from JSON output - Replace integration test for _sanitize_md_cell with direct unit test Co-Authored-By: Claude Opus 4.6 (1M context) --- datacontract/cli.py | 22 +++++++----- datacontract/output/ci_output.py | 59 +++++++++++++------------------- tests/test_ci_output.py | 43 ++++------------------- 3 files changed, 43 insertions(+), 81 deletions(-) diff --git a/datacontract/cli.py b/datacontract/cli.py index e35eb051..adc66e02 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -240,12 +240,15 @@ def ci( if server == "all": server = None + # When --json is used, send human-readable output to stderr so stdout is clean JSON. + out = Console(stderr=True) if json_output else console + results = [] severity_levels = {"warning": 0, "failed": 1, "error": 2} should_fail = False for location in locations: - console.print(f"Testing {location}") + out.print(f"Testing {location}") run = DataContract( data_contract_file=location, schema_location=schema, @@ -254,7 +257,7 @@ def ci( ssl_verification=ssl_verification, ).test() if logs: - _print_logs(run) + _print_logs(run, out) results.append((location, run)) write_ci_output(run, location) try: @@ -262,13 +265,12 @@ def ci( except Exception: data_contract = None try: - write_test_result(run, console, output_format, output, data_contract) + write_test_result(run, out, output_format, output, data_contract) except typer.Exit: pass - result_str = run.result.value if hasattr(run.result, "value") else run.result - if fail_on != "never" and result_str in severity_levels: + if fail_on != "never" and run.result in severity_levels: fail_on_level = severity_levels.get(fail_on, 0) - if severity_levels[result_str] >= fail_on_level: + if severity_levels[run.result] >= fail_on_level: should_fail = True write_ci_summary(results) @@ -600,10 +602,12 @@ def api( uvicorn.run(**uvicorn_args) -def _print_logs(run): - console.print("\nLogs:") +def _print_logs(run, out=None): + if out is None: + out = console + out.print("\nLogs:") for log in run.logs: - console.print(log.timestamp.strftime("%y-%m-%d %H:%M:%S"), log.level.ljust(5), log.message) + out.print(log.timestamp.strftime("%y-%m-%d %H:%M:%S"), log.level.ljust(5), log.message) if __name__ == "__main__": diff --git a/datacontract/output/ci_output.py b/datacontract/output/ci_output.py index 42c07ed1..8923df96 100644 --- a/datacontract/output/ci_output.py +++ b/datacontract/output/ci_output.py @@ -13,21 +13,20 @@ def _sanitize_md_cell(text: str) -> str: def write_ci_output(run: Run, data_contract_file: str): """Write CI-specific output for a single contract: annotations only.""" - _write_annotations(run, data_contract_file) - - -def write_ci_summary(results: List[Tuple[str, Run]]): - """Write aggregated CI step summary for all contracts.""" - _write_github_step_summary(results) - - -def _write_annotations(run: Run, data_contract_file: str): if os.environ.get("GITHUB_ACTIONS") == "true": _write_github_annotations(run, data_contract_file) elif os.environ.get("TF_BUILD") == "True": _write_azure_annotations(run, data_contract_file) +def write_ci_summary(results: List[Tuple[str, Run]]): + """Write aggregated CI step summary for all contracts.""" + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") + if not summary_path: + return + _write_github_step_summary(results, summary_path) + + def _write_github_annotations(run: Run, data_contract_file: str): for check in run.checks: if check.result in ("failed", "error"): @@ -44,10 +43,6 @@ def _write_azure_annotations(run: Run, data_contract_file: str): print(f"##vso[task.logissue type=warning;sourcepath={data_contract_file}]{check.name}: {check.reason}") -def _result_str(run: Run) -> str: - return run.result.value if hasattr(run.result, "value") else run.result - - RESULT_EMOJI = { "passed": "🟢 passed", "warning": "🟠 warning", @@ -56,46 +51,41 @@ def _result_str(run: Run) -> str: } -def _write_github_step_summary(results: List[Tuple[str, Run]]): - summary_path = os.environ.get("GITHUB_STEP_SUMMARY") - if not summary_path: - return - +def _write_github_step_summary(results: List[Tuple[str, Run]], summary_path: str): lines = [] # Aggregate header (only when multiple contracts) if len(results) > 1: - total_contracts = len(results) - passed_contracts = sum(1 for _, run in results if _result_str(run) == "passed") - failed_contracts = total_contracts - passed_contracts - overall = "🟢 passed" if failed_contracts == 0 else "🔴 failed" + n_total = len(results) + n_passed = sum(1 for _, run in results if run.result == "passed") + overall = "🟢 passed" if n_passed == n_total else "🔴 failed" lines.append("## Data Contract CI") lines.append("") - lines.append(f"**{overall}** | {total_contracts} contracts | {passed_contracts} passed | {failed_contracts} failed") + lines.append(f"**{overall}** — {n_passed}/{n_total} contracts successful") lines.append("") lines.append("| Result | Contract |") lines.append("|--------|----------|") for data_contract_file, run in results: - result = RESULT_EMOJI.get(_result_str(run), _result_str(run)) + result = RESULT_EMOJI.get(run.result, run.result) lines.append(f"| {result} | {data_contract_file} |") lines.append("") # Per-contract detail sections for data_contract_file, run in results: - result_display = RESULT_EMOJI.get(_result_str(run), _result_str(run)) + result_display = RESULT_EMOJI.get(run.result, run.result) - total = len(run.checks) if run.checks else 0 - passed = sum(1 for c in run.checks if c.result == "passed") if run.checks else 0 - failed = sum(1 for c in run.checks if c.result == "failed") if run.checks else 0 - warnings = sum(1 for c in run.checks if c.result == "warning") if run.checks else 0 - errors = sum(1 for c in run.checks if c.result == "error") if run.checks else 0 + n_total = len(run.checks) if run.checks else 0 + n_passed = sum(1 for c in run.checks if c.result == "passed") if run.checks else 0 + n_failed = sum(1 for c in run.checks if c.result == "failed") if run.checks else 0 + n_warnings = sum(1 for c in run.checks if c.result == "warning") if run.checks else 0 + n_errors = sum(1 for c in run.checks if c.result == "error") if run.checks else 0 duration = (run.timestampEnd - run.timestampStart).total_seconds() if run.timestampStart and run.timestampEnd else 0 heading_level = "###" if len(results) > 1 else "##" lines.append(f"{heading_level} Data Contract CI: {data_contract_file}") lines.append("") - lines.append(f"**Result: {result_display}** | {total} checks | {passed} passed | {failed} failed | {warnings} warnings | {errors} errors | {duration:.1f}s") + lines.append(f"**Result: {result_display}** | {n_total} checks | {n_passed} passed | {n_failed} failed | {n_warnings} warnings | {n_errors} errors | {duration:.1f}s") lines.append("") if run.checks: @@ -105,8 +95,7 @@ def _write_github_step_summary(results: List[Tuple[str, Run]]): field = _sanitize_md_cell(to_field(run, check) or "") reason = _sanitize_md_cell(check.reason or "") name = _sanitize_md_cell(check.name or "") - result_val = check.result.value if hasattr(check.result, "value") else check.result - lines.append(f"| {result_val} | {name} | {field} | {reason} |") + lines.append(f"| {check.result} | {name} | {field} | {reason} |") lines.append("") with open(summary_path, "a") as f: @@ -116,7 +105,7 @@ def _write_github_step_summary(results: List[Tuple[str, Run]]): def write_json_results(results: List[Tuple[str, Run]]): """Print test results as JSON to stdout.""" if len(results) == 1: - print(results[0][1].model_dump_json(indent=2)) + print(results[0][1].model_dump_json(indent=2, exclude_none=True)) else: - output = [json.loads(run.model_dump_json()) for _, run in results] + output = [json.loads(run.model_dump_json(exclude_none=True)) for _, run in results] print(json.dumps(output, indent=2)) diff --git a/tests/test_ci_output.py b/tests/test_ci_output.py index bd11400a..88510883 100644 --- a/tests/test_ci_output.py +++ b/tests/test_ci_output.py @@ -7,7 +7,7 @@ from datacontract.cli import app from datacontract.model.run import Check, ResultEnum, Run -from datacontract.output.ci_output import write_ci_output, write_ci_summary, write_json_results +from datacontract.output.ci_output import _sanitize_md_cell, write_ci_output, write_ci_summary, write_json_results runner = CliRunner() @@ -175,35 +175,8 @@ def test_step_summary_markdown_structure(): os.unlink(summary_path) -def test_step_summary_sanitizes_reason(): - run = _make_run( - [ - Check( - type="schema", - name="Test Data Contract", - result=ResultEnum.error, - reason="1 validation error\ninfo.title\n Input should be a string | got int", - ), - ] - ) - with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f: - summary_path = f.name - - try: - env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} - env["GITHUB_STEP_SUMMARY"] = summary_path - with patch.dict(os.environ, env, clear=True): - write_ci_summary([("datacontract.yaml", run)]) - - with open(summary_path) as f: - content = f.read() - # Newlines in reason must be collapsed, pipes must be escaped - table_lines = [line for line in content.splitlines() if line.startswith("|") and "Test Data Contract" in line] - assert len(table_lines) == 1, "Check should be on a single table row" - assert "\\|" in table_lines[0], "Pipe in reason should be escaped" - assert "\n" not in table_lines[0] - finally: - os.unlink(summary_path) +def test_sanitize_md_cell(): + assert _sanitize_md_cell("foo | bar\nbaz") == "foo \\| bar baz" def test_step_summary_multi_contract(): @@ -226,9 +199,7 @@ def test_step_summary_multi_contract(): content = f.read() # Aggregate header assert "## Data Contract CI" in content - assert "2 contracts" in content - assert "1 passed" in content - assert "1 failed" in content + assert "1/2 contracts successful" in content assert "| Result | Contract |" in content assert "orders.yaml" in content assert "customers.yaml" in content @@ -333,9 +304,7 @@ def test_json_output_multi(capsys): def test_ci_json_flag(): result = runner.invoke(app, ["ci", "--json", "fixtures/lint/valid_datacontract.yaml"]) assert result.exit_code == 0 - # stdout contains JSON — find it after the rich table output - # The JSON starts with '{' - json_start = result.stdout.index("{") - data = json.loads(result.stdout[json_start:]) + # With --json, stdout should be clean JSON (human output goes to stderr) + data = json.loads(result.stdout) assert "result" in data assert "checks" in data From b6946c229dd2c94ae7a41345f8ac92543a4f5876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Wed, 1 Apr 2026 18:42:28 +0200 Subject: [PATCH 10/12] =?UTF-8?q?fix:=20harden=20ci=20command=20=E2=80=94?= =?UTF-8?q?=20validate=20--fail-on,=20sanitize=20annotations,=20improve=20?= =?UTF-8?q?summary?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Constrain --fail-on to click.Choice for fast failure on invalid values - Sanitize annotation name/reason to collapse newlines and handle None - Show 🟠 warning overall state when no failures but warnings exist - Disable Rich colors in ci command for cleaner CI logs - Exclude None fields from --json output Co-Authored-By: Claude Opus 4.6 (1M context) --- datacontract/cli.py | 20 +++++++----- datacontract/output/ci_output.py | 41 ++++++++++++++++++------ tests/test_ci_output.py | 53 +++++++++++++++++--------------- 3 files changed, 72 insertions(+), 42 deletions(-) diff --git a/datacontract/cli.py b/datacontract/cli.py index adc66e02..df057346 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Iterable, List, Optional +import click import typer from click import Context from rich.console import Console @@ -220,7 +221,8 @@ def ci( fail_on: Annotated[ str, typer.Option( - help="Minimum severity that causes a non-zero exit code: 'warning', 'error', or 'never'." + click_type=click.Choice(["warning", "error", "never"], case_sensitive=False), + help="Minimum severity that causes a non-zero exit code.", ), ] = "error", ssl_verification: Annotated[ @@ -240,11 +242,15 @@ def ci( if server == "all": server = None - # When --json is used, send human-readable output to stderr so stdout is clean JSON. - out = Console(stderr=True) if json_output else console + # Plain text output for CI logs; --json sends human output to stderr. + out = Console(stderr=True, no_color=True) if json_output else Console(no_color=True) results = [] - severity_levels = {"warning": 0, "failed": 1, "error": 2} + fail_results = { + "warning": {"warning", "failed", "error"}, + "error": {"failed", "error"}, + "never": set(), + } should_fail = False for location in locations: @@ -268,10 +274,8 @@ def ci( write_test_result(run, out, output_format, output, data_contract) except typer.Exit: pass - if fail_on != "never" and run.result in severity_levels: - fail_on_level = severity_levels.get(fail_on, 0) - if severity_levels[run.result] >= fail_on_level: - should_fail = True + if run.result in fail_results[fail_on]: + should_fail = True write_ci_summary(results) if json_output: diff --git a/datacontract/output/ci_output.py b/datacontract/output/ci_output.py index 8923df96..ce63ff13 100644 --- a/datacontract/output/ci_output.py +++ b/datacontract/output/ci_output.py @@ -27,20 +27,31 @@ def write_ci_summary(results: List[Tuple[str, Run]]): _write_github_step_summary(results, summary_path) +def _sanitize_annotation(text: str | None) -> str: + """Collapse newlines and trim text for use in CI annotations.""" + if not text: + return "" + return text.replace("\r\n", " ").replace("\r", " ").replace("\n", " ").strip() + + def _write_github_annotations(run: Run, data_contract_file: str): for check in run.checks: + name = _sanitize_annotation(check.name) + reason = _sanitize_annotation(check.reason) if check.result in ("failed", "error"): - print(f"::error file={data_contract_file}::{check.name}: {check.reason}") + print(f"::error file={data_contract_file}::{name}: {reason}") elif check.result == "warning": - print(f"::warning file={data_contract_file}::{check.name}: {check.reason}") + print(f"::warning file={data_contract_file}::{name}: {reason}") def _write_azure_annotations(run: Run, data_contract_file: str): for check in run.checks: + name = _sanitize_annotation(check.name) + reason = _sanitize_annotation(check.reason) if check.result in ("failed", "error"): - print(f"##vso[task.logissue type=error;sourcepath={data_contract_file}]{check.name}: {check.reason}") + print(f"##vso[task.logissue type=error;sourcepath={data_contract_file}]{name}: {reason}") elif check.result == "warning": - print(f"##vso[task.logissue type=warning;sourcepath={data_contract_file}]{check.name}: {check.reason}") + print(f"##vso[task.logissue type=warning;sourcepath={data_contract_file}]{name}: {reason}") RESULT_EMOJI = { @@ -57,11 +68,19 @@ def _write_github_step_summary(results: List[Tuple[str, Run]], summary_path: str # Aggregate header (only when multiple contracts) if len(results) > 1: n_total = len(results) - n_passed = sum(1 for _, run in results if run.result == "passed") - overall = "🟢 passed" if n_passed == n_total else "🔴 failed" + result_values = [run.result for _, run in results] + has_failures = any(r in ("failed", "error") for r in result_values) + has_warnings = any(r == "warning" for r in result_values) + if has_failures: + overall = "🔴 failed" + elif has_warnings: + overall = "🟠 warning" + else: + overall = "🟢 passed" lines.append("## Data Contract CI") lines.append("") - lines.append(f"**{overall}** — {n_passed}/{n_total} contracts successful") + n_passed = sum(1 for r in result_values if r == "passed") + lines.append(f"**{overall}** — {n_passed}/{n_total} contracts passed") lines.append("") lines.append("| Result | Contract |") lines.append("|--------|----------|") @@ -80,12 +99,16 @@ def _write_github_step_summary(results: List[Tuple[str, Run]], summary_path: str n_warnings = sum(1 for c in run.checks if c.result == "warning") if run.checks else 0 n_errors = sum(1 for c in run.checks if c.result == "error") if run.checks else 0 - duration = (run.timestampEnd - run.timestampStart).total_seconds() if run.timestampStart and run.timestampEnd else 0 + duration = ( + (run.timestampEnd - run.timestampStart).total_seconds() if run.timestampStart and run.timestampEnd else 0 + ) heading_level = "###" if len(results) > 1 else "##" lines.append(f"{heading_level} Data Contract CI: {data_contract_file}") lines.append("") - lines.append(f"**Result: {result_display}** | {n_total} checks | {n_passed} passed | {n_failed} failed | {n_warnings} warnings | {n_errors} errors | {duration:.1f}s") + lines.append( + f"**Result: {result_display}** | {n_total} checks | {n_passed} passed | {n_failed} failed | {n_warnings} warnings | {n_errors} errors | {duration:.1f}s" + ) lines.append("") if run.checks: diff --git a/tests/test_ci_output.py b/tests/test_ci_output.py index 88510883..0b3998af 100644 --- a/tests/test_ci_output.py +++ b/tests/test_ci_output.py @@ -7,7 +7,13 @@ from datacontract.cli import app from datacontract.model.run import Check, ResultEnum, Run -from datacontract.output.ci_output import _sanitize_md_cell, write_ci_output, write_ci_summary, write_json_results +from datacontract.output.ci_output import ( + _sanitize_annotation, + _sanitize_md_cell, + write_ci_output, + write_ci_summary, + write_json_results, +) runner = CliRunner() @@ -80,7 +86,10 @@ def test_azure_annotations_emitted(capsys): captured = capsys.readouterr() assert "##vso[task.logissue type=error;sourcepath=datacontract.yaml]Check col types: type mismatch" in captured.out - assert "##vso[task.logissue type=warning;sourcepath=datacontract.yaml]Check nullability: nullable changed" in captured.out + assert ( + "##vso[task.logissue type=warning;sourcepath=datacontract.yaml]Check nullability: nullable changed" + in captured.out + ) assert "Check row count" not in captured.out @@ -94,7 +103,10 @@ def test_azure_annotation_format_for_errors(capsys): write_ci_output(run, "my/contract.yaml") captured = capsys.readouterr() - assert captured.out.strip() == "##vso[task.logissue type=error;sourcepath=my/contract.yaml]freshness: connection timeout" + assert ( + captured.out.strip() + == "##vso[task.logissue type=error;sourcepath=my/contract.yaml]freshness: connection timeout" + ) # --- Step summary tests --- @@ -179,13 +191,14 @@ def test_sanitize_md_cell(): assert _sanitize_md_cell("foo | bar\nbaz") == "foo \\| bar baz" +def test_sanitize_annotation(): + assert _sanitize_annotation("error\non line 2\r\nand line 3") == "error on line 2 and line 3" + assert _sanitize_annotation(None) == "" + + def test_step_summary_multi_contract(): - run_passed = _make_run( - [Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None)] - ) - run_failed = _make_run( - [Check(type="schema", name="Check nulls", result=ResultEnum.failed, reason="not nullable")] - ) + run_passed = _make_run([Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None)]) + run_failed = _make_run([Check(type="schema", name="Check nulls", result=ResultEnum.failed, reason="not nullable")]) with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f: summary_path = f.name @@ -199,7 +212,7 @@ def test_step_summary_multi_contract(): content = f.read() # Aggregate header assert "## Data Contract CI" in content - assert "1/2 contracts successful" in content + assert "1/2 contracts passed" in content assert "| Result | Contract |" in content assert "orders.yaml" in content assert "customers.yaml" in content @@ -237,9 +250,7 @@ def test_ci_multiple_files(): def test_ci_multiple_files_with_failure(): - result = runner.invoke( - app, ["ci", "fixtures/lint/valid_datacontract.yaml", "nonexistent.yaml"] - ) + result = runner.invoke(app, ["ci", "fixtures/lint/valid_datacontract.yaml", "nonexistent.yaml"]) assert result.exit_code == 1 @@ -262,9 +273,7 @@ def test_ci_fail_on_error_is_default(): def test_ci_continues_after_failure(): """CI should test all contracts even if one fails.""" - result = runner.invoke( - app, ["ci", "nonexistent.yaml", "fixtures/lint/valid_datacontract.yaml"] - ) + result = runner.invoke(app, ["ci", "nonexistent.yaml", "fixtures/lint/valid_datacontract.yaml"]) # Should still report on the second file assert "fixtures/lint/valid_datacontract.yaml" in result.stdout # But exit 1 because the first failed @@ -275,9 +284,7 @@ def test_ci_continues_after_failure(): def test_json_output_single(capsys): - run = _make_run( - [Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None)] - ) + run = _make_run([Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None)]) write_json_results([("datacontract.yaml", run)]) captured = capsys.readouterr() data = json.loads(captured.out) @@ -286,12 +293,8 @@ def test_json_output_single(capsys): def test_json_output_multi(capsys): - run1 = _make_run( - [Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None)] - ) - run2 = _make_run( - [Check(type="schema", name="Check nulls", result=ResultEnum.failed, reason="not nullable")] - ) + run1 = _make_run([Check(type="schema", name="Check types", result=ResultEnum.passed, reason=None)]) + run2 = _make_run([Check(type="schema", name="Check nulls", result=ResultEnum.failed, reason="not nullable")]) write_json_results([("orders.yaml", run1), ("customers.yaml", run2)]) captured = capsys.readouterr() data = json.loads(captured.out) From ef3f0c87a44a115e5ae4d4b94bd8f633f1382746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Thu, 2 Apr 2026 13:47:45 +0200 Subject: [PATCH 11/12] =?UTF-8?q?fix:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20JSON=20location,=20annotation=20escaping,=20output?= =?UTF-8?q?=20isolation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Include "location" field in JSON output so consumers can map results to contracts - Escape % in annotations for GitHub Actions workflow command format - Route annotations to stderr in --json mode to keep stdout as clean JSON - Use ResultEnum.value in step summary table instead of enum repr - Open step summary file with explicit UTF-8 encoding - Reject --output with multiple contracts (would overwrite) - Remove redundant resolve_data_contract call (was double-parsing) - Fix Azure annotation tests to clear GITHUB_ACTIONS env var (CI fix) Co-Authored-By: Claude Opus 4.6 (1M context) --- datacontract/cli.py | 12 ++++---- datacontract/output/ci_output.py | 47 ++++++++++++++++++++++---------- tests/test_ci_output.py | 34 +++++++++++++++++++++-- 3 files changed, 69 insertions(+), 24 deletions(-) diff --git a/datacontract/cli.py b/datacontract/cli.py index df057346..fea5f851 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -239,6 +239,10 @@ def ci( if not locations: locations = ["datacontract.yaml"] + if output and len(locations) > 1: + console.print("Error: --output cannot be used with multiple contracts (results would overwrite each other).") + raise typer.Exit(code=1) + if server == "all": server = None @@ -265,13 +269,9 @@ def ci( if logs: _print_logs(run, out) results.append((location, run)) - write_ci_output(run, location) - try: - data_contract = resolve_data_contract(location, schema_location=schema) - except Exception: - data_contract = None + write_ci_output(run, location, json_mode=json_output) try: - write_test_result(run, out, output_format, output, data_contract) + write_test_result(run, out, output_format, output) except typer.Exit: pass if run.result in fail_results[fail_on]: diff --git a/datacontract/output/ci_output.py b/datacontract/output/ci_output.py index ce63ff13..f2eeb3a9 100644 --- a/datacontract/output/ci_output.py +++ b/datacontract/output/ci_output.py @@ -1,5 +1,6 @@ import json import os +import sys from typing import List, Tuple from datacontract.model.run import Run @@ -11,12 +12,13 @@ def _sanitize_md_cell(text: str) -> str: return text.replace("|", "\\|").replace("\n", " ").strip() -def write_ci_output(run: Run, data_contract_file: str): +def write_ci_output(run: Run, data_contract_file: str, json_mode: bool = False): """Write CI-specific output for a single contract: annotations only.""" + out = sys.stderr if json_mode else sys.stdout if os.environ.get("GITHUB_ACTIONS") == "true": - _write_github_annotations(run, data_contract_file) + _write_github_annotations(run, data_contract_file, out) elif os.environ.get("TF_BUILD") == "True": - _write_azure_annotations(run, data_contract_file) + _write_azure_annotations(run, data_contract_file, out) def write_ci_summary(results: List[Tuple[str, Run]]): @@ -31,27 +33,27 @@ def _sanitize_annotation(text: str | None) -> str: """Collapse newlines and trim text for use in CI annotations.""" if not text: return "" - return text.replace("\r\n", " ").replace("\r", " ").replace("\n", " ").strip() + return text.replace("%", "%25").replace("\r\n", " ").replace("\r", " ").replace("\n", " ").strip() -def _write_github_annotations(run: Run, data_contract_file: str): +def _write_github_annotations(run: Run, data_contract_file: str, out=sys.stdout): for check in run.checks: name = _sanitize_annotation(check.name) reason = _sanitize_annotation(check.reason) if check.result in ("failed", "error"): - print(f"::error file={data_contract_file}::{name}: {reason}") + print(f"::error file={data_contract_file}::{name}: {reason}", file=out) elif check.result == "warning": - print(f"::warning file={data_contract_file}::{name}: {reason}") + print(f"::warning file={data_contract_file}::{name}: {reason}", file=out) -def _write_azure_annotations(run: Run, data_contract_file: str): +def _write_azure_annotations(run: Run, data_contract_file: str, out=sys.stdout): for check in run.checks: name = _sanitize_annotation(check.name) reason = _sanitize_annotation(check.reason) if check.result in ("failed", "error"): - print(f"##vso[task.logissue type=error;sourcepath={data_contract_file}]{name}: {reason}") + print(f"##vso[task.logissue type=error;sourcepath={data_contract_file}]{name}: {reason}", file=out) elif check.result == "warning": - print(f"##vso[task.logissue type=warning;sourcepath={data_contract_file}]{name}: {reason}") + print(f"##vso[task.logissue type=warning;sourcepath={data_contract_file}]{name}: {reason}", file=out) RESULT_EMOJI = { @@ -114,21 +116,36 @@ def _write_github_step_summary(results: List[Tuple[str, Run]], summary_path: str if run.checks: lines.append("| Result | Check | Field | Details |") lines.append("|--------|-------|-------|---------|") - for check in sorted(run.checks, key=lambda c: (c.result or "", c.model or "", c.field or "")): + for check in sorted( + run.checks, + key=lambda c: ( + c.result.value if hasattr(c.result, "value") else str(c.result or ""), + c.model or "", + c.field or "", + ), + ): field = _sanitize_md_cell(to_field(run, check) or "") reason = _sanitize_md_cell(check.reason or "") name = _sanitize_md_cell(check.name or "") - lines.append(f"| {check.result} | {name} | {field} | {reason} |") + result = check.result.value if hasattr(check.result, "value") else str(check.result) + lines.append(f"| {result} | {name} | {field} | {reason} |") lines.append("") - with open(summary_path, "a") as f: + with open(summary_path, "a", encoding="utf-8") as f: f.write("\n".join(lines) + "\n") def write_json_results(results: List[Tuple[str, Run]]): """Print test results as JSON to stdout.""" if len(results) == 1: - print(results[0][1].model_dump_json(indent=2, exclude_none=True)) + location, run = results[0] + obj = json.loads(run.model_dump_json(exclude_none=True)) + obj["location"] = location + print(json.dumps(obj, indent=2)) else: - output = [json.loads(run.model_dump_json(exclude_none=True)) for _, run in results] + output = [] + for location, run in results: + obj = json.loads(run.model_dump_json(exclude_none=True)) + obj["location"] = location + output.append(obj) print(json.dumps(output, indent=2)) diff --git a/tests/test_ci_output.py b/tests/test_ci_output.py index 0b3998af..46ea41b8 100644 --- a/tests/test_ci_output.py +++ b/tests/test_ci_output.py @@ -81,7 +81,9 @@ def test_azure_annotations_emitted(capsys): Check(type="schema", name="Check row count", result=ResultEnum.passed, reason=None), ] ) - with patch.dict(os.environ, {"TF_BUILD": "True"}): + env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} + env["TF_BUILD"] = "True" + with patch.dict(os.environ, env, clear=True): write_ci_output(run, "datacontract.yaml") captured = capsys.readouterr() @@ -99,7 +101,9 @@ def test_azure_annotation_format_for_errors(capsys): Check(type="quality", name="freshness", result=ResultEnum.error, reason="connection timeout"), ] ) - with patch.dict(os.environ, {"TF_BUILD": "True"}): + env = {k: v for k, v in os.environ.items() if k != "GITHUB_ACTIONS"} + env["TF_BUILD"] = "True" + with patch.dict(os.environ, env, clear=True): write_ci_output(run, "my/contract.yaml") captured = capsys.readouterr() @@ -194,6 +198,7 @@ def test_sanitize_md_cell(): def test_sanitize_annotation(): assert _sanitize_annotation("error\non line 2\r\nand line 3") == "error on line 2 and line 3" assert _sanitize_annotation(None) == "" + assert _sanitize_annotation("50% done") == "50%25 done" def test_step_summary_multi_contract(): @@ -280,6 +285,23 @@ def test_ci_continues_after_failure(): assert result.exit_code == 1 +def test_ci_output_rejects_multi_with_output(): + result = runner.invoke( + app, + [ + "ci", + "--output", + "results.xml", + "--output-format", + "junit", + "fixtures/lint/valid_datacontract.yaml", + "fixtures/lint/valid_datacontract.yaml", + ], + ) + assert result.exit_code == 1 + assert "cannot be used with multiple contracts" in result.stdout + + # --- JSON output tests --- @@ -289,6 +311,7 @@ def test_json_output_single(capsys): captured = capsys.readouterr() data = json.loads(captured.out) assert data["result"] == "passed" + assert data["location"] == "datacontract.yaml" assert len(data["checks"]) == 1 @@ -301,13 +324,18 @@ def test_json_output_multi(capsys): assert isinstance(data, list) assert len(data) == 2 assert data[0]["result"] == "passed" + assert data[0]["location"] == "orders.yaml" assert data[1]["result"] == "failed" + assert data[1]["location"] == "customers.yaml" def test_ci_json_flag(): - result = runner.invoke(app, ["ci", "--json", "fixtures/lint/valid_datacontract.yaml"]) + env = {k: v for k, v in os.environ.items() if k not in ("GITHUB_ACTIONS", "TF_BUILD")} + with patch.dict(os.environ, env, clear=True): + result = runner.invoke(app, ["ci", "--json", "fixtures/lint/valid_datacontract.yaml"]) assert result.exit_code == 0 # With --json, stdout should be clean JSON (human output goes to stderr) data = json.loads(result.stdout) assert "result" in data + assert "location" in data assert "checks" in data From e058c439510f74fd163617fba7d7f2142ef3d5b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6dl?= Date: Thu, 2 Apr 2026 13:56:32 +0200 Subject: [PATCH 12/12] fix: handle \r in md cells, use .value in RESULT_EMOJI fallback Co-Authored-By: Claude Opus 4.6 (1M context) --- datacontract/output/ci_output.py | 6 +++--- tests/test_ci_output.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/datacontract/output/ci_output.py b/datacontract/output/ci_output.py index f2eeb3a9..4df73065 100644 --- a/datacontract/output/ci_output.py +++ b/datacontract/output/ci_output.py @@ -9,7 +9,7 @@ def _sanitize_md_cell(text: str) -> str: """Escape pipe characters and collapse newlines for use in markdown table cells.""" - return text.replace("|", "\\|").replace("\n", " ").strip() + return text.replace("|", "\\|").replace("\r\n", " ").replace("\r", " ").replace("\n", " ").strip() def write_ci_output(run: Run, data_contract_file: str, json_mode: bool = False): @@ -87,13 +87,13 @@ def _write_github_step_summary(results: List[Tuple[str, Run]], summary_path: str lines.append("| Result | Contract |") lines.append("|--------|----------|") for data_contract_file, run in results: - result = RESULT_EMOJI.get(run.result, run.result) + result = RESULT_EMOJI.get(run.result, run.result.value if hasattr(run.result, "value") else str(run.result)) lines.append(f"| {result} | {data_contract_file} |") lines.append("") # Per-contract detail sections for data_contract_file, run in results: - result_display = RESULT_EMOJI.get(run.result, run.result) + result_display = RESULT_EMOJI.get(run.result, run.result.value if hasattr(run.result, "value") else str(run.result)) n_total = len(run.checks) if run.checks else 0 n_passed = sum(1 for c in run.checks if c.result == "passed") if run.checks else 0 diff --git a/tests/test_ci_output.py b/tests/test_ci_output.py index 46ea41b8..411c2752 100644 --- a/tests/test_ci_output.py +++ b/tests/test_ci_output.py @@ -193,6 +193,7 @@ def test_step_summary_markdown_structure(): def test_sanitize_md_cell(): assert _sanitize_md_cell("foo | bar\nbaz") == "foo \\| bar baz" + assert _sanitize_md_cell("line1\r\nline2\rline3") == "line1 line2 line3" def test_sanitize_annotation():