diff --git a/.agents/TESTING.md b/.agents/TESTING.md index 67799a2..610d56d 100644 --- a/.agents/TESTING.md +++ b/.agents/TESTING.md @@ -51,7 +51,13 @@ def test_merge_scenarios(source, upstream, expected_keys): ### 3.3 No Autouse Fixtures `autouse=True` fixtures are **never allowed**. They hide setup logic and can cause non-obvious side effects or dependencies between tests. All fixtures used by a test must be explicitly requested in the test function's arguments. -### 3.4 Main Entry Point +### 3.4 No unittest.mock +The use of `unittest.mock` or `MagicMock` is strictly forbidden because it encourages bad design and tests that lie to you. Any kind of patching is discouraged. Instead, follow these patterns: +1. **Dependency Injection (DI)**: Design code so that dependencies (like loggers or configurations) are passed in, rather than hard-coded. This allows for simple "Spies" or test-specific objects to be used without patching. +2. **Dedicated Libraries**: Use `respx` for HTTP and `pyfakefs` for filesystem operations to mock at the IO layer. +3. **Monkeypatch (as a last resort)**: If a dependency is truly external and not easily injectable (e.g., a global function in a library), use the built-in `monkeypatch` fixture. However, always check if the code can be redesigned for DI first. + +### 3.5 Main Entry Point Every test file **must** end with a main entry point block. This ensures each file is independently executable as a script (`python tests/test_foo.py`). ```python diff --git a/.agents/formatters-architecture.md b/.agents/formatters-architecture.md new file mode 100644 index 0000000..38ab408 --- /dev/null +++ b/.agents/formatters-architecture.md @@ -0,0 +1,219 @@ +# Formatter Pipeline Architecture + +> **Purpose**: Reference document for agents and developers extending or maintaining +> `ruff_sync.formatters`. Covers the streaming vs. accumulating formatter taxonomy, +> the `finalize()` contract, how to add a new formatter, and the fingerprint strategy +> required by structured CI report formats (GitLab Code Quality, SARIF). +> +> **Source of truth**: `src/ruff_sync/formatters.py` and `src/ruff_sync/constants.py`. + +--- + +## Overview + +All human-readable and machine-readable output from the `check` and `pull` +commands flows through a single **`ResultFormatter` protocol**. This keeps +`core.py` and `cli.py` agnostic about the output medium — the same drift +detection logic emits text to a terminal, GitHub Actions workflow commands, or +a GitLab Code Quality JSON report. + +``` +core.py / cli.py + │ + │ fmt.error(message, file_path=..., drift_key="lint.select") + │ fmt.warning(...) + │ fmt.finalize() + ▼ +ResultFormatter (Protocol) + ├── TextFormatter → stderr/stdout, human-readable + ├── GithubFormatter → ::error file=...:: workflow commands + ├── JsonFormatter → NDJSON, one record per line + └── GitlabFormatter → GitLab Code Quality JSON array (pending) +``` + +--- + +## The `ResultFormatter` Protocol + +Defined in `src/ruff_sync/formatters.py`. + +### Methods + +| Method | When to call | Notes | +|---|---|---| +| `note(message)` | Status output that always prints (e.g. "Checking…") | Streaming; never buffered | +| `info(message, logger)` | Informational progress | Streaming | +| `success(message)` | Positive outcome (e.g. "In sync ✓") | Streaming | +| `error(message, file_path, logger, check_name, drift_key)` | Config drift or failure | See §Semantic Fields | +| `warning(message, file_path, logger, check_name, drift_key)` | Non-fatal issue (e.g. stale pre-commit hook) | See §Semantic Fields | +| `debug(message, logger)` | Verbose internal state | Streaming | +| `diff(diff_text)` | Unified diff between upstream and local | Intentionally ignored by structured formatters | +| `finalize()` | **Always called in a `try…finally` by `core.py`** | No-op for streaming; writes report for accumulating | + +### Semantic Fields on `error()` and `warning()` + +```python +def error( + self, + message: str, + file_path: pathlib.Path | None = None, + logger: logging.Logger | None = None, + check_name: str = "ruff-sync/config-drift", # machine-readable rule ID + drift_key: str | None = None, # e.g. "lint.select" +) -> None: ... +``` + +- **`check_name`** — machine-readable rule identifier used by structured + formatters (GitLab `check_name`, SARIF `ruleId`). Defaults to + `"ruff-sync/config-drift"`. Use a distinct name for different issue + classes; e.g. `"ruff-sync/precommit-stale"`. +- **`drift_key`** — the dotted TOML key that caused the drift (e.g. + `"lint.select"`). Used by accumulating formatters to build **stable, + per-key fingerprints** without re-parsing the message string. Pass + `None` when the drift cannot be attributed to a single key. + +--- + +## Formatter Taxonomy + +### Streaming Formatters + +Emit output immediately on each call. `finalize()` is a no-op. + +| Class | Format | Use | +|---|---|---| +| `TextFormatter` | Human text | Default terminal output | +| `GithubFormatter` | `::error file=…::` | GitHub Actions inline annotations | +| `JsonFormatter` | NDJSON | Machine consumption, piping, `jq` | + +### Accumulating Formatters *(pending implementation)* + +Collect issues internally and flush a single structured document in +`finalize()`. + +| Class | Format | Use | +|---|---|---| +| `GitlabFormatter` | GitLab Code Quality JSON array | `artifacts: reports: codequality:` | +| `SarifFormatter` *(future)* | SARIF v2.1.0 | GitHub Advanced Security, other SAST tooling | + +Key difference: accumulating formatters **must** have `finalize()` called +to produce any output. The `try…finally` pattern in `cli.py` guarantees +this even when `UpstreamError` or another exception occurs. + +--- + +## Adding a New Formatter + +1. **Add the format value** to `OutputFormat` in `src/ruff_sync/constants.py`: + ```python + class OutputFormat(str, enum.Enum): + TEXT = "text" + JSON = "json" + GITHUB = "github" + GITLAB = "gitlab" + SARIF = "sarif" # new + ``` + +2. **Implement the class** in `src/ruff_sync/formatters.py`. For a + streaming formatter, all methods emit immediately and `finalize` is a + no-op. For an accumulating formatter: + - Collect issues in `self._issues: list[...]` during `error()` / `warning()`. + - Write the document in `finalize()` (to `stdout` — the caller pipes to + a file). + +3. **Add a `case`** in `get_formatter`: + ```python + def get_formatter(output_format: OutputFormat) -> ResultFormatter: + match output_format: + case OutputFormat.SARIF: + return SarifFormatter() + ... + ``` + The `match` statement is **exhaustive** — mypy will error if a new + `OutputFormat` value is not handled. + +4. **Add tests** in `tests/test_formatters.py`. At minimum: + - `finalize()` on an empty formatter produces the correct "no issues" + document (e.g. `[]` for GitLab, a valid SARIF run with zero results). + - `error()` produces a record with the correct severity. + - Fingerprints are stable across two identical calls. + - `finalize()` on a streaming formatter emits nothing. + +--- + +## `finalize()` Call Site + +`finalize()` is always called in a `try…finally` block within `core.py`’s +`check()` and `pull()` coroutines: + +```python +fmt = get_formatter(args.output_format) +try: + ... +finally: + fmt.finalize() # no-op for streaming; flushes JSON for accumulating +``` + +`finalize()` is always called unconditionally — **do not** guard it with +`isinstance` or `hasattr` checks. Every formatter in the protocol provides +the method. + +--- + +## Fingerprint Strategy (Accumulating Formatters) + +Stable fingerprints are required so CI platforms can track whether an issue +is newly introduced or already resolved between branches. + +```python +import hashlib + +def _make_fingerprint(upstream_url: str, local_file: str, drift_key: str | None) -> str: + if drift_key: + raw = f"ruff-sync:drift:{upstream_url}:{local_file}:{drift_key}" + else: + raw = f"ruff-sync:drift:{upstream_url}:{local_file}" + return hashlib.md5(raw.encode()).hexdigest() +``` + +**Rules:** +- Must be deterministic — same inputs → same fingerprint every run. +- Must be unique per logical issue (different keys → different fingerprints). +- Must **not** include timestamps, UUIDs, or any runtime-variable data. + +--- + +## Severity Mapping + +| ruff-sync scenario | GitLab severity | SARIF level | +|---|---|---| +| Config key value differs | `major` | `error` | +| Key missing from local | `major` | `error` | +| Extra local key absent upstream | `minor` | `warning` | +| Pre-commit hook version stale | `minor` | `warning` | +| Upstream unreachable | `blocker` | `error` | + +--- + +## Exit Codes (Unchanged by Formatter Choice) + +| Code | Meaning | +|---|---| +| 0 | In sync | +| 1 | Config drift detected | +| 2 | CLI usage error (argparse) | +| 3 | Pre-commit hook drift | +| 4 | Upstream unreachable | + +CI jobs should use `when: always` (GitLab) or `if: always()` (GitHub) to +upload structured reports regardless of exit code. + +--- + +## References + +- [`src/ruff_sync/formatters.py`](../src/ruff_sync/formatters.py) +- [`src/ruff_sync/constants.py`](../src/ruff_sync/constants.py) +- [`tests/test_formatters.py`](../tests/test_formatters.py) +- [`.agents/gitlab-reports.md`](./gitlab-reports.md) — GitLab Code Quality implementation spec +- [`issue-102-context.md`](./issue-102-context.md) — full feature context for Issue #102 diff --git a/.agents/gitlab-reports.md b/.agents/gitlab-reports.md new file mode 100644 index 0000000..2fcc86c --- /dev/null +++ b/.agents/gitlab-reports.md @@ -0,0 +1,614 @@ +# GitLab CI Artifacts Reports — Implementation Reference for ruff-sync + +> **Purpose**: This document captures everything an agent needs to implement a fully compliant +> GitLab CI artifacts report for `ruff-sync`. It is self-contained; no extra research is needed. +> +> **Source docs**: +> - https://docs.gitlab.com/ci/yaml/artifacts_reports/ +> - https://docs.gitlab.com/ci/testing/code_quality/ +> - https://docs.astral.sh/ruff/integrations/#gitlab-cicd + +--- + +## 1. The GitLab Report Landscape (Overview) + +GitLab supports many `artifacts: reports:` types. The ones relevant to a Python linter/drift tool are: + +| Report type | Tier | Best fit for ruff-sync? | +|---|---|---| +| `codequality` | Free | ✅ **Yes — primary target** | +| `junit` | Free | Possible but designed for test pass/fail, not lint | +| `sast` | Free (display in MR) | Overkill — security-focused format | +| `annotations` | Free (GitLab ≥ 16.3) | Simple external links only, limited | + +**Use `codequality`**. It is the standard, Free-tier report type for linter/style violations. It displays inline in MR diffs (Ultimate) and in the MR widget (Free) as "introduced" vs "resolved" issues, and in the pipeline Code Quality tab (Premium+). + +--- + +## 2. Key Behavior Rules for `artifacts: reports:` + +These apply to **all** report types and are critical to get right: + +1. **Always uploaded regardless of job exit code.** Even if the job fails, the artifact is + uploaded. This is the whole point — you want the report even when drift is detected. + +2. **`artifacts:expire_in` is separate from `artifacts:reports:`.** Reports have no implicit + expiry; set it explicitly. GitLab defaults `codequality` to `1 week` automatically when + the built-in CodeClimate template is used, but for custom jobs you must set it yourself. + +3. **To browse the raw file in the GitLab UI**, you must *also* add `artifacts:paths:` pointing + to the same file. `artifacts:reports:codequality:` alone only makes the report parseable by + GitLab; it does not make the file downloadable/browsable without `paths:`. + +4. **Multiple jobs can emit `codequality` reports** in the same pipeline. GitLab merges them + automatically for the MR widget and pipeline view. + +5. **Child pipeline `codequality` reports appear in MR annotations** but are NOT shared with + the parent pipeline. (Known limitation — tracked in epic 8205.) + +--- + +## 3. Code Quality Report Format (the JSON Schema) + +GitLab's Code Quality report is a **JSON array** (not an object, not NDJSON). Each element is an +**issue object**. This format derives from the +[CodeClimate spec](https://github.com/codeclimate/platform/blob/master/spec/analyzers/SPEC.md), +but GitLab only reads a subset of fields. + +### 3.1 Required Fields + +Every issue object MUST have all of the following: + +```json +{ + "description": "Human-readable description of the issue", + "check_name": "machine-readable-rule-id", + "fingerprint": "unique-stable-hash-string", + "severity": "minor", + "location": { + "path": "relative/path/to/file.toml", + "lines": { + "begin": 1 + } + } +} +``` + +| Field | Type | Notes | +|---|---|---| +| `description` | string | Shown in the MR widget and diff annotation. Keep it concise and human-readable. | +| `check_name` | string | Machine-readable rule identifier. Snake_case or kebab-case. Shown in the Code Quality tab. | +| `fingerprint` | string | **Must be unique and stable.** Used to detect whether an issue was introduced or resolved between branches. Same issue = same fingerprint across runs. If the fingerprint changes, GitLab treats it as a new issue. | +| `severity` | string | Must be one of: `info`, `minor`, `major`, `critical`, `blocker`. | +| `location.path` | string | Path to the file with the issue. **Must be relative to the repo root.** Do NOT use absolute paths. Do NOT use `./` prefix (although GitLab tolerates it). | +| `location.lines.begin` | integer | Line number where the issue begins (1-indexed). | + +> [!IMPORTANT] +> The alternative `location.positions.begin.line` is also accepted (equivalent to +> `location.lines.begin`). Use `lines.begin` — it is simpler and more widely supported. + +### 3.2 Optional Fields (Ignored by GitLab but harmless) + +The full CodeClimate spec supports many more fields (`categories`, `content`, `remediation_points`, +etc.). GitLab silently ignores them — they won't break parsing but won't appear in the UI either. + +### 3.3 Minimal Valid Example + +```json +[ + { + "description": "'lint.select' has drifted from upstream: expected ['E', 'F', 'W'], got ['E', 'F']", + "check_name": "ruff-sync/config-drift", + "fingerprint": "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4", + "severity": "major", + "location": { + "path": "pyproject.toml", + "lines": { + "begin": 1 + } + } + } +] +``` + +### 3.4 Empty Report (No Drift) + +When `ruff-sync check` finds no drift, emit an **empty JSON array**: + +```json +[] +``` + +Do NOT omit the file or output `null`. An empty array is the correct "no issues found" signal. +GitLab will use this to mark previously reported issues as "resolved." + +--- + +## 4. Fingerprint Requirements (Critical Gotcha) + +The fingerprint is the **most common source of bugs** in Code Quality integrations. + +### Rules + +- **Must be deterministic**: Same inputs → same fingerprint across all pipeline runs. +- **Must be unique per issue**: Different issues in the same file must have different fingerprints. +- **Must be stable**: If a key drifts, it should produce the same fingerprint whether the pipeline + ran today or last week, as long as the logical issue is the same. +- **MD5 hex string is the conventional format**. GitLab's own examples use `7815696ecbf1c96e6894b779456d330e` (32-char hex). Any consistent hash works. +- **Do NOT include line numbers in the fingerprint input** (if lines are unstable). For ruff-sync, + since we always point to line 1, including it is fine. + +### Recommended Fingerprint Strategy for ruff-sync + +For a "config drift" issue per key (e.g., `lint.select`), a good stable fingerprint is: + +```python +import hashlib + +def make_fingerprint(upstream_url: str, local_file: str, drift_key: str) -> str: + """Stable fingerprint for a config drift issue.""" + raw = f"ruff-sync:drift:{upstream_url}:{local_file}:{drift_key}" + return hashlib.md5(raw.encode()).hexdigest() +``` + +For a coarse "whole file drifted" issue (no per-key granularity yet): + +```python +def make_fingerprint(upstream_url: str, local_file: str) -> str: + raw = f"ruff-sync:drift:{upstream_url}:{local_file}" + return hashlib.md5(raw.encode()).hexdigest() +``` + +> [!WARNING] +> Do NOT use `uuid.uuid4()` or any random value. That produces a new fingerprint each run, which +> causes GitLab to think every issue is freshly introduced — breaking the "resolved" tracking. + +--- + +## 5. Severity Mapping for ruff-sync + +| ruff-sync scenario | Recommended severity | +|---|---| +| Config drift detected (key value differs) | `major` | +| Pre-commit hook version stale | `minor` | +| Key missing from local config (upstream has it, local doesn't) | `major` | +| Key present locally but absent upstream (extra local config) | `minor` | +| Upstream unreachable (can't check) | `blocker` (or emit no report) | + +--- + +## 6. `.gitlab-ci.yml` Job Definition + +### 6.1 Minimal Correct Job + +```yaml +ruff-sync-check: + stage: lint + image: python:3.12-slim + before_script: + - pip install ruff-sync + script: + # Exit code 1 = drift detected; we want the report even on failure + - ruff-sync check --output-format gitlab > gl-code-quality-report.json || true + artifacts: + when: always # Upload even if the job fails or has drift + reports: + codequality: gl-code-quality-report.json + paths: + - gl-code-quality-report.json # Needed to browse/download the raw file + expire_in: 1 week +``` + +> [!IMPORTANT] +> `when: always` is essential. Without it, if `ruff-sync check` exits with code 1 (drift found), +> GitLab treats the job as failed and does NOT upload the artifact — you lose the report. +> +> Alternatively, use `|| true` (or `|| exit 0`) in the script to always exit 0, then set the job +> to succeed. This is simpler but hides the failure signal. Use `when: always` instead so the job +> can still be marked as failed while the artifact is uploaded. + +### 6.2 With `allow_failure` (Recommended for Linting Jobs) + +```yaml +ruff-sync-check: + stage: lint + allow_failure: true # Don't block the pipeline on drift + script: + - ruff-sync check --output-format gitlab > gl-code-quality-report.json + artifacts: + when: always + reports: + codequality: gl-code-quality-report.json + paths: + - gl-code-quality-report.json + expire_in: 1 week +``` + +### 6.3 Using `uv` (Recommended for ruff-sync's own CI) + +```yaml +ruff-sync-check: + stage: lint + image: python:3.12-slim + before_script: + - pip install uv + - uv sync + script: + - uv run ruff-sync check --output-format gitlab > gl-code-quality-report.json + artifacts: + when: always + reports: + codequality: gl-code-quality-report.json + paths: + - gl-code-quality-report.json + expire_in: 1 week + allow_failure: true +``` + +--- + +## 7. Implementing `GitlabFormatter` in `formatters.py` + +The goal is a new `OutputFormat.GITLAB` mode (alongside `text`, `json`, `github`). Unlike the +streaming formatters (`TextFormatter`, `JsonFormatter`), **Code Quality reports must be +accumulated and written as a single JSON array at the end** — not streamed line by line. + +### 7.1 Key Design Constraint: `finalize()` Pattern + +The Code Quality report must be a single valid JSON array. This means the formatter must: + +1. Collect all issues into an internal list during the job. +2. Write the JSON array to the output file when the job finishes. + +This requires adding a `finalize(output_path: pathlib.Path) -> None` method (or similar) to the +formatter, or adapting the `ResultFormatter` protocol. + +**Option A — `GitlabFormatter` writes to a file (recommended)**: + +```python +class GitlabFormatter: + """GitLab Code Quality report formatter.""" + + def __init__(self) -> None: + self._issues: list[dict[str, Any]] = [] + + def error( + self, + message: str, + file_path: pathlib.Path | None = None, + logger: logging.Logger | None = None, + check_name: str = "ruff-sync/config-drift", + fingerprint: str | None = None, + ) -> None: + (logger or LOGGER).error(message) + self._issues.append(self._make_issue( + description=message, + check_name=check_name, + severity="major", + file_path=file_path, + fingerprint=fingerprint, + )) + + def warning( + self, + message: str, + file_path: pathlib.Path | None = None, + logger: logging.Logger | None = None, + check_name: str = "ruff-sync/config-drift", + fingerprint: str | None = None, + ) -> None: + (logger or LOGGER).warning(message) + self._issues.append(self._make_issue( + description=message, + check_name=check_name, + severity="minor", + file_path=file_path, + fingerprint=fingerprint, + )) + + def _make_issue( + self, + description: str, + check_name: str, + severity: str, + file_path: pathlib.Path | None, + fingerprint: str | None, + ) -> dict[str, Any]: + path = str(file_path) if file_path else "pyproject.toml" + fp = fingerprint or self._auto_fingerprint(description, path) + return { + "description": description, + "check_name": check_name, + "fingerprint": fp, + "severity": severity, + "location": {"path": path, "lines": {"begin": 1}}, + } + + @staticmethod + def _auto_fingerprint(description: str, path: str) -> str: + import hashlib + raw = f"ruff-sync:drift:{path}:{description}" + return hashlib.md5(raw.encode()).hexdigest() + + def finalize(self, output_path: pathlib.Path) -> None: + """Write the collected issues as a JSON array to the output file.""" + output_path.write_text(json.dumps(self._issues, indent=2)) +``` + +**Option B — `GitlabFormatter` writes to stdout as a JSON array at the end** (simpler for +piping with `>`): + +```python + def finalize(self) -> None: + """Print the collected issues as a JSON array to stdout.""" + print(json.dumps(self._issues, indent=2)) +``` + +With Option B, the CLI caller does `ruff-sync check --output-format gitlab > report.json`. + +> [!NOTE] +> Option B is simpler and consistent with how the `github` and `json` formatters work (they +> both write to stdout). The caller redirects to a file. Option A is more explicit but requires +> adding a `--output-file` CLI flag or hardcoding the filename. + +### 7.2 Protocol Impact + +The current `ResultFormatter` protocol does NOT include `finalize()`. Adding `GitlabFormatter` +requires either: + +1. **Add `finalize()` to the protocol** — all existing formatters get a no-op default. +2. **Keep `finalize()` off the protocol** — call it explicitly only when `output_format == GITLAB`. + +Option 2 is cleaner in the short term (the existing `get_formatter` callers don't need changes). + +### 7.3 `OutputFormat` enum extension + +Add `GITLAB = "gitlab"` to the `OutputFormat` enum in `src/ruff_sync/constants.py`: + +```python +class OutputFormat(str, enum.Enum): + TEXT = "text" + JSON = "json" + GITHUB = "github" + GITLAB = "gitlab" # NEW +``` + +Update `get_formatter` in `formatters.py`: + +```python +def get_formatter(output_format: OutputFormat) -> ResultFormatter: + match output_format: + case OutputFormat.GITHUB: + return GithubFormatter() + case OutputFormat.JSON: + return JsonFormatter() + case OutputFormat.GITLAB: + return GitlabFormatter() + case OutputFormat.TEXT: + return TextFormatter() +``` + +--- + +## 8. Complete End-to-End Example Output + +For a drift scenario where `lint.select` differs: + +**`gl-code-quality-report.json`** (written to stdout, redirected to file): + +```json +[ + { + "description": "Config drift detected in pyproject.toml: 'lint.select' differs from upstream", + "check_name": "ruff-sync/config-drift", + "fingerprint": "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4", + "severity": "major", + "location": { + "path": "pyproject.toml", + "lines": { + "begin": 1 + } + } + } +] +``` + +**No drift** (in-sync): + +```json +[] +``` + +--- + +## 9. Common Gotchas and Edge Cases + +### 9.1 File Must Be Valid JSON — No Trailing Commas, No Comments + +The file must be parseable by `json.dumps`/`json.loads`. Python's `json` module is fine. +Do NOT use TOML-style comments or trailing commas. + +### 9.2 No BOM (Byte Order Mark) + +GitLab's parser **rejects files with a BOM** at the start. Python's `json.dumps` does not add a +BOM, so this is not a concern as long as you use `str.encode()` or `pathlib.Path.write_text()` +without `encoding='utf-8-sig'`. + +### 9.3 Path Must Be Relative to the Repository Root + +`location.path` must be **relative** (e.g., `pyproject.toml`, `subdir/ruff.toml`). Absolute +paths will NOT create diff annotations — GitLab won't be able to match the file in the repo tree. + +If the user runs `ruff-sync check` from a subdirectory, you need to output the path relative to +the repo root, not the working directory. + +### 9.4 `begin: 1` Is Acceptable When Exact Line Is Unknown + +ruff-sync compares entire config sections, not individual lines. Using `begin: 1` (the file's +first line) is acceptable and commonly done by tools that don't have line-level granularity. +GitLab will show the annotation at line 1 of the file. + +### 9.5 Fingerprint Stability — Avoid Including Timestamps or Dynamic Data + +Never include `datetime.now()`, UUIDs, or any runtime-variable data in the fingerprint input. +This would cause every run to report the same issue as "newly introduced." + +### 9.6 MR Widget Only Shows Delta (Introduced vs. Fixed) + +The merge request widget compares the report from the **MR branch** against the report from the +**target branch**. If you want "introduced" and "fixed" tracking to work correctly, the same +job must run on both branches with the same fingerprints. + +### 9.7 Empty File vs. Missing File + +- **Empty JSON array `[]`**: Correct. Means no issues. Previously reported issues are resolved. +- **Missing file**: GitLab silently ignores the missing artifact. Previous issues remain. +- **Invalid JSON**: GitLab may silently ignore or log a parsing error. Causes issues to remain. + +Always write the file, even on error. In `ruff-sync`, use `try...finally` to ensure `finalize()` +is called even if an `UpstreamError` or other exception occurs. + +### 9.8 The `|| true` Anti-Pattern + +```yaml +# BAD — hides all exit codes, including real errors +script: + - ruff-sync check --output-format gitlab > report.json || true +``` + +```yaml +# GOOD — job is still marked failed (for visibility), but artifact is uploaded +script: + - ruff-sync check --output-format gitlab > report.json +artifacts: + when: always + reports: + codequality: gl-code-quality-report.json +``` + +### 9.9 Tier Requirements for Full Feature Set + +| Feature | Tier | +|---|---| +| MR widget (shows introduced/fixed count) | Free | +| MR changes view (inline diff annotations) | Ultimate | +| Pipeline Code Quality tab (full list) | Premium | +| Project quality view | Ultimate (Beta) | + +On Free tier, `codequality` reports still work — they appear in the MR widget. The per-line diff +annotations require Ultimate. + +### 9.10 Multiple Upstream Sources + +If `ruff-sync check` compares against multiple upstreams, each upstream's drift should either: +- Be a **separate issue object** in the array (recommended for per-key granularity) +- Be **one issue per upstream** with a combined description + +Use a different `check_name` per upstream if needed (e.g., `ruff-sync/upstream-1-drift`). + +--- + +## 10. Integration with Existing `ruff-sync` Architecture + +### What Needs to Change + +| File | Change | +|---|---| +| `src/ruff_sync/constants.py` | Add `GITLAB = "gitlab"` to `OutputFormat` enum | +| `src/ruff_sync/formatters.py` | Add `GitlabFormatter` class; update `get_formatter` | +| `src/ruff_sync/core.py` | Ensure `finalize()` is called after `check()` / `pull()` via `try...finally` | +| `.agents/skills/ruff-sync-usage/references/ci-integration.md` | Document `--output-format gitlab` | + +### 10.3 `finalize()` Call Site (in `core.py`) + +Every formatter in the `ResultFormatter` protocol provides the `finalize()` method. +The `check()` and `pull()` coroutines in `core.py` ensure it is always called +unconditionally via a `try...finally` block: + +```python +fmt = get_formatter(args.output_format) +try: + ... +finally: + fmt.finalize() # Writes the JSON array to stdout (piped to file by CI) +``` + +**Do not** guard the call with `isinstance` or `hasattr` checks. + +### Exit Codes (Unchanged) + +The `GitlabFormatter` does not change exit codes. Exit codes remain: + +| Code | Meaning | +|---|---| +| 0 | In sync | +| 1 | Config drift detected | +| 3 | Pre-commit hook drift | +| 4 | Upstream unreachable | + +The CI job uses `when: always` to upload the artifact regardless of exit code. + +--- + +## 11. Testing the GitlabFormatter + +Add tests in `tests/test_basic.py` or a new `tests/test_formatters.py`: + +```python +import json +import pathlib +from io import StringIO +from unittest.mock import patch + +from ruff_sync.formatters import GitlabFormatter + +def test_gitlab_formatter_empty_on_no_issues(capsys): + fmt = GitlabFormatter() + fmt.finalize() + captured = capsys.readouterr() + issues = json.loads(captured.out) + assert issues == [] + +def test_gitlab_formatter_error_produces_major_issue(capsys): + fmt = GitlabFormatter() + fmt.error("drift found", file_path=pathlib.Path("pyproject.toml")) + fmt.finalize() + captured = capsys.readouterr() + issues = json.loads(captured.out) + assert len(issues) == 1 + assert issues[0]["severity"] == "major" + assert issues[0]["location"]["path"] == "pyproject.toml" + assert issues[0]["location"]["lines"]["begin"] == 1 + assert "fingerprint" in issues[0] + +def test_gitlab_formatter_fingerprint_is_stable(capsys): + fmt1 = GitlabFormatter() + fmt2 = GitlabFormatter() + fmt1.error("drift found", file_path=pathlib.Path("pyproject.toml")) + fmt2.error("drift found", file_path=pathlib.Path("pyproject.toml")) + fmt1.finalize() + out1 = capsys.readouterr().out + fmt2.finalize() + out2 = capsys.readouterr().out + issues1 = json.loads(out1) + issues2 = json.loads(out2) + assert issues1[0]["fingerprint"] == issues2[0]["fingerprint"] + +def test_gitlab_formatter_no_bom(capsys): + fmt = GitlabFormatter() + fmt.finalize() + captured = capsys.readouterr() + assert not captured.out.startswith("\ufeff") +``` + +--- + +## 12. References + +- [GitLab artifacts reports types](https://docs.gitlab.com/ci/yaml/artifacts_reports/) +- [GitLab Code Quality](https://docs.gitlab.com/ci/testing/code_quality/) +- [CodeClimate report spec (SPEC.md)](https://github.com/codeclimate/platform/blob/master/spec/analyzers/SPEC.md#data-types) +- [Ruff GitLab CI integration](https://docs.astral.sh/ruff/integrations/#gitlab-cicd) +- [ruff-sync existing formatters](../src/ruff_sync/formatters.py) +- [ruff-sync OutputFormat enum](../src/ruff_sync/constants.py) +- [ruff-sync Issue #102 context](./issue-102-context.md) diff --git a/.agents/issue-102-context.md b/.agents/issue-102-context.md new file mode 100644 index 0000000..e33b178 --- /dev/null +++ b/.agents/issue-102-context.md @@ -0,0 +1,81 @@ +# Feature Context: Richer CI output (Issue #102) + +This document provides all technical specifications and context required to complete the enhancements for the `ruff-sync check` command. No additional research is needed. + +## 1. Objective +Enhance the `check` command to support structured output formats (JSON, SARIF) and GitHub Actions inline annotations, while refining exit codes to distinguish between different failure modes. + +## 2. Current Architecture + +The project has already transitioned to a **Protocol-based formatting pipeline**. All output from the `check` and `pull` commands must flow through the `ResultFormatter` protocol. + +### Location: `src/ruff_sync/formatters.py` +The `ResultFormatter` protocol defines how diagnostics are reported: + +```python +class ResultFormatter(Protocol): + def note(self, message: str) -> None: ... + def info(self, message: str, logger: logging.Logger | None = None) -> None: ... + def success(self, message: str) -> None: ... + def error(self, message: str, file_path: pathlib.Path | None = None, logger: logging.Logger | None = None) -> None: ... + def warning(self, message: str, file_path: pathlib.Path | None = None, logger: logging.Logger | None = None) -> None: ... + def debug(self, message: str, logger: logging.Logger | None = None) -> None: ... + def diff(self, diff_text: str) -> None: ... +``` + +### Existing Implementations: +- `TextFormatter`: Default human-readable output. +- `JsonFormatter`: Structured JSON output (emits one JSON record per line). +- `GithubFormatter`: Emits GitHub Actions workflow commands (e.g., `::error file=...::`). + +## 3. Exit Code Specification (Final) + +The following exit codes MUST be strictly enforced to ensure zero collisions with `argparse` or existing hook logic: + +| Code | Meaning | Implementation Status | +|---|---|---| +| **0** | **In sync** | ✅ Implemented | +| **1** | **Config drift detected** | ✅ Implemented | +| **2** | **CLI usage error** | ✅ Handled by `argparse` | +| **3** | **Pre-commit hook drift** | ❌ **REQUIRED**: Move from code 2 to 3. | +| **4** | **Upstream unreachable** | ❌ **REQUIRED**: Catch `UpstreamError` and return 4. | + +## 4. Pending Task: Granular Drift Reporting + +Currently, `ruff-sync` reports if a file has drifted but does not specify which keys changed. For structured formats (JSON/SARIF), we need a list of the specific dotted keys that differ. + +### Requirement: +1. Implement `_find_changed_keys(source: Table, merged: Table) -> list[str]` in `src/ruff_sync/core.py`. +2. It should recursively walk both tables and return a list of keys like `["lint.select", "target-version"]`. +3. Update `ResultFormatter` to optionally accept this list, or ensure it is included in the diagnostic messages. + +## 5. Pending Task: SARIF Output (`SarifFormatter`) + +Implement the `SarifFormatter` class in `src/ruff_sync/formatters.py`. + +### SARIF Requirements (v2.1.0): +- **Tool**: `ruff-sync` +- **Rule ID**: `RUFF-SYNC-CONFIG-DRIFT` +- **Level**: `error` if config drifted, `warning` if pre-commit drifted. +- **Message**: Must include the specific keys that drifted if available. +- **Location**: Must point to the local configuration file (e.g., `pyproject.toml`). + +## 6. Implementation Checklist + +- [ ] **`core.py`**: + - [ ] Add `_find_changed_keys` helper. + - [ ] Update `check()` to return code **3** for pre-commit drift. + - [ ] Update `check()` and `pull()` to pass granular drift info to formatters. +- [ ] **`cli.py`**: + - [ ] Update `main()` to catch `UpstreamError` and return exit code **4**. +- [ ] **`formatters.py`**: + - [ ] Implement `SarifFormatter`. + - [ ] Update `get_formatter` to include `"sarif"`. +- [ ] **Documentation**: + - [ ] Update `.agents/skills/ruff-sync-usage/references/ci-integration.md` exit codes. + +## 7. Verification Plan + +1. **Test Case**: Verify `ruff-sync check --pre-commit` returns exit code **3** when hooks are stale. +2. **Test Case**: Verify `ruff-sync check` returns exit code **4** when the upstream URL is 404. +3. **Test Case**: Verify `ruff-sync check --output-format sarif` produces valid SARIF JSON. diff --git a/.agents/skills/gh-issues/SKILL.md b/.agents/skills/gh-issues/SKILL.md new file mode 100644 index 0000000..7b74262 --- /dev/null +++ b/.agents/skills/gh-issues/SKILL.md @@ -0,0 +1,181 @@ +--- +name: gh-issues +description: >- + Create, update, comment on, and link GitHub issues using the gh CLI. + Use when creating a new issue, adding sub-issues, cross-referencing issues, + or posting large structured content (e.g. research docs) to GitHub. +--- + +# Working with GitHub Issues via the `gh` CLI + +## Quick Reference + +```bash +gh issue list # Open issues +gh issue list --limit 5 # Most recent 5 +gh issue view # Read issue body + metadata +gh issue view --comments # Include comments +gh issue create --title "..." --body "..." # Create issue (inline body — see Body Size warning) +gh issue create --title "..." --body-file /tmp/body.md # Create from file (preferred) +gh issue comment --body "..." # Post a comment +gh issue edit --title "..." # Edit issue metadata +gh issue close # Close an issue +``` + +--- + +## Creating Issues + +### Always Use `--body-file` for Non-Trivial Bodies + +> [!WARNING] +> **Never use inline `--body` with heredocs for large content.** A `gh issue create` with a +> multi-kilobyte `--body "$(cat << 'EOF' ... EOF)"` heredoc can hang for 10+ minutes before +> eventually succeeding (or silently timing out). This is a shell quoting / argument-size issue. +> +> **Rule of thumb**: If the body is more than ~20 lines, always write it to a temp file first. + +```bash +# GOOD — write body to file, then reference it +cat > /tmp/issue-body.md << 'EOF' +## Summary +...long content... +EOF +gh issue create --title "My Issue" --body-file /tmp/issue-body.md + +# BAD — inline heredoc with large bodies hangs +gh issue create --title "My Issue" --body "$(cat << 'EOF' +...long content... +EOF +)" +``` + +### Full `gh issue create` Options + +```bash +gh issue create \ + --repo owner/repo \ + --title "Issue title" \ + --body-file /tmp/body.md \ + --label "enhancement" \ + --milestone "v1.0" \ + --assignee "@me" +``` + +- `--label` — must match an existing label name exactly (case-sensitive) +- `--milestone` — pass the milestone **title** string, not the number +- `--assignee "@me"` — assigns to the authenticated user + +--- + +## Linking Issues + +GitHub does not have a native parent/child relationship for issues. The convention is: + +1. **Reference by number** in the issue body or a comment: `#102` +2. **Comment on the parent** after creating the child to make the link bidirectional + +```bash +# After creating child issue #134, comment on the parent #102 +gh issue comment 102 --body "Sub-issue created: #134 — short description." +``` + +GitHub automatically renders `#134` as a clickable link and shows a cross-reference in the +child issue's timeline. This is the standard convention used across open source projects. + +--- + +## Reading Issues for Context + +Always read the full issue before starting implementation work: + +```bash +gh issue view 102 # Body + metadata +gh issue view 102 --comments # Include all comments (important for updated context) +gh issue list --milestone "v0.2.0" # All issues in a milestone +gh issue list --search "formatter" # Full-text search +``` + +--- + +## Posting Research / Reference Documents as Issues + +If you have a long reference document (e.g., a `.md` file) to attach to an issue: + +```bash +# 1. Create the issue using --body-file (NOT inline --body) +gh issue create \ + --title "GitLab CI codequality report support" \ + --body-file /tmp/issue-body.md \ + --label "enhancement" + +# 2. Verify it was created +gh issue list --limit 3 + +# 3. Link it to the parent issue +gh issue comment --body "Sub-issue: # — description." +``` + +--- + +## Common Gotchas + +### 1. Large Inline `--body` Hangs the Shell + +As noted above, `--body "$(cat ...)"` with large content can hang for many minutes. **Always use `--body-file`.** + +### 2. Check the Issue Was Actually Created + +After a slow `gh issue create`, run `gh issue list --limit 3` to confirm it succeeded before +retrying. Retrying blindly can create duplicate issues. + +### 3. Labels and Milestones Must Pre-exist + +`gh issue create --label "my-label"` will fail if `my-label` doesn't exist in the repo. +Check available labels with `gh label list`. Similarly for milestones. + +### 4. `gh` Uses the Repo Inferred from `git remote` + +When running in the repo directory, `gh` automatically targets the right repo. Pass +`--repo owner/repo` explicitly if you're unsure or running from a different directory. + +### 5. Issue Numbers Are Stable — Just Look Them Up + +If you lose track of a new issue's number after creating it: + +```bash +gh issue list --limit 5 # shows most recently updated issues +``` + +The new issue will be at the top. + +--- + +## Workflow: Create Issue from a Reference File + +This is the pattern used when posting a research document (like a `.agents/` reference file) +as a GitHub issue body. + +``` +1. Write content to /tmp/issue-body.md + - Include a preamble: what this is, what file it mirrors, link to parent issue + - Then paste the full document content + +2. gh issue create \ + --title "Descriptive title" \ + --body-file /tmp/issue-body.md \ + --label "enhancement" \ + --milestone "vX.Y.Z - ..." + +3. gh issue list --limit 3 # confirm creation, note the issue number + +4. gh issue comment --body "Sub-issue: # — one-line description." +``` + +--- + +## References + +- `gh issue --help` — full CLI reference +- [GitHub CLI manual](https://cli.github.com/manual/gh_issue) +- [GitHub cross-referencing issues](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls#issues-and-pull-requests) diff --git a/.agents/skills/ruff-sync-usage/references/ci-integration.md b/.agents/skills/ruff-sync-usage/references/ci-integration.md index 969ae11..3bf6804 100644 --- a/.agents/skills/ruff-sync-usage/references/ci-integration.md +++ b/.agents/skills/ruff-sync-usage/references/ci-integration.md @@ -88,7 +88,7 @@ Run `ruff-sync check` as a pre-commit hook to catch drift before every commit: ```yaml # .pre-commit-config.yaml - repo: https://github.com/Kilo59/ruff-sync - rev: v0.1.0 # pin to a release tag + rev: v0.1.3 # pin to a release tag hooks: - id: ruff-sync-check ``` diff --git a/AGENTS.md b/AGENTS.md index 5f1bc2b..229d75b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -150,6 +150,7 @@ uv run coverage run -m pytest -vv - Always use `from __future__ import annotations` as the first import. - Do NOT use `from pathlib import Path` or `from datetime import ...` — these are banned by the import conventions config. Use `import pathlib` and `import datetime as dt` instead. +- **Do NOT use `unittest.mock` or `MagicMock`**. This project forbids `unittest.mock` because it encourages bad design and tests that lie to you. Prefer Dependency Injection (DI) and dedicated IO-layer libraries (respx, pyfakefs) over any kind of patching. - Imports used only for type hints should go inside `if TYPE_CHECKING:` blocks. ### Style @@ -159,6 +160,7 @@ uv run coverage run -m pytest -vv - Do not create custom exception classes for simple errors (`TRY003` is ignored). - **Prefer `NamedTuple` for return types** over plain tuples to improve readability and type safety. - **Prefer `typing.Protocol` over `abc.ABC`** for abstract base classes to promote structural subtyping. +- **Prefer Dependency Injection (DI)**: Pass dependencies as arguments to functions and classes instead of hard-coding them or relying on global state. This makes code easier to test without patching. ### TOML Handling diff --git a/docs/pre-commit.md b/docs/pre-commit.md index 72aa758..c8dad9f 100644 --- a/docs/pre-commit.md +++ b/docs/pre-commit.md @@ -12,7 +12,7 @@ Verifies that your local `pyproject.toml` or `ruff.toml` matches the upstream co ```yaml - repo: https://github.com/Kilo59/ruff-sync - rev: v0.1.0 # Use the latest version + rev: v0.1.3 # Use the latest version hooks: - id: ruff-sync-check ``` @@ -23,7 +23,7 @@ Automatically pulls and applies the upstream configuration if a drift is detecte ```yaml - repo: https://github.com/Kilo59/ruff-sync - rev: v0.1.0 # Use the latest version + rev: v0.1.3 # Use the latest version hooks: - id: ruff-sync-pull ``` @@ -40,7 +40,7 @@ The hooks will automatically respect the configuration defined in your `pyprojec ```yaml repos: - repo: https://github.com/Kilo59/ruff-sync - rev: v0.1.0 + rev: v0.1.3 hooks: - id: ruff-sync-check # Common arguments: diff --git a/pyproject.toml b/pyproject.toml index ef9b3de..6122fe0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ruff-sync" -version = "0.1.3" +version = "0.1.4.dev0" description = "Synchronize Ruff linter configuration across projects" keywords = ["ruff", "linter", "config", "synchronize", "python", "linting", "automation", "tomlkit", "pre-commit"] authors = [ diff --git a/src/ruff_sync/constants.py b/src/ruff_sync/constants.py index f85559f..a1c4532 100644 --- a/src/ruff_sync/constants.py +++ b/src/ruff_sync/constants.py @@ -52,6 +52,7 @@ class OutputFormat(str, enum.Enum): TEXT = "text" JSON = "json" GITHUB = "github" + GITLAB = "gitlab" @override def __str__(self) -> str: diff --git a/src/ruff_sync/core.py b/src/ruff_sync/core.py index 0185822..98b413e 100644 --- a/src/ruff_sync/core.py +++ b/src/ruff_sync/core.py @@ -914,84 +914,87 @@ async def check( >>> # asyncio.run(check(args)) """ fmt = get_formatter(args.output_format) - fmt.note("🔍 Checking Ruff sync status...") - - _source_toml_path = resolve_target_path(args.to, args.upstream).resolve(strict=False) - if not _source_toml_path.exists(): - fmt.error( - f"❌ Configuration file {_source_toml_path} does not exist. " - "Run 'ruff-sync pull' to create it.", - file_path=_source_toml_path, - logger=LOGGER, - ) - return 1 - - source_toml_file = TOMLFile(_source_toml_path) - source_doc = source_toml_file.read() + try: + fmt.note("🔍 Checking Ruff sync status...") + + _source_toml_path = resolve_target_path(args.to, args.upstream).resolve(strict=False) + if not _source_toml_path.exists(): + fmt.error( + f"❌ Configuration file {_source_toml_path} does not exist. " + "Run 'ruff-sync pull' to create it.", + file_path=_source_toml_path, + logger=LOGGER, + ) + return 1 - # Create a copy for comparison - source_doc_copy = tomlkit.parse(source_doc.as_string()) - merged_doc = source_doc_copy + source_toml_file = TOMLFile(_source_toml_path) + source_doc = source_toml_file.read() - async with httpx.AsyncClient() as client: - merged_doc = await _merge_multiple_upstreams( - merged_doc, - is_target_ruff_toml=is_ruff_toml_file(_source_toml_path.name), - args=args, - client=client, - ) + # Create a copy for comparison + source_doc_copy = tomlkit.parse(source_doc.as_string()) + merged_doc = source_doc_copy - is_source_ruff_toml = is_ruff_toml_file(_source_toml_path.name) - source_val: Any = None - merged_val: Any = None - if args.semantic: - if is_source_ruff_toml: - source_ruff = source_doc - merged_ruff = merged_doc - else: - source_ruff = source_doc.get("tool", {}).get("ruff") - merged_ruff = merged_doc.get("tool", {}).get("ruff") - - # Compare unwrapped versions - source_val = source_ruff.unwrap() if source_ruff is not None else None - merged_val = merged_ruff.unwrap() if merged_ruff is not None else None + async with httpx.AsyncClient() as client: + merged_doc = await _merge_multiple_upstreams( + merged_doc, + is_target_ruff_toml=is_ruff_toml_file(_source_toml_path.name), + args=args, + client=client, + ) - if source_val == merged_val: - fmt.success("✅ Ruff configuration is semantically in sync.") + is_source_ruff_toml = is_ruff_toml_file(_source_toml_path.name) + source_val: Any = None + merged_val: Any = None + if args.semantic: + if is_source_ruff_toml: + source_ruff = source_doc + merged_ruff = merged_doc + else: + source_ruff = source_doc.get("tool", {}).get("ruff") + merged_ruff = merged_doc.get("tool", {}).get("ruff") + + # Compare unwrapped versions + source_val = source_ruff.unwrap() if source_ruff is not None else None + merged_val = merged_ruff.unwrap() if merged_ruff is not None else None + + if source_val == merged_val: + fmt.success("✅ Ruff configuration is semantically in sync.") + exit_code = _check_pre_commit_sync(args, fmt) + if exit_code is not None: + return exit_code + return 0 + elif source_doc.as_string() == merged_doc.as_string(): + fmt.success("✅ Ruff configuration is in sync.") exit_code = _check_pre_commit_sync(args, fmt) if exit_code is not None: return exit_code return 0 - elif source_doc.as_string() == merged_doc.as_string(): - fmt.success("✅ Ruff configuration is in sync.") - exit_code = _check_pre_commit_sync(args, fmt) - if exit_code is not None: - return exit_code - return 0 - - try: - rel_path = _source_toml_path.relative_to(pathlib.Path.cwd()) - except ValueError: - rel_path = _source_toml_path - fmt.error( - f"❌ Ruff configuration at {rel_path} is out of sync! Run `ruff-sync` to update.", - file_path=rel_path, - logger=LOGGER, - ) - if args.diff: - _print_diff( - args=args, - fmt=fmt, - ctx=DiffContext( - source_toml_path=_source_toml_path, - source_doc=source_doc, - merged_doc=merged_doc, - source_val=source_val, - merged_val=merged_val, - ), + try: + rel_path = _source_toml_path.relative_to(pathlib.Path.cwd()) + except ValueError: + rel_path = _source_toml_path + fmt.error( + f"❌ Ruff configuration at {rel_path} is out of sync! Run `ruff-sync` to update.", + file_path=rel_path, + logger=LOGGER, ) - return 1 + + if args.diff: + _print_diff( + args=args, + fmt=fmt, + ctx=DiffContext( + source_toml_path=_source_toml_path, + source_doc=source_doc, + merged_doc=merged_doc, + source_val=source_val, + merged_val=merged_val, + ), + ) + return 1 + finally: + fmt.finalize() def _get_credential_url(upstreams: tuple[URL, ...]) -> URL | None: @@ -1085,57 +1088,61 @@ async def pull( >>> # asyncio.run(pull(args)) """ fmt = get_formatter(args.output_format) - fmt.note("🔄 Syncing Ruff...") - _source_toml_path = resolve_target_path(args.to, args.upstream).resolve(strict=False) - - source_toml_file = TOMLFile(_source_toml_path) - if _source_toml_path.exists(): - source_doc = source_toml_file.read() - elif args.init: - LOGGER.info(f"✨ Target file {_source_toml_path} does not exist, creating it.") - source_doc = tomlkit.document() - # Scaffold the file immediately to ensure we can write to the enclosing directory - try: - _source_toml_path.parent.mkdir(parents=True, exist_ok=True) - _source_toml_path.touch() - except OSError as e: - fmt.error(f"❌ Failed to create {_source_toml_path}: {e}", logger=LOGGER) + try: + fmt.note("🔄 Syncing Ruff...") + _source_toml_path = resolve_target_path(args.to, args.upstream).resolve(strict=False) + + source_toml_file = TOMLFile(_source_toml_path) + if _source_toml_path.exists(): + source_doc = source_toml_file.read() + elif args.init: + LOGGER.info(f"✨ Target file {_source_toml_path} does not exist, creating it.") + source_doc = tomlkit.document() + # Scaffold the file immediately to ensure we can write to the enclosing directory + try: + _source_toml_path.parent.mkdir(parents=True, exist_ok=True) + _source_toml_path.touch() + except OSError as e: + fmt.error(f"❌ Failed to create {_source_toml_path}: {e}", logger=LOGGER) + return 1 + else: + fmt.error( + f"❌ Configuration file {_source_toml_path} does not exist. " + "Pass the '--init' flag to create it.", + file_path=_source_toml_path, + logger=LOGGER, + ) return 1 - else: - fmt.error( - f"❌ Configuration file {_source_toml_path} does not exist. " - "Pass the '--init' flag to create it.", - file_path=_source_toml_path, - logger=LOGGER, - ) - return 1 - - async with httpx.AsyncClient() as client: - source_doc = await _merge_multiple_upstreams( - source_doc, - is_target_ruff_toml=is_ruff_toml_file(_source_toml_path.name), - args=args, - client=client, - ) - should_save = args.save if args.save is not None else args.init - if should_save: - if _source_toml_path.name == RuffConfigFileName.PYPROJECT_TOML: - LOGGER.info(f"Saving [tool.ruff-sync] configuration to {_source_toml_path.name}") - serialize_ruff_sync_config(source_doc, args) - else: - LOGGER.info( - "Skipping [tool.ruff-sync] configuration save because target is not pyproject.toml" + async with httpx.AsyncClient() as client: + source_doc = await _merge_multiple_upstreams( + source_doc, + is_target_ruff_toml=is_ruff_toml_file(_source_toml_path.name), + args=args, + client=client, ) - source_toml_file.write(source_doc) - try: - rel_path = _source_toml_path.resolve().relative_to(pathlib.Path.cwd()) - except ValueError: - rel_path = _source_toml_path.resolve() - fmt.success(f"✅ Updated {rel_path}") + should_save = args.save if args.save is not None else args.init + if should_save: + if _source_toml_path.name == RuffConfigFileName.PYPROJECT_TOML: + LOGGER.info(f"Saving [tool.ruff-sync] configuration to {_source_toml_path.name}") + serialize_ruff_sync_config(source_doc, args) + else: + LOGGER.info( + "Skipping [tool.ruff-sync] configuration save " + "because target is not pyproject.toml" + ) + + source_toml_file.write(source_doc) + try: + rel_path = _source_toml_path.resolve().relative_to(pathlib.Path.cwd()) + except ValueError: + rel_path = _source_toml_path.resolve() + fmt.success(f"✅ Updated {rel_path}") - if args.pre_commit is not MISSING and args.pre_commit: - sync_pre_commit(pathlib.Path.cwd(), dry_run=False) + if args.pre_commit is not MISSING and args.pre_commit: + sync_pre_commit(pathlib.Path.cwd(), dry_run=False) - return 0 + return 0 + finally: + fmt.finalize() diff --git a/src/ruff_sync/formatters.py b/src/ruff_sync/formatters.py index 1472be8..b2a499e 100644 --- a/src/ruff_sync/formatters.py +++ b/src/ruff_sync/formatters.py @@ -2,20 +2,31 @@ from __future__ import annotations +import hashlib import json import logging -from typing import TYPE_CHECKING, Final, Protocol - -if TYPE_CHECKING: - import pathlib +import pathlib +from typing import Final, Literal, Protocol, TypedDict from ruff_sync.constants import OutputFormat LOGGER: Final[logging.Logger] = logging.getLogger(__name__) +_DEFAULT_CHECK_NAME: Final[str] = "ruff-sync/config-drift" + class ResultFormatter(Protocol): - """Protocol for output formatters.""" + """Protocol for output formatters. + + Streaming formatters (Text, GitHub, JSON) implement ``note`` / ``info`` / + ``success`` / ``error`` / ``warning`` / ``debug`` / ``diff`` and provide a + no-op ``finalize``. + + Accumulating formatters (GitLab, SARIF) collect issues during the run and + write their structured report in ``finalize``. ``finalize`` is always + called by the CLI in a ``try...finally`` block, so all formatters receive + it unconditionally. + """ def note(self, message: str) -> None: """Print a status note (unconditional).""" @@ -31,30 +42,66 @@ def error( message: str, file_path: pathlib.Path | None = None, logger: logging.Logger | None = None, + check_name: str = _DEFAULT_CHECK_NAME, + drift_key: str | None = None, ) -> None: - """Print an error message.""" + """Print an error message. + + Args: + message: Human-readable description of the issue. + file_path: Path to the file that contains the issue. + logger: Optional logger to use instead of the module logger. + check_name: Machine-readable rule ID (used by structured formatters). + drift_key: Dotted TOML key that drifted, e.g. ``"lint.select"``. + Used by structured formatters to build stable fingerprints. + """ def warning( self, message: str, file_path: pathlib.Path | None = None, logger: logging.Logger | None = None, + check_name: str = _DEFAULT_CHECK_NAME, + drift_key: str | None = None, ) -> None: - """Print a warning message.""" + """Print a warning message. + + Args: + message: Human-readable description of the issue. + file_path: Path to the file that contains the issue. + logger: Optional logger to use instead of the module logger. + check_name: Machine-readable rule ID (used by structured formatters). + drift_key: Dotted TOML key that drifted, e.g. ``"lint.select"``. + Used by structured formatters to build stable fingerprints. + """ def debug(self, message: str, logger: logging.Logger | None = None) -> None: """Print a debug message.""" def diff(self, diff_text: str) -> None: - """Print a unified diff between configurations.""" + """Print a unified diff between configurations. + + Note: + Structured (accumulating) formatters intentionally ignore this + method — diffs are not representable in JSON report schemas. + """ + + def finalize(self) -> None: + """Finalize and flush all output. + + Streaming formatters (Text, GitHub, JSON) implement this as a no-op. + Accumulating formatters (GitLab, SARIF) write their collected report + here. The CLI calls this unconditionally inside a ``try...finally`` + block so it is always executed, even when an exception occurred. + """ class TextFormatter: """Standard text output formatter. - Delegates diagnostic messages (info, warning, error, debug) to the project logger - to ensure they benefit from standard logging configuration (colors, streams). - Primary command feedback (note, success) is printed to stdout. + Delegates diagnostic messages (info, warning, error, debug) to the project + logger to ensure they benefit from standard logging configuration (colors, + streams). Primary command feedback (note, success) is printed to stdout. """ def note(self, message: str) -> None: @@ -74,6 +121,8 @@ def error( message: str, file_path: pathlib.Path | None = None, logger: logging.Logger | None = None, + check_name: str = _DEFAULT_CHECK_NAME, + drift_key: str | None = None, ) -> None: """Log an error message.""" (logger or LOGGER).error(message) @@ -83,6 +132,8 @@ def warning( message: str, file_path: pathlib.Path | None = None, logger: logging.Logger | None = None, + check_name: str = _DEFAULT_CHECK_NAME, + drift_key: str | None = None, ) -> None: """Log a warning message.""" (logger or LOGGER).warning(message) @@ -95,19 +146,24 @@ def diff(self, diff_text: str) -> None: """Print a unified diff directly to stdout.""" print(diff_text, end="") + def finalize(self) -> None: + """No-op for streaming formatters.""" + class GithubFormatter: """GitHub Actions output formatter. - Emits `::error::` and `::warning::` workflow commands for inline annotations. + Emits ``::error::`` and ``::warning::`` workflow commands for inline + annotations. """ @staticmethod def _escape(value: str, is_property: bool = False) -> str: r"""Escapes characters for GitHub Actions workflow commands. - GitHub requires percent-encoding for '%', '\r', and '\n' in all messages. - Additionally, property values (like file and title) require escaping for ':' and ','. + GitHub requires percent-encoding for '%', '\r', and '\n' in all + messages. Additionally, property values (like file and title) require + escaping for ':' and ','. """ escaped = value.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A") if is_property: @@ -131,18 +187,17 @@ def error( message: str, file_path: pathlib.Path | None = None, logger: logging.Logger | None = None, + check_name: str = _DEFAULT_CHECK_NAME, + drift_key: str | None = None, ) -> None: """Print an error message as a GitHub Action error annotation.""" # Delegate standard log output to the logger to preserve context (logger or LOGGER).error(message) - # Strip emoji/symbols if any for the raw title, or just use a generic title file_val = self._escape(str(file_path), is_property=True) if file_path else "" file_arg = f"file={file_val},line=1," if file_path else "" title_val = self._escape("Ruff Sync Error", is_property=True) - # The message is technically what we pass after :: - # E.g. ::error file=app.js,line=1::Missing semicolon clean_msg = message.removeprefix("❌ ").removeprefix("⚠️ ") escaped_msg = self._escape(clean_msg) print(f"::error {file_arg}title={title_val}::{escaped_msg}") @@ -152,6 +207,8 @@ def warning( message: str, file_path: pathlib.Path | None = None, logger: logging.Logger | None = None, + check_name: str = _DEFAULT_CHECK_NAME, + drift_key: str | None = None, ) -> None: """Print a warning message as a GitHub Action warning annotation.""" (logger or LOGGER).warning(message) @@ -174,9 +231,12 @@ def diff(self, diff_text: str) -> None: """Print a unified diff in GitHub Actions logs (standard stdout).""" print(diff_text, end="") + def finalize(self) -> None: + """No-op for streaming formatters.""" + class JsonFormatter: - """JSON output formatter.""" + """JSON output formatter (newline-delimited JSON, one record per line).""" def note(self, message: str) -> None: """Print a status note as JSON.""" @@ -198,6 +258,8 @@ def error( message: str, file_path: pathlib.Path | None = None, logger: logging.Logger | None = None, + check_name: str = _DEFAULT_CHECK_NAME, + drift_key: str | None = None, ) -> None: """Print an error message as JSON.""" data = {"level": "error", "message": message} @@ -205,6 +267,9 @@ def error( data["file"] = str(file_path) if logger: data["logger"] = logger.name + if drift_key: + data["drift_key"] = drift_key + data["check_name"] = check_name print(json.dumps(data)) def warning( @@ -212,6 +277,8 @@ def warning( message: str, file_path: pathlib.Path | None = None, logger: logging.Logger | None = None, + check_name: str = _DEFAULT_CHECK_NAME, + drift_key: str | None = None, ) -> None: """Print a warning message as JSON.""" data = {"level": "warning", "message": message} @@ -219,6 +286,9 @@ def warning( data["file"] = str(file_path) if logger: data["logger"] = logger.name + if drift_key: + data["drift_key"] = drift_key + data["check_name"] = check_name print(json.dumps(data)) def debug(self, message: str, logger: logging.Logger | None = None) -> None: @@ -233,11 +303,179 @@ def diff(self, diff_text: str) -> None: # Strip trailing newline if any, as it's common in diff text print(json.dumps({"level": "diff", "message": diff_text.strip()})) + def finalize(self) -> None: + """No-op for streaming formatters.""" + + +class GitlabLines(TypedDict): + """GitLab Code Quality report lines.""" + + begin: int + + +class GitlabLocation(TypedDict): + """GitLab Code Quality report location.""" + + path: str + lines: GitlabLines + + +class GitlabIssue(TypedDict): + """GitLab Code Quality report issue.""" + + description: str + check_name: str + fingerprint: str + severity: Literal["info", "minor", "major", "critical", "blocker"] + location: GitlabLocation + + +class GitlabFormatter: + """GitLab Code Quality report formatter. + + Accumulates issues during the run and writes a single valid JSON array to + stdout in ``finalize()``. The CI job redirects stdout to a file:: + + ruff-sync check --output-format gitlab > gl-code-quality-report.json + + An empty array (``[]``) is emitted when no issues were collected, which + signals to GitLab that previously reported issues are now resolved. + + Fingerprints are deterministic MD5 hashes so GitLab can track whether an + issue was introduced or resolved between branches. + """ + + def __init__(self) -> None: + """Initialise an empty issue list.""" + self._issues: list[GitlabIssue] = [] + + # ------------------------------------------------------------------ + # Protocol methods + # ------------------------------------------------------------------ + + def note(self, message: str) -> None: + """Delegate to logger only; not representable in the Code Quality schema.""" + LOGGER.info(message) + + def info(self, message: str, logger: logging.Logger | None = None) -> None: + """Delegate to logger only; not included in the structured report.""" + (logger or LOGGER).info(message) + + def success(self, message: str) -> None: + """Delegate to logger only; not representable in the Code Quality schema.""" + LOGGER.info(message) + + def error( + self, + message: str, + file_path: pathlib.Path | None = None, + logger: logging.Logger | None = None, + check_name: str = _DEFAULT_CHECK_NAME, + drift_key: str | None = None, + ) -> None: + """Accumulate a major-severity Code Quality issue.""" + (logger or LOGGER).error(message) + self._issues.append( + self._make_issue( + description=message, + check_name=check_name, + severity="major", + file_path=file_path, + drift_key=drift_key, + ) + ) + + def warning( + self, + message: str, + file_path: pathlib.Path | None = None, + logger: logging.Logger | None = None, + check_name: str = _DEFAULT_CHECK_NAME, + drift_key: str | None = None, + ) -> None: + """Accumulate a minor-severity Code Quality issue.""" + (logger or LOGGER).warning(message) + self._issues.append( + self._make_issue( + description=message, + check_name=check_name, + severity="minor", + file_path=file_path, + drift_key=drift_key, + ) + ) + + def debug(self, message: str, logger: logging.Logger | None = None) -> None: + """Delegate to logger only; not included in the structured report.""" + (logger or LOGGER).debug(message) + + def diff(self, diff_text: str) -> None: + """Ignored by structured formatters — diffs are not representable in JSON report schemas.""" + + def finalize(self) -> None: + """Write the collected issues as a GitLab Code Quality JSON array to stdout. + + Always produces valid JSON: an empty array ``[]`` when no issues were + collected (signals resolution of previously reported issues to GitLab). + """ + print(json.dumps(self._issues, indent=2)) + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _make_issue( + self, + description: str, + check_name: str, + severity: Literal["info", "minor", "major", "critical", "blocker"], + file_path: pathlib.Path | None, + drift_key: str | None, + ) -> GitlabIssue: + """Build a single Code Quality issue object.""" + # Normalize location.path to be relative to the repo root (no absolute paths). + if file_path: + if file_path.is_absolute(): + try: + path = str(file_path.relative_to(pathlib.Path.cwd())) + except ValueError: + # If the absolute path is outside the CWD (repo root in CI), + # fall back to the filename so GitLab can at least attempt a map. + path = file_path.name + else: + path = str(file_path) + else: + path = "pyproject.toml" + + return { + "description": description, + "check_name": check_name, + "fingerprint": self._make_fingerprint(path, check_name, drift_key), + "severity": severity, + "location": {"path": path, "lines": {"begin": 1}}, + } + + @staticmethod + def _make_fingerprint(path: str, check_name: str, drift_key: str | None) -> str: + """Return a stable MD5 fingerprint for a Code Quality issue. + + The fingerprint must be deterministic (same inputs → same output on + every pipeline run) so GitLab can track introduced vs resolved issues. + Never include timestamps, UUIDs, or any other runtime-variable data. + """ + # Include check_name to prevent collisions if multiple rules trigger on the same key. + raw = f"ruff-sync:{check_name}:{path}:{drift_key or ''}" + return hashlib.md5(raw.encode()).hexdigest() # noqa: S324 + def get_formatter(output_format: OutputFormat) -> ResultFormatter: """Return the corresponding formatter for the given format.""" - if output_format == OutputFormat.GITHUB: - return GithubFormatter() - if output_format == OutputFormat.JSON: - return JsonFormatter() - return TextFormatter() + match output_format: + case OutputFormat.TEXT: + return TextFormatter() + case OutputFormat.GITHUB: + return GithubFormatter() + case OutputFormat.JSON: + return JsonFormatter() + case OutputFormat.GITLAB: + return GitlabFormatter() diff --git a/tests/test_formatters.py b/tests/test_formatters.py index 136bfce..30e51ed 100644 --- a/tests/test_formatters.py +++ b/tests/test_formatters.py @@ -7,9 +7,10 @@ import pytest -from ruff_sync.constants import OutputFormat +from ruff_sync.constants import MISSING, OutputFormat from ruff_sync.formatters import ( GithubFormatter, + GitlabFormatter, JsonFormatter, TextFormatter, get_formatter, @@ -18,10 +19,14 @@ if TYPE_CHECKING: from ruff_sync.formatters import ResultFormatter +# --------------------------------------------------------------------------- +# Shared fixtures +# --------------------------------------------------------------------------- + @pytest.fixture(params=[TextFormatter, GithubFormatter, JsonFormatter]) def formatter(request: pytest.FixtureRequest) -> ResultFormatter: - """Fixture providing instances of all registered formatters.""" + """Fixture providing instances of all streaming formatters.""" formatter_cls: type[ResultFormatter] = request.param return formatter_cls() @@ -56,6 +61,17 @@ def test_simple_stdout_methods( # Text and Github both print raw for these assert message in captured + def test_finalize_is_noop_for_streaming_formatters( + self, + formatter: ResultFormatter, + capsys: pytest.CaptureFixture[str], + ) -> None: + """finalize() must not emit any output for streaming formatters.""" + formatter.finalize() + captured = capsys.readouterr() + assert captured.out == "" + assert captured.err == "" + @pytest.mark.parametrize( "method, level", [ @@ -128,10 +144,12 @@ def test_metadata_propagation(self, capsys: pytest.CaptureFixture[str], method: logger = logging.getLogger("custom") file_path = pathlib.Path("f.py") - # Call the method with all possible extras (only error/warning take file_path) + # Call the method with all possible extras (only error/warning take file_path and drift_key) kwargs: dict[str, Any] = {"logger": logger} if method in ("error", "warning"): kwargs["file_path"] = file_path + kwargs["drift_key"] = "lint.select" + kwargs["check_name"] = "custom-check" getattr(fmt, method)("msg", **kwargs) @@ -139,6 +157,364 @@ def test_metadata_propagation(self, capsys: pytest.CaptureFixture[str], method: assert data["logger"] == "custom" if method in ("error", "warning"): assert data["file"] == "f.py" + assert data["drift_key"] == "lint.select" + assert data["check_name"] == "custom-check" + + +class TestGitlabFormatter: + """Tests for GitlabFormatter (GitLab Code Quality report format).""" + + def test_default_path_when_file_path_is_none(self, capsys: pytest.CaptureFixture[str]) -> None: + """When file_path is None, GitlabFormatter should default to 'pyproject.toml'.""" + fmt = GitlabFormatter() + fmt.error("drift", file_path=None) + fmt.finalize() + issues = json.loads(capsys.readouterr().out) + assert len(issues) == 1 + assert issues[0]["location"]["path"] == "pyproject.toml" + + def test_absolute_path_normalization(self, capsys: pytest.CaptureFixture[str]) -> None: + """GitlabFormatter must normalize absolute paths to be relative to CWD.""" + fmt = GitlabFormatter() + cwd = pathlib.Path.cwd() + abs_path = cwd / "subdir" / "pyproject.toml" + + fmt.error("drift", file_path=abs_path) + fmt.finalize() + + issues = json.loads(capsys.readouterr().out) + assert issues[0]["location"]["path"] == "subdir/pyproject.toml" + + def test_finalize_empty_produces_empty_array(self, capsys: pytest.CaptureFixture[str]) -> None: + """An empty formatter must emit [] — the GitLab 'no issues' signal.""" + fmt = GitlabFormatter() + fmt.finalize() + issues = json.loads(capsys.readouterr().out) + assert issues == [] + + def test_error_produces_major_severity(self, capsys: pytest.CaptureFixture[str]) -> None: + """error() must accumulate a major-severity issue.""" + fmt = GitlabFormatter() + fmt.error("drift found", file_path=pathlib.Path("pyproject.toml")) + fmt.finalize() + issues = json.loads(capsys.readouterr().out) + assert len(issues) == 1 + assert issues[0]["severity"] == "major" + assert issues[0]["location"]["path"] == "pyproject.toml" + assert issues[0]["location"]["lines"]["begin"] == 1 + assert "fingerprint" in issues[0] + assert "description" in issues[0] + assert issues[0]["check_name"] == "ruff-sync/config-drift" + + def test_warning_produces_minor_severity(self, capsys: pytest.CaptureFixture[str]) -> None: + """warning() must accumulate a minor-severity issue.""" + fmt = GitlabFormatter() + fmt.warning("stale hook", file_path=pathlib.Path(".pre-commit-config.yaml")) + fmt.finalize() + issues = json.loads(capsys.readouterr().out) + assert len(issues) == 1 + assert issues[0]["severity"] == "minor" + + def test_custom_check_name_is_preserved(self, capsys: pytest.CaptureFixture[str]) -> None: + """A caller-supplied check_name must appear in the issue object.""" + fmt = GitlabFormatter() + fmt.error("hook stale", check_name="ruff-sync/precommit-stale") + fmt.finalize() + issues = json.loads(capsys.readouterr().out) + assert issues[0]["check_name"] == "ruff-sync/precommit-stale" + + def test_drift_key_produces_distinct_fingerprints( + self, capsys: pytest.CaptureFixture[str] + ) -> None: + """Different drift_key values must yield different fingerprints.""" + fmt = GitlabFormatter() + fmt.error("drift", file_path=pathlib.Path("pyproject.toml"), drift_key="lint.select") + fmt.error("drift", file_path=pathlib.Path("pyproject.toml"), drift_key="target-version") + fmt.finalize() + issues = json.loads(capsys.readouterr().out) + assert issues[0]["fingerprint"] != issues[1]["fingerprint"] + + def test_fingerprint_is_stable(self, capsys: pytest.CaptureFixture[str]) -> None: + """Same inputs must produce the same fingerprint across multiple instances.""" + fmt1 = GitlabFormatter() + fmt2 = GitlabFormatter() + fp = pathlib.Path("pyproject.toml") + fmt1.error("drift found", file_path=fp, drift_key="lint.select") + fmt2.error("drift found", file_path=fp, drift_key="lint.select") + fmt1.finalize() + fp1 = json.loads(capsys.readouterr().out)[0]["fingerprint"] + fmt2.finalize() + fp2 = json.loads(capsys.readouterr().out)[0]["fingerprint"] + assert fp1 == fp2 + + def test_absolute_path_outside_cwd_normalization( + self, capsys: pytest.CaptureFixture[str] + ) -> None: + """If an absolute path is outside CWD, it must fall back to the filename.""" + fmt = GitlabFormatter() + # Use a path that is guaranteed to be outside the project root/CWD + outside_path = pathlib.Path("/external.toml") + + fmt.error("drift", file_path=outside_path) + fmt.finalize() + + issues = json.loads(capsys.readouterr().out) + assert issues[0]["location"]["path"] == "external.toml" + + def test_fingerprint_includes_check_name(self, capsys: pytest.CaptureFixture[str]) -> None: + """Different check_names on the same file/key must produce distinct fingerprints.""" + fmt = GitlabFormatter() + path = pathlib.Path("pyproject.toml") + + fmt.error("drift", file_path=path, drift_key="lint.select", check_name="rule-1") + fmt.error("drift", file_path=path, drift_key="lint.select", check_name="rule-2") + fmt.finalize() + + issues = json.loads(capsys.readouterr().out) + assert issues[0]["fingerprint"] != issues[1]["fingerprint"] + + def test_distinct_fingerprints_no_drift_key(self, capsys: pytest.CaptureFixture[str]) -> None: + """Omitting drift_key must still produce stable and distinct fingerprints.""" + fmt1 = GitlabFormatter() + fmt2 = GitlabFormatter() + path1 = pathlib.Path("pyproject.toml") + path2 = pathlib.Path("ruff.toml") + + # Stable for same path + fmt1.error("drift", file_path=path1, drift_key=None) + fmt2.error("drift", file_path=path1, drift_key=None) + fmt1.finalize() + fp1_a = json.loads(capsys.readouterr().out)[0]["fingerprint"] + fmt2.finalize() + fp1_b = json.loads(capsys.readouterr().out)[0]["fingerprint"] + assert fp1_a == fp1_b + + # Distinct for different paths + fmt1 = GitlabFormatter() + fmt1.error("drift", file_path=path1, drift_key=None) + fmt1.error("drift", file_path=path2, drift_key=None) + fmt1.finalize() + issues = json.loads(capsys.readouterr().out) + assert issues[0]["fingerprint"] != issues[1]["fingerprint"] + + def test_no_bom_in_output(self, capsys: pytest.CaptureFixture[str]) -> None: + """Output must not start with a UTF-8 BOM (GitLab rejects BOM-prefixed JSON).""" + fmt = GitlabFormatter() + fmt.finalize() + assert not capsys.readouterr().out.startswith("\ufeff") + + def test_multiple_issues_accumulated(self, capsys: pytest.CaptureFixture[str]) -> None: + """All error() and warning() calls must appear in the final JSON array.""" + fmt = GitlabFormatter() + fmt.error("error one", file_path=pathlib.Path("pyproject.toml"), drift_key="lint.select") + fmt.warning( + "warn one", + file_path=pathlib.Path("pyproject.toml"), + drift_key="target-version", + ) + fmt.finalize() + issues = json.loads(capsys.readouterr().out) + assert len(issues) == 2 + assert issues[0]["severity"] == "major" + assert issues[1]["severity"] == "minor" + + def test_note_and_success_do_not_produce_issues( + self, capsys: pytest.CaptureFixture[str] + ) -> None: + """note() and success() must not populate the issue list.""" + fmt = GitlabFormatter() + fmt.note("checking...") + fmt.success("✅ in sync") + fmt.finalize() + issues = json.loads(capsys.readouterr().out) + assert issues == [] + + def test_diff_is_ignored(self, capsys: pytest.CaptureFixture[str]) -> None: + """diff() must not add any issue to the report.""" + fmt = GitlabFormatter() + fmt.diff("--- a\n+++ b\n@@ ...") + fmt.finalize() + issues = json.loads(capsys.readouterr().out) + assert issues == [] + + def test_diagnostic_methods_delegate_to_logger(self, caplog: pytest.LogCaptureFixture) -> None: + """Verify diagnostic methods (info, debug, note, success) delegate to the logger.""" + fmt = GitlabFormatter() + + # note/success/info log at INFO level + with caplog.at_level(logging.INFO, logger="ruff_sync.formatters"): + fmt.note("note msg") + fmt.success("success msg") + fmt.info("info msg") + + # debug logs at DEBUG level + with caplog.at_level(logging.DEBUG, logger="ruff_sync.formatters"): + fmt.debug("debug msg") + + assert "note msg" in caplog.text + assert "success msg" in caplog.text + assert "info msg" in caplog.text + assert "debug msg" in caplog.text + + def test_custom_logger_delegation(self) -> None: + """Verify diagnostic methods delegate to a custom logger if provided.""" + fmt = GitlabFormatter() + + class LoggerSpy: + def __init__(self) -> None: + self.info_called = False + self.debug_called = False + + def info(self, msg: str) -> None: + self.info_called = True + + def debug(self, msg: str) -> None: + self.debug_called = True + + mock_logger = LoggerSpy() + + fmt.info("info msg", logger=mock_logger) # type: ignore[arg-type] + assert mock_logger.info_called + + fmt.debug("debug msg", logger=mock_logger) # type: ignore[arg-type] + assert mock_logger.debug_called + + +class TestJsonFormatterMetadata: + """Verify metadata propagation in JsonFormatter.""" + + def test_json_formatter_always_includes_check_name( + self, capsys: pytest.CaptureFixture[str] + ) -> None: + """JsonFormatter must always include check_name, even if it is the default.""" + from ruff_sync.formatters import JsonFormatter + + fmt = JsonFormatter() + fmt.error("drift") + data = json.loads(capsys.readouterr().out) + assert data["check_name"] == "ruff-sync/config-drift" + + fmt.warning("stale") + data = json.loads(capsys.readouterr().out) + assert data["check_name"] == "ruff-sync/config-drift" + + +class SpyFormatter: + """A minimal ResultFormatter that tracks calls to ensure they occur.""" + + def __init__(self) -> None: + """Initialize the spy with state tracking fields.""" + self.finalized = False + self.errors: list[str] = [] + + def note(self, message: str) -> None: + """No-op note implementation.""" + + def info(self, message: str, logger: Any = None) -> None: + """No-op info implementation.""" + + def success(self, message: str) -> None: + """No-op success implementation.""" + + def debug(self, message: str, logger: Any = None) -> None: + """No-op debug implementation.""" + + def diff(self, diff_text: str) -> None: + """No-op diff implementation.""" + + def error( + self, + message: str, + file_path: Any = None, + logger: Any = None, + check_name: str = "ruff-sync/config-drift", + drift_key: str | None = None, + ) -> None: + """Record the error message.""" + self.errors.append(message) + + def warning( + self, + message: str, + file_path: Any = None, + logger: Any = None, + check_name: str = "ruff-sync/config-drift", + drift_key: str | None = None, + ) -> None: + """No-op warning implementation.""" + + def finalize(self) -> None: + """Flush the accumulated report and finish the sync process.""" + self.finalized = True + + +class TestCLILifecycle: + """Verify that core.py ensures the formatter lifecycle (finalize) is honored.""" + + @pytest.mark.asyncio + async def test_check_calls_finalize_on_success(self, monkeypatch: pytest.MonkeyPatch) -> None: + """check() must call finalize() even when everything is in sync.""" + from ruff_sync import core + + spy = SpyFormatter() + monkeypatch.setattr(core, "get_formatter", lambda _: spy) + + # Mock dependencies to avoid real IO + monkeypatch.setattr(pathlib.Path, "exists", lambda _: True) + # Mock TOMLFile.read to return a valid sync config + from tomlkit import parse + + content = parse("[tool.ruff]\nline-length = 80") + monkeypatch.setattr("ruff_sync.core.TOMLFile.read", lambda _: content) + + async def _mock_merge(*args, **kwargs): + return content + + monkeypatch.setattr("ruff_sync.core._merge_multiple_upstreams", _mock_merge) + + from httpx import URL + + from ruff_sync.cli import Arguments + + args = Arguments( + command="check", + upstream=(URL("https://example.com"),), + to=pathlib.Path("pyproject.toml"), + exclude=MISSING, + verbose=0, + output_format=OutputFormat.TEXT, + ) + + await core.check(args) + assert spy.finalized + + @pytest.mark.asyncio + async def test_check_calls_finalize_on_error(self, monkeypatch: pytest.MonkeyPatch) -> None: + """check() must call finalize() even when an exception occurs.""" + from ruff_sync import core + + spy = SpyFormatter() + monkeypatch.setattr(core, "get_formatter", lambda _: spy) + + # Force an early return/error by making the file not exist + monkeypatch.setattr(pathlib.Path, "exists", lambda _: False) + + from httpx import URL + + from ruff_sync.cli import Arguments + + args = Arguments( + command="check", + upstream=(URL("https://example.com"),), + to=pathlib.Path("nonexistent.toml"), + exclude=MISSING, + verbose=0, + output_format=OutputFormat.TEXT, + ) + + await core.check(args) + assert len(spy.errors) > 0 + assert spy.finalized def test_get_formatter_factory() -> None: @@ -146,8 +522,7 @@ def test_get_formatter_factory() -> None: assert isinstance(get_formatter(OutputFormat.TEXT), TextFormatter) assert isinstance(get_formatter(OutputFormat.JSON), JsonFormatter) assert isinstance(get_formatter(OutputFormat.GITHUB), GithubFormatter) - # Default fallback - ignore error for purposeful type-incorrect input - assert isinstance(get_formatter("invalid"), TextFormatter) # type: ignore[arg-type] + assert isinstance(get_formatter(OutputFormat.GITLAB), GitlabFormatter) if __name__ == "__main__": diff --git a/uv.lock b/uv.lock index ac15df2..762b7cd 100644 --- a/uv.lock +++ b/uv.lock @@ -1484,7 +1484,7 @@ wheels = [ [[package]] name = "ruff-sync" -version = "0.1.3" +version = "0.1.4.dev0" source = { editable = "." } dependencies = [ { name = "httpx" },