From 6813dc0b5e0ab81b70547ab78595b863be6f6d83 Mon Sep 17 00:00:00 2001 From: Max Bachaud Date: Mon, 11 May 2026 09:59:05 -0700 Subject: [PATCH] fix(drift): honest lint confidence + feat(compliance): approvals/ privacy default (v0.1.3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drift: - score_lint: switch to sys.executable -m ruff (PATH-resolution consistency with the other v0.1.2 scorers). Return (0.0, 0.0) unmeasured sentinel when ruff cannot be launched or its output is unparseable, instead of fabricating a 0.99/1.0 score. - 4 new unit tests for the lint sentinel behavior. - Re-seeded .mek/drift-baseline.json with the corrected lint score. compliance: - Scaffold compliance/.gitignore that ignores approvals/ by default — HITL records frequently contain names + infra details. - /mek-compliance-audit now surfaces tracked files under approvals/ as a privacy warning; --strict fails the audit on any tracked approval. - docs/compliance.md documents both the static-block (mek.toml gate=block) and conditional-block (project-local hook) hardening patterns. - Updated docs/compliance.md to enumerate all 6 risky-op categories (was still listing 5 — drift since v0.1.2's repo_visibility_flip add). - New integration test verifies the scaffold ships the .gitignore. build: - .markdownlint.json with MD024 siblings_only=true so Keep-a-Changelog '### Added' / '### Fixed' repetition across version sections stops firing lint warnings on every release. All findings sourced from dogfood (lint asymmetry surfaced by Max running /mek-drift init on helix-context; approvals privacy concern flagged after the compliance subagent's verification pass). --- .claude-plugin/marketplace.json | 2 +- .claude-plugin/plugin.json | 2 +- .markdownlint.json | 3 ++ .mek/drift-baseline.json | 4 +- CHANGELOG.md | 17 ++++++++ commands/mek-compliance-audit.md | 6 ++- commands/mek-init.md | 2 + docs/compliance.md | 50 +++++++++++++++++++++- lib/drift_scoring/python_preset.py | 14 ++++-- package.json | 2 +- pyproject.toml | 2 +- scaffold/compliance/.gitignore | 14 ++++++ tests/integration/test_scaffold_payload.py | 7 +++ tests/unit/test_drift_python_preset.py | 38 ++++++++++++++++ 14 files changed, 150 insertions(+), 13 deletions(-) create mode 100644 .markdownlint.json create mode 100644 scaffold/compliance/.gitignore create mode 100644 tests/unit/test_drift_python_preset.py diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 4fdfe23..d743cad 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -10,7 +10,7 @@ { "name": "maxexpresskit", "source": "./", - "version": "0.1.2", + "version": "0.1.3", "description": "Three guardrails for Claude Code: compliance, drift, ledger." } ] diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index cf73cfe..d404c3d 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "maxexpresskit", "description": "Three guardrails for Claude Code: compliance, drift, ledger.", - "version": "0.1.2", + "version": "0.1.3", "author": { "name": "Max Bachaud" } diff --git a/.markdownlint.json b/.markdownlint.json new file mode 100644 index 0000000..8ab4145 --- /dev/null +++ b/.markdownlint.json @@ -0,0 +1,3 @@ +{ + "MD024": { "siblings_only": true } +} diff --git a/.mek/drift-baseline.json b/.mek/drift-baseline.json index 6761ef5..c23df27 100644 --- a/.mek/drift-baseline.json +++ b/.mek/drift-baseline.json @@ -1,6 +1,6 @@ { "schema_version": 1, - "updated_at": "2026-05-11T07:50:48.679821+00:00", + "updated_at": "2026-05-11T16:58:45.390223+00:00", "preset": "python", "dimensions": { "test_pass_rate": { @@ -16,7 +16,7 @@ "floor": 0.95 }, "coverage": { - "auto": 0.757201646090535, + "auto": 0.8320000000000001, "manual": null, "confidence": 0.9, "floor": 0.7 diff --git a/CHANGELOG.md b/CHANGELOG.md index a49e90f..4a1c6b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.3] — 2026-05-11 + +### Fixed + +- **`score_lint` confidence honesty** — when `ruff` couldn't be launched or its output couldn't be parsed, the scorer previously returned `(0.99, 1.0)`, claiming high confidence in fabricated data. Now returns the same `(0.0, 0.0)` "unmeasured" sentinel as `score_security`. Also switched the invocation from bare `"ruff"` to `[sys.executable, "-m", "ruff", ...]` for the same PATH-resolution reasons we hit in v0.1.2 for the other scorers. + +### Added + +- **Privacy default for HITL approvals** — `/mek-init` now drops a `compliance/.gitignore` that ignores `approvals/` by default. HITL records often carry names, infra details, and rationale that don't belong in public git history. Add `!approvals/` negations to opt specific (redacted) approvals into tracking. +- **`/mek-compliance-audit` privacy check** — surfaces files tracked under `compliance/approvals/` as a warning. `--strict` fails the audit when any such file exists. +- **`docs/compliance.md` hardening section** — documents both the static-block pattern (`repo_visibility_flip = "block"` in mek.toml) and a conditional-block recipe for projects that want visibility flips to fail only when approvals exist on disk. + +### Tests + +- 4 new unit tests on the lint sentinel (`tests/unit/test_drift_python_preset.py`). +- 1 new integration test that the scaffold ships `compliance/.gitignore`. + ## [0.1.2] — 2026-05-11 ### Added diff --git a/commands/mek-compliance-audit.md b/commands/mek-compliance-audit.md index 86b2021..798cdac 100644 --- a/commands/mek-compliance-audit.md +++ b/commands/mek-compliance-audit.md @@ -12,10 +12,12 @@ For each file: - Check `mtime`. Mark `stale` if older than `compliance.staleness_days` (default 90) AND no `last_reviewed:` frontmatter within the window. - For HITL approval files: check for a `Signed off by:` line. -Output: a markdown table to stdout. With `--write `, also write the report to that file. +**Privacy check: tracked approvals.** HITL approval files frequently contain names, infrastructure details, and other sensitive data, so the default scaffold gitignores `compliance/approvals/`. Run `git ls-files compliance/approvals/` — if it returns any paths, surface them as a warning (`tracked HITL approvals detected: . Confirm these are intentionally version-controlled, or add to compliance/.gitignore.`). Treat this as advisory (does not by itself set exit 1) unless the user passes `--strict`, in which case tracked unredacted approvals fail the audit. + +Output: a markdown table to stdout plus a separate "Tracked approvals" section when present. With `--write `, also write the report to that file. Exit code: - `0` if everything is fresh and signed. -- `1` if any artifact is stale or unsigned. +- `1` if any artifact is stale or unsigned, OR if `--strict` is passed and any tracked file lives under `compliance/approvals/`. - `--soft` coerces exit to `0` (for CI advisory mode). diff --git a/commands/mek-init.md b/commands/mek-init.md index c7410a7..7ca7f54 100644 --- a/commands/mek-init.md +++ b/commands/mek-init.md @@ -15,8 +15,10 @@ Behavior: - `scaffold/compliance/HITL_TEMPLATE.md` → `./compliance/HITL_TEMPLATE.md` - `scaffold/compliance/DECISION_LOG.md` → `./compliance/DECISION_LOG.md` - `scaffold/compliance/RISKY_OPS.yaml` → `./compliance/RISKY_OPS.yaml` + - `scaffold/compliance/.gitignore` → `./compliance/.gitignore` (ignores `approvals/` by default — see [docs/compliance.md](../docs/compliance.md) for the rationale) 3. Print a summary of what was created and the next steps: - Run `/mek-drift init` to seed the drift baseline. - Open `compliance/RISKY_OPS.yaml` and add project-specific patterns. + - HITL approvals belong under `compliance/approvals/`. That path is gitignored by default; add explicit `!approvals/` negations to track redacted approvals. This is a tool-using flow (Read, Write the files). Don't run shell `cp`. diff --git a/docs/compliance.md b/docs/compliance.md index 78fe330..87e44b5 100644 --- a/docs/compliance.md +++ b/docs/compliance.md @@ -1,8 +1,8 @@ # compliance -Ambient HITL/audit nudge. Triggers on five risky-op categories: +Ambient HITL/audit nudge. Triggers on six risky-op categories: -- `rm_rf`, `deploy`, `schema_migration`, `money_write`, `force_push_main`. +- `rm_rf`, `deploy`, `schema_migration`, `money_write`, `force_push_main`, `repo_visibility_flip`. For each, the `pre_risky_op.py` hook checks `mek.toml > [compliance.gates] > `: @@ -13,3 +13,49 @@ For each, the `pre_risky_op.py` hook checks `mek.toml > [compliance.gates] > tuple[float, float]: def score_lint(project_root: Path) -> tuple[float, float]: - code, out = _run(["ruff", "check", "."], project_root) - if code == 0: + """Run ruff. Returns the unmeasured sentinel (0.0, 0.0) if ruff isn't + installed or its output couldn't be parsed — so callers don't mistake + a launch failure for a clean repo. + """ + code, out = _run([sys.executable, "-m", "ruff", "check", "."], project_root) + # Discriminate "ruff actually ran" by looking for one of its expected + # output markers. If neither appears, we have no real measurement. + if "All checks passed" in out: return 1.0, 1.0 import re m = re.search(r"Found (\d+) error", out) - errors = int(m.group(1)) if m else 1 + if not m: + return 0.0, 0.0 # unmeasured — same sentinel as score_security + errors = int(m.group(1)) # Heuristic: 0 errors = 1.0, 100+ errors = 0.0, linear in between. score = max(0.0, 1.0 - errors / 100.0) return score, 1.0 diff --git a/package.json b/package.json index 2755f87..aebf305 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "maxexpresskit", - "version": "0.1.2", + "version": "0.1.3", "description": "Three guardrails for Claude Code: compliance, drift, ledger.", "license": "Apache-2.0", "private": true diff --git a/pyproject.toml b/pyproject.toml index c2b9e8a..aa53c95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "maxexpresskit" -version = "0.1.2" +version = "0.1.3" description = "Three guardrails for Claude Code: compliance, drift, ledger." requires-python = ">=3.11" license = "Apache-2.0" diff --git a/scaffold/compliance/.gitignore b/scaffold/compliance/.gitignore new file mode 100644 index 0000000..5dc7d75 --- /dev/null +++ b/scaffold/compliance/.gitignore @@ -0,0 +1,14 @@ +# By default, MEK does NOT track HITL approvals — they frequently contain +# names, infrastructure details, internal URLs, and other data you probably +# don't want in git history (especially on public repos). +# +# To track a specific approval file explicitly (e.g., after redacting it), +# add a negation below the `approvals/` line. Example: +# +# approvals/ +# !approvals/2026-05-11-redacted-public.md +# +# Or remove the `approvals/` line entirely if your project's policy is to +# track all approvals. + +approvals/ diff --git a/tests/integration/test_scaffold_payload.py b/tests/integration/test_scaffold_payload.py index 840ab8e..8d970e6 100644 --- a/tests/integration/test_scaffold_payload.py +++ b/tests/integration/test_scaffold_payload.py @@ -15,6 +15,13 @@ def test_scaffold_has_compliance_templates(): assert (SCAFFOLD / "compliance" / "RISKY_OPS.yaml").is_file() +def test_scaffold_gitignores_approvals_dir(): + # HITL approvals contain sensitive data — opt-out-shaped privacy default. + gi = SCAFFOLD / "compliance" / ".gitignore" + assert gi.is_file() + assert "approvals/" in gi.read_text(encoding="utf-8") + + def test_scaffold_mek_toml_parses(): import sys if sys.version_info >= (3, 11): diff --git a/tests/unit/test_drift_python_preset.py b/tests/unit/test_drift_python_preset.py new file mode 100644 index 0000000..37d2c2e --- /dev/null +++ b/tests/unit/test_drift_python_preset.py @@ -0,0 +1,38 @@ +"""Regression tests for the python drift preset's confidence honesty.""" +from pathlib import Path + +from lib.drift_scoring import python_preset + + +def _fake_run(out: str, code: int = 0): + """Return a _run replacement that yields the given (code, out).""" + def _run(cmd, cwd, merge_stderr=True): + return code, out + return _run + + +def test_score_lint_clean_returns_full_confidence(monkeypatch): + monkeypatch.setattr(python_preset, "_run", _fake_run("All checks passed!\n")) + assert python_preset.score_lint(Path(".")) == (1.0, 1.0) + + +def test_score_lint_with_errors_returns_full_confidence(monkeypatch): + monkeypatch.setattr(python_preset, "_run", _fake_run("Found 3 errors.\n", code=1)) + score, confidence = python_preset.score_lint(Path(".")) + assert confidence == 1.0 + assert score == 0.97 # 1.0 - 3/100 + + +def test_score_lint_missing_ruff_returns_unmeasured_sentinel(monkeypatch): + # Subprocess failed to launch ruff: empty stdout, nonzero exit. + monkeypatch.setattr(python_preset, "_run", _fake_run("", code=1)) + assert python_preset.score_lint(Path(".")) == (0.0, 0.0) + + +def test_score_lint_unparseable_output_returns_unmeasured_sentinel(monkeypatch): + # Some future ruff version changes its output format — we'd rather report + # "no data" than fabricate a 0.99 score. + monkeypatch.setattr( + python_preset, "_run", _fake_run("some unexpected output\n", code=1) + ) + assert python_preset.score_lint(Path(".")) == (0.0, 0.0)