From d2bbf75cac791983e2cc58d89727d216c15f8cb8 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 18:57:48 +0000 Subject: [PATCH 1/5] fix(matcher): scorecard findings unreachable due to language gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recipe matcher rejected every scorecard-source finding (~310 ecosystem- wide), routing them to :control "no safe fix available" advisories. Root cause: `lib/recipe_matcher.ex` filtered candidate recipes with `"*" in langs or language in langs`. Two failure modes: 1. 12 recipes declared `languages: ["any"]` — never matched, since `"any"` is not a sentinel the filter recognises and no repo has `"any"` as its primary language. 2. 8 scorecard / workflow-file recipes declared `languages: ["yaml"]` — never matched, since yaml is a workflow-file type, not any repo's primary language. So `recipe-pin-dependencies`, `recipe-fix-workflow-permissions`, etc. were unreachable for SC013/ SC018 findings — the exact rule families dominating the daily remediation sweep. Fix: - `langs_match?/2` private helper accepts `"*"` and `"any"` as synonymous language-agnostic sentinels. - `effective_language_for/2` remaps the lookup language to `"yaml"` for patterns whose `source` is `"scorecard"` or whose `category` names a known workflow-file rule family (DependencyPinning, TokenPermissions, DangerousWorkflow, etc.). The repo's primary language is irrelevant for workflow-file findings. - Applied to `best_recipe/2`, `category_match_recipe/2`, and `fuzzy_match_recipe/2`. Tests pin all three invariants. All 22 scorecard recipe `fix_script` references already exist on disk in `scripts/fix-scripts/` — the bug was purely in matcher reachability, not missing fix implementations. Closes the dispatcher half of the "no security stuff being sorted" symptom. Remaining M7 work (PAT for cross-repo dispatch, push fixes to remotes) still needs operator action, but the manifests will now carry populated fix_script fields for scorecard findings. --- lib/recipe_matcher.ex | 42 ++++++++++++++++++++++++++----- test/recipe_matcher_test.exs | 48 ++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 6 deletions(-) diff --git a/lib/recipe_matcher.ex b/lib/recipe_matcher.ex index 21c1d618..748fe50e 100644 --- a/lib/recipe_matcher.ex +++ b/lib/recipe_matcher.ex @@ -32,8 +32,7 @@ defmodule Hypatia.RecipeMatcher do def best_recipe(pattern_id, language) do find_recipes(pattern_id) |> Enum.find(fn recipe -> - langs = Map.get(recipe, "languages", []) - "*" in langs or language in langs + langs_match?(Map.get(recipe, "languages", []), language) end) end @@ -140,12 +139,12 @@ defmodule Hypatia.RecipeMatcher do # Match recipe by target_categories field -- most reliable match defp category_match_recipe(pattern, language) do category = Map.get(pattern, "category", "") + effective_language = effective_language_for(pattern, language) all_recipes() |> Enum.filter(fn recipe -> cats = Map.get(recipe, "target_categories", []) - langs = Map.get(recipe, "languages", []) - lang_ok = "*" in langs or language in langs + lang_ok = langs_match?(Map.get(recipe, "languages", []), effective_language) lang_ok and category in cats end) |> Enum.sort_by(fn r -> Map.get(r, "confidence", 0) end, :desc) @@ -155,6 +154,7 @@ defmodule Hypatia.RecipeMatcher do defp fuzzy_match_recipe(pattern, language) do pa_rule = Map.get(pattern, "pa_rule", "") description = Map.get(pattern, "description", "") |> String.downcase() + effective_language = effective_language_for(pattern, language) # Skip if no PA rule to match against if pa_rule == "" do @@ -162,8 +162,7 @@ defmodule Hypatia.RecipeMatcher do else all_recipes() |> Enum.filter(fn recipe -> - langs = Map.get(recipe, "languages", []) - lang_ok = "*" in langs or language in langs + lang_ok = langs_match?(Map.get(recipe, "languages", []), effective_language) recipe_pattern_ids = Map.get(recipe, "pattern_ids", []) @@ -196,6 +195,37 @@ defmodule Hypatia.RecipeMatcher do end end + # Both "*" and "any" are language-agnostic sentinels. Historical recipes + # use one or the other; treating them as synonyms keeps both groups + # routable (without this, ~12 recipes declared "any" matched no patterns). + defp langs_match?(langs, language) do + "*" in langs or "any" in langs or language in langs + end + + # Scorecard / workflow-file findings are about .github/workflows/*.yml, + # not the repo's primary language. Without this remap, recipes declared + # `languages: ["yaml"]` (pin-deps, token-permissions, etc.) never match + # because no repo has yaml as its primary language, and every scorecard + # finding falls through to :control "no safe fix available". + defp effective_language_for(pattern, language) do + cond do + Map.get(pattern, "source") == "scorecard" -> "yaml" + workflow_file_category?(Map.get(pattern, "category", "")) -> "yaml" + true -> language + end + end + + defp workflow_file_category?(category) do + category in [ + "DependencyPinning", + "PinnedDependencies", + "TokenPermissions", + "DangerousWorkflow", + "DependencyUpdateTool", + "BranchProtection" + ] + end + defp load_recipe(path) do with {:ok, content} <- File.read(path), {:ok, data} <- Jason.decode(content) do diff --git a/test/recipe_matcher_test.exs b/test/recipe_matcher_test.exs index c2ead171..5eae2cb5 100644 --- a/test/recipe_matcher_test.exs +++ b/test/recipe_matcher_test.exs @@ -93,4 +93,52 @@ defmodule Hypatia.RecipeMatcherTest do assert RecipeMatcher.substitution_for_category("FakeCategory") == nil end end + + describe "best_recipe_for_pattern/2 — language matching" do + test "'any' sentinel matches any repo language" do + # recipe-scorecard-license declares languages: ["any"] + pattern = %{ + "id" => "SC-010-some-repo", + "category" => "License", + "pa_rule" => "SC010", + "source" => "scorecard" + } + + recipe = RecipeMatcher.best_recipe_for_pattern(pattern, "rust") + assert recipe != nil + assert recipe["id"] in ["recipe-scorecard-license", "recipe-add-license-file"] + assert recipe["fix_script"] not in [nil, ""] + end + + test "scorecard DependencyPinning pattern resolves yaml recipe regardless of repo language" do + # Reproduces the production gap: SC013 findings across 230+ repos + # routed to :control "no safe fix available" because recipe-pin-deps + # declares languages: ["yaml"] and no repo has yaml as primary lang. + pattern = %{ + "id" => "SC-013-007-lang", + "category" => "DependencyPinning", + "pa_rule" => "SC013", + "source" => "scorecard", + "description" => "1 workflow(s) with tag-pinned actions" + } + + recipe = RecipeMatcher.best_recipe_for_pattern(pattern, "elixir") + assert recipe != nil, "scorecard pattern must route to a recipe, not :control" + assert recipe["fix_script"] not in [nil, ""] + assert recipe["triangle_tier"] in ["eliminate", "substitute"] + end + + test "scorecard TokenPermissions pattern resolves yaml recipe" do + pattern = %{ + "id" => "SC-018-some-repo", + "category" => "TokenPermissions", + "pa_rule" => "SC018", + "source" => "scorecard" + } + + recipe = RecipeMatcher.best_recipe_for_pattern(pattern, "go") + assert recipe != nil + assert recipe["fix_script"] == "fix-workflow-permissions.sh" + end + end end From 7cc2667331c2f6acb7ad00465fd946f73961a85f Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 19:32:18 +0000 Subject: [PATCH 2/5] chore(baseline): regenerate .hypatia-baseline.json against current tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The baseline had drifted into pure historical risk: 71 accepted findings (31 critical, 40 high) generated before the #278 stale-escript fix and the wave of code_safety/security_errors cleanups landed. A fresh scan against the current tree finds 35 findings, all medium-or-lower: - 32 low (code_safety hot-path expects, ncl_docker_not_podman, workflow_audit missing-workflow, structural_drift, etc.) - 3 medium (git_state transient + structural_drift) - 0 critical, 0 high Most old baseline entries are either: - fixed in code (e.g. the believe_me at src/abi/RuleEngine.idr is now inline-suppressed with a documented `-- hypatia: allow` directive), - migrated/refactored (e.g. lib/direct_github_pr.ex no longer exists), - or were covered by the new total-Python-ban / scanner-soundness wave. Net effect: every gate threshold of "fail on critical|high above baseline" now starts from an empty critical/high ledger — net-new critical or high findings will stand out, which is what the baseline is supposed to enable. Generated with the canonical Elixir escript pipeline against this tree (no rule changes, just a snapshot refresh). Severity threshold "low" so the snapshot reflects the full advisory surface, not just gates. --- .hypatia-baseline.json | 502 ++++++++++------------------------------- 1 file changed, 125 insertions(+), 377 deletions(-) diff --git a/.hypatia-baseline.json b/.hypatia-baseline.json index c3277c30..3ef7458f 100644 --- a/.hypatia-baseline.json +++ b/.hypatia-baseline.json @@ -1,499 +1,247 @@ [ { - "severity": "critical", + "severity": "low", "rule_module": "code_safety", - "type": "believe_me", - "file": "src/abi/RuleEngine.idr", + "type": "expect_in_hot_path", + "file": "fixer/src/scanner.rs", "action": "flag" }, { - "severity": "critical", + "severity": "low", "rule_module": "code_safety", - "type": "elixir_system_cmd_interpolation", - "file": "lib/direct_github_pr.ex", - "action": "flag" - }, - { - "severity": "critical", - "rule_module": "code_safety", - "type": "elixir_system_cmd_interpolation", - "file": "lib/hypatia/cli.ex", - "action": "flag" - }, - { - "severity": "critical", - "rule_module": "code_safety", - "type": "elixir_system_cmd_interpolation", - "file": "lib/mix/tasks/hypatia.audit_repos.ex", - "action": "flag" - }, - { - "severity": "critical", - "rule_module": "code_safety", - "type": "elixir_system_cmd_interpolation", - "file": "lib/mix/tasks/hypatia.deploy_prevention_workflows.ex", + "type": "expect_in_hot_path", + "file": "integration/src/ci_simulation/assertions.rs", "action": "flag" }, { - "severity": "critical", + "severity": "low", "rule_module": "code_safety", - "type": "elixir_system_cmd_interpolation", - "file": "lib/rules/structural_drift.ex", + "type": "expect_in_hot_path", + "file": "integration/src/ci_simulation/scenarios.rs", "action": "flag" }, { - "severity": "critical", + "severity": "low", "rule_module": "code_safety", - "type": "unwrap_dangerous_default", - "file": "adapters/src/sourcehut.rs", + "type": "ncl_docker_not_podman", + "file": ".machine_readable/svc/k9/hypatia-metadata.k9.ncl", "action": "flag" }, { - "severity": "critical", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_dangerous_default", "file": "cli/src/commands/batch.rs", "action": "flag" }, { - "severity": "critical", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_dangerous_default", "file": "cli/src/commands/scan.rs", "action": "flag" }, { - "severity": "critical", - "rule_module": "code_safety", - "type": "unwrap_dangerous_default", - "file": "clients/rust/hypatia-client/src/ffi.rs", - "action": "flag" - }, - { - "severity": "critical", - "rule_module": "code_safety", - "type": "unwrap_dangerous_default", - "file": "data/src/cache.rs", - "action": "flag" - }, - { - "severity": "critical", - "rule_module": "code_safety", - "type": "unwrap_dangerous_default", - "file": "data/src/dragonfly.rs", - "action": "flag" - }, - { - "severity": "critical", - "rule_module": "code_safety", - "type": "unwrap_dangerous_default", - "file": "integration/src/ci_simulation/scenarios.rs", - "action": "flag" - }, - { - "severity": "critical", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_dangerous_default", - "file": "integration/tests/fleet_test.rs", + "file": "scripts/ci-tools/src/bin/check-k9iser-paths.rs", "action": "flag" }, { - "severity": "critical", - "rule_module": "code_safety", - "type": "unwrap_dangerous_default", - "file": "integration/tests/forge_test.rs", - "action": "flag" - }, - { - "severity": "critical", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_dangerous_default", "file": "tools/cii-registrar/src/main.rs", "action": "flag" }, { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": ".audittraining/security-errors/echidnabot.md", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": ".audittraining/security-errors/echidnabot.md", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": ".github/workflows/integration.yml", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": ".hypatia-exemptions.md", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": "adapters/src/codeberg.rs", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": "adapters/tests/adapter_tests.rs", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": "hooks/lib/cache.sh", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": "integration/run-tests.sh", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": "lib/rules/security_errors.ex", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": "scripts/fix-scripts/fix-hardcoded-secrets.sh", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": "scripts/fix-scripts/fix-hardcoded-secrets.sh", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "security_errors", - "type": "secret_detected", - "file": "test/code_safety_test.exs", - "action": "revoke_rotate_and_purge" - }, - { - "severity": "critical", - "rule_module": "structural_drift", - "type": "SD008", - "file": "src/abi/RuleEngine.idr", - "action": "fix_proof" - }, - { - "severity": "critical", - "rule_module": "workflow_audit", - "type": "actions_expression_injection", - "file": "mirror.yml", - "action": "sanitize_context" - }, - { - "severity": "critical", - "rule_module": "workflow_audit", - "type": "actions_expression_injection", - "file": "quality.yml", - "action": "sanitize_context" - }, - { - "severity": "high", - "rule_module": "cicd_rules", - "type": "missing_requirement", - "file": ".github/dependabot.yml", - "action": "create" - }, - { - "severity": "high", - "rule_module": "cicd_rules", - "type": "missing_requirement", - "file": ".github/workflows/scorecard.yml", - "action": "create" - }, - { - "severity": "high", - "rule_module": "cicd_rules", - "type": "missing_requirement", - "file": "permissions: read-all", - "action": "create" - }, - { - "severity": "high", - "rule_module": "code_safety", - "type": "elixir_send_unsanitised", - "file": "lib/rules/security_errors.ex", - "action": "flag" - }, - { - "severity": "high", - "rule_module": "code_safety", - "type": "from_raw", - "file": "clients/rust/hypatia-client/src/ffi.rs", - "action": "flag" - }, - { - "severity": "high", - "rule_module": "code_safety", - "type": "lock_unwrap", - "file": "cli/src/app_state.rs", - "action": "flag" - }, - { - "severity": "high", - "rule_module": "code_safety", - "type": "ncl_http_url", - "file": "fleet-config.k9.ncl", - "action": "flag" - }, - { - "severity": "high", - "rule_module": "code_safety", - "type": "panic_macro", - "file": "integration/src/ci_simulation/assertions.rs", - "action": "flag" - }, - { - "severity": "high", - "rule_module": "code_safety", - "type": "shell_download_then_run", - "file": "scripts/fix-scripts/fix-heredoc-install.sh", - "action": "flag" - }, - { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "adapters/src/bitbucket.rs", - "action": "flag" - }, - { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "adapters/src/codeberg.rs", - "action": "flag" - }, - { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "adapters/src/github.rs", - "action": "flag" - }, - { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "adapters/src/gitlab.rs", - "action": "flag" - }, - { - "severity": "high", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_without_check", - "file": "adapters/src/radicle.rs", + "file": "cli/build.rs", "action": "flag" }, { - "severity": "high", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_without_check", - "file": "adapters/src/sourcehut.rs", + "file": "cli/src/commands/batch.rs", "action": "flag" }, { - "severity": "high", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_without_check", - "file": "adapters/tests/adapter_tests.rs", + "file": "cli/src/commands/fleet.rs", "action": "flag" }, { - "severity": "high", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_without_check", - "file": "cli/build.rs", + "file": "cli/src/commands/scan.rs", "action": "flag" }, { - "severity": "high", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_without_check", - "file": "cli/src/app_state.rs", + "file": "cli/src/output.rs", "action": "flag" }, { - "severity": "high", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_without_check", - "file": "cli/src/commands/batch.rs", + "file": "fixer/src/main.rs", "action": "flag" }, { - "severity": "high", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_without_check", - "file": "cli/src/commands/fleet.rs", + "file": "integration/src/ci_simulation/scenarios.rs", "action": "flag" }, { - "severity": "high", + "severity": "low", "rule_module": "code_safety", "type": "unwrap_without_check", - "file": "cli/src/commands/scan.rs", + "file": "integration/src/lib.rs", "action": "flag" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "cli/src/config.rs", - "action": "flag" + "severity": "low", + "rule_module": "git_state", + "type": "GS006", + "file": ".", + "action": "review" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "cli/src/output.rs", + "severity": "low", + "rule_module": "honest_completion", + "type": "no_state_file", + "file": "/home/user/hypatia", "action": "flag" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "clients/rust/hypatia-client/src/ffi.rs", - "action": "flag" + "severity": "low", + "rule_module": "root_hygiene", + "type": "stale", + "file": "DESIGN-NARRATIVE.md", + "action": "move" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "data/src/cache.rs", - "action": "flag" + "severity": "low", + "rule_module": "structural_drift", + "type": "SD013", + "file": ".gitignore", + "action": "globalise_gitignore_pattern" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "data/src/dragonfly.rs", - "action": "flag" + "severity": "low", + "rule_module": "structural_drift", + "type": "SD013", + "file": ".gitignore", + "action": "globalise_gitignore_pattern" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "data/src/verisim.rs", - "action": "flag" + "severity": "low", + "rule_module": "structural_drift", + "type": "SD013", + "file": ".gitignore", + "action": "globalise_gitignore_pattern" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "fixer/src/main.rs", - "action": "flag" + "severity": "low", + "rule_module": "workflow_audit", + "type": "missing_workflow", + "file": "guix-nix-policy.yml", + "action": "create" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "integration/src/ci_simulation/mod.rs", - "action": "flag" + "severity": "low", + "rule_module": "workflow_audit", + "type": "missing_workflow", + "file": "instant-sync.yml", + "action": "create" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "integration/src/ci_simulation/scenarios.rs", - "action": "flag" + "severity": "low", + "rule_module": "workflow_audit", + "type": "missing_workflow", + "file": "jekyll-gh-pages.yml", + "action": "create" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "integration/src/lib.rs", - "action": "flag" + "severity": "low", + "rule_module": "workflow_audit", + "type": "missing_workflow", + "file": "jekyll.yml", + "action": "create" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "integration/tests/arangodb_test.rs", - "action": "flag" + "severity": "low", + "rule_module": "workflow_audit", + "type": "missing_workflow", + "file": "npm-bun-blocker.yml", + "action": "create" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "integration/tests/ci_simulation_test.rs", - "action": "flag" + "severity": "low", + "rule_module": "workflow_audit", + "type": "missing_workflow", + "file": "rsr-antipattern.yml", + "action": "create" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "integration/tests/fleet_test.rs", - "action": "flag" + "severity": "low", + "rule_module": "workflow_audit", + "type": "missing_workflow", + "file": "scorecard-enforcer.yml", + "action": "create" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "integration/tests/forge_test.rs", - "action": "flag" + "severity": "low", + "rule_module": "workflow_audit", + "type": "missing_workflow", + "file": "ts-blocker.yml", + "action": "create" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "integration/tests/hooks_test.rs", - "action": "flag" + "severity": "low", + "rule_module": "workflow_audit", + "type": "missing_workflow", + "file": "wellknown-enforcement.yml", + "action": "create" }, { - "severity": "high", - "rule_module": "code_safety", - "type": "unwrap_without_check", - "file": "integration/tests/registry_test.rs", - "action": "flag" + "severity": "low", + "rule_module": "workflow_audit", + "type": "missing_workflow", + "file": "workflow-linter.yml", + "action": "create" }, { - "severity": "high", + "severity": "medium", "rule_module": "git_state", - "type": "GS005", + "type": "GS001", "file": ".", - "action": "flag" + "action": "commit" }, { - "severity": "high", - "rule_module": "workflow_audit", - "type": "download_then_run", - "file": "docs.yml", - "action": "verify_download_integrity" + "severity": "medium", + "rule_module": "git_state", + "type": "GS007", + "file": ".", + "action": "delete_remote_branches" }, { - "severity": "high", - "rule_module": "workflow_audit", - "type": "unsafe_curl_payload", - "file": "hypatia-scan.yml", - "action": "use_jq_payload" + "severity": "medium", + "rule_module": "structural_drift", + "type": "SD009", + "file": "ffi/zig/src/main.zig", + "action": "add_spdx_header" } ] From e929621f6d87a99a52482837d5f477449a4dcbb0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 23:07:35 +0000 Subject: [PATCH 3/5] feat(rules): consume secret-scanning and code-scanning alert APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HYPATIA_DISPATCH_PAT was provisioned with read access to secret-scanning alerts, code-scanning alerts, and Dependabot alerts. Only Dependabot was actually being consumed (lib/rules/dependabot_alerts.ex, DA001-DA004) — the other two alert surfaces were granted but unused. Adds two new rule modules mirroring the DependabotAlerts shape: lib/rules/secret_scanning_alerts.ex (SSA001-SSA004) SSA001 — Open leaked-secret alerts (always :critical; staleness surfaced in the reason for triage prioritisation). SSA002 — Repo-level meta-finding when any open alert exists. SSA003 — Stale open alerts past the 7-day rotation threshold. SSA004 — Resolved alerts with no documented resolution vocabulary (anything outside revoked/used_in_tests/pattern_deleted/ pattern_edited). lib/rules/code_scanning_alerts.ex (CSA001-CSA004) CSA001 — Open code-scanning alerts (CodeQL + third-party SARIF including Hypatia's own `hypatia` category). Severity mapped from `security_severity_level`/`severity` onto the canonical four-bucket scale. CSA002 — Severity summary (any critical, ≥5 high, or ≥10 total). CSA003 — Stale open alerts (3/7/30/90 days by severity bucket). CSA004 — Dismissed without documented reason. Wires both into `Hypatia.CLI`: - registered in `@all_rule_modules` so the default scan includes them, - scan blocks emit normalised findings alongside the rest, - `format_module_name/1` gives them display names, - usage strings updated to list the new --rules tokens. Workflow comment in `.github/workflows/hypatia-scan.yml` updated to note that the existing `security-events: write` grant now covers all three alert APIs, not just Dependabot. No new permissions needed. Tests pin token-absent behaviour and the non-GitHub-remote error path for each module's helpers. --- .github/workflows/hypatia-scan.yml | 8 +- lib/hypatia/cli.ex | 62 +++- lib/rules/code_scanning_alerts.ex | 449 +++++++++++++++++++++++++++ lib/rules/secret_scanning_alerts.ex | 336 ++++++++++++++++++++ test/code_scanning_alerts_test.exs | 68 ++++ test/secret_scanning_alerts_test.exs | 69 ++++ 6 files changed, 988 insertions(+), 4 deletions(-) create mode 100644 lib/rules/code_scanning_alerts.ex create mode 100644 lib/rules/secret_scanning_alerts.ex create mode 100644 test/code_scanning_alerts_test.exs create mode 100644 test/secret_scanning_alerts_test.exs diff --git a/.github/workflows/hypatia-scan.yml b/.github/workflows/hypatia-scan.yml index cd38e6ce..c632a707 100644 --- a/.github/workflows/hypatia-scan.yml +++ b/.github/workflows/hypatia-scan.yml @@ -21,9 +21,11 @@ permissions: contents: read # security-events: write serves two purposes (write implies read): # 1. read — lets the built-in GITHUB_TOKEN query this repo's own - # Dependabot alerts via the Hypatia DependabotAlerts rule - # (DA001-DA004). Without read, `scan_from_path` gets HTTP 403 - # and the rule silently returns no findings. + # Dependabot alerts (DependabotAlerts rule, DA001-DA004), + # secret-scanning alerts (SecretScanningAlerts, SSA001-SSA004), + # and code-scanning alerts (CodeScanningAlerts, CSA001-CSA004). + # Without read, `scan_from_path` gets HTTP 403 and the rule + # silently returns no findings. # See 007-lang/audits/audit-dependabot-automation-gap-2026-04-17.md. # 2. write — lets the "Upload SARIF to code scanning" step publish # Hypatia findings to the Security → Code scanning page so they diff --git a/lib/hypatia/cli.ex b/lib/hypatia/cli.ex index 21322508..e9ec7bcd 100644 --- a/lib/hypatia/cli.ex +++ b/lib/hypatia/cli.ex @@ -23,6 +23,7 @@ defmodule Hypatia.CLI do Available: root_hygiene,honest_completion,workflow_audit, cicd_rules,code_safety,migration_rules,scorecard, green_web,git_state,dependabot_alerts, + secret_scanning_alerts,code_scanning_alerts, structural_drift --format Output format: json (default), text, github --severity Minimum severity to report: critical, high, medium (default), low, info @@ -47,6 +48,8 @@ defmodule Hypatia.CLI do :green_web, :git_state, :dependabot_alerts, + :secret_scanning_alerts, + :code_scanning_alerts, :structural_drift ] @@ -636,6 +639,60 @@ defmodule Hypatia.CLI do results end + # Secret Scanning Alerts + results = + if :secret_scanning_alerts in rules do + case Hypatia.Rules.SecretScanningAlerts.scan_from_path(repo_path) do + {:ok, %{findings: findings}} -> + normalized = + Enum.map(findings, fn f -> + %{ + rule_module: "secret_scanning_alerts", + severity: to_string(f.severity), + type: f.rule, + file: Map.get(f, :file, ""), + reason: f.reason, + action: to_string(f.action) + } + end) + + results ++ normalized + + {:error, reason} -> + IO.puts(:stderr, "Warning: Secret-scanning alerts unavailable: #{reason}") + results + end + else + results + end + + # Code Scanning Alerts + results = + if :code_scanning_alerts in rules do + case Hypatia.Rules.CodeScanningAlerts.scan_from_path(repo_path) do + {:ok, %{findings: findings}} -> + normalized = + Enum.map(findings, fn f -> + %{ + rule_module: "code_scanning_alerts", + severity: to_string(f.severity), + type: f.rule, + file: Map.get(f, :file, ""), + reason: f.reason, + action: to_string(f.action) + } + end) + + results ++ normalized + + {:error, reason} -> + IO.puts(:stderr, "Warning: Code-scanning alerts unavailable: #{reason}") + results + end + else + results + end + # Structural Drift results = if :structural_drift in rules do @@ -1042,6 +1099,8 @@ defmodule Hypatia.CLI do defp format_module_name("green_web"), do: "Green Web Foundation" defp format_module_name("git_state"), do: "Git State Sync" defp format_module_name("dependabot_alerts"), do: "Dependabot Alerts" + defp format_module_name("secret_scanning_alerts"), do: "Secret Scanning Alerts" + defp format_module_name("code_scanning_alerts"), do: "Code Scanning Alerts" defp format_module_name(other), do: other defp print_usage do @@ -1062,7 +1121,8 @@ defmodule Hypatia.CLI do Available: root_hygiene,honest_completion, workflow_audit,cicd_rules,code_safety, migration_rules,scorecard,green_web, - git_state,dependabot_alerts + git_state,dependabot_alerts, + secret_scanning_alerts,code_scanning_alerts --format, -f Output format: json (default), text, github --severity, -s Minimum severity: critical, high, medium (default), low --path, -p Path to scan (alternative to positional arg) diff --git a/lib/rules/code_scanning_alerts.ex b/lib/rules/code_scanning_alerts.ex new file mode 100644 index 00000000..b18d3464 --- /dev/null +++ b/lib/rules/code_scanning_alerts.ex @@ -0,0 +1,449 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.Rules.CodeScanningAlerts do + @moduledoc """ + GitHub Code Scanning alert querying (CodeQL + third-party SARIF). + + Queries the GitHub REST API for code-scanning alerts on a repository, + classifies by severity, and generates findings for the safety triangle + pipeline. Surfaces CodeQL findings (and any other SARIF uploads -- + including Hypatia's own, via the `hypatia` category) alongside the + rest of the scanner output so a single Hypatia run sees everything + GitHub's security tab is showing. + + Requires GITHUB_TOKEN with `code_scanning_alerts: read` permission + (fine-grained PAT) or `security_events` scope (classic PAT). + + Rule IDs: CSA001-CSA004 + """ + + require Logger + + @github_api_base "https://api.github.com" + @max_alerts_per_repo 100 + + # Stale thresholds (days), keyed by alert severity. Mirrors the + # DependabotAlerts cadence: critical findings escalate fastest. + @stale_thresholds %{ + critical: 3, + high: 7, + medium: 30, + low: 90, + note: 90, + warning: 30, + error: 7 + } + + # Dismissal reasons accepted by policy without further review. + @accepted_dismissals ~w(false\ positive used\ in\ tests won't\ fix) + + # ─── CSA001: Open code-scanning alerts ───────────────────────────────── + + @doc """ + CSA001: List all open code-scanning alerts on the repo. Each alert's + severity is taken from its rule definition (critical/high/medium/low, + or CodeQL's note/warning/error). The Hypatia-side severity is mapped + to the same canonical four-bucket scale used by other rule modules so + the CLI's severity threshold works uniformly. + """ + def csa001_open_alerts(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(&(&1["state"] == "open")) + |> Enum.map(fn alert -> + rule_id = get_in(alert, ["rule", "id"]) || "unknown" + severity_raw = get_in(alert, ["rule", "severity"]) || "warning" + security_severity = get_in(alert, ["rule", "security_severity_level"]) + description = get_in(alert, ["rule", "description"]) || rule_id + tool = get_in(alert, ["tool", "name"]) || "unknown" + path = get_in(alert, ["most_recent_instance", "location", "path"]) || "" + line = get_in(alert, ["most_recent_instance", "location", "start_line"]) + + created = alert["created_at"] + age_days = age_in_days(created) + mapped_severity = map_severity(security_severity || severity_raw) + stale_threshold = Map.get(@stale_thresholds, mapped_severity, 30) + is_stale = age_days > stale_threshold + + %{ + rule: "CSA001", + file: path, + severity: mapped_severity, + reason: build_alert_reason(tool, rule_id, description, age_days, is_stale), + action: determine_action(mapped_severity, is_stale), + detail: %{ + alert_number: alert["number"], + tool: tool, + rule_id: rule_id, + rule_severity: severity_raw, + security_severity_level: security_severity, + path: path, + line: line, + age_days: age_days, + is_stale: is_stale, + created_at: created, + url: alert["html_url"] + } + } + end) + + {:error, reason} -> + Logger.warning("CSA001: Failed to fetch code-scanning alerts: #{reason}") + [] + end + end + + # ─── CSA002: Severity summary ────────────────────────────────────────── + + @doc """ + CSA002: Meta-finding when open alert counts exceed thresholds. + Triggers at any critical, ≥5 high, or ≥10 total open alerts. + """ + def csa002_severity_summary(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + open = Enum.filter(alerts, &(&1["state"] == "open")) + + by_severity = + Enum.group_by(open, fn a -> + sev = get_in(a, ["rule", "security_severity_level"]) || get_in(a, ["rule", "severity"]) + map_severity(sev) + end) + + critical_count = length(Map.get(by_severity, :critical, [])) + high_count = length(Map.get(by_severity, :high, [])) + total = length(open) + + findings = [] + + findings = + if critical_count > 0 do + [%{ + rule: "CSA002", + file: "#{owner}/#{repo}", + severity: :critical, + reason: + "#{critical_count} critical code-scanning alert(s) -- immediate triage required", + action: :escalate, + detail: %{critical: critical_count, high: high_count, total: total} + } + | findings] + else + findings + end + + findings = + if high_count >= 5 do + [%{ + rule: "CSA002", + file: "#{owner}/#{repo}", + severity: :high, + reason: + "#{high_count} high-severity code-scanning alert(s) -- batch remediation recommended", + action: :batch_update, + detail: %{high: high_count, total: total} + } + | findings] + else + findings + end + + findings = + if total >= 10 do + [%{ + rule: "CSA002", + file: "#{owner}/#{repo}", + severity: :medium, + reason: "#{total} total open code-scanning alert(s) -- security hygiene review", + action: :review, + detail: %{ + total: total, + by_severity: + Map.new(by_severity, fn {k, v} -> {to_string(k), length(v)} end) + } + } + | findings] + else + findings + end + + findings + + {:error, _} -> [] + end + end + + # ─── CSA003: Stale open alerts ───────────────────────────────────────── + + @doc """ + CSA003: Open code-scanning alerts older than the severity-appropriate + threshold. Critical alerts stale after 3 days, high after 7, medium + after 30, low after 90. + """ + def csa003_stale_alerts(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(&(&1["state"] == "open")) + |> Enum.filter(fn alert -> + sev = + map_severity( + get_in(alert, ["rule", "security_severity_level"]) || + get_in(alert, ["rule", "severity"]) || "medium" + ) + + threshold = Map.get(@stale_thresholds, sev, 30) + age_in_days(alert["created_at"]) > threshold + end) + |> Enum.map(fn alert -> + rule_id = get_in(alert, ["rule", "id"]) || "unknown" + + sev = + map_severity( + get_in(alert, ["rule", "security_severity_level"]) || + get_in(alert, ["rule", "severity"]) || "medium" + ) + + age = age_in_days(alert["created_at"]) + threshold = Map.get(@stale_thresholds, sev, 30) + path = get_in(alert, ["most_recent_instance", "location", "path"]) || "" + + %{ + rule: "CSA003", + file: path, + severity: :high, + reason: + "Code-scanning alert #{rule_id} (#{sev}) at #{path} is #{age} days old " <> + "(threshold: #{threshold} days) -- overdue for remediation", + action: :escalate, + detail: %{ + alert_number: alert["number"], + rule_id: rule_id, + path: path, + original_severity: sev, + age_days: age, + threshold_days: threshold + } + } + end) + + {:error, _} -> [] + end + end + + # ─── CSA004: Dismissed without documented resolution ─────────────────── + + @doc """ + CSA004: Alerts dismissed with no documented reason (or with a vague + one). Real dismissals carry a `dismissed_reason` in the accepted + vocabulary (`false positive`, `won't fix`, `used in tests`); anything + else is policy-suspicious and should be reviewed. + """ + def csa004_dismissed_without_fix(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(fn a -> + a["state"] == "dismissed" and + a["dismissed_reason"] not in @accepted_dismissals + end) + |> Enum.map(fn alert -> + rule_id = get_in(alert, ["rule", "id"]) || "unknown" + reason = alert["dismissed_reason"] || "no reason given" + path = get_in(alert, ["most_recent_instance", "location", "path"]) || "" + + %{ + rule: "CSA004", + file: path, + severity: :medium, + reason: + "Code-scanning alert #{rule_id} dismissed as '#{reason}' " <> + "-- ensure dismissal is documented and justified", + action: :review, + detail: %{ + alert_number: alert["number"], + rule_id: rule_id, + path: path, + dismissed_reason: reason, + dismissed_comment: alert["dismissed_comment"], + dismissed_at: alert["dismissed_at"] + } + } + end) + + {:error, _} -> [] + end + end + + # ─── Comprehensive scan ──────────────────────────────────────────────── + + @doc """ + Run all code-scanning checks for a repository. + """ + def scan(owner, repo) do + token = System.get_env("GITHUB_TOKEN") + + if token == nil or token == "" do + {:error, "GITHUB_TOKEN not set -- cannot query code-scanning alerts"} + else + findings = + csa001_open_alerts(owner, repo) ++ + csa002_severity_summary(owner, repo) ++ + csa003_stale_alerts(owner, repo) ++ + csa004_dismissed_without_fix(owner, repo) + + deduped = + findings + |> Enum.uniq_by(fn f -> + {f.rule, Map.get(f.detail, :alert_number, f.file)} + end) + + {:ok, %{ + findings: deduped, + total: length(deduped), + by_severity: group_by_severity(deduped) + }} + end + end + + @doc """ + Scan from a local repo path -- extracts owner/repo from git remote. + """ + def scan_from_path(repo_path) do + case extract_owner_repo(repo_path) do + {:ok, owner, repo} -> scan(owner, repo) + {:error, reason} -> {:error, reason} + end + end + + # ─── GitHub API ──────────────────────────────────────────────────────── + + defp fetch_alerts(owner, repo) do + token = System.get_env("GITHUB_TOKEN") + + if token == nil or token == "" do + {:error, "GITHUB_TOKEN not set"} + else + url = + "#{@github_api_base}/repos/#{owner}/#{repo}/code-scanning/alerts" <> + "?per_page=#{@max_alerts_per_repo}" + + case System.cmd("curl", [ + "-s", + "-f", + "-H", + "Accept: application/vnd.github+json", + "-H", + "Authorization: Bearer #{token}", + "-H", + "X-GitHub-Api-Version: 2022-11-28", + url + ], stderr_to_stdout: true) do + {body, 0} -> + case Jason.decode(body) do + {:ok, alerts} when is_list(alerts) -> {:ok, alerts} + {:ok, %{"message" => msg}} -> {:error, "GitHub API: #{msg}"} + {:error, _} -> {:error, "Invalid JSON response from GitHub API"} + end + + {error, _} -> + {:error, "curl failed: #{String.slice(error, 0, 200)}"} + end + end + end + + defp extract_owner_repo(repo_path) do + case System.cmd("git", ["remote", "get-url", "origin"], + cd: repo_path, + stderr_to_stdout: true + ) do + {url, 0} -> + trimmed = String.trim(url) + + cond do + String.contains?(trimmed, "github.com:") -> + [_, path] = String.split(trimmed, "github.com:", parts: 2) + parse_owner_repo_from_path(path) + + String.contains?(trimmed, "github.com/") -> + [_, path] = String.split(trimmed, "github.com/", parts: 2) + parse_owner_repo_from_path(path) + + true -> + {:error, "Remote URL is not a GitHub URL: #{trimmed}"} + end + + _ -> + {:error, "Could not get remote URL"} + end + end + + defp parse_owner_repo_from_path(path) do + clean = path |> String.trim() |> String.trim_trailing(".git") + + case String.split(clean, "/", parts: 2) do + [owner, repo] -> {:ok, owner, repo} + _ -> {:error, "Could not parse owner/repo from: #{path}"} + end + end + + # ─── Helpers ─────────────────────────────────────────────────────────── + + # Normalise the heterogeneous severity surface (CodeQL uses note/ + # warning/error, third-party SARIF often uses critical/high/medium/low, + # GitHub's `security_severity_level` uses critical/high/medium/low) onto + # Hypatia's canonical bucket scale so the CLI's severity threshold + # works uniformly across all rule modules. + defp map_severity(sev) when is_binary(sev) do + case String.downcase(sev) do + "critical" -> :critical + "high" -> :high + "error" -> :high + "medium" -> :medium + "warning" -> :medium + "low" -> :low + "note" -> :low + _ -> :medium + end + end + + defp map_severity(sev) when is_atom(sev), do: map_severity(Atom.to_string(sev)) + defp map_severity(_), do: :medium + + defp age_in_days(nil), do: 0 + + defp age_in_days(iso_string) when is_binary(iso_string) do + case DateTime.from_iso8601(iso_string) do + {:ok, dt, _} -> DateTime.diff(DateTime.utc_now(), dt, :day) + _ -> 0 + end + end + + defp build_alert_reason(tool, rule_id, description, age_days, is_stale) do + base = "Code scanning (#{tool}): #{rule_id} -- #{description}" + age_part = " -- #{age_days} day(s) old" + stale_part = if is_stale, do: " [STALE]", else: "" + base <> age_part <> stale_part + end + + defp determine_action(severity, is_stale) do + case {severity, is_stale} do + {:critical, _} -> :escalate + {:high, true} -> :escalate + {:high, false} -> :update + {:medium, true} -> :update + {:medium, false} -> :review + {:low, _} -> :review + _ -> :review + end + end + + defp group_by_severity(findings) do + findings + |> Enum.group_by(& &1.severity) + |> Enum.map(fn {sev, items} -> {sev, length(items)} end) + |> Map.new() + end +end diff --git a/lib/rules/secret_scanning_alerts.ex b/lib/rules/secret_scanning_alerts.ex new file mode 100644 index 00000000..3acf283a --- /dev/null +++ b/lib/rules/secret_scanning_alerts.ex @@ -0,0 +1,336 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.Rules.SecretScanningAlerts do + @moduledoc """ + GitHub Secret Scanning alert querying. + + Queries the GitHub REST API for active secret-scanning alerts on + repositories and generates findings for the safety triangle pipeline. + + A secret-scanning alert means GitHub identified a credential committed + to the repo (API token, private key, etc.). Every open alert is treated + as :critical -- leaked secrets are by definition not "advisory" risk, + and the dismissal vocabulary (`revoked`, `used_in_tests`, `false_positive`) + is the place to mark accepted ones. + + Requires GITHUB_TOKEN with `secret_scanning_alerts: read` permission + (fine-grained PAT) or `security_events` scope (classic PAT). + + Rule IDs: SSA001-SSA004 + """ + + require Logger + + @github_api_base "https://api.github.com" + @max_alerts_per_repo 100 + + # Stale thresholds (days). A revoked secret left in history is still a + # finding -- but a fresh open alert is much more urgent. + @stale_threshold_days 7 + + # Dismissal reasons that are accepted by policy without further review. + @accepted_resolutions ~w(revoked used_in_tests pattern_deleted pattern_edited) + + # ─── SSA001: Open secret-scanning alerts ─────────────────────────────── + + @doc """ + SSA001: List all open secret-scanning alerts on the repo. + + Every open alert is :critical -- a real credential is sitting in the + git history. The triangle classifier deals with whether it's + fixable (rotate + remove) vs. a documented test fixture. + """ + def ssa001_open_alerts(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(&(&1["state"] == "open")) + |> Enum.map(fn alert -> + secret_type = alert["secret_type_display_name"] || alert["secret_type"] || "unknown" + created = alert["created_at"] + age_days = age_in_days(created) + is_stale = age_days > @stale_threshold_days + + %{ + rule: "SSA001", + file: secret_type, + severity: :critical, + reason: build_alert_reason(secret_type, age_days, is_stale), + action: :escalate, + detail: %{ + alert_number: alert["number"], + secret_type: alert["secret_type"], + secret_type_display: alert["secret_type_display_name"], + age_days: age_days, + is_stale: is_stale, + created_at: created, + url: alert["html_url"], + locations_url: alert["locations_url"] + } + } + end) + + {:error, reason} -> + Logger.warning("SSA001: Failed to fetch secret-scanning alerts: #{reason}") + [] + end + end + + # ─── SSA002: Severity summary ────────────────────────────────────────── + + @doc """ + SSA002: Meta-finding if open alert count exceeds zero. Any leaked + secret is a critical security event -- we surface a repo-level marker + so the dashboard can highlight the repo, not just the individual alert. + """ + def ssa002_severity_summary(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + open = Enum.filter(alerts, &(&1["state"] == "open")) + count = length(open) + + if count > 0 do + [%{ + rule: "SSA002", + file: "#{owner}/#{repo}", + severity: :critical, + reason: "#{count} open secret-scanning alert(s) -- rotate and purge from history", + action: :escalate, + detail: %{ + total: count, + by_type: + open + |> Enum.group_by(&(&1["secret_type"] || "unknown")) + |> Map.new(fn {k, v} -> {k, length(v)} end) + } + }] + else + [] + end + + {:error, _} -> [] + end + end + + # ─── SSA003: Stale open alerts ───────────────────────────────────────── + + @doc """ + SSA003: Open secret-scanning alerts older than the stale threshold. + Leaked secrets must be rotated within days, not weeks. Findings are + always :critical regardless of age (the secret is leaked either way), + but staleness is surfaced in the reason for triage prioritisation. + """ + def ssa003_stale_alerts(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(&(&1["state"] == "open")) + |> Enum.filter(fn alert -> + age_in_days(alert["created_at"]) > @stale_threshold_days + end) + |> Enum.map(fn alert -> + secret_type = alert["secret_type_display_name"] || alert["secret_type"] || "unknown" + age = age_in_days(alert["created_at"]) + + %{ + rule: "SSA003", + file: secret_type, + severity: :critical, + reason: + "Secret-scanning alert for #{secret_type} is #{age} days old " <> + "(threshold: #{@stale_threshold_days} days) -- overdue for rotation", + action: :escalate, + detail: %{ + alert_number: alert["number"], + secret_type: alert["secret_type"], + age_days: age, + threshold_days: @stale_threshold_days + } + } + end) + + {:error, _} -> [] + end + end + + # ─── SSA004: Dismissed without acceptable resolution ─────────────────── + + @doc """ + SSA004: Alerts resolved with no documented resolution reason, or with + a vague reason. Real resolutions go through the `revoked`, + `used_in_tests`, `false_positive`, `pattern_deleted`, `pattern_edited` + vocabulary; anything else (including nil) is policy-suspicious. + """ + def ssa004_dismissed_without_fix(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(fn a -> + a["state"] == "resolved" and + a["resolution"] not in @accepted_resolutions + end) + |> Enum.map(fn alert -> + secret_type = alert["secret_type_display_name"] || alert["secret_type"] || "unknown" + resolution = alert["resolution"] || "no reason given" + + %{ + rule: "SSA004", + file: secret_type, + severity: :high, + reason: + "Secret-scanning alert for #{secret_type} resolved as '#{resolution}' " <> + "-- confirm rotation completed and document acceptance reason", + action: :review, + detail: %{ + alert_number: alert["number"], + secret_type: alert["secret_type"], + resolution: resolution, + resolved_at: alert["resolved_at"], + resolution_comment: alert["resolution_comment"] + } + } + end) + + {:error, _} -> [] + end + end + + # ─── Comprehensive scan ──────────────────────────────────────────────── + + @doc """ + Run all secret-scanning checks for a repository. + Returns `{:ok, result}` or `{:error, reason}`. + """ + def scan(owner, repo) do + token = System.get_env("GITHUB_TOKEN") + + if token == nil or token == "" do + {:error, "GITHUB_TOKEN not set -- cannot query secret-scanning alerts"} + else + findings = + ssa001_open_alerts(owner, repo) ++ + ssa002_severity_summary(owner, repo) ++ + ssa003_stale_alerts(owner, repo) ++ + ssa004_dismissed_without_fix(owner, repo) + + deduped = + findings + |> Enum.uniq_by(fn f -> + {f.rule, Map.get(f.detail, :alert_number, f.file)} + end) + + {:ok, %{ + findings: deduped, + total: length(deduped), + by_severity: group_by_severity(deduped) + }} + end + end + + @doc """ + Scan from a local repo path -- extracts owner/repo from git remote. + """ + def scan_from_path(repo_path) do + case extract_owner_repo(repo_path) do + {:ok, owner, repo} -> scan(owner, repo) + {:error, reason} -> {:error, reason} + end + end + + # ─── GitHub API ──────────────────────────────────────────────────────── + + defp fetch_alerts(owner, repo) do + token = System.get_env("GITHUB_TOKEN") + + if token == nil or token == "" do + {:error, "GITHUB_TOKEN not set"} + else + url = + "#{@github_api_base}/repos/#{owner}/#{repo}/secret-scanning/alerts" <> + "?per_page=#{@max_alerts_per_repo}" + + case System.cmd("curl", [ + "-s", + "-f", + "-H", + "Accept: application/vnd.github+json", + "-H", + "Authorization: Bearer #{token}", + "-H", + "X-GitHub-Api-Version: 2022-11-28", + url + ], stderr_to_stdout: true) do + {body, 0} -> + case Jason.decode(body) do + {:ok, alerts} when is_list(alerts) -> {:ok, alerts} + {:ok, %{"message" => msg}} -> {:error, "GitHub API: #{msg}"} + {:error, _} -> {:error, "Invalid JSON response from GitHub API"} + end + + {error, _} -> + {:error, "curl failed: #{String.slice(error, 0, 200)}"} + end + end + end + + defp extract_owner_repo(repo_path) do + case System.cmd("git", ["remote", "get-url", "origin"], + cd: repo_path, + stderr_to_stdout: true + ) do + {url, 0} -> + trimmed = String.trim(url) + + cond do + String.contains?(trimmed, "github.com:") -> + [_, path] = String.split(trimmed, "github.com:", parts: 2) + parse_owner_repo_from_path(path) + + String.contains?(trimmed, "github.com/") -> + [_, path] = String.split(trimmed, "github.com/", parts: 2) + parse_owner_repo_from_path(path) + + true -> + {:error, "Remote URL is not a GitHub URL: #{trimmed}"} + end + + _ -> + {:error, "Could not get remote URL"} + end + end + + defp parse_owner_repo_from_path(path) do + clean = path |> String.trim() |> String.trim_trailing(".git") + + case String.split(clean, "/", parts: 2) do + [owner, repo] -> {:ok, owner, repo} + _ -> {:error, "Could not parse owner/repo from: #{path}"} + end + end + + # ─── Helpers ─────────────────────────────────────────────────────────── + + defp age_in_days(nil), do: 0 + + defp age_in_days(iso_string) when is_binary(iso_string) do + case DateTime.from_iso8601(iso_string) do + {:ok, dt, _} -> DateTime.diff(DateTime.utc_now(), dt, :day) + _ -> 0 + end + end + + defp build_alert_reason(secret_type, age_days, is_stale) do + base = "Secret scanning: leaked #{secret_type}" + age_part = " -- #{age_days} day(s) old" + stale_part = if is_stale, do: " [STALE -- rotate immediately]", else: "" + base <> age_part <> stale_part + end + + defp group_by_severity(findings) do + findings + |> Enum.group_by(& &1.severity) + |> Enum.map(fn {sev, items} -> {sev, length(items)} end) + |> Map.new() + end +end diff --git a/test/code_scanning_alerts_test.exs b/test/code_scanning_alerts_test.exs new file mode 100644 index 00000000..d8eb1dc8 --- /dev/null +++ b/test/code_scanning_alerts_test.exs @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# Tests for Code Scanning alert querying rules (CSA001-CSA004). +# Exercise logic without hitting the GitHub API. + +defmodule Hypatia.Rules.CodeScanningAlertsTest do + use ExUnit.Case, async: false + + alias Hypatia.Rules.CodeScanningAlerts + + setup do + old_token = System.get_env("GITHUB_TOKEN") + System.delete_env("GITHUB_TOKEN") + + on_exit(fn -> + if old_token, do: System.put_env("GITHUB_TOKEN", old_token) + end) + + :ok + end + + describe "csa001_open_alerts/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert CodeScanningAlerts.csa001_open_alerts("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "csa002_severity_summary/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert CodeScanningAlerts.csa002_severity_summary("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "csa003_stale_alerts/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert CodeScanningAlerts.csa003_stale_alerts("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "csa004_dismissed_without_fix/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert CodeScanningAlerts.csa004_dismissed_without_fix("hyperpolymath", "test-nonexistent") == + [] + end + end + + describe "scan/2" do + test "returns error tuple when GITHUB_TOKEN is not set" do + assert {:error, msg} = CodeScanningAlerts.scan("hyperpolymath", "test-nonexistent") + assert msg =~ "GITHUB_TOKEN not set" + end + end + + describe "scan_from_path/1" do + test "returns error when remote is not a github URL" do + tmp = Path.join(System.tmp_dir!(), "csa-test-#{System.unique_integer([:positive])}") + File.mkdir_p!(tmp) + System.cmd("git", ["init", "-q"], cd: tmp) + System.cmd("git", ["remote", "add", "origin", "http://gitea.example.com/foo/bar.git"], cd: tmp) + + assert {:error, msg} = CodeScanningAlerts.scan_from_path(tmp) + assert msg =~ "Remote URL is not a GitHub URL" + + File.rm_rf!(tmp) + end + end +end diff --git a/test/secret_scanning_alerts_test.exs b/test/secret_scanning_alerts_test.exs new file mode 100644 index 00000000..95ea8ce4 --- /dev/null +++ b/test/secret_scanning_alerts_test.exs @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# Tests for Secret Scanning alert querying rules (SSA001-SSA004). +# Exercise the logic without hitting the GitHub API by relying on +# token-absent behaviour and direct helper calls. + +defmodule Hypatia.Rules.SecretScanningAlertsTest do + use ExUnit.Case, async: false + + alias Hypatia.Rules.SecretScanningAlerts + + setup do + old_token = System.get_env("GITHUB_TOKEN") + System.delete_env("GITHUB_TOKEN") + + on_exit(fn -> + if old_token, do: System.put_env("GITHUB_TOKEN", old_token) + end) + + :ok + end + + describe "ssa001_open_alerts/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert SecretScanningAlerts.ssa001_open_alerts("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "ssa002_severity_summary/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert SecretScanningAlerts.ssa002_severity_summary("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "ssa003_stale_alerts/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert SecretScanningAlerts.ssa003_stale_alerts("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "ssa004_dismissed_without_fix/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert SecretScanningAlerts.ssa004_dismissed_without_fix("hyperpolymath", "test-nonexistent") == + [] + end + end + + describe "scan/2" do + test "returns error tuple when GITHUB_TOKEN is not set" do + assert {:error, msg} = SecretScanningAlerts.scan("hyperpolymath", "test-nonexistent") + assert msg =~ "GITHUB_TOKEN not set" + end + end + + describe "scan_from_path/1" do + test "returns error when remote is not a github URL" do + tmp = Path.join(System.tmp_dir!(), "ssa-test-#{System.unique_integer([:positive])}") + File.mkdir_p!(tmp) + System.cmd("git", ["init", "-q"], cd: tmp) + System.cmd("git", ["remote", "add", "origin", "http://gitea.example.com/foo/bar.git"], cd: tmp) + + assert {:error, msg} = SecretScanningAlerts.scan_from_path(tmp) + assert msg =~ "Remote URL is not a GitHub URL" + + File.rm_rf!(tmp) + end + end +end From 74173eeac16f2d4e226c40947703b17c9094f34f Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 23:18:19 +0000 Subject: [PATCH 4/5] test(soundness): manifest-driven regression gate for scanner rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #278 documented that the deployed escript had been silently dropping the Elixir/Erlang/Coq/Lean/Agda/Zig/F*/Ada code_safety pattern families for days because the binary was stale relative to the rule sources. "No findings" looks identical whether the code is clean or the rule is broken — that ambiguity is the soundness gap. Closes it with the simplest possible mechanism: for every rule the scanner is supposed to detect, keep a known-bad sample on disk, and assert in CI that the rule fires on its sample at the expected severity. A rule that goes silent (regex drift, file pruning, packaging regression, module rename) breaks the build instead of silently weakening the estate's security posture. Layout: test/soundness/ manifest.json -- rule -> fixture -> severity fixtures/code_safety/ believe_me.idr -- Idris2 sorry.lean -- Lean admitted.v -- Coq unsafe_coerce.hs -- Haskell obj_magic_ocaml.ml -- OCaml getexn_on_external.res -- ReScript unwrap_without_check.rs -- Rust transmute.rs -- Rust unsafe elixir_system_shell.ex -- THE PR#278 false-negative elixir_os_cmd.ex -- Elixir os.cmd elixir_code_eval.ex -- Elixir Code.eval shell_download_then_run.sh -- curl|bash agda_postulate.agda -- Agda zig_ptr_cast.zig -- Zig README.adoc -- how to add a fixture test/soundness_test.exs -- runner, @moduletag :soundness Manifest entries cover all the language families PR #278 specifically called out as having been silently dropped. The runner is data-driven: adding a rule means dropping a fixture + a manifest entry, no test code change. Hand-run against the current tree: 14/14 fixtures fire at the expected severity. The soundness gate is operational. Out of scope (next iteration): - End-to-end escript-build soundness (build the escript, run it against the fixture corpus -- exact PR #278 reproduction). The in-process test catches rule-definition regressions, but a packaging regression that strips a module would still slip through. - Fixtures for non-code_safety families (workflow_audit, cicd_rules, structural_drift, scorecard, dependabot_alerts, ...). --- test/soundness/README.adoc | 87 ++++++++++++++ .../soundness/fixtures/code_safety/admitted.v | 7 ++ .../fixtures/code_safety/agda_postulate.agda | 6 + .../fixtures/code_safety/believe_me.idr | 8 ++ .../fixtures/code_safety/elixir_code_eval.ex | 9 ++ .../fixtures/code_safety/elixir_os_cmd.ex | 9 ++ .../code_safety/elixir_system_shell.ex | 10 ++ .../code_safety/getexn_on_external.res | 5 + .../fixtures/code_safety/obj_magic_ocaml.ml | 5 + .../code_safety/shell_download_then_run.sh | 6 + .../soundness/fixtures/code_safety/sorry.lean | 5 + .../fixtures/code_safety/transmute.rs | 7 ++ .../fixtures/code_safety/unsafe_coerce.hs | 10 ++ .../code_safety/unwrap_without_check.rs | 7 ++ .../fixtures/code_safety/zig_ptr_cast.zig | 7 ++ test/soundness/manifest.json | 104 +++++++++++++++++ test/soundness_test.exs | 110 ++++++++++++++++++ 17 files changed, 402 insertions(+) create mode 100644 test/soundness/README.adoc create mode 100644 test/soundness/fixtures/code_safety/admitted.v create mode 100644 test/soundness/fixtures/code_safety/agda_postulate.agda create mode 100644 test/soundness/fixtures/code_safety/believe_me.idr create mode 100644 test/soundness/fixtures/code_safety/elixir_code_eval.ex create mode 100644 test/soundness/fixtures/code_safety/elixir_os_cmd.ex create mode 100644 test/soundness/fixtures/code_safety/elixir_system_shell.ex create mode 100644 test/soundness/fixtures/code_safety/getexn_on_external.res create mode 100644 test/soundness/fixtures/code_safety/obj_magic_ocaml.ml create mode 100644 test/soundness/fixtures/code_safety/shell_download_then_run.sh create mode 100644 test/soundness/fixtures/code_safety/sorry.lean create mode 100644 test/soundness/fixtures/code_safety/transmute.rs create mode 100644 test/soundness/fixtures/code_safety/unsafe_coerce.hs create mode 100644 test/soundness/fixtures/code_safety/unwrap_without_check.rs create mode 100644 test/soundness/fixtures/code_safety/zig_ptr_cast.zig create mode 100644 test/soundness/manifest.json create mode 100644 test/soundness_test.exs diff --git a/test/soundness/README.adoc b/test/soundness/README.adoc new file mode 100644 index 00000000..d4b252ba --- /dev/null +++ b/test/soundness/README.adoc @@ -0,0 +1,87 @@ += Soundness Gate + +== Purpose + +PR #278 documented a class of bug where the deployed `hypatia` escript +was silently dropping entire pattern families because the binary was +stale relative to the rule sources. "No findings" looks the same whether +the code is clean OR the rule is broken — that's the soundness gap. + +The soundness gate fixes that with the simplest possible mechanism: for +every rule the scanner is supposed to detect, we keep a known-bad sample +on disk. The test asserts every sample is flagged by its rule. If a rule +silently breaks (regex drift, file pruning, module rename), the build +fails before that change merges. + +== Layout + + test/soundness/ + ├── manifest.json -- rule -> fixture -> expected severity + ├── fixtures/ + │ ├── code_safety/ -- one file per code_safety rule_id + │ │ ├── believe_me.idr + │ │ ├── elixir_system_shell.ex + │ │ ├── ... + │ ├── cicd_rules/ -- one file per cicd_rules rule_id + │ └── security_errors/ -- one file per security_errors rule_id + └── README.adoc -- this file + +The test runner is `test/soundness_test.exs`, tagged `:soundness`. + +== Adding a fixture for a new rule + +1. Write a minimal known-bad sample under + `test/soundness/fixtures//.`. Keep it as + small as possible — ideally just the bad pattern with enough context + to look real, plus an SPDX header and a "DO NOT FIX" comment so future + contributors don't try to "clean it up". + +2. Add an entry to `test/soundness/manifest.json`: ++ +[source,json] +---- +{ + "rule_module": "code_safety", + "rule_id": "your_new_rule", + "language": "rust", + "fixture": "test/soundness/fixtures/code_safety/your_new_rule.rs", + "expected_severity": "high" +} +---- + +3. Run `mix test test/soundness_test.exs` and confirm the new entry + passes (rule fires at expected severity). + +4. Commit fixture + manifest entry + the rule change in one PR. + +== Removing a fixture + +Only acceptable when the rule itself is being removed or merged into +another rule. The commit message MUST justify the removal — the default +assumption is the entry stays. A bare manifest entry deletion in a PR +that doesn't also remove the rule should fail review. + +== Running + + mix test --only soundness # just the soundness suite + mix test # full suite includes soundness + mix test --exclude soundness # everything else (for dev cycles) + +== Why the manifest is JSON, not Elixir + +So a non-Elixir reviewer (or a non-Elixir scanner / a JSON Schema +validator running in CI) can verify it without a BEAM runtime. The +schema is intentionally flat and self-documenting. + +== Out of scope (today) + +* End-to-end escript-build soundness — building the escript, then + running the built binary against the fixture corpus. That's the + exact PR #278 reproduction. Worth adding next, but requires a CI + job that can build escripts (the in-process test already catches + rule-definition regressions, just not packaging regressions). + +* Fixtures for non-`code_safety` rule families. The current manifest + covers the families PR #278 specifically called out as having been + silently dropped. Workflow_audit, cicd_rules, structural_drift, + scorecard, dependabot_alerts etc. fixtures are next-iteration work. diff --git a/test/soundness/fixtures/code_safety/admitted.v b/test/soundness/fixtures/code_safety/admitted.v new file mode 100644 index 00000000..bd45f151 --- /dev/null +++ b/test/soundness/fixtures/code_safety/admitted.v @@ -0,0 +1,7 @@ +(* SPDX-License-Identifier: MPL-2.0 *) +(* SOUNDNESS FIXTURE — known-bad sample for code_safety/admitted. *) +(* DO NOT FIX. *) + +Theorem bad : 1 + 1 = 3. +Proof. + Admitted. diff --git a/test/soundness/fixtures/code_safety/agda_postulate.agda b/test/soundness/fixtures/code_safety/agda_postulate.agda new file mode 100644 index 00000000..0b694e1e --- /dev/null +++ b/test/soundness/fixtures/code_safety/agda_postulate.agda @@ -0,0 +1,6 @@ +-- SPDX-License-Identifier: MPL-2.0 +-- SOUNDNESS FIXTURE — known-bad sample for code_safety/agda_postulate. +-- DO NOT FIX. + +postulate + bad : Set diff --git a/test/soundness/fixtures/code_safety/believe_me.idr b/test/soundness/fixtures/code_safety/believe_me.idr new file mode 100644 index 00000000..d3e4a5ae --- /dev/null +++ b/test/soundness/fixtures/code_safety/believe_me.idr @@ -0,0 +1,8 @@ +-- SPDX-License-Identifier: MPL-2.0 +-- SOUNDNESS FIXTURE — known-bad sample for code_safety/believe_me. +-- DO NOT FIX. This file exists so the build fails if the rule stops firing. + +module Soundness.BelieveMe + +bad : Nat +bad = believe_me Z diff --git a/test/soundness/fixtures/code_safety/elixir_code_eval.ex b/test/soundness/fixtures/code_safety/elixir_code_eval.ex new file mode 100644 index 00000000..cf31496b --- /dev/null +++ b/test/soundness/fixtures/code_safety/elixir_code_eval.ex @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: MPL-2.0 +# SOUNDNESS FIXTURE — known-bad sample for code_safety/elixir_code_eval. +# DO NOT FIX. + +defmodule Soundness.ElixirCodeEval do + def bad(input) do + Code.eval_string(input) + end +end diff --git a/test/soundness/fixtures/code_safety/elixir_os_cmd.ex b/test/soundness/fixtures/code_safety/elixir_os_cmd.ex new file mode 100644 index 00000000..515ae06f --- /dev/null +++ b/test/soundness/fixtures/code_safety/elixir_os_cmd.ex @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: MPL-2.0 +# SOUNDNESS FIXTURE — known-bad sample for code_safety/elixir_os_cmd. +# DO NOT FIX. + +defmodule Soundness.ElixirOsCmd do + def bad(user) do + :os.cmd(~c"echo #{user}") + end +end diff --git a/test/soundness/fixtures/code_safety/elixir_system_shell.ex b/test/soundness/fixtures/code_safety/elixir_system_shell.ex new file mode 100644 index 00000000..eed1e99f --- /dev/null +++ b/test/soundness/fixtures/code_safety/elixir_system_shell.ex @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: MPL-2.0 +# SOUNDNESS FIXTURE — known-bad sample for code_safety/elixir_system_shell. +# This is THE pattern #278's stale-escript audit found being silently +# dropped. DO NOT FIX. + +defmodule Soundness.ElixirSystemShell do + def bad(user) do + System.shell("echo #{user}") + end +end diff --git a/test/soundness/fixtures/code_safety/getexn_on_external.res b/test/soundness/fixtures/code_safety/getexn_on_external.res new file mode 100644 index 00000000..52f311e8 --- /dev/null +++ b/test/soundness/fixtures/code_safety/getexn_on_external.res @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: MPL-2.0 +// SOUNDNESS FIXTURE — known-bad sample for code_safety/getexn_on_external. +// DO NOT FIX. + +let bad = (untrusted: Js.Dict.t) => Js.Dict.getExn(untrusted, "key") diff --git a/test/soundness/fixtures/code_safety/obj_magic_ocaml.ml b/test/soundness/fixtures/code_safety/obj_magic_ocaml.ml new file mode 100644 index 00000000..9bad2081 --- /dev/null +++ b/test/soundness/fixtures/code_safety/obj_magic_ocaml.ml @@ -0,0 +1,5 @@ +(* SPDX-License-Identifier: MPL-2.0 *) +(* SOUNDNESS FIXTURE — known-bad sample for code_safety/obj_magic_ocaml. *) +(* DO NOT FIX. *) + +let bad (x : int) : string = Obj.magic x diff --git a/test/soundness/fixtures/code_safety/shell_download_then_run.sh b/test/soundness/fixtures/code_safety/shell_download_then_run.sh new file mode 100644 index 00000000..15e54ad9 --- /dev/null +++ b/test/soundness/fixtures/code_safety/shell_download_then_run.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +# SOUNDNESS FIXTURE — known-bad sample for code_safety/shell_download_then_run. +# DO NOT FIX. + +curl -sL https://example.com/install.sh | bash diff --git a/test/soundness/fixtures/code_safety/sorry.lean b/test/soundness/fixtures/code_safety/sorry.lean new file mode 100644 index 00000000..82086023 --- /dev/null +++ b/test/soundness/fixtures/code_safety/sorry.lean @@ -0,0 +1,5 @@ +-- SPDX-License-Identifier: MPL-2.0 +-- SOUNDNESS FIXTURE — known-bad sample for code_safety/sorry. +-- DO NOT FIX. + +theorem bad : 1 + 1 = 3 := by sorry diff --git a/test/soundness/fixtures/code_safety/transmute.rs b/test/soundness/fixtures/code_safety/transmute.rs new file mode 100644 index 00000000..95e73e14 --- /dev/null +++ b/test/soundness/fixtures/code_safety/transmute.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: MPL-2.0 +// SOUNDNESS FIXTURE — known-bad sample for code_safety/transmute. +// DO NOT FIX. + +pub fn bad(x: u32) -> f32 { + unsafe { std::mem::transmute(x) } +} diff --git a/test/soundness/fixtures/code_safety/unsafe_coerce.hs b/test/soundness/fixtures/code_safety/unsafe_coerce.hs new file mode 100644 index 00000000..9a3640d9 --- /dev/null +++ b/test/soundness/fixtures/code_safety/unsafe_coerce.hs @@ -0,0 +1,10 @@ +-- SPDX-License-Identifier: MPL-2.0 +-- SOUNDNESS FIXTURE — known-bad sample for code_safety/unsafe_coerce. +-- DO NOT FIX. + +module Soundness.UnsafeCoerce where + +import Unsafe.Coerce + +bad :: Int -> String +bad n = unsafeCoerce n diff --git a/test/soundness/fixtures/code_safety/unwrap_without_check.rs b/test/soundness/fixtures/code_safety/unwrap_without_check.rs new file mode 100644 index 00000000..b8c0c5cd --- /dev/null +++ b/test/soundness/fixtures/code_safety/unwrap_without_check.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: MPL-2.0 +// SOUNDNESS FIXTURE — known-bad sample for code_safety/unwrap_without_check. +// DO NOT FIX. + +pub fn bad(s: &str) -> i32 { + s.parse::().unwrap() +} diff --git a/test/soundness/fixtures/code_safety/zig_ptr_cast.zig b/test/soundness/fixtures/code_safety/zig_ptr_cast.zig new file mode 100644 index 00000000..5b50c391 --- /dev/null +++ b/test/soundness/fixtures/code_safety/zig_ptr_cast.zig @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: MPL-2.0 +// SOUNDNESS FIXTURE — known-bad sample for code_safety/zig_ptr_cast. +// DO NOT FIX. + +pub fn bad(ptr: *u8) *u32 { + return @ptrCast(*u32, ptr); +} diff --git a/test/soundness/manifest.json b/test/soundness/manifest.json new file mode 100644 index 00000000..688bbc8c --- /dev/null +++ b/test/soundness/manifest.json @@ -0,0 +1,104 @@ +{ + "_comment": "Soundness manifest — each entry asserts that the named rule MUST fire on its fixture. Catches regressions of the kind PR #278 documented (stale escript silently dropping entire pattern families). Add an entry whenever you add a new rule; remove only if you delete the rule.", + "entries": [ + { + "rule_module": "code_safety", + "rule_id": "believe_me", + "language": "idris2", + "fixture": "test/soundness/fixtures/code_safety/believe_me.idr", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "sorry", + "language": "lean", + "fixture": "test/soundness/fixtures/code_safety/sorry.lean", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "admitted", + "language": "coq", + "fixture": "test/soundness/fixtures/code_safety/admitted.v", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "unsafe_coerce", + "language": "haskell", + "fixture": "test/soundness/fixtures/code_safety/unsafe_coerce.hs", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "obj_magic_ocaml", + "language": "ocaml", + "fixture": "test/soundness/fixtures/code_safety/obj_magic_ocaml.ml", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "getexn_on_external", + "language": "rescript", + "fixture": "test/soundness/fixtures/code_safety/getexn_on_external.res", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "unwrap_without_check", + "language": "rust", + "fixture": "test/soundness/fixtures/code_safety/unwrap_without_check.rs", + "expected_severity": "high" + }, + { + "rule_module": "code_safety", + "rule_id": "transmute", + "language": "rust", + "fixture": "test/soundness/fixtures/code_safety/transmute.rs", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "elixir_system_shell", + "language": "elixir", + "fixture": "test/soundness/fixtures/code_safety/elixir_system_shell.ex", + "expected_severity": "critical", + "note": "THE rule PR #278 caught the stale escript silently dropping. Removing this entry needs a soundness PR explanation." + }, + { + "rule_module": "code_safety", + "rule_id": "elixir_os_cmd", + "language": "elixir", + "fixture": "test/soundness/fixtures/code_safety/elixir_os_cmd.ex", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "elixir_code_eval", + "language": "elixir", + "fixture": "test/soundness/fixtures/code_safety/elixir_code_eval.ex", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "shell_download_then_run", + "language": "shell", + "fixture": "test/soundness/fixtures/code_safety/shell_download_then_run.sh", + "expected_severity": "high" + }, + { + "rule_module": "code_safety", + "rule_id": "agda_postulate", + "language": "agda", + "fixture": "test/soundness/fixtures/code_safety/agda_postulate.agda", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "zig_ptr_cast", + "language": "zig", + "fixture": "test/soundness/fixtures/code_safety/zig_ptr_cast.zig", + "expected_severity": "high" + } + ] +} diff --git a/test/soundness_test.exs b/test/soundness_test.exs new file mode 100644 index 00000000..9edd28f0 --- /dev/null +++ b/test/soundness_test.exs @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.SoundnessTest do + @moduledoc """ + Soundness gate: every rule listed in `test/soundness/manifest.json` + MUST fire on its declared fixture. + + This catches the class of regression PR #278 documented (the deployed + escript was silently dropping the entire Elixir/Erlang/Coq/Lean/Agda/ + Zig/F\*/Ada pattern families because the binary was stale). Without a + named fixture per rule, we can rebuild every binary in the world and + still not know whether a given rule is firing — because "no findings" + means both "code is clean" and "rule is broken." + + Adding a rule: drop a known-bad sample in `test/soundness/fixtures/` + and add a manifest entry. The test will pick it up automatically. + + Removing a manifest entry: must be justified in the commit message + (rule deprecated / merged / superseded). The default assumption is + the entry stays. + + Tagged `:soundness` so CI can call this suite out separately in + reports — a soundness failure is qualitatively different from a + product test failure. + """ + + use ExUnit.Case, async: true + + @moduletag :soundness + + alias Hypatia.Rules.CodeSafety + + @manifest_path Path.expand("soundness/manifest.json", __DIR__) + + setup_all do + manifest = + @manifest_path + |> File.read!() + |> Jason.decode!() + |> Map.fetch!("entries") + + {:ok, manifest: manifest} + end + + describe "manifest" do + test "is non-empty", %{manifest: manifest} do + assert length(manifest) > 0, + "Soundness manifest must list at least one rule. " <> + "An empty manifest defeats the entire purpose of this test." + end + + test "every fixture file exists on disk", %{manifest: manifest} do + missing = + Enum.filter(manifest, fn entry -> + not File.exists?(Map.fetch!(entry, "fixture")) + end) + + assert missing == [], + "Soundness manifest references fixtures that don't exist: " <> + inspect(Enum.map(missing, &Map.fetch!(&1, "fixture"))) + end + + test "every entry has the required fields", %{manifest: manifest} do + required = ~w(rule_module rule_id language fixture expected_severity) + + bad = + Enum.filter(manifest, fn entry -> + Enum.any?(required, fn key -> not Map.has_key?(entry, key) end) + end) + + assert bad == [], + "Soundness manifest entries missing required fields: " <> inspect(bad) + end + end + + describe "code_safety rules fire on their fixtures" do + @manifest_path + |> File.read!() + |> Jason.decode!() + |> Map.fetch!("entries") + |> Enum.filter(fn entry -> Map.fetch!(entry, "rule_module") == "code_safety" end) + |> Enum.each(fn entry -> + rule_id = Map.fetch!(entry, "rule_id") + language = Map.fetch!(entry, "language") + fixture = Map.fetch!(entry, "fixture") + expected_severity = Map.fetch!(entry, "expected_severity") + + test "code_safety/#{rule_id} fires on #{fixture}" do + content = File.read!(unquote(fixture)) + findings = CodeSafety.scan_content(content, unquote(language)) + + finding = Enum.find(findings, &(&1.rule == unquote(String.to_atom(rule_id)))) + + assert finding != nil, + "Soundness gate FAILED: rule code_safety/#{unquote(rule_id)} " <> + "did NOT fire on its fixture #{unquote(fixture)}. " <> + "Either the rule was removed / weakened / the regex broke, " <> + "or the fixture was sanitised. See PR #278 for context." + + actual_severity = to_string(finding.severity) + + assert actual_severity == unquote(expected_severity), + "Soundness gate: rule code_safety/#{unquote(rule_id)} fired but at " <> + "severity '#{actual_severity}', expected '#{unquote(expected_severity)}'. " <> + "If this is intentional, update the manifest in the same commit." + end + end) + end +end From 12f2890a804feb147164ed25867dc09557c80dcf Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 23:22:04 +0000 Subject: [PATCH 5/5] feat(outcomes): closed-loop verification metric + recipe_health task The OutcomeTracker.verify_fix/3 re-scan mechanism existed but its result was discarded on the success path: clean re-scans produced no marker, unclean re-scans were re-recorded as :false_positive without preserving the "this was verification, not an organic failure" distinction. The outcomes log had no way to answer "what fraction of this recipe's 'successes' were actually verified clean by post-fix re-scan?" That's the closed-loop metric this commit adds. lib/outcome_tracker.ex record_outcome/4,5 Optional `metadata` map merges into the record (under the canonical fields so a caller can't overwrite recipe_id/repo/file/outcome/ timestamp/bot by accident). record_and_verify/5 Now persists the verification verdict on every branch: verified -> success record with "verification" = "verified" still_present -> success record with "verification" = "still_present" PLUS a follow-up :false_positive record (caused_by = "post_fix_rescan") scan_failed -> success record with "verification" = "scan_failed" verify: false -> outcome record with "verification" = "unverified" The distinction between "scan_failed" and "unverified" matters: a recipe is not penalised for being run in environments without panic-attack. verification_rate/2 For a recipe_id, returns counts {verified, still_present, scan_failed, unverified} and a rate = verified / (verified + still_present). scan_failed and unverified records are excluded from the denominator so a low-verification-attempt environment doesn't artificially deflate the rate. Returns :insufficient_data below min_attempts. recipe_health/1 Aggregates across every recipe with recorded outcomes. Returns a list of maps with dispatches / successes / failures / FPs / success_rate / verification breakdown / status, sorted so the most actionable rows (quarantine_candidate, degraded) surface first. Configurable thresholds. lib/mix/tasks/hypatia.recipe_health.ex mix hypatia.recipe_health [--format json] [--only-actionable] Prints the report in a human-readable table or JSON. test/recipe_health_test.exs Pins the rate calculation (verified/still_present ratio, scan_failed + unverified excluded), the insufficient_data threshold, and the healthy/degraded/quarantine_candidate status mapping. Hand-run against the current outcomes log: 4 recipes found, all flagged :insufficient_data because the historical log was written before the verification marker existed. From the next `record_and_verify`-enabled dispatch onwards, recipes will accumulate verification data and migrate to :healthy / :degraded / :quarantine_candidate based on real evidence. --- lib/mix/tasks/hypatia.recipe_health.ex | 185 +++++++++++++++++++ lib/outcome_tracker.ex | 239 ++++++++++++++++++++++++- test/recipe_health_test.exs | 138 ++++++++++++++ 3 files changed, 556 insertions(+), 6 deletions(-) create mode 100644 lib/mix/tasks/hypatia.recipe_health.ex create mode 100644 test/recipe_health_test.exs diff --git a/lib/mix/tasks/hypatia.recipe_health.ex b/lib/mix/tasks/hypatia.recipe_health.ex new file mode 100644 index 00000000..74d4a9b1 --- /dev/null +++ b/lib/mix/tasks/hypatia.recipe_health.ex @@ -0,0 +1,185 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Mix.Tasks.Hypatia.RecipeHealth do + @moduledoc """ + Per-recipe health report driven by `Hypatia.OutcomeTracker.recipe_health/1`. + + Surfaces recipes whose re-scan verification rate is low (potential + false-fix candidates) or insufficient (verification was not attempted + often enough to draw conclusions). Output is sorted so the most + actionable rows -- quarantine candidates and degraded recipes -- are + at the top. + + Status legend: + healthy -- verification rate >= 0.70 + degraded -- verification rate < 0.70 (review) + quarantine_cand -- verification rate < 0.30 (auto-quarantine candidate) + insufficient -- fewer than --min-attempts verifiable outcomes + no_data -- recipe has outcomes but none were verified + + Options: + --format text|json (default: text) + --min-attempts N fewer than this and the recipe is "insufficient" + --degraded N.NN threshold below "healthy" (default 0.70) + --quarantine N.NN threshold below "degraded" (default 0.30) + --only-actionable hide healthy + insufficient + no_data rows + + ## Examples + + mix hypatia.recipe_health + mix hypatia.recipe_health --only-actionable + mix hypatia.recipe_health --format json > recipe-health.json + """ + + use Mix.Task + + @shortdoc "Show per-recipe success + verification health" + + @switches [ + format: :string, + min_attempts: :integer, + degraded: :float, + quarantine: :float, + only_actionable: :boolean + ] + + @impl Mix.Task + def run(argv) do + {opts, _, _} = OptionParser.parse(argv, switches: @switches) + + format = Keyword.get(opts, :format, "text") + min_attempts = Keyword.get(opts, :min_attempts, 5) + degraded = Keyword.get(opts, :degraded, 0.70) + quarantine = Keyword.get(opts, :quarantine, 0.30) + only_actionable = Keyword.get(opts, :only_actionable, false) + + rows = + Hypatia.OutcomeTracker.recipe_health( + min_attempts: min_attempts, + degraded_threshold: degraded, + quarantine_threshold: quarantine + ) + + rows = + if only_actionable do + Enum.filter(rows, fn r -> r.status in [:degraded, :quarantine_candidate] end) + else + rows + end + + case format do + "json" -> emit_json(rows) + _ -> emit_text(rows) + end + end + + defp emit_text([]) do + Mix.shell().info("No recipes match the filter (or no outcomes recorded yet).") + end + + defp emit_text(rows) do + headers = ["recipe_id", "disp", "succ", "fail", "fp", "verified", "still", "scan_fail", "rate", "status"] + width = column_widths(rows, headers) + + Mix.shell().info(format_row(headers, width)) + Mix.shell().info(format_row(Enum.map(width, fn w -> String.duplicate("-", w) end), width)) + + Enum.each(rows, fn r -> + row = [ + r.recipe_id, + Integer.to_string(r.dispatches), + Integer.to_string(r.successes), + Integer.to_string(r.failures), + Integer.to_string(r.false_positives), + Integer.to_string(r.verification.verified), + Integer.to_string(r.verification.still_present), + Integer.to_string(r.verification.scan_failed), + format_rate(r.verification.rate), + Atom.to_string(r.status) + ] + + Mix.shell().info(format_row(row, width)) + end) + + Mix.shell().info("") + + Mix.shell().info( + "#{length(rows)} recipe(s). " <> + "Quarantine threshold #{quarantine_msg(rows)}, " <> + "degraded threshold #{degraded_msg(rows)}." + ) + end + + defp emit_json(rows) do + payload = %{ + "generated_at" => DateTime.utc_now() |> DateTime.to_iso8601(), + "rows" => + Enum.map(rows, fn r -> + %{ + "recipe_id" => r.recipe_id, + "dispatches" => r.dispatches, + "successes" => r.successes, + "failures" => r.failures, + "false_positives" => r.false_positives, + "success_rate" => to_jsonable(r.success_rate), + "verification" => %{ + "verified" => r.verification.verified, + "still_present" => r.verification.still_present, + "scan_failed" => r.verification.scan_failed, + "unverified" => r.verification.unverified, + "verifiable" => r.verification.verifiable, + "rate" => to_jsonable(r.verification.rate) + }, + "status" => Atom.to_string(r.status) + } + end) + } + + IO.puts(Jason.encode!(payload, pretty: true)) + end + + defp column_widths(rows, headers) do + initial = Enum.map(headers, &String.length/1) + + Enum.reduce(rows, initial, fn r, widths -> + lengths = [ + String.length(r.recipe_id), + String.length(Integer.to_string(r.dispatches)), + String.length(Integer.to_string(r.successes)), + String.length(Integer.to_string(r.failures)), + String.length(Integer.to_string(r.false_positives)), + String.length(Integer.to_string(r.verification.verified)), + String.length(Integer.to_string(r.verification.still_present)), + String.length(Integer.to_string(r.verification.scan_failed)), + String.length(format_rate(r.verification.rate)), + String.length(Atom.to_string(r.status)) + ] + + Enum.zip_with([widths, lengths], fn [a, b] -> max(a, b) end) + end) + end + + defp format_row(cells, widths) do + Enum.zip(cells, widths) + |> Enum.map_join(" ", fn {cell, w} -> String.pad_trailing(cell, w) end) + end + + defp format_rate(:no_data), do: "—" + defp format_rate(:insufficient_data), do: "?" + defp format_rate(r) when is_float(r), do: :erlang.float_to_binary(r, decimals: 2) + + defp to_jsonable(:no_data), do: nil + defp to_jsonable(:insufficient_data), do: "insufficient_data" + defp to_jsonable(r) when is_float(r), do: r + + defp quarantine_msg(rows) do + count = Enum.count(rows, &(&1.status == :quarantine_candidate)) + "#{count} recipe(s)" + end + + defp degraded_msg(rows) do + count = Enum.count(rows, &(&1.status == :degraded)) + "#{count} recipe(s)" + end +end diff --git a/lib/outcome_tracker.ex b/lib/outcome_tracker.ex index 213974b3..9e525613 100644 --- a/lib/outcome_tracker.ex +++ b/lib/outcome_tracker.ex @@ -38,12 +38,16 @@ defmodule Hypatia.OutcomeTracker do - repo: repository name - file: file that was fixed - outcome: :success | :failure | :false_positive + - metadata: optional map of extra fields to merge into the record + (e.g. %{"verification" => "verified"} from `record_and_verify`). + Pre-existing keys (recipe_id, repo, file, outcome, + timestamp, bot) are not overwritten by metadata. """ - def record_outcome(recipe_id, repo, file, outcome) do + def record_outcome(recipe_id, repo, file, outcome, metadata \\ %{}) do now = DateTime.utc_now() |> DateTime.to_iso8601() outcome_str = Atom.to_string(outcome) - record = %{ + base = %{ "pattern_id" => nil, "recipe_id" => recipe_id, "repo" => repo, @@ -53,6 +57,10 @@ defmodule Hypatia.OutcomeTracker do "bot" => "hypatia" } + # Metadata is merged UNDER the base so the canonical fields can't be + # silently overwritten by a caller passing the wrong recipe_id etc. + record = Map.merge(metadata, base) + # Write to verisim-data outcomes (append-only JSONL per month) write_outcome_log(record) @@ -114,8 +122,6 @@ defmodule Hypatia.OutcomeTracker do :false_positive to correct the confidence. """ def record_and_verify(recipe_id, repo, file, outcome, opts \\ []) do - {:ok, record} = record_outcome(recipe_id, repo, file, outcome) - if Keyword.get(opts, :verify, false) and outcome == :success do repos_dir = System.get_env("HYPATIA_REPOS_DIR", File.cwd!()) repo_path = Keyword.get(opts, :repo_path, Path.join(repos_dir, repo)) @@ -124,17 +130,49 @@ defmodule Hypatia.OutcomeTracker do case verify_fix(repo_path, pattern_id, category) do :verified -> + # Record success WITH the verification stamp so recipe_health + # can distinguish verified-clean fixes from un-verified ones. + {:ok, record} = + record_outcome(recipe_id, repo, file, outcome, %{"verification" => "verified"}) + {:ok, record, :verified} :still_present -> - Logger.warning("Fix claimed success but pattern still present -- recording false_positive") - record_outcome(recipe_id, repo, file, :false_positive) + Logger.warning( + "Fix claimed success but pattern still present -- recording false_positive" + ) + + # Both records are tagged so the trail is explicit: the claimed + # success was actually a false positive, surfaced by re-scan. + {:ok, _} = + record_outcome(recipe_id, repo, file, outcome, %{"verification" => "still_present"}) + + {:ok, record} = + record_outcome(recipe_id, repo, file, :false_positive, %{ + "verification" => "still_present", + "caused_by" => "post_fix_rescan" + }) + {:ok, record, :false_positive} :scan_failed -> + # The fix may or may not have worked; we just couldn't verify. + # Recording the outcome with the scan_failed marker preserves + # the distinction from "verified clean" without penalising the + # recipe in confidence updates. + {:ok, record} = + record_outcome(recipe_id, repo, file, outcome, %{"verification" => "scan_failed"}) + {:ok, record, :scan_unavailable} end else + # Unverified outcome (or non-success): record as before, with the + # explicit "unverified" marker so verification_rate aggregates can + # tell the difference between "verification wasn't attempted" and + # "verification was attempted and failed". + {:ok, record} = + record_outcome(recipe_id, repo, file, outcome, %{"verification" => "unverified"}) + {:ok, record, :not_verified} end end @@ -264,8 +302,197 @@ defmodule Hypatia.OutcomeTracker do end end + # ─── Closed-loop verification metric ─────────────────────────────────── + + @doc """ + Per-recipe verification rate. + + Returns `{:ok, %{verified, still_present, scan_failed, unverified, total, + rate}}` where `rate` is the fraction of *verifiable* successes that were + actually verified clean by post-fix re-scan. `scan_failed` and + `unverified` records are excluded from the denominator so a recipe is + not penalised for being run in environments without panic-attack. + + A recipe's verification rate is meaningful only after a handful of + attempts -- returns `{:ok, :insufficient_data}` below the threshold. + """ + def verification_rate(recipe_id, min_attempts \\ 5) do + outcomes = load_outcomes_for_recipe(recipe_id) + successes = Enum.filter(outcomes, fn o -> Map.get(o, "outcome") == "success" end) + + counts = + Enum.reduce( + successes, + %{verified: 0, still_present: 0, scan_failed: 0, unverified: 0}, + fn o, acc -> + case Map.get(o, "verification") do + "verified" -> Map.update!(acc, :verified, &(&1 + 1)) + "still_present" -> Map.update!(acc, :still_present, &(&1 + 1)) + "scan_failed" -> Map.update!(acc, :scan_failed, &(&1 + 1)) + _ -> Map.update!(acc, :unverified, &(&1 + 1)) + end + end + ) + + verifiable = counts.verified + counts.still_present + + cond do + length(successes) == 0 -> + {:ok, :no_outcomes} + + verifiable < min_attempts -> + {:ok, + Map.merge(counts, %{ + total: length(successes), + rate: :insufficient_data, + verifiable: verifiable + })} + + true -> + rate = counts.verified / verifiable + + {:ok, + Map.merge(counts, %{ + total: length(successes), + rate: rate, + verifiable: verifiable + })} + end + end + + @doc """ + Aggregate health stats across every recipe with recorded outcomes. + + Returns a list of maps sorted ascending by verification rate, so + recipes that look most broken surface first. Recipes with insufficient + verification data still appear -- they're flagged distinctly so they + can be prioritised for verification-enabled runs. + + Schema: + %{ + recipe_id: String.t(), + dispatches: non_neg_integer(), + successes: non_neg_integer(), + failures: non_neg_integer(), + false_positives: non_neg_integer(), + success_rate: float() | :no_data, + verification: %{ + verified: non_neg_integer(), + still_present: non_neg_integer(), + scan_failed: non_neg_integer(), + unverified: non_neg_integer(), + verifiable: non_neg_integer(), + rate: float() | :insufficient_data | :no_data + }, + status: :healthy | :unverified | :insufficient_data | :degraded | :quarantine_candidate + } + """ + def recipe_health(opts \\ []) do + min_attempts = Keyword.get(opts, :min_attempts, 5) + degraded_threshold = Keyword.get(opts, :degraded_threshold, 0.70) + quarantine_threshold = Keyword.get(opts, :quarantine_threshold, 0.30) + + recipe_ids = all_recipe_ids_with_outcomes() + + recipe_ids + |> Enum.map(fn recipe_id -> + outcomes = load_outcomes_for_recipe(recipe_id) + + successes = Enum.count(outcomes, fn o -> Map.get(o, "outcome") == "success" end) + failures = Enum.count(outcomes, fn o -> Map.get(o, "outcome") == "failure" end) + false_positives = Enum.count(outcomes, fn o -> Map.get(o, "outcome") == "false_positive" end) + + dispatches = length(outcomes) + attempts = successes + failures + false_positives + + success_rate = + if attempts > 0, do: successes / attempts, else: :no_data + + {:ok, verification} = verification_rate(recipe_id, min_attempts) + + verification_map = + case verification do + :no_outcomes -> + %{ + verified: 0, + still_present: 0, + scan_failed: 0, + unverified: 0, + verifiable: 0, + rate: :no_data + } + + map when is_map(map) -> + map + end + + status = + cond do + verification_map.rate == :no_data -> :no_data + verification_map.rate == :insufficient_data -> :insufficient_data + is_float(verification_map.rate) and verification_map.rate < quarantine_threshold -> + :quarantine_candidate + is_float(verification_map.rate) and verification_map.rate < degraded_threshold -> + :degraded + is_float(verification_map.rate) -> + :healthy + true -> + :unverified + end + + %{ + recipe_id: recipe_id, + dispatches: dispatches, + successes: successes, + failures: failures, + false_positives: false_positives, + success_rate: success_rate, + verification: verification_map, + status: status + } + end) + |> Enum.sort_by(fn r -> + # Sort by rate ascending so quarantine_candidate / degraded float to + # the top. :no_data and :insufficient_data sort after numerics so + # they don't bury actionable rows. + case r.verification.rate do + :no_data -> {2, 0} + :insufficient_data -> {1, 0} + rate when is_float(rate) -> {0, rate} + end + end) + end + # --- Private --- + defp all_recipe_ids_with_outcomes do + outcomes_dir = Path.join(Path.expand(@verisimdb_data_path), "outcomes") + + case File.ls(outcomes_dir) do + {:ok, files} -> + files + |> Enum.filter(&String.ends_with?(&1, ".jsonl")) + |> Enum.flat_map(fn f -> + path = Path.join(outcomes_dir, f) + + path + |> File.stream!() + |> Stream.map(fn line -> + case Jason.decode(String.trim(line)) do + {:ok, %{"recipe_id" => id}} when is_binary(id) -> id + _ -> nil + end + end) + |> Stream.reject(&is_nil/1) + |> Enum.to_list() + end) + |> Enum.uniq() + + {:error, _} -> + [] + end + end + defp write_outcome_log(record) do {{year, month, _}, _} = :calendar.universal_time() month_str = String.pad_leading("#{month}", 2, "0") diff --git a/test/recipe_health_test.exs b/test/recipe_health_test.exs new file mode 100644 index 00000000..7aef8118 --- /dev/null +++ b/test/recipe_health_test.exs @@ -0,0 +1,138 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# Tests for the closed-loop verification metric: +# OutcomeTracker.verification_rate/2 and OutcomeTracker.recipe_health/1. + +defmodule Hypatia.RecipeHealthTest do + # async: false because the outcome log is a shared on-disk resource; + # writing test outcomes from concurrent tests would race each other. + use ExUnit.Case, async: false + + alias Hypatia.OutcomeTracker + + @test_recipe_prefix "test-recipe-health-" + + setup do + # Each test gets a unique recipe_id so its outcomes are isolated in + # the shared outcomes log. We don't clean up — the verification_rate + # aggregator filters by recipe_id so leftover records from a previous + # run only affect their own recipe_id. + recipe_id = @test_recipe_prefix <> Integer.to_string(System.unique_integer([:positive])) + {:ok, recipe_id: recipe_id} + end + + describe "verification_rate/2" do + test "returns :no_outcomes for a recipe that has no records", %{recipe_id: recipe_id} do + assert {:ok, :no_outcomes} = OutcomeTracker.verification_rate(recipe_id) + end + + test "returns :insufficient_data below the threshold", %{recipe_id: recipe_id} do + OutcomeTracker.record_outcome(recipe_id, "test-repo", "a.ex", :success, %{ + "verification" => "verified" + }) + + assert {:ok, %{rate: :insufficient_data, verifiable: 1, total: 1}} = + OutcomeTracker.verification_rate(recipe_id, 5) + end + + test "computes rate from verified/still_present ratio", %{recipe_id: recipe_id} do + # 4 verified + 1 still_present = 5 verifiable, rate = 0.8 + for i <- 1..4 do + OutcomeTracker.record_outcome(recipe_id, "r", "f#{i}", :success, %{ + "verification" => "verified" + }) + end + + OutcomeTracker.record_outcome(recipe_id, "r", "f5", :success, %{ + "verification" => "still_present" + }) + + assert {:ok, %{rate: rate, verifiable: 5, verified: 4, still_present: 1}} = + OutcomeTracker.verification_rate(recipe_id, 5) + + assert_in_delta(rate, 0.8, 0.001) + end + + test "excludes scan_failed and unverified from the denominator", %{recipe_id: recipe_id} do + # 5 verified + 100 scan_failed + 100 unverified -> rate is 1.0, not + # diluted by environments where panic-attack wasn't available. + for i <- 1..5 do + OutcomeTracker.record_outcome(recipe_id, "r", "v#{i}", :success, %{ + "verification" => "verified" + }) + end + + for i <- 1..3 do + OutcomeTracker.record_outcome(recipe_id, "r", "sf#{i}", :success, %{ + "verification" => "scan_failed" + }) + + OutcomeTracker.record_outcome(recipe_id, "r", "u#{i}", :success, %{ + "verification" => "unverified" + }) + end + + assert {:ok, %{rate: 1.0, verifiable: 5, scan_failed: 3, unverified: 3}} = + OutcomeTracker.verification_rate(recipe_id, 5) + end + end + + describe "recipe_health/1" do + test "returns at least the recipe we just recorded outcomes for", %{recipe_id: recipe_id} do + for i <- 1..6 do + OutcomeTracker.record_outcome(recipe_id, "r", "f#{i}", :success, %{ + "verification" => "verified" + }) + end + + rows = OutcomeTracker.recipe_health(min_attempts: 5) + ours = Enum.find(rows, &(&1.recipe_id == recipe_id)) + + assert ours != nil + assert ours.successes == 6 + assert ours.verification.verified == 6 + assert ours.verification.rate == 1.0 + assert ours.status == :healthy + end + + test "tags quarantine_candidate when verification rate is below 0.30", %{recipe_id: recipe_id} do + # 1 verified + 9 still_present = 10 verifiable, rate = 0.1 + OutcomeTracker.record_outcome(recipe_id, "r", "v1", :success, %{ + "verification" => "verified" + }) + + for i <- 1..9 do + OutcomeTracker.record_outcome(recipe_id, "r", "sp#{i}", :success, %{ + "verification" => "still_present" + }) + end + + rows = OutcomeTracker.recipe_health(min_attempts: 5) + ours = Enum.find(rows, &(&1.recipe_id == recipe_id)) + + assert ours.status == :quarantine_candidate + end + + test "tags degraded between quarantine and healthy", %{recipe_id: recipe_id} do + # 3 verified + 7 still_present = 10 verifiable, rate = 0.3 + # → just at the quarantine threshold (0.30), so degraded (< 0.70). + for i <- 1..3 do + OutcomeTracker.record_outcome(recipe_id, "r", "v#{i}", :success, %{ + "verification" => "verified" + }) + end + + for i <- 1..7 do + OutcomeTracker.record_outcome(recipe_id, "r", "sp#{i}", :success, %{ + "verification" => "still_present" + }) + end + + rows = OutcomeTracker.recipe_health(min_attempts: 5) + ours = Enum.find(rows, &(&1.recipe_id == recipe_id)) + + assert ours.status == :degraded + end + end +end