diff --git a/.github/workflows/hypatia-scan.yml b/.github/workflows/hypatia-scan.yml index cd38e6ce..c632a707 100644 --- a/.github/workflows/hypatia-scan.yml +++ b/.github/workflows/hypatia-scan.yml @@ -21,9 +21,11 @@ permissions: contents: read # security-events: write serves two purposes (write implies read): # 1. read — lets the built-in GITHUB_TOKEN query this repo's own - # Dependabot alerts via the Hypatia DependabotAlerts rule - # (DA001-DA004). Without read, `scan_from_path` gets HTTP 403 - # and the rule silently returns no findings. + # Dependabot alerts (DependabotAlerts rule, DA001-DA004), + # secret-scanning alerts (SecretScanningAlerts, SSA001-SSA004), + # and code-scanning alerts (CodeScanningAlerts, CSA001-CSA004). + # Without read, `scan_from_path` gets HTTP 403 and the rule + # silently returns no findings. # See 007-lang/audits/audit-dependabot-automation-gap-2026-04-17.md. # 2. write — lets the "Upload SARIF to code scanning" step publish # Hypatia findings to the Security → Code scanning page so they diff --git a/lib/hypatia/cli.ex b/lib/hypatia/cli.ex index 21322508..e9ec7bcd 100644 --- a/lib/hypatia/cli.ex +++ b/lib/hypatia/cli.ex @@ -23,6 +23,7 @@ defmodule Hypatia.CLI do Available: root_hygiene,honest_completion,workflow_audit, cicd_rules,code_safety,migration_rules,scorecard, green_web,git_state,dependabot_alerts, + secret_scanning_alerts,code_scanning_alerts, structural_drift --format Output format: json (default), text, github --severity Minimum severity to report: critical, high, medium (default), low, info @@ -47,6 +48,8 @@ defmodule Hypatia.CLI do :green_web, :git_state, :dependabot_alerts, + :secret_scanning_alerts, + :code_scanning_alerts, :structural_drift ] @@ -636,6 +639,60 @@ defmodule Hypatia.CLI do results end + # Secret Scanning Alerts + results = + if :secret_scanning_alerts in rules do + case Hypatia.Rules.SecretScanningAlerts.scan_from_path(repo_path) do + {:ok, %{findings: findings}} -> + normalized = + Enum.map(findings, fn f -> + %{ + rule_module: "secret_scanning_alerts", + severity: to_string(f.severity), + type: f.rule, + file: Map.get(f, :file, ""), + reason: f.reason, + action: to_string(f.action) + } + end) + + results ++ normalized + + {:error, reason} -> + IO.puts(:stderr, "Warning: Secret-scanning alerts unavailable: #{reason}") + results + end + else + results + end + + # Code Scanning Alerts + results = + if :code_scanning_alerts in rules do + case Hypatia.Rules.CodeScanningAlerts.scan_from_path(repo_path) do + {:ok, %{findings: findings}} -> + normalized = + Enum.map(findings, fn f -> + %{ + rule_module: "code_scanning_alerts", + severity: to_string(f.severity), + type: f.rule, + file: Map.get(f, :file, ""), + reason: f.reason, + action: to_string(f.action) + } + end) + + results ++ normalized + + {:error, reason} -> + IO.puts(:stderr, "Warning: Code-scanning alerts unavailable: #{reason}") + results + end + else + results + end + # Structural Drift results = if :structural_drift in rules do @@ -1042,6 +1099,8 @@ defmodule Hypatia.CLI do defp format_module_name("green_web"), do: "Green Web Foundation" defp format_module_name("git_state"), do: "Git State Sync" defp format_module_name("dependabot_alerts"), do: "Dependabot Alerts" + defp format_module_name("secret_scanning_alerts"), do: "Secret Scanning Alerts" + defp format_module_name("code_scanning_alerts"), do: "Code Scanning Alerts" defp format_module_name(other), do: other defp print_usage do @@ -1062,7 +1121,8 @@ defmodule Hypatia.CLI do Available: root_hygiene,honest_completion, workflow_audit,cicd_rules,code_safety, migration_rules,scorecard,green_web, - git_state,dependabot_alerts + git_state,dependabot_alerts, + secret_scanning_alerts,code_scanning_alerts --format, -f Output format: json (default), text, github --severity, -s Minimum severity: critical, high, medium (default), low --path, -p Path to scan (alternative to positional arg) diff --git a/lib/mix/tasks/hypatia.recipe_health.ex b/lib/mix/tasks/hypatia.recipe_health.ex new file mode 100644 index 00000000..74d4a9b1 --- /dev/null +++ b/lib/mix/tasks/hypatia.recipe_health.ex @@ -0,0 +1,185 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Mix.Tasks.Hypatia.RecipeHealth do + @moduledoc """ + Per-recipe health report driven by `Hypatia.OutcomeTracker.recipe_health/1`. + + Surfaces recipes whose re-scan verification rate is low (potential + false-fix candidates) or insufficient (verification was not attempted + often enough to draw conclusions). Output is sorted so the most + actionable rows -- quarantine candidates and degraded recipes -- are + at the top. + + Status legend: + healthy -- verification rate >= 0.70 + degraded -- verification rate < 0.70 (review) + quarantine_cand -- verification rate < 0.30 (auto-quarantine candidate) + insufficient -- fewer than --min-attempts verifiable outcomes + no_data -- recipe has outcomes but none were verified + + Options: + --format text|json (default: text) + --min-attempts N fewer than this and the recipe is "insufficient" + --degraded N.NN threshold below "healthy" (default 0.70) + --quarantine N.NN threshold below "degraded" (default 0.30) + --only-actionable hide healthy + insufficient + no_data rows + + ## Examples + + mix hypatia.recipe_health + mix hypatia.recipe_health --only-actionable + mix hypatia.recipe_health --format json > recipe-health.json + """ + + use Mix.Task + + @shortdoc "Show per-recipe success + verification health" + + @switches [ + format: :string, + min_attempts: :integer, + degraded: :float, + quarantine: :float, + only_actionable: :boolean + ] + + @impl Mix.Task + def run(argv) do + {opts, _, _} = OptionParser.parse(argv, switches: @switches) + + format = Keyword.get(opts, :format, "text") + min_attempts = Keyword.get(opts, :min_attempts, 5) + degraded = Keyword.get(opts, :degraded, 0.70) + quarantine = Keyword.get(opts, :quarantine, 0.30) + only_actionable = Keyword.get(opts, :only_actionable, false) + + rows = + Hypatia.OutcomeTracker.recipe_health( + min_attempts: min_attempts, + degraded_threshold: degraded, + quarantine_threshold: quarantine + ) + + rows = + if only_actionable do + Enum.filter(rows, fn r -> r.status in [:degraded, :quarantine_candidate] end) + else + rows + end + + case format do + "json" -> emit_json(rows) + _ -> emit_text(rows) + end + end + + defp emit_text([]) do + Mix.shell().info("No recipes match the filter (or no outcomes recorded yet).") + end + + defp emit_text(rows) do + headers = ["recipe_id", "disp", "succ", "fail", "fp", "verified", "still", "scan_fail", "rate", "status"] + width = column_widths(rows, headers) + + Mix.shell().info(format_row(headers, width)) + Mix.shell().info(format_row(Enum.map(width, fn w -> String.duplicate("-", w) end), width)) + + Enum.each(rows, fn r -> + row = [ + r.recipe_id, + Integer.to_string(r.dispatches), + Integer.to_string(r.successes), + Integer.to_string(r.failures), + Integer.to_string(r.false_positives), + Integer.to_string(r.verification.verified), + Integer.to_string(r.verification.still_present), + Integer.to_string(r.verification.scan_failed), + format_rate(r.verification.rate), + Atom.to_string(r.status) + ] + + Mix.shell().info(format_row(row, width)) + end) + + Mix.shell().info("") + + Mix.shell().info( + "#{length(rows)} recipe(s). " <> + "Quarantine threshold #{quarantine_msg(rows)}, " <> + "degraded threshold #{degraded_msg(rows)}." + ) + end + + defp emit_json(rows) do + payload = %{ + "generated_at" => DateTime.utc_now() |> DateTime.to_iso8601(), + "rows" => + Enum.map(rows, fn r -> + %{ + "recipe_id" => r.recipe_id, + "dispatches" => r.dispatches, + "successes" => r.successes, + "failures" => r.failures, + "false_positives" => r.false_positives, + "success_rate" => to_jsonable(r.success_rate), + "verification" => %{ + "verified" => r.verification.verified, + "still_present" => r.verification.still_present, + "scan_failed" => r.verification.scan_failed, + "unverified" => r.verification.unverified, + "verifiable" => r.verification.verifiable, + "rate" => to_jsonable(r.verification.rate) + }, + "status" => Atom.to_string(r.status) + } + end) + } + + IO.puts(Jason.encode!(payload, pretty: true)) + end + + defp column_widths(rows, headers) do + initial = Enum.map(headers, &String.length/1) + + Enum.reduce(rows, initial, fn r, widths -> + lengths = [ + String.length(r.recipe_id), + String.length(Integer.to_string(r.dispatches)), + String.length(Integer.to_string(r.successes)), + String.length(Integer.to_string(r.failures)), + String.length(Integer.to_string(r.false_positives)), + String.length(Integer.to_string(r.verification.verified)), + String.length(Integer.to_string(r.verification.still_present)), + String.length(Integer.to_string(r.verification.scan_failed)), + String.length(format_rate(r.verification.rate)), + String.length(Atom.to_string(r.status)) + ] + + Enum.zip_with([widths, lengths], fn [a, b] -> max(a, b) end) + end) + end + + defp format_row(cells, widths) do + Enum.zip(cells, widths) + |> Enum.map_join(" ", fn {cell, w} -> String.pad_trailing(cell, w) end) + end + + defp format_rate(:no_data), do: "—" + defp format_rate(:insufficient_data), do: "?" + defp format_rate(r) when is_float(r), do: :erlang.float_to_binary(r, decimals: 2) + + defp to_jsonable(:no_data), do: nil + defp to_jsonable(:insufficient_data), do: "insufficient_data" + defp to_jsonable(r) when is_float(r), do: r + + defp quarantine_msg(rows) do + count = Enum.count(rows, &(&1.status == :quarantine_candidate)) + "#{count} recipe(s)" + end + + defp degraded_msg(rows) do + count = Enum.count(rows, &(&1.status == :degraded)) + "#{count} recipe(s)" + end +end diff --git a/lib/outcome_tracker.ex b/lib/outcome_tracker.ex index 213974b3..9e525613 100644 --- a/lib/outcome_tracker.ex +++ b/lib/outcome_tracker.ex @@ -38,12 +38,16 @@ defmodule Hypatia.OutcomeTracker do - repo: repository name - file: file that was fixed - outcome: :success | :failure | :false_positive + - metadata: optional map of extra fields to merge into the record + (e.g. %{"verification" => "verified"} from `record_and_verify`). + Pre-existing keys (recipe_id, repo, file, outcome, + timestamp, bot) are not overwritten by metadata. """ - def record_outcome(recipe_id, repo, file, outcome) do + def record_outcome(recipe_id, repo, file, outcome, metadata \\ %{}) do now = DateTime.utc_now() |> DateTime.to_iso8601() outcome_str = Atom.to_string(outcome) - record = %{ + base = %{ "pattern_id" => nil, "recipe_id" => recipe_id, "repo" => repo, @@ -53,6 +57,10 @@ defmodule Hypatia.OutcomeTracker do "bot" => "hypatia" } + # Metadata is merged UNDER the base so the canonical fields can't be + # silently overwritten by a caller passing the wrong recipe_id etc. + record = Map.merge(metadata, base) + # Write to verisim-data outcomes (append-only JSONL per month) write_outcome_log(record) @@ -114,8 +122,6 @@ defmodule Hypatia.OutcomeTracker do :false_positive to correct the confidence. """ def record_and_verify(recipe_id, repo, file, outcome, opts \\ []) do - {:ok, record} = record_outcome(recipe_id, repo, file, outcome) - if Keyword.get(opts, :verify, false) and outcome == :success do repos_dir = System.get_env("HYPATIA_REPOS_DIR", File.cwd!()) repo_path = Keyword.get(opts, :repo_path, Path.join(repos_dir, repo)) @@ -124,17 +130,49 @@ defmodule Hypatia.OutcomeTracker do case verify_fix(repo_path, pattern_id, category) do :verified -> + # Record success WITH the verification stamp so recipe_health + # can distinguish verified-clean fixes from un-verified ones. + {:ok, record} = + record_outcome(recipe_id, repo, file, outcome, %{"verification" => "verified"}) + {:ok, record, :verified} :still_present -> - Logger.warning("Fix claimed success but pattern still present -- recording false_positive") - record_outcome(recipe_id, repo, file, :false_positive) + Logger.warning( + "Fix claimed success but pattern still present -- recording false_positive" + ) + + # Both records are tagged so the trail is explicit: the claimed + # success was actually a false positive, surfaced by re-scan. + {:ok, _} = + record_outcome(recipe_id, repo, file, outcome, %{"verification" => "still_present"}) + + {:ok, record} = + record_outcome(recipe_id, repo, file, :false_positive, %{ + "verification" => "still_present", + "caused_by" => "post_fix_rescan" + }) + {:ok, record, :false_positive} :scan_failed -> + # The fix may or may not have worked; we just couldn't verify. + # Recording the outcome with the scan_failed marker preserves + # the distinction from "verified clean" without penalising the + # recipe in confidence updates. + {:ok, record} = + record_outcome(recipe_id, repo, file, outcome, %{"verification" => "scan_failed"}) + {:ok, record, :scan_unavailable} end else + # Unverified outcome (or non-success): record as before, with the + # explicit "unverified" marker so verification_rate aggregates can + # tell the difference between "verification wasn't attempted" and + # "verification was attempted and failed". + {:ok, record} = + record_outcome(recipe_id, repo, file, outcome, %{"verification" => "unverified"}) + {:ok, record, :not_verified} end end @@ -264,8 +302,197 @@ defmodule Hypatia.OutcomeTracker do end end + # ─── Closed-loop verification metric ─────────────────────────────────── + + @doc """ + Per-recipe verification rate. + + Returns `{:ok, %{verified, still_present, scan_failed, unverified, total, + rate}}` where `rate` is the fraction of *verifiable* successes that were + actually verified clean by post-fix re-scan. `scan_failed` and + `unverified` records are excluded from the denominator so a recipe is + not penalised for being run in environments without panic-attack. + + A recipe's verification rate is meaningful only after a handful of + attempts -- returns `{:ok, :insufficient_data}` below the threshold. + """ + def verification_rate(recipe_id, min_attempts \\ 5) do + outcomes = load_outcomes_for_recipe(recipe_id) + successes = Enum.filter(outcomes, fn o -> Map.get(o, "outcome") == "success" end) + + counts = + Enum.reduce( + successes, + %{verified: 0, still_present: 0, scan_failed: 0, unverified: 0}, + fn o, acc -> + case Map.get(o, "verification") do + "verified" -> Map.update!(acc, :verified, &(&1 + 1)) + "still_present" -> Map.update!(acc, :still_present, &(&1 + 1)) + "scan_failed" -> Map.update!(acc, :scan_failed, &(&1 + 1)) + _ -> Map.update!(acc, :unverified, &(&1 + 1)) + end + end + ) + + verifiable = counts.verified + counts.still_present + + cond do + length(successes) == 0 -> + {:ok, :no_outcomes} + + verifiable < min_attempts -> + {:ok, + Map.merge(counts, %{ + total: length(successes), + rate: :insufficient_data, + verifiable: verifiable + })} + + true -> + rate = counts.verified / verifiable + + {:ok, + Map.merge(counts, %{ + total: length(successes), + rate: rate, + verifiable: verifiable + })} + end + end + + @doc """ + Aggregate health stats across every recipe with recorded outcomes. + + Returns a list of maps sorted ascending by verification rate, so + recipes that look most broken surface first. Recipes with insufficient + verification data still appear -- they're flagged distinctly so they + can be prioritised for verification-enabled runs. + + Schema: + %{ + recipe_id: String.t(), + dispatches: non_neg_integer(), + successes: non_neg_integer(), + failures: non_neg_integer(), + false_positives: non_neg_integer(), + success_rate: float() | :no_data, + verification: %{ + verified: non_neg_integer(), + still_present: non_neg_integer(), + scan_failed: non_neg_integer(), + unverified: non_neg_integer(), + verifiable: non_neg_integer(), + rate: float() | :insufficient_data | :no_data + }, + status: :healthy | :unverified | :insufficient_data | :degraded | :quarantine_candidate + } + """ + def recipe_health(opts \\ []) do + min_attempts = Keyword.get(opts, :min_attempts, 5) + degraded_threshold = Keyword.get(opts, :degraded_threshold, 0.70) + quarantine_threshold = Keyword.get(opts, :quarantine_threshold, 0.30) + + recipe_ids = all_recipe_ids_with_outcomes() + + recipe_ids + |> Enum.map(fn recipe_id -> + outcomes = load_outcomes_for_recipe(recipe_id) + + successes = Enum.count(outcomes, fn o -> Map.get(o, "outcome") == "success" end) + failures = Enum.count(outcomes, fn o -> Map.get(o, "outcome") == "failure" end) + false_positives = Enum.count(outcomes, fn o -> Map.get(o, "outcome") == "false_positive" end) + + dispatches = length(outcomes) + attempts = successes + failures + false_positives + + success_rate = + if attempts > 0, do: successes / attempts, else: :no_data + + {:ok, verification} = verification_rate(recipe_id, min_attempts) + + verification_map = + case verification do + :no_outcomes -> + %{ + verified: 0, + still_present: 0, + scan_failed: 0, + unverified: 0, + verifiable: 0, + rate: :no_data + } + + map when is_map(map) -> + map + end + + status = + cond do + verification_map.rate == :no_data -> :no_data + verification_map.rate == :insufficient_data -> :insufficient_data + is_float(verification_map.rate) and verification_map.rate < quarantine_threshold -> + :quarantine_candidate + is_float(verification_map.rate) and verification_map.rate < degraded_threshold -> + :degraded + is_float(verification_map.rate) -> + :healthy + true -> + :unverified + end + + %{ + recipe_id: recipe_id, + dispatches: dispatches, + successes: successes, + failures: failures, + false_positives: false_positives, + success_rate: success_rate, + verification: verification_map, + status: status + } + end) + |> Enum.sort_by(fn r -> + # Sort by rate ascending so quarantine_candidate / degraded float to + # the top. :no_data and :insufficient_data sort after numerics so + # they don't bury actionable rows. + case r.verification.rate do + :no_data -> {2, 0} + :insufficient_data -> {1, 0} + rate when is_float(rate) -> {0, rate} + end + end) + end + # --- Private --- + defp all_recipe_ids_with_outcomes do + outcomes_dir = Path.join(Path.expand(@verisimdb_data_path), "outcomes") + + case File.ls(outcomes_dir) do + {:ok, files} -> + files + |> Enum.filter(&String.ends_with?(&1, ".jsonl")) + |> Enum.flat_map(fn f -> + path = Path.join(outcomes_dir, f) + + path + |> File.stream!() + |> Stream.map(fn line -> + case Jason.decode(String.trim(line)) do + {:ok, %{"recipe_id" => id}} when is_binary(id) -> id + _ -> nil + end + end) + |> Stream.reject(&is_nil/1) + |> Enum.to_list() + end) + |> Enum.uniq() + + {:error, _} -> + [] + end + end + defp write_outcome_log(record) do {{year, month, _}, _} = :calendar.universal_time() month_str = String.pad_leading("#{month}", 2, "0") diff --git a/lib/rules/code_scanning_alerts.ex b/lib/rules/code_scanning_alerts.ex new file mode 100644 index 00000000..b18d3464 --- /dev/null +++ b/lib/rules/code_scanning_alerts.ex @@ -0,0 +1,449 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.Rules.CodeScanningAlerts do + @moduledoc """ + GitHub Code Scanning alert querying (CodeQL + third-party SARIF). + + Queries the GitHub REST API for code-scanning alerts on a repository, + classifies by severity, and generates findings for the safety triangle + pipeline. Surfaces CodeQL findings (and any other SARIF uploads -- + including Hypatia's own, via the `hypatia` category) alongside the + rest of the scanner output so a single Hypatia run sees everything + GitHub's security tab is showing. + + Requires GITHUB_TOKEN with `code_scanning_alerts: read` permission + (fine-grained PAT) or `security_events` scope (classic PAT). + + Rule IDs: CSA001-CSA004 + """ + + require Logger + + @github_api_base "https://api.github.com" + @max_alerts_per_repo 100 + + # Stale thresholds (days), keyed by alert severity. Mirrors the + # DependabotAlerts cadence: critical findings escalate fastest. + @stale_thresholds %{ + critical: 3, + high: 7, + medium: 30, + low: 90, + note: 90, + warning: 30, + error: 7 + } + + # Dismissal reasons accepted by policy without further review. + @accepted_dismissals ~w(false\ positive used\ in\ tests won't\ fix) + + # ─── CSA001: Open code-scanning alerts ───────────────────────────────── + + @doc """ + CSA001: List all open code-scanning alerts on the repo. Each alert's + severity is taken from its rule definition (critical/high/medium/low, + or CodeQL's note/warning/error). The Hypatia-side severity is mapped + to the same canonical four-bucket scale used by other rule modules so + the CLI's severity threshold works uniformly. + """ + def csa001_open_alerts(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(&(&1["state"] == "open")) + |> Enum.map(fn alert -> + rule_id = get_in(alert, ["rule", "id"]) || "unknown" + severity_raw = get_in(alert, ["rule", "severity"]) || "warning" + security_severity = get_in(alert, ["rule", "security_severity_level"]) + description = get_in(alert, ["rule", "description"]) || rule_id + tool = get_in(alert, ["tool", "name"]) || "unknown" + path = get_in(alert, ["most_recent_instance", "location", "path"]) || "" + line = get_in(alert, ["most_recent_instance", "location", "start_line"]) + + created = alert["created_at"] + age_days = age_in_days(created) + mapped_severity = map_severity(security_severity || severity_raw) + stale_threshold = Map.get(@stale_thresholds, mapped_severity, 30) + is_stale = age_days > stale_threshold + + %{ + rule: "CSA001", + file: path, + severity: mapped_severity, + reason: build_alert_reason(tool, rule_id, description, age_days, is_stale), + action: determine_action(mapped_severity, is_stale), + detail: %{ + alert_number: alert["number"], + tool: tool, + rule_id: rule_id, + rule_severity: severity_raw, + security_severity_level: security_severity, + path: path, + line: line, + age_days: age_days, + is_stale: is_stale, + created_at: created, + url: alert["html_url"] + } + } + end) + + {:error, reason} -> + Logger.warning("CSA001: Failed to fetch code-scanning alerts: #{reason}") + [] + end + end + + # ─── CSA002: Severity summary ────────────────────────────────────────── + + @doc """ + CSA002: Meta-finding when open alert counts exceed thresholds. + Triggers at any critical, ≥5 high, or ≥10 total open alerts. + """ + def csa002_severity_summary(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + open = Enum.filter(alerts, &(&1["state"] == "open")) + + by_severity = + Enum.group_by(open, fn a -> + sev = get_in(a, ["rule", "security_severity_level"]) || get_in(a, ["rule", "severity"]) + map_severity(sev) + end) + + critical_count = length(Map.get(by_severity, :critical, [])) + high_count = length(Map.get(by_severity, :high, [])) + total = length(open) + + findings = [] + + findings = + if critical_count > 0 do + [%{ + rule: "CSA002", + file: "#{owner}/#{repo}", + severity: :critical, + reason: + "#{critical_count} critical code-scanning alert(s) -- immediate triage required", + action: :escalate, + detail: %{critical: critical_count, high: high_count, total: total} + } + | findings] + else + findings + end + + findings = + if high_count >= 5 do + [%{ + rule: "CSA002", + file: "#{owner}/#{repo}", + severity: :high, + reason: + "#{high_count} high-severity code-scanning alert(s) -- batch remediation recommended", + action: :batch_update, + detail: %{high: high_count, total: total} + } + | findings] + else + findings + end + + findings = + if total >= 10 do + [%{ + rule: "CSA002", + file: "#{owner}/#{repo}", + severity: :medium, + reason: "#{total} total open code-scanning alert(s) -- security hygiene review", + action: :review, + detail: %{ + total: total, + by_severity: + Map.new(by_severity, fn {k, v} -> {to_string(k), length(v)} end) + } + } + | findings] + else + findings + end + + findings + + {:error, _} -> [] + end + end + + # ─── CSA003: Stale open alerts ───────────────────────────────────────── + + @doc """ + CSA003: Open code-scanning alerts older than the severity-appropriate + threshold. Critical alerts stale after 3 days, high after 7, medium + after 30, low after 90. + """ + def csa003_stale_alerts(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(&(&1["state"] == "open")) + |> Enum.filter(fn alert -> + sev = + map_severity( + get_in(alert, ["rule", "security_severity_level"]) || + get_in(alert, ["rule", "severity"]) || "medium" + ) + + threshold = Map.get(@stale_thresholds, sev, 30) + age_in_days(alert["created_at"]) > threshold + end) + |> Enum.map(fn alert -> + rule_id = get_in(alert, ["rule", "id"]) || "unknown" + + sev = + map_severity( + get_in(alert, ["rule", "security_severity_level"]) || + get_in(alert, ["rule", "severity"]) || "medium" + ) + + age = age_in_days(alert["created_at"]) + threshold = Map.get(@stale_thresholds, sev, 30) + path = get_in(alert, ["most_recent_instance", "location", "path"]) || "" + + %{ + rule: "CSA003", + file: path, + severity: :high, + reason: + "Code-scanning alert #{rule_id} (#{sev}) at #{path} is #{age} days old " <> + "(threshold: #{threshold} days) -- overdue for remediation", + action: :escalate, + detail: %{ + alert_number: alert["number"], + rule_id: rule_id, + path: path, + original_severity: sev, + age_days: age, + threshold_days: threshold + } + } + end) + + {:error, _} -> [] + end + end + + # ─── CSA004: Dismissed without documented resolution ─────────────────── + + @doc """ + CSA004: Alerts dismissed with no documented reason (or with a vague + one). Real dismissals carry a `dismissed_reason` in the accepted + vocabulary (`false positive`, `won't fix`, `used in tests`); anything + else is policy-suspicious and should be reviewed. + """ + def csa004_dismissed_without_fix(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(fn a -> + a["state"] == "dismissed" and + a["dismissed_reason"] not in @accepted_dismissals + end) + |> Enum.map(fn alert -> + rule_id = get_in(alert, ["rule", "id"]) || "unknown" + reason = alert["dismissed_reason"] || "no reason given" + path = get_in(alert, ["most_recent_instance", "location", "path"]) || "" + + %{ + rule: "CSA004", + file: path, + severity: :medium, + reason: + "Code-scanning alert #{rule_id} dismissed as '#{reason}' " <> + "-- ensure dismissal is documented and justified", + action: :review, + detail: %{ + alert_number: alert["number"], + rule_id: rule_id, + path: path, + dismissed_reason: reason, + dismissed_comment: alert["dismissed_comment"], + dismissed_at: alert["dismissed_at"] + } + } + end) + + {:error, _} -> [] + end + end + + # ─── Comprehensive scan ──────────────────────────────────────────────── + + @doc """ + Run all code-scanning checks for a repository. + """ + def scan(owner, repo) do + token = System.get_env("GITHUB_TOKEN") + + if token == nil or token == "" do + {:error, "GITHUB_TOKEN not set -- cannot query code-scanning alerts"} + else + findings = + csa001_open_alerts(owner, repo) ++ + csa002_severity_summary(owner, repo) ++ + csa003_stale_alerts(owner, repo) ++ + csa004_dismissed_without_fix(owner, repo) + + deduped = + findings + |> Enum.uniq_by(fn f -> + {f.rule, Map.get(f.detail, :alert_number, f.file)} + end) + + {:ok, %{ + findings: deduped, + total: length(deduped), + by_severity: group_by_severity(deduped) + }} + end + end + + @doc """ + Scan from a local repo path -- extracts owner/repo from git remote. + """ + def scan_from_path(repo_path) do + case extract_owner_repo(repo_path) do + {:ok, owner, repo} -> scan(owner, repo) + {:error, reason} -> {:error, reason} + end + end + + # ─── GitHub API ──────────────────────────────────────────────────────── + + defp fetch_alerts(owner, repo) do + token = System.get_env("GITHUB_TOKEN") + + if token == nil or token == "" do + {:error, "GITHUB_TOKEN not set"} + else + url = + "#{@github_api_base}/repos/#{owner}/#{repo}/code-scanning/alerts" <> + "?per_page=#{@max_alerts_per_repo}" + + case System.cmd("curl", [ + "-s", + "-f", + "-H", + "Accept: application/vnd.github+json", + "-H", + "Authorization: Bearer #{token}", + "-H", + "X-GitHub-Api-Version: 2022-11-28", + url + ], stderr_to_stdout: true) do + {body, 0} -> + case Jason.decode(body) do + {:ok, alerts} when is_list(alerts) -> {:ok, alerts} + {:ok, %{"message" => msg}} -> {:error, "GitHub API: #{msg}"} + {:error, _} -> {:error, "Invalid JSON response from GitHub API"} + end + + {error, _} -> + {:error, "curl failed: #{String.slice(error, 0, 200)}"} + end + end + end + + defp extract_owner_repo(repo_path) do + case System.cmd("git", ["remote", "get-url", "origin"], + cd: repo_path, + stderr_to_stdout: true + ) do + {url, 0} -> + trimmed = String.trim(url) + + cond do + String.contains?(trimmed, "github.com:") -> + [_, path] = String.split(trimmed, "github.com:", parts: 2) + parse_owner_repo_from_path(path) + + String.contains?(trimmed, "github.com/") -> + [_, path] = String.split(trimmed, "github.com/", parts: 2) + parse_owner_repo_from_path(path) + + true -> + {:error, "Remote URL is not a GitHub URL: #{trimmed}"} + end + + _ -> + {:error, "Could not get remote URL"} + end + end + + defp parse_owner_repo_from_path(path) do + clean = path |> String.trim() |> String.trim_trailing(".git") + + case String.split(clean, "/", parts: 2) do + [owner, repo] -> {:ok, owner, repo} + _ -> {:error, "Could not parse owner/repo from: #{path}"} + end + end + + # ─── Helpers ─────────────────────────────────────────────────────────── + + # Normalise the heterogeneous severity surface (CodeQL uses note/ + # warning/error, third-party SARIF often uses critical/high/medium/low, + # GitHub's `security_severity_level` uses critical/high/medium/low) onto + # Hypatia's canonical bucket scale so the CLI's severity threshold + # works uniformly across all rule modules. + defp map_severity(sev) when is_binary(sev) do + case String.downcase(sev) do + "critical" -> :critical + "high" -> :high + "error" -> :high + "medium" -> :medium + "warning" -> :medium + "low" -> :low + "note" -> :low + _ -> :medium + end + end + + defp map_severity(sev) when is_atom(sev), do: map_severity(Atom.to_string(sev)) + defp map_severity(_), do: :medium + + defp age_in_days(nil), do: 0 + + defp age_in_days(iso_string) when is_binary(iso_string) do + case DateTime.from_iso8601(iso_string) do + {:ok, dt, _} -> DateTime.diff(DateTime.utc_now(), dt, :day) + _ -> 0 + end + end + + defp build_alert_reason(tool, rule_id, description, age_days, is_stale) do + base = "Code scanning (#{tool}): #{rule_id} -- #{description}" + age_part = " -- #{age_days} day(s) old" + stale_part = if is_stale, do: " [STALE]", else: "" + base <> age_part <> stale_part + end + + defp determine_action(severity, is_stale) do + case {severity, is_stale} do + {:critical, _} -> :escalate + {:high, true} -> :escalate + {:high, false} -> :update + {:medium, true} -> :update + {:medium, false} -> :review + {:low, _} -> :review + _ -> :review + end + end + + defp group_by_severity(findings) do + findings + |> Enum.group_by(& &1.severity) + |> Enum.map(fn {sev, items} -> {sev, length(items)} end) + |> Map.new() + end +end diff --git a/lib/rules/secret_scanning_alerts.ex b/lib/rules/secret_scanning_alerts.ex new file mode 100644 index 00000000..3acf283a --- /dev/null +++ b/lib/rules/secret_scanning_alerts.ex @@ -0,0 +1,336 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.Rules.SecretScanningAlerts do + @moduledoc """ + GitHub Secret Scanning alert querying. + + Queries the GitHub REST API for active secret-scanning alerts on + repositories and generates findings for the safety triangle pipeline. + + A secret-scanning alert means GitHub identified a credential committed + to the repo (API token, private key, etc.). Every open alert is treated + as :critical -- leaked secrets are by definition not "advisory" risk, + and the dismissal vocabulary (`revoked`, `used_in_tests`, `false_positive`) + is the place to mark accepted ones. + + Requires GITHUB_TOKEN with `secret_scanning_alerts: read` permission + (fine-grained PAT) or `security_events` scope (classic PAT). + + Rule IDs: SSA001-SSA004 + """ + + require Logger + + @github_api_base "https://api.github.com" + @max_alerts_per_repo 100 + + # Stale thresholds (days). A revoked secret left in history is still a + # finding -- but a fresh open alert is much more urgent. + @stale_threshold_days 7 + + # Dismissal reasons that are accepted by policy without further review. + @accepted_resolutions ~w(revoked used_in_tests pattern_deleted pattern_edited) + + # ─── SSA001: Open secret-scanning alerts ─────────────────────────────── + + @doc """ + SSA001: List all open secret-scanning alerts on the repo. + + Every open alert is :critical -- a real credential is sitting in the + git history. The triangle classifier deals with whether it's + fixable (rotate + remove) vs. a documented test fixture. + """ + def ssa001_open_alerts(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(&(&1["state"] == "open")) + |> Enum.map(fn alert -> + secret_type = alert["secret_type_display_name"] || alert["secret_type"] || "unknown" + created = alert["created_at"] + age_days = age_in_days(created) + is_stale = age_days > @stale_threshold_days + + %{ + rule: "SSA001", + file: secret_type, + severity: :critical, + reason: build_alert_reason(secret_type, age_days, is_stale), + action: :escalate, + detail: %{ + alert_number: alert["number"], + secret_type: alert["secret_type"], + secret_type_display: alert["secret_type_display_name"], + age_days: age_days, + is_stale: is_stale, + created_at: created, + url: alert["html_url"], + locations_url: alert["locations_url"] + } + } + end) + + {:error, reason} -> + Logger.warning("SSA001: Failed to fetch secret-scanning alerts: #{reason}") + [] + end + end + + # ─── SSA002: Severity summary ────────────────────────────────────────── + + @doc """ + SSA002: Meta-finding if open alert count exceeds zero. Any leaked + secret is a critical security event -- we surface a repo-level marker + so the dashboard can highlight the repo, not just the individual alert. + """ + def ssa002_severity_summary(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + open = Enum.filter(alerts, &(&1["state"] == "open")) + count = length(open) + + if count > 0 do + [%{ + rule: "SSA002", + file: "#{owner}/#{repo}", + severity: :critical, + reason: "#{count} open secret-scanning alert(s) -- rotate and purge from history", + action: :escalate, + detail: %{ + total: count, + by_type: + open + |> Enum.group_by(&(&1["secret_type"] || "unknown")) + |> Map.new(fn {k, v} -> {k, length(v)} end) + } + }] + else + [] + end + + {:error, _} -> [] + end + end + + # ─── SSA003: Stale open alerts ───────────────────────────────────────── + + @doc """ + SSA003: Open secret-scanning alerts older than the stale threshold. + Leaked secrets must be rotated within days, not weeks. Findings are + always :critical regardless of age (the secret is leaked either way), + but staleness is surfaced in the reason for triage prioritisation. + """ + def ssa003_stale_alerts(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(&(&1["state"] == "open")) + |> Enum.filter(fn alert -> + age_in_days(alert["created_at"]) > @stale_threshold_days + end) + |> Enum.map(fn alert -> + secret_type = alert["secret_type_display_name"] || alert["secret_type"] || "unknown" + age = age_in_days(alert["created_at"]) + + %{ + rule: "SSA003", + file: secret_type, + severity: :critical, + reason: + "Secret-scanning alert for #{secret_type} is #{age} days old " <> + "(threshold: #{@stale_threshold_days} days) -- overdue for rotation", + action: :escalate, + detail: %{ + alert_number: alert["number"], + secret_type: alert["secret_type"], + age_days: age, + threshold_days: @stale_threshold_days + } + } + end) + + {:error, _} -> [] + end + end + + # ─── SSA004: Dismissed without acceptable resolution ─────────────────── + + @doc """ + SSA004: Alerts resolved with no documented resolution reason, or with + a vague reason. Real resolutions go through the `revoked`, + `used_in_tests`, `false_positive`, `pattern_deleted`, `pattern_edited` + vocabulary; anything else (including nil) is policy-suspicious. + """ + def ssa004_dismissed_without_fix(owner, repo) do + case fetch_alerts(owner, repo) do + {:ok, alerts} -> + alerts + |> Enum.filter(fn a -> + a["state"] == "resolved" and + a["resolution"] not in @accepted_resolutions + end) + |> Enum.map(fn alert -> + secret_type = alert["secret_type_display_name"] || alert["secret_type"] || "unknown" + resolution = alert["resolution"] || "no reason given" + + %{ + rule: "SSA004", + file: secret_type, + severity: :high, + reason: + "Secret-scanning alert for #{secret_type} resolved as '#{resolution}' " <> + "-- confirm rotation completed and document acceptance reason", + action: :review, + detail: %{ + alert_number: alert["number"], + secret_type: alert["secret_type"], + resolution: resolution, + resolved_at: alert["resolved_at"], + resolution_comment: alert["resolution_comment"] + } + } + end) + + {:error, _} -> [] + end + end + + # ─── Comprehensive scan ──────────────────────────────────────────────── + + @doc """ + Run all secret-scanning checks for a repository. + Returns `{:ok, result}` or `{:error, reason}`. + """ + def scan(owner, repo) do + token = System.get_env("GITHUB_TOKEN") + + if token == nil or token == "" do + {:error, "GITHUB_TOKEN not set -- cannot query secret-scanning alerts"} + else + findings = + ssa001_open_alerts(owner, repo) ++ + ssa002_severity_summary(owner, repo) ++ + ssa003_stale_alerts(owner, repo) ++ + ssa004_dismissed_without_fix(owner, repo) + + deduped = + findings + |> Enum.uniq_by(fn f -> + {f.rule, Map.get(f.detail, :alert_number, f.file)} + end) + + {:ok, %{ + findings: deduped, + total: length(deduped), + by_severity: group_by_severity(deduped) + }} + end + end + + @doc """ + Scan from a local repo path -- extracts owner/repo from git remote. + """ + def scan_from_path(repo_path) do + case extract_owner_repo(repo_path) do + {:ok, owner, repo} -> scan(owner, repo) + {:error, reason} -> {:error, reason} + end + end + + # ─── GitHub API ──────────────────────────────────────────────────────── + + defp fetch_alerts(owner, repo) do + token = System.get_env("GITHUB_TOKEN") + + if token == nil or token == "" do + {:error, "GITHUB_TOKEN not set"} + else + url = + "#{@github_api_base}/repos/#{owner}/#{repo}/secret-scanning/alerts" <> + "?per_page=#{@max_alerts_per_repo}" + + case System.cmd("curl", [ + "-s", + "-f", + "-H", + "Accept: application/vnd.github+json", + "-H", + "Authorization: Bearer #{token}", + "-H", + "X-GitHub-Api-Version: 2022-11-28", + url + ], stderr_to_stdout: true) do + {body, 0} -> + case Jason.decode(body) do + {:ok, alerts} when is_list(alerts) -> {:ok, alerts} + {:ok, %{"message" => msg}} -> {:error, "GitHub API: #{msg}"} + {:error, _} -> {:error, "Invalid JSON response from GitHub API"} + end + + {error, _} -> + {:error, "curl failed: #{String.slice(error, 0, 200)}"} + end + end + end + + defp extract_owner_repo(repo_path) do + case System.cmd("git", ["remote", "get-url", "origin"], + cd: repo_path, + stderr_to_stdout: true + ) do + {url, 0} -> + trimmed = String.trim(url) + + cond do + String.contains?(trimmed, "github.com:") -> + [_, path] = String.split(trimmed, "github.com:", parts: 2) + parse_owner_repo_from_path(path) + + String.contains?(trimmed, "github.com/") -> + [_, path] = String.split(trimmed, "github.com/", parts: 2) + parse_owner_repo_from_path(path) + + true -> + {:error, "Remote URL is not a GitHub URL: #{trimmed}"} + end + + _ -> + {:error, "Could not get remote URL"} + end + end + + defp parse_owner_repo_from_path(path) do + clean = path |> String.trim() |> String.trim_trailing(".git") + + case String.split(clean, "/", parts: 2) do + [owner, repo] -> {:ok, owner, repo} + _ -> {:error, "Could not parse owner/repo from: #{path}"} + end + end + + # ─── Helpers ─────────────────────────────────────────────────────────── + + defp age_in_days(nil), do: 0 + + defp age_in_days(iso_string) when is_binary(iso_string) do + case DateTime.from_iso8601(iso_string) do + {:ok, dt, _} -> DateTime.diff(DateTime.utc_now(), dt, :day) + _ -> 0 + end + end + + defp build_alert_reason(secret_type, age_days, is_stale) do + base = "Secret scanning: leaked #{secret_type}" + age_part = " -- #{age_days} day(s) old" + stale_part = if is_stale, do: " [STALE -- rotate immediately]", else: "" + base <> age_part <> stale_part + end + + defp group_by_severity(findings) do + findings + |> Enum.group_by(& &1.severity) + |> Enum.map(fn {sev, items} -> {sev, length(items)} end) + |> Map.new() + end +end diff --git a/test/code_scanning_alerts_test.exs b/test/code_scanning_alerts_test.exs new file mode 100644 index 00000000..d8eb1dc8 --- /dev/null +++ b/test/code_scanning_alerts_test.exs @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# Tests for Code Scanning alert querying rules (CSA001-CSA004). +# Exercise logic without hitting the GitHub API. + +defmodule Hypatia.Rules.CodeScanningAlertsTest do + use ExUnit.Case, async: false + + alias Hypatia.Rules.CodeScanningAlerts + + setup do + old_token = System.get_env("GITHUB_TOKEN") + System.delete_env("GITHUB_TOKEN") + + on_exit(fn -> + if old_token, do: System.put_env("GITHUB_TOKEN", old_token) + end) + + :ok + end + + describe "csa001_open_alerts/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert CodeScanningAlerts.csa001_open_alerts("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "csa002_severity_summary/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert CodeScanningAlerts.csa002_severity_summary("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "csa003_stale_alerts/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert CodeScanningAlerts.csa003_stale_alerts("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "csa004_dismissed_without_fix/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert CodeScanningAlerts.csa004_dismissed_without_fix("hyperpolymath", "test-nonexistent") == + [] + end + end + + describe "scan/2" do + test "returns error tuple when GITHUB_TOKEN is not set" do + assert {:error, msg} = CodeScanningAlerts.scan("hyperpolymath", "test-nonexistent") + assert msg =~ "GITHUB_TOKEN not set" + end + end + + describe "scan_from_path/1" do + test "returns error when remote is not a github URL" do + tmp = Path.join(System.tmp_dir!(), "csa-test-#{System.unique_integer([:positive])}") + File.mkdir_p!(tmp) + System.cmd("git", ["init", "-q"], cd: tmp) + System.cmd("git", ["remote", "add", "origin", "http://gitea.example.com/foo/bar.git"], cd: tmp) + + assert {:error, msg} = CodeScanningAlerts.scan_from_path(tmp) + assert msg =~ "Remote URL is not a GitHub URL" + + File.rm_rf!(tmp) + end + end +end diff --git a/test/recipe_health_test.exs b/test/recipe_health_test.exs new file mode 100644 index 00000000..7aef8118 --- /dev/null +++ b/test/recipe_health_test.exs @@ -0,0 +1,138 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# Tests for the closed-loop verification metric: +# OutcomeTracker.verification_rate/2 and OutcomeTracker.recipe_health/1. + +defmodule Hypatia.RecipeHealthTest do + # async: false because the outcome log is a shared on-disk resource; + # writing test outcomes from concurrent tests would race each other. + use ExUnit.Case, async: false + + alias Hypatia.OutcomeTracker + + @test_recipe_prefix "test-recipe-health-" + + setup do + # Each test gets a unique recipe_id so its outcomes are isolated in + # the shared outcomes log. We don't clean up — the verification_rate + # aggregator filters by recipe_id so leftover records from a previous + # run only affect their own recipe_id. + recipe_id = @test_recipe_prefix <> Integer.to_string(System.unique_integer([:positive])) + {:ok, recipe_id: recipe_id} + end + + describe "verification_rate/2" do + test "returns :no_outcomes for a recipe that has no records", %{recipe_id: recipe_id} do + assert {:ok, :no_outcomes} = OutcomeTracker.verification_rate(recipe_id) + end + + test "returns :insufficient_data below the threshold", %{recipe_id: recipe_id} do + OutcomeTracker.record_outcome(recipe_id, "test-repo", "a.ex", :success, %{ + "verification" => "verified" + }) + + assert {:ok, %{rate: :insufficient_data, verifiable: 1, total: 1}} = + OutcomeTracker.verification_rate(recipe_id, 5) + end + + test "computes rate from verified/still_present ratio", %{recipe_id: recipe_id} do + # 4 verified + 1 still_present = 5 verifiable, rate = 0.8 + for i <- 1..4 do + OutcomeTracker.record_outcome(recipe_id, "r", "f#{i}", :success, %{ + "verification" => "verified" + }) + end + + OutcomeTracker.record_outcome(recipe_id, "r", "f5", :success, %{ + "verification" => "still_present" + }) + + assert {:ok, %{rate: rate, verifiable: 5, verified: 4, still_present: 1}} = + OutcomeTracker.verification_rate(recipe_id, 5) + + assert_in_delta(rate, 0.8, 0.001) + end + + test "excludes scan_failed and unverified from the denominator", %{recipe_id: recipe_id} do + # 5 verified + 100 scan_failed + 100 unverified -> rate is 1.0, not + # diluted by environments where panic-attack wasn't available. + for i <- 1..5 do + OutcomeTracker.record_outcome(recipe_id, "r", "v#{i}", :success, %{ + "verification" => "verified" + }) + end + + for i <- 1..3 do + OutcomeTracker.record_outcome(recipe_id, "r", "sf#{i}", :success, %{ + "verification" => "scan_failed" + }) + + OutcomeTracker.record_outcome(recipe_id, "r", "u#{i}", :success, %{ + "verification" => "unverified" + }) + end + + assert {:ok, %{rate: 1.0, verifiable: 5, scan_failed: 3, unverified: 3}} = + OutcomeTracker.verification_rate(recipe_id, 5) + end + end + + describe "recipe_health/1" do + test "returns at least the recipe we just recorded outcomes for", %{recipe_id: recipe_id} do + for i <- 1..6 do + OutcomeTracker.record_outcome(recipe_id, "r", "f#{i}", :success, %{ + "verification" => "verified" + }) + end + + rows = OutcomeTracker.recipe_health(min_attempts: 5) + ours = Enum.find(rows, &(&1.recipe_id == recipe_id)) + + assert ours != nil + assert ours.successes == 6 + assert ours.verification.verified == 6 + assert ours.verification.rate == 1.0 + assert ours.status == :healthy + end + + test "tags quarantine_candidate when verification rate is below 0.30", %{recipe_id: recipe_id} do + # 1 verified + 9 still_present = 10 verifiable, rate = 0.1 + OutcomeTracker.record_outcome(recipe_id, "r", "v1", :success, %{ + "verification" => "verified" + }) + + for i <- 1..9 do + OutcomeTracker.record_outcome(recipe_id, "r", "sp#{i}", :success, %{ + "verification" => "still_present" + }) + end + + rows = OutcomeTracker.recipe_health(min_attempts: 5) + ours = Enum.find(rows, &(&1.recipe_id == recipe_id)) + + assert ours.status == :quarantine_candidate + end + + test "tags degraded between quarantine and healthy", %{recipe_id: recipe_id} do + # 3 verified + 7 still_present = 10 verifiable, rate = 0.3 + # → just at the quarantine threshold (0.30), so degraded (< 0.70). + for i <- 1..3 do + OutcomeTracker.record_outcome(recipe_id, "r", "v#{i}", :success, %{ + "verification" => "verified" + }) + end + + for i <- 1..7 do + OutcomeTracker.record_outcome(recipe_id, "r", "sp#{i}", :success, %{ + "verification" => "still_present" + }) + end + + rows = OutcomeTracker.recipe_health(min_attempts: 5) + ours = Enum.find(rows, &(&1.recipe_id == recipe_id)) + + assert ours.status == :degraded + end + end +end diff --git a/test/secret_scanning_alerts_test.exs b/test/secret_scanning_alerts_test.exs new file mode 100644 index 00000000..95ea8ce4 --- /dev/null +++ b/test/secret_scanning_alerts_test.exs @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# Tests for Secret Scanning alert querying rules (SSA001-SSA004). +# Exercise the logic without hitting the GitHub API by relying on +# token-absent behaviour and direct helper calls. + +defmodule Hypatia.Rules.SecretScanningAlertsTest do + use ExUnit.Case, async: false + + alias Hypatia.Rules.SecretScanningAlerts + + setup do + old_token = System.get_env("GITHUB_TOKEN") + System.delete_env("GITHUB_TOKEN") + + on_exit(fn -> + if old_token, do: System.put_env("GITHUB_TOKEN", old_token) + end) + + :ok + end + + describe "ssa001_open_alerts/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert SecretScanningAlerts.ssa001_open_alerts("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "ssa002_severity_summary/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert SecretScanningAlerts.ssa002_severity_summary("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "ssa003_stale_alerts/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert SecretScanningAlerts.ssa003_stale_alerts("hyperpolymath", "test-nonexistent") == [] + end + end + + describe "ssa004_dismissed_without_fix/2" do + test "returns empty list when GITHUB_TOKEN is not set" do + assert SecretScanningAlerts.ssa004_dismissed_without_fix("hyperpolymath", "test-nonexistent") == + [] + end + end + + describe "scan/2" do + test "returns error tuple when GITHUB_TOKEN is not set" do + assert {:error, msg} = SecretScanningAlerts.scan("hyperpolymath", "test-nonexistent") + assert msg =~ "GITHUB_TOKEN not set" + end + end + + describe "scan_from_path/1" do + test "returns error when remote is not a github URL" do + tmp = Path.join(System.tmp_dir!(), "ssa-test-#{System.unique_integer([:positive])}") + File.mkdir_p!(tmp) + System.cmd("git", ["init", "-q"], cd: tmp) + System.cmd("git", ["remote", "add", "origin", "http://gitea.example.com/foo/bar.git"], cd: tmp) + + assert {:error, msg} = SecretScanningAlerts.scan_from_path(tmp) + assert msg =~ "Remote URL is not a GitHub URL" + + File.rm_rf!(tmp) + end + end +end diff --git a/test/soundness/README.adoc b/test/soundness/README.adoc new file mode 100644 index 00000000..d4b252ba --- /dev/null +++ b/test/soundness/README.adoc @@ -0,0 +1,87 @@ += Soundness Gate + +== Purpose + +PR #278 documented a class of bug where the deployed `hypatia` escript +was silently dropping entire pattern families because the binary was +stale relative to the rule sources. "No findings" looks the same whether +the code is clean OR the rule is broken — that's the soundness gap. + +The soundness gate fixes that with the simplest possible mechanism: for +every rule the scanner is supposed to detect, we keep a known-bad sample +on disk. The test asserts every sample is flagged by its rule. If a rule +silently breaks (regex drift, file pruning, module rename), the build +fails before that change merges. + +== Layout + + test/soundness/ + ├── manifest.json -- rule -> fixture -> expected severity + ├── fixtures/ + │ ├── code_safety/ -- one file per code_safety rule_id + │ │ ├── believe_me.idr + │ │ ├── elixir_system_shell.ex + │ │ ├── ... + │ ├── cicd_rules/ -- one file per cicd_rules rule_id + │ └── security_errors/ -- one file per security_errors rule_id + └── README.adoc -- this file + +The test runner is `test/soundness_test.exs`, tagged `:soundness`. + +== Adding a fixture for a new rule + +1. Write a minimal known-bad sample under + `test/soundness/fixtures//.`. Keep it as + small as possible — ideally just the bad pattern with enough context + to look real, plus an SPDX header and a "DO NOT FIX" comment so future + contributors don't try to "clean it up". + +2. Add an entry to `test/soundness/manifest.json`: ++ +[source,json] +---- +{ + "rule_module": "code_safety", + "rule_id": "your_new_rule", + "language": "rust", + "fixture": "test/soundness/fixtures/code_safety/your_new_rule.rs", + "expected_severity": "high" +} +---- + +3. Run `mix test test/soundness_test.exs` and confirm the new entry + passes (rule fires at expected severity). + +4. Commit fixture + manifest entry + the rule change in one PR. + +== Removing a fixture + +Only acceptable when the rule itself is being removed or merged into +another rule. The commit message MUST justify the removal — the default +assumption is the entry stays. A bare manifest entry deletion in a PR +that doesn't also remove the rule should fail review. + +== Running + + mix test --only soundness # just the soundness suite + mix test # full suite includes soundness + mix test --exclude soundness # everything else (for dev cycles) + +== Why the manifest is JSON, not Elixir + +So a non-Elixir reviewer (or a non-Elixir scanner / a JSON Schema +validator running in CI) can verify it without a BEAM runtime. The +schema is intentionally flat and self-documenting. + +== Out of scope (today) + +* End-to-end escript-build soundness — building the escript, then + running the built binary against the fixture corpus. That's the + exact PR #278 reproduction. Worth adding next, but requires a CI + job that can build escripts (the in-process test already catches + rule-definition regressions, just not packaging regressions). + +* Fixtures for non-`code_safety` rule families. The current manifest + covers the families PR #278 specifically called out as having been + silently dropped. Workflow_audit, cicd_rules, structural_drift, + scorecard, dependabot_alerts etc. fixtures are next-iteration work. diff --git a/test/soundness/fixtures/code_safety/admitted.v b/test/soundness/fixtures/code_safety/admitted.v new file mode 100644 index 00000000..bd45f151 --- /dev/null +++ b/test/soundness/fixtures/code_safety/admitted.v @@ -0,0 +1,7 @@ +(* SPDX-License-Identifier: MPL-2.0 *) +(* SOUNDNESS FIXTURE — known-bad sample for code_safety/admitted. *) +(* DO NOT FIX. *) + +Theorem bad : 1 + 1 = 3. +Proof. + Admitted. diff --git a/test/soundness/fixtures/code_safety/agda_postulate.agda b/test/soundness/fixtures/code_safety/agda_postulate.agda new file mode 100644 index 00000000..0b694e1e --- /dev/null +++ b/test/soundness/fixtures/code_safety/agda_postulate.agda @@ -0,0 +1,6 @@ +-- SPDX-License-Identifier: MPL-2.0 +-- SOUNDNESS FIXTURE — known-bad sample for code_safety/agda_postulate. +-- DO NOT FIX. + +postulate + bad : Set diff --git a/test/soundness/fixtures/code_safety/believe_me.idr b/test/soundness/fixtures/code_safety/believe_me.idr new file mode 100644 index 00000000..d3e4a5ae --- /dev/null +++ b/test/soundness/fixtures/code_safety/believe_me.idr @@ -0,0 +1,8 @@ +-- SPDX-License-Identifier: MPL-2.0 +-- SOUNDNESS FIXTURE — known-bad sample for code_safety/believe_me. +-- DO NOT FIX. This file exists so the build fails if the rule stops firing. + +module Soundness.BelieveMe + +bad : Nat +bad = believe_me Z diff --git a/test/soundness/fixtures/code_safety/elixir_code_eval.ex b/test/soundness/fixtures/code_safety/elixir_code_eval.ex new file mode 100644 index 00000000..cf31496b --- /dev/null +++ b/test/soundness/fixtures/code_safety/elixir_code_eval.ex @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: MPL-2.0 +# SOUNDNESS FIXTURE — known-bad sample for code_safety/elixir_code_eval. +# DO NOT FIX. + +defmodule Soundness.ElixirCodeEval do + def bad(input) do + Code.eval_string(input) + end +end diff --git a/test/soundness/fixtures/code_safety/elixir_os_cmd.ex b/test/soundness/fixtures/code_safety/elixir_os_cmd.ex new file mode 100644 index 00000000..515ae06f --- /dev/null +++ b/test/soundness/fixtures/code_safety/elixir_os_cmd.ex @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: MPL-2.0 +# SOUNDNESS FIXTURE — known-bad sample for code_safety/elixir_os_cmd. +# DO NOT FIX. + +defmodule Soundness.ElixirOsCmd do + def bad(user) do + :os.cmd(~c"echo #{user}") + end +end diff --git a/test/soundness/fixtures/code_safety/elixir_system_shell.ex b/test/soundness/fixtures/code_safety/elixir_system_shell.ex new file mode 100644 index 00000000..eed1e99f --- /dev/null +++ b/test/soundness/fixtures/code_safety/elixir_system_shell.ex @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: MPL-2.0 +# SOUNDNESS FIXTURE — known-bad sample for code_safety/elixir_system_shell. +# This is THE pattern #278's stale-escript audit found being silently +# dropped. DO NOT FIX. + +defmodule Soundness.ElixirSystemShell do + def bad(user) do + System.shell("echo #{user}") + end +end diff --git a/test/soundness/fixtures/code_safety/getexn_on_external.res b/test/soundness/fixtures/code_safety/getexn_on_external.res new file mode 100644 index 00000000..52f311e8 --- /dev/null +++ b/test/soundness/fixtures/code_safety/getexn_on_external.res @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: MPL-2.0 +// SOUNDNESS FIXTURE — known-bad sample for code_safety/getexn_on_external. +// DO NOT FIX. + +let bad = (untrusted: Js.Dict.t) => Js.Dict.getExn(untrusted, "key") diff --git a/test/soundness/fixtures/code_safety/obj_magic_ocaml.ml b/test/soundness/fixtures/code_safety/obj_magic_ocaml.ml new file mode 100644 index 00000000..9bad2081 --- /dev/null +++ b/test/soundness/fixtures/code_safety/obj_magic_ocaml.ml @@ -0,0 +1,5 @@ +(* SPDX-License-Identifier: MPL-2.0 *) +(* SOUNDNESS FIXTURE — known-bad sample for code_safety/obj_magic_ocaml. *) +(* DO NOT FIX. *) + +let bad (x : int) : string = Obj.magic x diff --git a/test/soundness/fixtures/code_safety/shell_download_then_run.sh b/test/soundness/fixtures/code_safety/shell_download_then_run.sh new file mode 100644 index 00000000..15e54ad9 --- /dev/null +++ b/test/soundness/fixtures/code_safety/shell_download_then_run.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +# SOUNDNESS FIXTURE — known-bad sample for code_safety/shell_download_then_run. +# DO NOT FIX. + +curl -sL https://example.com/install.sh | bash diff --git a/test/soundness/fixtures/code_safety/sorry.lean b/test/soundness/fixtures/code_safety/sorry.lean new file mode 100644 index 00000000..82086023 --- /dev/null +++ b/test/soundness/fixtures/code_safety/sorry.lean @@ -0,0 +1,5 @@ +-- SPDX-License-Identifier: MPL-2.0 +-- SOUNDNESS FIXTURE — known-bad sample for code_safety/sorry. +-- DO NOT FIX. + +theorem bad : 1 + 1 = 3 := by sorry diff --git a/test/soundness/fixtures/code_safety/transmute.rs b/test/soundness/fixtures/code_safety/transmute.rs new file mode 100644 index 00000000..95e73e14 --- /dev/null +++ b/test/soundness/fixtures/code_safety/transmute.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: MPL-2.0 +// SOUNDNESS FIXTURE — known-bad sample for code_safety/transmute. +// DO NOT FIX. + +pub fn bad(x: u32) -> f32 { + unsafe { std::mem::transmute(x) } +} diff --git a/test/soundness/fixtures/code_safety/unsafe_coerce.hs b/test/soundness/fixtures/code_safety/unsafe_coerce.hs new file mode 100644 index 00000000..9a3640d9 --- /dev/null +++ b/test/soundness/fixtures/code_safety/unsafe_coerce.hs @@ -0,0 +1,10 @@ +-- SPDX-License-Identifier: MPL-2.0 +-- SOUNDNESS FIXTURE — known-bad sample for code_safety/unsafe_coerce. +-- DO NOT FIX. + +module Soundness.UnsafeCoerce where + +import Unsafe.Coerce + +bad :: Int -> String +bad n = unsafeCoerce n diff --git a/test/soundness/fixtures/code_safety/unwrap_without_check.rs b/test/soundness/fixtures/code_safety/unwrap_without_check.rs new file mode 100644 index 00000000..b8c0c5cd --- /dev/null +++ b/test/soundness/fixtures/code_safety/unwrap_without_check.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: MPL-2.0 +// SOUNDNESS FIXTURE — known-bad sample for code_safety/unwrap_without_check. +// DO NOT FIX. + +pub fn bad(s: &str) -> i32 { + s.parse::().unwrap() +} diff --git a/test/soundness/fixtures/code_safety/zig_ptr_cast.zig b/test/soundness/fixtures/code_safety/zig_ptr_cast.zig new file mode 100644 index 00000000..5b50c391 --- /dev/null +++ b/test/soundness/fixtures/code_safety/zig_ptr_cast.zig @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: MPL-2.0 +// SOUNDNESS FIXTURE — known-bad sample for code_safety/zig_ptr_cast. +// DO NOT FIX. + +pub fn bad(ptr: *u8) *u32 { + return @ptrCast(*u32, ptr); +} diff --git a/test/soundness/manifest.json b/test/soundness/manifest.json new file mode 100644 index 00000000..688bbc8c --- /dev/null +++ b/test/soundness/manifest.json @@ -0,0 +1,104 @@ +{ + "_comment": "Soundness manifest — each entry asserts that the named rule MUST fire on its fixture. Catches regressions of the kind PR #278 documented (stale escript silently dropping entire pattern families). Add an entry whenever you add a new rule; remove only if you delete the rule.", + "entries": [ + { + "rule_module": "code_safety", + "rule_id": "believe_me", + "language": "idris2", + "fixture": "test/soundness/fixtures/code_safety/believe_me.idr", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "sorry", + "language": "lean", + "fixture": "test/soundness/fixtures/code_safety/sorry.lean", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "admitted", + "language": "coq", + "fixture": "test/soundness/fixtures/code_safety/admitted.v", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "unsafe_coerce", + "language": "haskell", + "fixture": "test/soundness/fixtures/code_safety/unsafe_coerce.hs", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "obj_magic_ocaml", + "language": "ocaml", + "fixture": "test/soundness/fixtures/code_safety/obj_magic_ocaml.ml", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "getexn_on_external", + "language": "rescript", + "fixture": "test/soundness/fixtures/code_safety/getexn_on_external.res", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "unwrap_without_check", + "language": "rust", + "fixture": "test/soundness/fixtures/code_safety/unwrap_without_check.rs", + "expected_severity": "high" + }, + { + "rule_module": "code_safety", + "rule_id": "transmute", + "language": "rust", + "fixture": "test/soundness/fixtures/code_safety/transmute.rs", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "elixir_system_shell", + "language": "elixir", + "fixture": "test/soundness/fixtures/code_safety/elixir_system_shell.ex", + "expected_severity": "critical", + "note": "THE rule PR #278 caught the stale escript silently dropping. Removing this entry needs a soundness PR explanation." + }, + { + "rule_module": "code_safety", + "rule_id": "elixir_os_cmd", + "language": "elixir", + "fixture": "test/soundness/fixtures/code_safety/elixir_os_cmd.ex", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "elixir_code_eval", + "language": "elixir", + "fixture": "test/soundness/fixtures/code_safety/elixir_code_eval.ex", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "shell_download_then_run", + "language": "shell", + "fixture": "test/soundness/fixtures/code_safety/shell_download_then_run.sh", + "expected_severity": "high" + }, + { + "rule_module": "code_safety", + "rule_id": "agda_postulate", + "language": "agda", + "fixture": "test/soundness/fixtures/code_safety/agda_postulate.agda", + "expected_severity": "critical" + }, + { + "rule_module": "code_safety", + "rule_id": "zig_ptr_cast", + "language": "zig", + "fixture": "test/soundness/fixtures/code_safety/zig_ptr_cast.zig", + "expected_severity": "high" + } + ] +} diff --git a/test/soundness_test.exs b/test/soundness_test.exs new file mode 100644 index 00000000..9edd28f0 --- /dev/null +++ b/test/soundness_test.exs @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.SoundnessTest do + @moduledoc """ + Soundness gate: every rule listed in `test/soundness/manifest.json` + MUST fire on its declared fixture. + + This catches the class of regression PR #278 documented (the deployed + escript was silently dropping the entire Elixir/Erlang/Coq/Lean/Agda/ + Zig/F\*/Ada pattern families because the binary was stale). Without a + named fixture per rule, we can rebuild every binary in the world and + still not know whether a given rule is firing — because "no findings" + means both "code is clean" and "rule is broken." + + Adding a rule: drop a known-bad sample in `test/soundness/fixtures/` + and add a manifest entry. The test will pick it up automatically. + + Removing a manifest entry: must be justified in the commit message + (rule deprecated / merged / superseded). The default assumption is + the entry stays. + + Tagged `:soundness` so CI can call this suite out separately in + reports — a soundness failure is qualitatively different from a + product test failure. + """ + + use ExUnit.Case, async: true + + @moduletag :soundness + + alias Hypatia.Rules.CodeSafety + + @manifest_path Path.expand("soundness/manifest.json", __DIR__) + + setup_all do + manifest = + @manifest_path + |> File.read!() + |> Jason.decode!() + |> Map.fetch!("entries") + + {:ok, manifest: manifest} + end + + describe "manifest" do + test "is non-empty", %{manifest: manifest} do + assert length(manifest) > 0, + "Soundness manifest must list at least one rule. " <> + "An empty manifest defeats the entire purpose of this test." + end + + test "every fixture file exists on disk", %{manifest: manifest} do + missing = + Enum.filter(manifest, fn entry -> + not File.exists?(Map.fetch!(entry, "fixture")) + end) + + assert missing == [], + "Soundness manifest references fixtures that don't exist: " <> + inspect(Enum.map(missing, &Map.fetch!(&1, "fixture"))) + end + + test "every entry has the required fields", %{manifest: manifest} do + required = ~w(rule_module rule_id language fixture expected_severity) + + bad = + Enum.filter(manifest, fn entry -> + Enum.any?(required, fn key -> not Map.has_key?(entry, key) end) + end) + + assert bad == [], + "Soundness manifest entries missing required fields: " <> inspect(bad) + end + end + + describe "code_safety rules fire on their fixtures" do + @manifest_path + |> File.read!() + |> Jason.decode!() + |> Map.fetch!("entries") + |> Enum.filter(fn entry -> Map.fetch!(entry, "rule_module") == "code_safety" end) + |> Enum.each(fn entry -> + rule_id = Map.fetch!(entry, "rule_id") + language = Map.fetch!(entry, "language") + fixture = Map.fetch!(entry, "fixture") + expected_severity = Map.fetch!(entry, "expected_severity") + + test "code_safety/#{rule_id} fires on #{fixture}" do + content = File.read!(unquote(fixture)) + findings = CodeSafety.scan_content(content, unquote(language)) + + finding = Enum.find(findings, &(&1.rule == unquote(String.to_atom(rule_id)))) + + assert finding != nil, + "Soundness gate FAILED: rule code_safety/#{unquote(rule_id)} " <> + "did NOT fire on its fixture #{unquote(fixture)}. " <> + "Either the rule was removed / weakened / the regex broke, " <> + "or the fixture was sanitised. See PR #278 for context." + + actual_severity = to_string(finding.severity) + + assert actual_severity == unquote(expected_severity), + "Soundness gate: rule code_safety/#{unquote(rule_id)} fired but at " <> + "severity '#{actual_severity}', expected '#{unquote(expected_severity)}'. " <> + "If this is intentional, update the manifest in the same commit." + end + end) + end +end