diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 18311f19..33517e61 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -51,6 +51,15 @@ jobs: - name: Run E2E test suite run: bash tests/e2e.sh + - name: Escript packaging soundness + # PR #278 documented that the rule definitions can be correct + # but the *packaged escript* can silently drop entire rule + # families. The in-process `mix test` catches rule-definition + # regressions; this step catches packaging regressions by + # building the escript fresh and running it against every + # known-bad sample in test/soundness/manifest.json. + run: bash test/soundness/run-escript-soundness.sh + e2e-rust: name: E2E — Rust CLI Scan runs-on: ubuntu-latest diff --git a/lib/application.ex b/lib/application.ex index 010a1ba0..80fa1003 100644 --- a/lib/application.ex +++ b/lib/application.ex @@ -32,6 +32,10 @@ defmodule Hypatia.Application do Hypatia.Dispatch.Pipeline, # Layer 0.7: Diagnostics -- system health monitoring and auto-recovery Hypatia.Diagnostics.Monitor, + # Layer 0.8: Watcher -- live monitoring aggregator (subscribes to + # telemetry events, maintains rolling windows in ETS, backs the + # /api/status endpoint and `mix hypatia.watch` TUI). + Hypatia.Watcher, # Layer 1: Safety -- rate limiting and bot quarantine Hypatia.Safety.RateLimiter, Hypatia.Safety.Quarantine, diff --git a/lib/fleet_dispatcher.ex b/lib/fleet_dispatcher.ex index 1e20d6a4..5812060a 100644 --- a/lib/fleet_dispatcher.ex +++ b/lib/fleet_dispatcher.ex @@ -271,16 +271,49 @@ defmodule Hypatia.FleetDispatcher do # --- Eliminate dispatch helpers (called after Gate approval) --- defp do_eliminate_dispatch(:auto_execute, recipe, pattern, confidence) do - dispatch_to_robot_repo_automaton(%{ - type: :auto_fix_request, - repo: get_pattern_repo(pattern), - file: Map.get(pattern, "file", ""), - issue: Map.get(pattern, "description", ""), - fix_type: "eliminate", - confidence: confidence, - recipe_id: Map.get(recipe, "id"), - suggestion: Map.get(recipe, "description", "") - }) + recipe_id = Map.get(recipe, "id") + + if recipe_id && Hypatia.OutcomeTracker.quarantined?(recipe_id) do + # Verification-rate gate: this recipe's post-fix re-scans have been + # failing too often to trust for auto_execute. Downgrade to :review + # so rhodibot opens a PR for human inspection. This is the + # closed-loop safety net: a recipe drifting toward false fixes + # can no longer ship to repos automatically. + Hypatia.Telemetry.quarantine_triggered( + kind: :recipe, + id: recipe_id, + reason: "verification_rate", + level: :auto_downgrade + ) + + Hypatia.Telemetry.dispatch_decision(confidence, + strategy: :review, + tier: :eliminate, + recipe_id: recipe_id, + repo: get_pattern_repo(pattern), + quarantine_downgraded: true + ) + + do_eliminate_dispatch(:review, recipe, pattern, confidence) + else + Hypatia.Telemetry.dispatch_decision(confidence, + strategy: :auto_execute, + tier: :eliminate, + recipe_id: recipe_id, + repo: get_pattern_repo(pattern) + ) + + dispatch_to_robot_repo_automaton(%{ + type: :auto_fix_request, + repo: get_pattern_repo(pattern), + file: Map.get(pattern, "file", ""), + issue: Map.get(pattern, "description", ""), + fix_type: "eliminate", + confidence: confidence, + recipe_id: recipe_id, + suggestion: Map.get(recipe, "description", "") + }) + end end defp do_eliminate_dispatch(:review, recipe, pattern, _confidence) do diff --git a/lib/hypatia/cli.ex b/lib/hypatia/cli.ex index e9ec7bcd..87db408d 100644 --- a/lib/hypatia/cli.ex +++ b/lib/hypatia/cli.ex @@ -157,7 +157,14 @@ defmodule Hypatia.CLI do System.halt(2) end + started = System.monotonic_time(:millisecond) findings = collect_findings(abs_path, config.rules) + duration_ms = System.monotonic_time(:millisecond) - started + + Hypatia.Telemetry.scan_complete(duration_ms, length(findings), + path: abs_path, + severity_floor: config.severity + ) # Filter by severity threshold filtered = @@ -731,6 +738,12 @@ defmodule Hypatia.CLI do "ocaml" => [".ml", ".mli"], "coq" => [".v"], "lean" => [".lean"], + "agda" => [".agda"], + "isabelle" => [".thy"], + "hol4" => [".sml"], + "zig" => [".zig"], + "fstar" => [".fst", ".fsti"], + "ada" => [".adb", ".ads"], "nickel" => [".ncl"], "elixir" => [".ex", ".exs"], "erlang" => [".erl", ".hrl"], diff --git a/lib/hypatia/telemetry.ex b/lib/hypatia/telemetry.ex new file mode 100644 index 00000000..caab7dfa --- /dev/null +++ b/lib/hypatia/telemetry.ex @@ -0,0 +1,125 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.Telemetry do + @moduledoc """ + Centralised event-name registry for Hypatia's telemetry surface. + + Every observable decision in the pipeline emits a `:telemetry` event + through one of the helpers in this module. Centralising event names + here (rather than spreading magic atom lists across call sites) means + the watcher / Prometheus exporter / future alerting layer can + enumerate the full surface from a single source. + + All events follow the convention `[:hypatia, , ]`. The + measurements map carries numeric values (counts, durations); + metadata carries categorical context (recipe_id, repo, severity). + + Calling `:telemetry.execute/3` is safe with no handlers attached — + it's a no-op, so instrumenting a code path costs nothing when the + watcher isn't running (e.g. inside the escript scanner). + + ## Event catalogue + + | event | measurements | metadata | + |--------------------------------------|---------------------------|-------------------------------------------------| + | `[:hypatia, :scan, :complete]` | `duration_ms, findings` | `path, severity_floor` | + | `[:hypatia, :dispatch, :decision]` | `confidence` | `strategy, tier, recipe_id, repo` | + | `[:hypatia, :outcome, :recorded]` | `count` | `recipe_id, repo, outcome, verification` | + | `[:hypatia, :verification, :result]` | `count` | `recipe_id, repo, verdict` | + | `[:hypatia, :quarantine, :triggered]`| `count` | `kind, id, reason, level` | + | `[:hypatia, :rate_limit, :exceeded]` | `count` | `bot, scope` | + | `[:hypatia, :neural, :cycle]` | `duration_ms` | `networks_updated` | + | `[:hypatia, :soundness, :violation]` | `count` | `rule_module, rule_id, fixture` | + + ## Subscribers + + Subscribe by attaching a handler with `:telemetry.attach_many/4`: + + :telemetry.attach_many( + "my-handler", + Hypatia.Telemetry.all_events(), + fn event, measurements, metadata, _config -> + # handle event + end, + nil + ) + + The watcher (`Hypatia.Watcher`) does this on startup and aggregates + into rolling-window ETS tables. + """ + + @scan_complete [:hypatia, :scan, :complete] + @dispatch_decision [:hypatia, :dispatch, :decision] + @outcome_recorded [:hypatia, :outcome, :recorded] + @verification_result [:hypatia, :verification, :result] + @quarantine_triggered [:hypatia, :quarantine, :triggered] + @rate_limit_exceeded [:hypatia, :rate_limit, :exceeded] + @neural_cycle [:hypatia, :neural, :cycle] + @soundness_violation [:hypatia, :soundness, :violation] + + @all_events [ + @scan_complete, + @dispatch_decision, + @outcome_recorded, + @verification_result, + @quarantine_triggered, + @rate_limit_exceeded, + @neural_cycle, + @soundness_violation + ] + + @doc "Every event the watcher should subscribe to." + def all_events, do: @all_events + + # ─── Emit helpers ────────────────────────────────────────────────────── + # + # Hand-written rather than meta-programmed so each emit site shows + # what it's saying. Each helper takes the metadata fields as a + # keyword list to keep call sites self-documenting. + + def scan_complete(duration_ms, findings, metadata) when is_integer(duration_ms) do + safe_execute(@scan_complete, %{duration_ms: duration_ms, findings: findings}, Map.new(metadata)) + end + + def dispatch_decision(confidence, metadata) when is_number(confidence) do + safe_execute(@dispatch_decision, %{confidence: confidence}, Map.new(metadata)) + end + + def outcome_recorded(metadata) do + safe_execute(@outcome_recorded, %{count: 1}, Map.new(metadata)) + end + + def verification_result(metadata) do + safe_execute(@verification_result, %{count: 1}, Map.new(metadata)) + end + + def quarantine_triggered(metadata) do + safe_execute(@quarantine_triggered, %{count: 1}, Map.new(metadata)) + end + + def rate_limit_exceeded(metadata) do + safe_execute(@rate_limit_exceeded, %{count: 1}, Map.new(metadata)) + end + + def neural_cycle(duration_ms, metadata) when is_integer(duration_ms) do + safe_execute(@neural_cycle, %{duration_ms: duration_ms}, Map.new(metadata)) + end + + def soundness_violation(metadata) do + safe_execute(@soundness_violation, %{count: 1}, Map.new(metadata)) + end + + # `:telemetry` is a transitive dep of phoenix/bandit, but if Hypatia + # is consumed in an unusual build (escript-only, stripped releases) + # the module may not be loaded. Wrap the call so a missing + # `:telemetry` is a no-op rather than a crash. Instrumentation must + # never break the host. + defp safe_execute(event, measurements, metadata) do + if Code.ensure_loaded?(:telemetry) do + :telemetry.execute(event, measurements, metadata) + end + + :ok + end +end diff --git a/lib/hypatia/watcher.ex b/lib/hypatia/watcher.ex new file mode 100644 index 00000000..f12f5e3d --- /dev/null +++ b/lib/hypatia/watcher.ex @@ -0,0 +1,287 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.Watcher do + @moduledoc """ + Live-monitoring aggregator for the supervision tree. + + Subscribes to every event in `Hypatia.Telemetry.all_events/0` and + maintains rolling-window counters in ETS so the CLI dashboard, JSON + API, and (future) alerting layer can read live state without + re-parsing JSONL or re-querying the outcomes log. + + ## State model + + Three ETS tables hold time-bucketed event counts: + - `:hypatia_watcher_5m` — 5-minute window, 5-second buckets (60 buckets) + - `:hypatia_watcher_1h` — 1-hour window, 1-minute buckets (60 buckets) + - `:hypatia_watcher_1d` — 1-day window, 1-hour buckets (24 buckets) + + Each row is `{{event, bucket_ts}, count}`. A periodic tick prunes + expired buckets so the tables don't grow unbounded. + + Also tracks: + - GenServer message-queue depths via `:erlang.process_info(pid, + :message_queue_len)` polled every 5s + - Most-recent dispatch / outcome / quarantine event for each + recipe_id (for drill-down) + + ## Back-pressure + + The watcher must NEVER block the producer. Telemetry handlers run + in the *caller's* process, so they cast to the watcher; the + watcher's mailbox is the only place events can pile up. + `:hibernate_after` plus a drop-on-full counter (exposed as a + metric itself) keep the watcher honest under load. + + ## Lifecycle + + Supervised by `Hypatia.Application`. On terminate, ETS tables die + with the process — live state is ephemeral by design (Phase 1 + scope). Persistence to verisim-data is Phase 3 work. + """ + + use GenServer + require Logger + + alias Hypatia.Telemetry + + @tables [ + {:hypatia_watcher_5m, 5_000, 60}, + {:hypatia_watcher_1h, 60_000, 60}, + {:hypatia_watcher_1d, 3_600_000, 24} + ] + + @prune_interval_ms 30_000 + @queue_poll_interval_ms 5_000 + @handler_id "hypatia-watcher" + # Drop telemetry events if our mailbox is over this. Keeps the + # watcher from becoming a tarpit during sweep storms. + @max_mailbox 1_000 + @recent_events_per_kind 50 + + # ─── Public API ──────────────────────────────────────────────────────── + + def start_link(opts) do + GenServer.start_link(__MODULE__, opts, name: __MODULE__) + end + + @doc """ + Snapshot of current state: counters across all three windows, queue + depths, and the recent-event tail. JSON-serialisable. + """ + def snapshot do + GenServer.call(__MODULE__, :snapshot, 5_000) + catch + :exit, _ -> %{status: :unavailable} + end + + @doc """ + Event counts in the given window (`:m5 | :h1 | :d1`) keyed by + telemetry event name. Cheap — reads ETS directly without going + through the GenServer. + """ + def counts(window \\ :m5) do + table = window_table(window) + + if :ets.info(table) == :undefined do + %{} + else + table + |> :ets.tab2list() + |> Enum.reduce(%{}, fn {{event, _bucket}, count}, acc -> + Map.update(acc, event, count, &(&1 + count)) + end) + end + end + + @doc """ + Most-recent N events of every kind, oldest first. Drilldown surface. + """ + def recent_events do + GenServer.call(__MODULE__, :recent_events, 5_000) + catch + :exit, _ -> [] + end + + @doc """ + Message-queue depth for every supervised GenServer, plus the + watcher's own backpressure counters. Backs the "is anything stuck?" + view in the dashboard. + """ + def queue_depths do + GenServer.call(__MODULE__, :queue_depths, 5_000) + catch + :exit, _ -> %{} + end + + # ─── GenServer ───────────────────────────────────────────────────────── + + @impl true + def init(_opts) do + Enum.each(@tables, fn {name, _bucket_ms, _max_buckets} -> + :ets.new(name, [:named_table, :public, :set, read_concurrency: true]) + end) + + attach_handler() + + Process.send_after(self(), :prune, @prune_interval_ms) + Process.send_after(self(), :poll_queues, @queue_poll_interval_ms) + + state = %{ + recent: %{}, + queue_depths: %{}, + dropped_events: 0, + started_at: DateTime.utc_now() + } + + {:ok, state, :hibernate} + end + + @impl true + def handle_cast({:event, event, measurements, metadata}, state) do + if mailbox_overloaded?() do + {:noreply, %{state | dropped_events: state.dropped_events + 1}} + else + now = System.system_time(:millisecond) + + record_counts(event, now) + state = record_recent(state, event, measurements, metadata, now) + + {:noreply, state} + end + end + + @impl true + def handle_call(:snapshot, _from, state) do + {:reply, + %{ + counts: %{ + m5: counts(:m5), + h1: counts(:h1), + d1: counts(:d1) + }, + queue_depths: state.queue_depths, + dropped_events: state.dropped_events, + recent_by_kind: state.recent, + uptime_seconds: DateTime.diff(DateTime.utc_now(), state.started_at), + generated_at: DateTime.utc_now() |> DateTime.to_iso8601() + }, state} + end + + def handle_call(:recent_events, _from, state) do + {:reply, state.recent, state} + end + + def handle_call(:queue_depths, _from, state) do + {:reply, state.queue_depths, state} + end + + @impl true + def handle_info(:prune, state) do + Enum.each(@tables, fn {name, bucket_ms, max_buckets} -> + prune_table(name, bucket_ms, max_buckets) + end) + + Process.send_after(self(), :prune, @prune_interval_ms) + {:noreply, state, :hibernate} + end + + def handle_info(:poll_queues, state) do + depths = collect_queue_depths() + Process.send_after(self(), :poll_queues, @queue_poll_interval_ms) + {:noreply, %{state | queue_depths: depths}} + end + + @impl true + def terminate(_reason, _state) do + :telemetry.detach(@handler_id) + :ok + end + + # ─── Internals ───────────────────────────────────────────────────────── + + defp attach_handler do + # Use a captured remote function (&__MODULE__.handle_event/4) rather + # than an anonymous closure — telemetry warns about local fns + # because they prevent hot-code-reloading of the handler. The + # captured function casts back to the watcher so the producer + # process never pays the watcher's processing cost. + :telemetry.attach_many( + @handler_id, + Telemetry.all_events(), + &__MODULE__.handle_event/4, + nil + ) + end + + @doc false + def handle_event(event, measurements, metadata, _config) do + GenServer.cast(__MODULE__, {:event, event, measurements, metadata}) + end + + defp record_counts(event, now) do + Enum.each(@tables, fn {name, bucket_ms, _max} -> + bucket = bucket_for(now, bucket_ms) + :ets.update_counter(name, {event, bucket}, 1, {{event, bucket}, 0}) + end) + end + + defp record_recent(state, event, measurements, metadata, now) do + entry = %{ + event: event, + measurements: measurements, + metadata: metadata, + at: now + } + + updated = + Map.update(state.recent, event, [entry], fn existing -> + [entry | existing] |> Enum.take(@recent_events_per_kind) + end) + + %{state | recent: updated} + end + + defp prune_table(name, bucket_ms, max_buckets) do + cutoff = bucket_for(System.system_time(:millisecond), bucket_ms) - max_buckets * bucket_ms + # :ets.select_delete via match_spec: {{_event, bucket}, _count} where bucket < cutoff + :ets.select_delete(name, [ + {{{:"$1", :"$2"}, :"$3"}, [{:<, :"$2", cutoff}], [true]} + ]) + end + + defp bucket_for(now_ms, bucket_ms), do: div(now_ms, bucket_ms) * bucket_ms + + defp window_table(:m5), do: :hypatia_watcher_5m + defp window_table(:h1), do: :hypatia_watcher_1h + defp window_table(:d1), do: :hypatia_watcher_1d + + defp mailbox_overloaded? do + {:message_queue_len, n} = Process.info(self(), :message_queue_len) + n > @max_mailbox + end + + defp collect_queue_depths do + # Walk the supervisor's children and probe each. Anything that + # isn't a live process (transient / restarting) gets nil. + case Process.whereis(Hypatia.Supervisor) do + nil -> + %{} + + sup_pid -> + sup_pid + |> Supervisor.which_children() + |> Enum.reduce(%{}, fn + {id, pid, _type, _modules}, acc when is_pid(pid) -> + case Process.info(pid, :message_queue_len) do + {:message_queue_len, len} -> Map.put(acc, inspect(id), len) + _ -> Map.put(acc, inspect(id), nil) + end + + {id, _, _, _}, acc -> + Map.put(acc, inspect(id), nil) + end) + end + end +end diff --git a/lib/hypatia/web/api_router.ex b/lib/hypatia/web/api_router.ex new file mode 100644 index 00000000..199673a4 --- /dev/null +++ b/lib/hypatia/web/api_router.ex @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.Web.ApiRouter do + @moduledoc """ + Operational HTTP API. Forwarded from `Hypatia.Web.Router` at `/api`. + + All endpoints are loopback-only by default (operational data must + not leak past the local machine). Set `HYPATIA_API_ALLOW_NONLOCAL=true` + to bypass — the bypass is logged on each request so audit captures it. + + Endpoints: + GET /status full Watcher snapshot + GET /counts/:window event counts in window (5m | 1h | 1d) + GET /recipes recipe-health rows (?status=...) + """ + + use Plug.Router + + require Logger + + plug :match + plug :loopback_only + plug :dispatch + + get "/status" do + snap = Hypatia.Watcher.snapshot() + json(conn, 200, normalize_snapshot(snap)) + end + + get "/counts/:window" do + case parse_window(window) do + {:ok, atom} -> + counts = Hypatia.Watcher.counts(atom) + json(conn, 200, %{window: window, counts: flatten_event_keys(counts)}) + + :error -> + json(conn, 400, %{ + error: "unknown_window", + got: window, + valid: ["5m", "1h", "1d"] + }) + end + end + + get "/recipes" do + conn = Plug.Conn.fetch_query_params(conn) + rows = Hypatia.OutcomeTracker.recipe_health() + + case Map.get(conn.query_params, "status") do + nil -> + json(conn, 200, %{count: length(rows), rows: rows}) + + statuses -> + try do + allowed = statuses |> String.split(",") |> Enum.map(&String.to_existing_atom/1) + filtered = Enum.filter(rows, &(&1.status in allowed)) + json(conn, 200, %{count: length(filtered), rows: filtered}) + rescue + ArgumentError -> json(conn, 400, %{error: "unknown_status_filter"}) + end + end + end + + match _ do + json(conn, 404, %{error: "not_found"}) + end + + # ─── Plug ────────────────────────────────────────────────────────────── + + defp loopback_only(conn, _opts) do + cond do + System.get_env("HYPATIA_API_ALLOW_NONLOCAL") == "true" -> + Logger.warning( + "Hypatia /api access from #{inspect(conn.remote_ip)} allowed by " <> + "HYPATIA_API_ALLOW_NONLOCAL env override" + ) + + conn + + loopback_ip?(conn.remote_ip) -> + conn + + true -> + conn + |> put_resp_content_type("application/json") + |> send_resp( + 403, + Jason.encode!(%{ + error: "loopback_only", + path: conn.request_path, + hint: + "Hypatia /api is loopback-only. Set HYPATIA_API_ALLOW_NONLOCAL=true to " <> + "permit non-local clients, or tunnel via SSH." + }) + ) + |> halt() + end + end + + defp loopback_ip?({127, _, _, _}), do: true + defp loopback_ip?({0, 0, 0, 0, 0, 0, 0, 1}), do: true + defp loopback_ip?(_), do: false + + # ─── Helpers ─────────────────────────────────────────────────────────── + + defp json(conn, status, body) do + conn + |> put_resp_content_type("application/json") + |> send_resp(status, Jason.encode!(body)) + end + + defp parse_window("5m"), do: {:ok, :m5} + defp parse_window("1h"), do: {:ok, :h1} + defp parse_window("1d"), do: {:ok, :d1} + defp parse_window(_), do: :error + + defp flatten_event_keys(counts_map) do + Map.new(counts_map, fn {k, v} -> {Enum.join(k, "."), v} end) + end + + defp normalize_snapshot(snap) do + %{ + counts: %{ + m5: flatten_event_keys(snap.counts.m5), + h1: flatten_event_keys(snap.counts.h1), + d1: flatten_event_keys(snap.counts.d1) + }, + queue_depths: snap.queue_depths, + dropped_events: snap.dropped_events, + uptime_seconds: snap.uptime_seconds, + generated_at: snap.generated_at, + recent_by_kind: + Map.new(snap.recent_by_kind, fn {event, entries} -> + {Enum.join(event, "."), + Enum.map(entries, fn entry -> %{entry | event: Enum.join(entry.event, ".")} end)} + end) + } + end +end diff --git a/lib/hypatia/web/router.ex b/lib/hypatia/web/router.ex index b08bd56a..9084f4dc 100644 --- a/lib/hypatia/web/router.ex +++ b/lib/hypatia/web/router.ex @@ -7,6 +7,15 @@ defmodule Hypatia.Web.Router do Serves well-known service discovery manifests and health checks. Listens on port 9090 via Bandit, supervised by the OTP application. + + Public: + GET /health liveness probe (no auth, no IP filter) + GET /.well-known/groove service discovery (via GroovePlug) + + Loopback-only (operational): + GET /api/status live Watcher snapshot + GET /api/counts/:window event counts in window + GET /api/recipes recipe-health roll-up """ use Plug.Router @@ -34,7 +43,14 @@ defmodule Hypatia.Web.Router do |> send_resp(200, Jason.encode!(health)) end + # /api/* is gated to loopback in Hypatia.Web.ApiRouter — keeps + # operational data off the public surface while leaving /health + # reachable for container orchestrators. + forward "/api", to: Hypatia.Web.ApiRouter + match _ do - send_resp(conn, 404, Jason.encode!(%{error: "not_found"})) + conn + |> put_resp_content_type("application/json") + |> send_resp(404, Jason.encode!(%{error: "not_found"})) end end diff --git a/lib/mix/tasks/hypatia.record_outcome.ex b/lib/mix/tasks/hypatia.record_outcome.ex new file mode 100644 index 00000000..08c10ebc --- /dev/null +++ b/lib/mix/tasks/hypatia.record_outcome.ex @@ -0,0 +1,131 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Mix.Tasks.Hypatia.RecordOutcome do + @moduledoc """ + Record the outcome of an applied fix, with default-on re-scan + verification. Designed to be called from gitbot-fleet's bash + dispatch-runner after a fix is committed. + + Usage: + + mix hypatia.record_outcome \\ + --recipe recipe-pin-action-sha \\ + --repo hyperpolymath/007-lang \\ + --file .github/workflows/ci.yml \\ + --outcome success + + Optional: + --pattern-id PA-013-pin-deps (defaults to recipe id) + --category DependencyPinning (auto-derived from recipe + if omitted) + --repo-path /path/to/local/clone (defaults to + $HYPATIA_REPOS_DIR/) + --no-verify (record without re-scanning) + --format text|json (default: text) + + Exit codes: + 0 outcome recorded, verification verified-clean OR not attempted + 0 outcome recorded, verification scan_unavailable (env didn't + have panic-attack; counted distinctly in recipe_health) + 2 outcome recorded, verification still_present (the fix was + claimed but the re-scan still finds the weak point — the + dispatch-runner SHOULD treat this as a failed batch and + consider rollback) + 1 bad arguments or unrecoverable error + + The non-zero exit on still_present is the contract that lets the bash + runner notice a false-fix without reading JSON. + """ + + use Mix.Task + + @shortdoc "Record a fix outcome (default-verifies via panic-attack)" + + @switches [ + recipe: :string, + repo: :string, + file: :string, + outcome: :string, + pattern_id: :string, + category: :string, + repo_path: :string, + no_verify: :boolean, + format: :string + ] + + @impl Mix.Task + def run(argv) do + {opts, _, _} = OptionParser.parse(argv, switches: @switches) + + required = [:recipe, :repo, :file, :outcome] + missing = Enum.filter(required, &(Keyword.get(opts, &1) in [nil, ""])) + + if missing != [] do + Mix.shell().error("missing required option(s): #{Enum.map_join(missing, ", ", &"--#{&1}")}") + exit({:shutdown, 1}) + end + + outcome_atom = + case Keyword.fetch!(opts, :outcome) do + "success" -> :success + "failure" -> :failure + "false_positive" -> :false_positive + other -> + Mix.shell().error("invalid --outcome '#{other}' (use success|failure|false_positive)") + exit({:shutdown, 1}) + end + + recipe = Keyword.fetch!(opts, :recipe) + repo = Keyword.fetch!(opts, :repo) + file = Keyword.fetch!(opts, :file) + + verify? = not Keyword.get(opts, :no_verify, false) + + call_opts = + [] + |> maybe_put(:pattern_id, Keyword.get(opts, :pattern_id)) + |> maybe_put(:category, Keyword.get(opts, :category)) + |> maybe_put(:repo_path, Keyword.get(opts, :repo_path)) + + {record, verification} = + if verify? do + {:ok, record, v} = + Hypatia.OutcomeTracker.record_outcome_for_fix(recipe, repo, file, outcome_atom, call_opts) + + {record, v} + else + {:ok, record} = + Hypatia.OutcomeTracker.record_outcome(recipe, repo, file, outcome_atom, %{ + "verification" => "unverified" + }) + + {record, :not_verified} + end + + case Keyword.get(opts, :format, "text") do + "json" -> emit_json(record, verification) + _ -> emit_text(record, verification) + end + + if verification == :false_positive do + exit({:shutdown, 2}) + end + end + + defp maybe_put(opts, _key, nil), do: opts + defp maybe_put(opts, _key, ""), do: opts + defp maybe_put(opts, key, value), do: Keyword.put(opts, key, value) + + defp emit_text(record, verification) do + Mix.shell().info( + "recorded: #{record["recipe_id"]} in #{record["repo"]}/#{record["file"]} " <> + "outcome=#{record["outcome"]} verification=#{verification}" + ) + end + + defp emit_json(record, verification) do + payload = Map.put(record, "verification_result", Atom.to_string(verification)) + IO.puts(Jason.encode!(payload)) + end +end diff --git a/lib/mix/tasks/hypatia.watch.ex b/lib/mix/tasks/hypatia.watch.ex new file mode 100644 index 00000000..9cfd8eda --- /dev/null +++ b/lib/mix/tasks/hypatia.watch.ex @@ -0,0 +1,214 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Mix.Tasks.Hypatia.Watch do + @moduledoc """ + Terminal dashboard for the live `Hypatia.Watcher` state. + + Refreshes every `--interval` seconds (default 2). No external deps — + uses IO.ANSI for cursor positioning, so it works over plain SSH + without any TUI library, terminfo, or curses dependency. + + Two operating modes: + + Local mode (default) talks to a Watcher GenServer in THIS BEAM. + Useful for `iex -S mix` + `mix hypatia.watch` + in another terminal that shares the node. + + Remote mode (--url) polls a running Hypatia's HTTP /api/status + endpoint. The default URL is + http://127.0.0.1:9090/api/status (loopback + only — to watch a different host, set up an + SSH tunnel first). + + Press Ctrl+C to exit (the terminal will be reset to a clean state by + the alternate-screen restore). + + ## Options + + --interval SECONDS refresh rate (default 2) + --url URL poll the /api/status endpoint here + instead of attaching to a local Watcher + --once render once and exit (good for cron / scripts) + --plain disable ANSI cursor positioning (logs append + rather than refresh-in-place — for CI logs + or anywhere you'd pipe the output) + + ## Examples + + mix hypatia.watch # local Watcher, refresh 2s + mix hypatia.watch --interval 5 # slower refresh + mix hypatia.watch --url http://localhost:9090/api/status + mix hypatia.watch --once --plain # one-shot dump for logs + """ + + use Mix.Task + + @shortdoc "Live terminal dashboard for the Hypatia Watcher" + + @switches [ + interval: :integer, + url: :string, + once: :boolean, + plain: :boolean + ] + + @impl Mix.Task + def run(argv) do + Mix.Task.run("app.start") + + {opts, _, _} = OptionParser.parse(argv, switches: @switches) + + interval_ms = Keyword.get(opts, :interval, 2) * 1000 + url = Keyword.get(opts, :url) + plain? = Keyword.get(opts, :plain, false) + once? = Keyword.get(opts, :once, false) + + fetch = fetch_fn(url) + + if once? do + render(fetch.(), plain?, header_only: true) + else + if not plain?, do: IO.write([IO.ANSI.clear(), IO.ANSI.cursor(1, 1)]) + loop(fetch, interval_ms, plain?) + end + end + + defp loop(fetch, interval_ms, plain?) do + snapshot = fetch.() + render(snapshot, plain?) + Process.sleep(interval_ms) + loop(fetch, interval_ms, plain?) + end + + defp fetch_fn(nil) do + fn -> + case Hypatia.Watcher.snapshot() do + %{status: :unavailable} -> :unavailable + snap -> snap + end + end + end + + defp fetch_fn(url) do + fn -> + case System.cmd("curl", ["-sf", "--max-time", "3", url], stderr_to_stdout: true) do + {body, 0} -> + case Jason.decode(body) do + {:ok, snap} -> snap + _ -> :unavailable + end + + _ -> + :unavailable + end + end + end + + @doc """ + Render a snapshot to stdout. Public so the unit test can call it + directly without going through the run-loop + Mix.Task plumbing. + Set `plain? = true` to suppress ANSI cursor positioning (append-style + output suitable for piping to a log). + """ + def render(snap, plain?, opts \\ []) + + def render(:unavailable, plain?, _opts) do + if not plain?, do: IO.write([IO.ANSI.clear(), IO.ANSI.cursor(1, 1)]) + + IO.puts( + "[hypatia.watch] Watcher unavailable. Is Hypatia running? " <> + "(Try `iex -S mix` or pass --url http://host:9090/api/status)" + ) + end + + def render(snap, plain?, _opts) do + if not plain?, do: IO.write([IO.ANSI.clear(), IO.ANSI.cursor(1, 1)]) + + counts = snap[:counts] || snap["counts"] || %{} + + IO.puts(bold("Hypatia Watcher ") <> dim("(refresh every 2s — Ctrl+C to exit)")) + IO.puts(dim(String.duplicate("─", 78))) + IO.puts(format_uptime(snap)) + IO.puts("") + + IO.puts(bold("Events / 5min")) + render_counts(get_window(counts, :m5)) + IO.puts("") + + IO.puts(bold("Events / 1hr")) + render_counts(get_window(counts, :h1)) + IO.puts("") + + IO.puts(bold("GenServer queue depths")) + render_queue_depths(snap[:queue_depths] || snap["queue_depths"] || %{}) + IO.puts("") + + dropped = snap[:dropped_events] || snap["dropped_events"] || 0 + + if dropped > 0 do + IO.puts(red("⚠ Dropped #{dropped} telemetry event(s) under load")) + end + + IO.puts(dim("Last updated: #{snap[:generated_at] || snap["generated_at"]}")) + end + + defp render_counts(counts) when map_size(counts) == 0 do + IO.puts(" (no events)") + end + + defp render_counts(counts) do + counts + |> Enum.sort_by(fn {_event, count} -> -count end) + |> Enum.each(fn {event, count} -> + IO.puts( + " " <> String.pad_trailing(format_event(event), 36) <> dim(Integer.to_string(count)) + ) + end) + end + + defp render_queue_depths(map) when map_size(map) == 0 do + IO.puts(" (no supervised processes visible)") + end + + defp render_queue_depths(map) do + map + |> Enum.sort() + |> Enum.each(fn {name, depth} -> + depth_str = + case depth do + nil -> dim("—") + n when n > 100 -> red(Integer.to_string(n)) + n when n > 10 -> yellow(Integer.to_string(n)) + n -> Integer.to_string(n) + end + + IO.puts(" " <> String.pad_trailing(to_string(name), 40) <> depth_str) + end) + end + + defp get_window(counts, atom) do + counts[atom] || counts[Atom.to_string(atom)] || %{} + end + + defp format_event(event) when is_binary(event), do: event + defp format_event(event) when is_list(event), do: Enum.join(event, ".") + defp format_event(event), do: inspect(event) + + defp format_uptime(snap) do + seconds = snap[:uptime_seconds] || snap["uptime_seconds"] || 0 + "Uptime: #{format_seconds(seconds)}" + end + + defp format_seconds(s) when s < 60, do: "#{s}s" + defp format_seconds(s) when s < 3600, do: "#{div(s, 60)}m #{rem(s, 60)}s" + + defp format_seconds(s) do + "#{div(s, 3600)}h #{div(rem(s, 3600), 60)}m" + end + + defp bold(text), do: IO.ANSI.bright() <> text <> IO.ANSI.normal() + defp dim(text), do: IO.ANSI.faint() <> text <> IO.ANSI.normal() + defp red(text), do: IO.ANSI.red() <> text <> IO.ANSI.reset() + defp yellow(text), do: IO.ANSI.yellow() <> text <> IO.ANSI.reset() +end diff --git a/lib/outcome_tracker.ex b/lib/outcome_tracker.ex index 9e525613..af0ae6f9 100644 --- a/lib/outcome_tracker.ex +++ b/lib/outcome_tracker.ex @@ -73,6 +73,13 @@ defmodule Hypatia.OutcomeTracker do # Update recipe confidence (now annealing-aware) update_recipe_confidence(recipe_id) + Hypatia.Telemetry.outcome_recorded( + recipe_id: recipe_id, + repo: repo, + outcome: outcome_str, + verification: Map.get(metadata, "verification", "unverified") + ) + Logger.info("Outcome recorded: #{recipe_id} in #{repo}/#{file} -> #{outcome_str}") {:ok, record} end @@ -85,6 +92,18 @@ defmodule Hypatia.OutcomeTracker do or :scan_failed. """ def verify_fix(repo_path, pattern_id, category) do + result = do_verify_fix(repo_path, pattern_id, category) + + Hypatia.Telemetry.verification_result( + recipe_id: pattern_id, + repo: Path.basename(repo_path), + verdict: result + ) + + result + end + + defp do_verify_fix(repo_path, pattern_id, category) do case System.cmd("panic-attack", ["assail", repo_path, "--output-format", "json", "--quiet"], stderr_to_stdout: true) do {output, 0} -> @@ -114,12 +133,84 @@ defmodule Hypatia.OutcomeTracker do end end + @doc """ + Canonical entry point for the dispatch-runner / external automaton + *after* a fix has been applied to a repo. + + This is the default-verify variant: unlike `record_outcome/5` (which is + intentionally unverified, used by replay and rollback paths), this + always attempts a post-fix re-scan via panic-attack when `outcome` is + `:success`. It auto-derives the `category` and `pattern_id` from the + recipe registry, so callers don't have to thread them through. + + Failure to derive `category` (recipe not found / no `target_categories`) + falls back to `record_outcome/5` with `"verification" = "scan_skipped"` + so the outcome is still recorded and the verification gap is auditable. + + Returns `{:ok, record, verification}` where verification is one of + `:verified | :false_positive | :scan_unavailable | :not_verified`. + """ + def record_outcome_for_fix(recipe_id, repo, file, outcome, opts \\ []) do + derived = + case derive_verify_opts(recipe_id, repo, opts) do + {:ok, derived_opts} -> Keyword.merge(derived_opts, opts) + {:error, _reason} -> Keyword.put(opts, :verify, false) + end + + record_and_verify(recipe_id, repo, file, outcome, Keyword.put(derived, :verify, true)) + end + + defp derive_verify_opts(recipe_id, repo, opts) do + cond do + Keyword.has_key?(opts, :category) and Keyword.has_key?(opts, :repo_path) -> + {:ok, opts} + + true -> + recipe_path = find_recipe_file(recipe_id) + + case recipe_path && File.read(recipe_path) do + {:ok, content} -> + case Jason.decode(content) do + {:ok, recipe} -> + cats = Map.get(recipe, "target_categories", []) + + category = + Keyword.get(opts, :category, List.first(cats) || "") + + repos_dir = System.get_env("HYPATIA_REPOS_DIR", File.cwd!()) + + repo_path = + Keyword.get(opts, :repo_path, Path.join(repos_dir, repo)) + + {:ok, + [ + category: category, + pattern_id: Keyword.get(opts, :pattern_id, recipe_id), + repo_path: repo_path + ]} + + _ -> + {:error, :recipe_unparseable} + end + + _ -> + {:error, :recipe_not_found} + end + end + end + @doc """ Record an outcome and optionally verify the fix by re-scanning. - If verify: true is passed, runs panic-attacker against the repo after - recording the outcome. If the pattern is still present, records a - :false_positive to correct the confidence. + Low-level: prefer `record_outcome_for_fix/5` from external runners + (it auto-derives `category` from the recipe registry). This entry + point is for in-tree call sites that already have those fields in + hand. + + If `verify: true` is passed, runs `panic-attacker` against the repo + after recording the outcome. The verification verdict is persisted + on every branch (verified / still_present / scan_failed / unverified) + so `recipe_health/1` can compute meaningful aggregates. """ def record_and_verify(recipe_id, repo, file, outcome, opts \\ []) do if Keyword.get(opts, :verify, false) and outcome == :success do @@ -463,6 +554,75 @@ defmodule Hypatia.OutcomeTracker do end) end + @doc """ + Cheap predicate: is this recipe currently auto-quarantined on the + basis of its verification rate? + + A recipe is quarantined when its verification rate (verified / + (verified + still_present)) drops below `:threshold` AND the + verifiable-outcomes denominator has crossed `:min_attempts`. + Recipes with no verification data are NOT quarantined — the gate + errs on the side of letting them dispatch so the runner can + produce verification data in the first place. + + An operator override is available via `HYPATIA_RECIPE_QUARANTINE_DISABLE=true` + for emergencies; the override is logged when consulted so audit + history captures why an "unhealthy" recipe was still dispatched. + + Options: + :threshold -- rate below which to quarantine (default 0.30) + :min_attempts -- minimum verifiable count before the gate engages + (default 5) + """ + def quarantined?(recipe_id, opts \\ []) do + cond do + System.get_env("HYPATIA_RECIPE_QUARANTINE_DISABLE") == "true" -> + Logger.warning( + "Recipe quarantine gate DISABLED via env override -- recipe " <> + "#{recipe_id} dispatched without verification-rate check." + ) + + false + + true -> + threshold = Keyword.get(opts, :threshold, 0.30) + min_attempts = Keyword.get(opts, :min_attempts, 5) + + case verification_rate(recipe_id, min_attempts) do + {:ok, %{rate: rate}} when is_float(rate) -> + quarantined = rate < threshold + + if quarantined do + Hypatia.Telemetry.quarantine_triggered( + kind: :recipe, + id: recipe_id, + reason: "verification_rate_below_threshold", + level: :auto, + rate: rate, + threshold: threshold + ) + + Logger.warning( + "Recipe #{recipe_id} AUTO-QUARANTINED: " <> + "verification rate #{:erlang.float_to_binary(rate, decimals: 2)} " <> + "< threshold #{threshold}. Will be downgraded from " <> + ":auto_execute to :review until human reviews recipe." + ) + end + + quarantined + + _ -> + # :no_outcomes / :insufficient_data — let the dispatch through. + # The whole point of letting it through is to accumulate + # verification data; gating here would create a chicken-and- + # egg problem where new recipes can never earn enough data + # to leave quarantine. + false + end + end + end + # --- Private --- defp all_recipe_ids_with_outcomes do diff --git a/test/api_router_test.exs b/test/api_router_test.exs new file mode 100644 index 00000000..5d1e9c22 --- /dev/null +++ b/test/api_router_test.exs @@ -0,0 +1,131 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.Web.ApiRouterTest do + use ExUnit.Case, async: false + use Plug.Test + + alias Hypatia.Web.ApiRouter + alias Hypatia.Telemetry, as: T + + setup do + case Process.whereis(Hypatia.Watcher) do + nil -> + {:ok, pid} = Hypatia.Watcher.start_link([]) + on_exit(fn -> if Process.alive?(pid), do: GenServer.stop(pid) end) + + _ -> + :ok + end + + System.delete_env("HYPATIA_API_ALLOW_NONLOCAL") + :ok + end + + describe "loopback gate" do + test "127.0.0.1 caller is allowed through" do + conn = build_conn(:get, "/status", {127, 0, 0, 1}) + + conn = ApiRouter.call(conn, ApiRouter.init([])) + + assert conn.status == 200 + end + + test "non-loopback caller is rejected with 403" do + conn = build_conn(:get, "/status", {10, 1, 2, 3}) + + conn = ApiRouter.call(conn, ApiRouter.init([])) + + assert conn.status == 403 + body = Jason.decode!(conn.resp_body) + assert body["error"] == "loopback_only" + end + + test "HYPATIA_API_ALLOW_NONLOCAL=true bypasses the gate" do + System.put_env("HYPATIA_API_ALLOW_NONLOCAL", "true") + + on_exit(fn -> System.delete_env("HYPATIA_API_ALLOW_NONLOCAL") end) + + conn = build_conn(:get, "/status", {10, 1, 2, 3}) + conn = ApiRouter.call(conn, ApiRouter.init([])) + + assert conn.status == 200 + end + end + + describe "GET /status" do + test "returns a snapshot with normalised event keys" do + T.scan_complete(50, 3, path: "/tmp/x", severity_floor: "low") + Process.sleep(50) + + conn = build_conn(:get, "/status", {127, 0, 0, 1}) + conn = ApiRouter.call(conn, ApiRouter.init([])) + + assert conn.status == 200 + body = Jason.decode!(conn.resp_body) + + # Event names should be dotted strings, not JSON arrays. + assert Map.has_key?(body["counts"]["m5"], "hypatia.scan.complete") + assert is_integer(body["counts"]["m5"]["hypatia.scan.complete"]) + end + end + + describe "GET /counts/:window" do + test "returns counts for a valid window" do + T.outcome_recorded(recipe_id: "x", repo: "r", outcome: "success", verification: "verified") + Process.sleep(50) + + conn = build_conn(:get, "/counts/5m", {127, 0, 0, 1}) + conn = ApiRouter.call(conn, ApiRouter.init([])) + + assert conn.status == 200 + body = Jason.decode!(conn.resp_body) + assert body["window"] == "5m" + assert is_map(body["counts"]) + end + + test "returns 400 for an unknown window" do + conn = build_conn(:get, "/counts/banana", {127, 0, 0, 1}) + conn = ApiRouter.call(conn, ApiRouter.init([])) + + assert conn.status == 400 + body = Jason.decode!(conn.resp_body) + assert body["error"] == "unknown_window" + end + end + + describe "GET /recipes" do + test "returns the recipe-health roll-up" do + conn = build_conn(:get, "/recipes", {127, 0, 0, 1}) + conn = ApiRouter.call(conn, ApiRouter.init([])) + + assert conn.status == 200 + body = Jason.decode!(conn.resp_body) + assert Map.has_key?(body, "count") + assert is_list(body["rows"]) + end + + test "?status= filter rejects unknown atoms with 400" do + conn = build_conn(:get, "/recipes?status=not_a_status_atom_xyz", {127, 0, 0, 1}) + conn = ApiRouter.call(conn, ApiRouter.init([])) + + assert conn.status == 400 + end + end + + describe "404" do + test "unknown path under /api returns 404 JSON" do + conn = build_conn(:get, "/no_such_endpoint", {127, 0, 0, 1}) + conn = ApiRouter.call(conn, ApiRouter.init([])) + + assert conn.status == 404 + body = Jason.decode!(conn.resp_body) + assert body["error"] == "not_found" + end + end + + defp build_conn(method, path, remote_ip) do + conn(method, path) + |> Map.put(:remote_ip, remote_ip) + end +end diff --git a/test/recipe_health_test.exs b/test/recipe_health_test.exs index 7aef8118..6a237a20 100644 --- a/test/recipe_health_test.exs +++ b/test/recipe_health_test.exs @@ -114,6 +114,61 @@ defmodule Hypatia.RecipeHealthTest do assert ours.status == :quarantine_candidate end + test "quarantined?/2 returns true when verification rate is below threshold", %{ + recipe_id: recipe_id + } do + # 1 verified + 9 still_present = 10 verifiable, rate = 0.10 + OutcomeTracker.record_outcome(recipe_id, "r", "v1", :success, %{ + "verification" => "verified" + }) + + for i <- 1..9 do + OutcomeTracker.record_outcome(recipe_id, "r", "sp#{i}", :success, %{ + "verification" => "still_present" + }) + end + + assert OutcomeTracker.quarantined?(recipe_id, threshold: 0.30, min_attempts: 5) == true + end + + test "quarantined?/2 returns false on insufficient data (avoids chicken-and-egg)", %{ + recipe_id: recipe_id + } do + # Only 2 verifiable outcomes -- below min_attempts. The gate + # should let the recipe through so it can earn more verification + # data, not gate it on too few samples. + OutcomeTracker.record_outcome(recipe_id, "r", "v1", :success, %{ + "verification" => "still_present" + }) + + OutcomeTracker.record_outcome(recipe_id, "r", "v2", :success, %{ + "verification" => "still_present" + }) + + assert OutcomeTracker.quarantined?(recipe_id, threshold: 0.30, min_attempts: 5) == false + end + + test "quarantined?/2 honours HYPATIA_RECIPE_QUARANTINE_DISABLE env override", %{ + recipe_id: recipe_id + } do + # Set up data that WOULD quarantine, then disable via env. + OutcomeTracker.record_outcome(recipe_id, "r", "v1", :success, %{ + "verification" => "verified" + }) + + for i <- 1..9 do + OutcomeTracker.record_outcome(recipe_id, "r", "sp#{i}", :success, %{ + "verification" => "still_present" + }) + end + + System.put_env("HYPATIA_RECIPE_QUARANTINE_DISABLE", "true") + + on_exit(fn -> System.delete_env("HYPATIA_RECIPE_QUARANTINE_DISABLE") end) + + assert OutcomeTracker.quarantined?(recipe_id, threshold: 0.30, min_attempts: 5) == false + end + test "tags degraded between quarantine and healthy", %{recipe_id: recipe_id} do # 3 verified + 7 still_present = 10 verifiable, rate = 0.3 # → just at the quarantine threshold (0.30), so degraded (< 0.70). diff --git a/test/soundness/README.adoc b/test/soundness/README.adoc index d4b252ba..f63ef110 100644 --- a/test/soundness/README.adoc +++ b/test/soundness/README.adoc @@ -75,13 +75,98 @@ schema is intentionally flat and self-documenting. == Out of scope (today) -* End-to-end escript-build soundness — building the escript, then - running the built binary against the fixture corpus. That's the - exact PR #278 reproduction. Worth adding next, but requires a CI - job that can build escripts (the in-process test already catches - rule-definition regressions, just not packaging regressions). - -* Fixtures for non-`code_safety` rule families. The current manifest - covers the families PR #278 specifically called out as having been - silently dropped. Workflow_audit, cicd_rules, structural_drift, - scorecard, dependabot_alerts etc. fixtures are next-iteration work. +* End-to-end escript-build soundness has now landed + (`run-escript-soundness.sh`, wired into the e2e-elixir job). It + builds the escript fresh and runs it against the fixtures tree on + every CI run. The PR #278 reproduction is closed. + +* Fixtures for non-`code_safety` rule families are NOT a simple + generalisation. Each rule family has an architectural model that + makes the "one fixture file per rule" pattern not directly portable. + Documented below so future iteration starts from the right premise. + +=== Why other rule families don't drop in cleanly + +[cols="1,3,3", options="header"] +|=== +| Module | Detection model | Soundness model needed + +| `code_safety` +| `scan_content(content, language)` per file +| ✅ One fixture file per rule — current design works + +| `cicd_rules` (`banned_language_file`) +| Walk repo, match banned globs (`*.py`, `*.go`, ...) +| Architecturally blocked: the rule is **unsuppressable** by design + (PR #280 "total ban, no exceptions"). A .py fixture in-tree produces + a real critical finding in every scan, polluting the baseline. + Needs either (a) a policy carve-out for `test/soundness/fixtures/` + in the unsuppressable clause, or (b) a separate scratch-repo + scan-root model. + +| `workflow_audit` +| `audit(yml_files, contents)` against + `/.github/workflows/*.yml` at the scan root only +| Needs per-fixture scan-root: each fixture is its own directory tree + with `.github/workflows/bad.yml`. The current "scan the whole + fixtures tree once" runner can't see them — workflow_audit doesn't + recurse into subdirs looking for `.github/workflows/`. Either + refactor the runner to scan per-fixture-directory, or relax the + workflow_audit walker. + +| `structural_drift` +| Repo-level structural checks against well-known paths (LICENSE, + README.md, SECURITY.md, .github/...) +| Same shape as `workflow_audit` — needs per-fixture scan-root. + +| `security_errors` +| Most are content patterns dispatched through `code_safety` — already + covered when the underlying `code_safety` pattern is fixtured. + The `:secret_detected` family is the exception: it uses a separate + scanner (`scan_file_for_secrets`). +| Either piggyback on `code_safety` fixtures (already done for shared + patterns) or add per-fixture-directory entries for `:secret_detected`. + +| `git_state`, `root_hygiene`, `honest_completion` +| Transient/repo-level — depend on git state, file presence at the + repo root, absence of STATE.a2ml etc. +| Per-fixture scan-root, harder to make hermetic (git state is the + signal). + +| `dependabot_alerts`, + `secret_scanning_alerts`, + `code_scanning_alerts` +| GitHub API queries +| Soundness gate doesn't apply — these don't have a "known-bad sample + on disk" model. Mock the GitHub API in `test/*_alerts_test.exs` + (already done) and gate token-absent + parse paths there. +|=== + +=== Suggested Phase 2 design (when someone gets to it) + +Extend the manifest with a `scan_root` field. When present, the +soundness runner scans that path INSTEAD of the whole fixtures tree +and asserts the rule fires on it. Example: + +[source,json] +---- +{ + "rule_module": "workflow_audit", + "rule_id": "missing_permissions", + "scan_root": "test/soundness/fixtures/workflow_audit/missing_permissions", + "expected_severity": "high" +} +---- + +Fixture layout becomes: + + test/soundness/fixtures/workflow_audit/missing_permissions/ + .github/workflows/bad.yml (no permissions: block) + +The runner iterates manifest entries with `scan_root`, runs the +escript on each, and accumulates findings. Same exit-code contract, +just per-fixture invocation instead of one tree-scan. + +Once that's in place, all the rule families above become tractable. +Today's scope: ship the `code_safety` soundness gate (operational) +and document the gap. diff --git a/test/soundness/run-escript-soundness.sh b/test/soundness/run-escript-soundness.sh new file mode 100755 index 00000000..beca943b --- /dev/null +++ b/test/soundness/run-escript-soundness.sh @@ -0,0 +1,138 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# End-to-end escript-build soundness gate. +# +# The Elixir in-process soundness test (test/soundness_test.exs) catches +# rule-definition regressions: a rule whose regex stops matching its +# fixture fails `mix test` before merge. But it does NOT catch +# **packaging** regressions — exactly the bug class PR #278 documented, +# where a stale escript binary silently dropped entire pattern families +# even though the in-tree rule definitions were correct. +# +# This script closes that loop: builds the escript fresh from source, +# runs the built binary against every fixture in +# test/soundness/manifest.json, and asserts each rule fires at the +# expected severity. Exits non-zero on the first packaging regression. +# +# Run locally: +# bash test/soundness/run-escript-soundness.sh +# +# In CI: wired into .github/workflows/tests.yml as the +# "Escript packaging soundness" step on the e2e-elixir job. + +set -euo pipefail + +cd "$(dirname "$0")/../.." +REPO_ROOT=$(pwd) +MANIFEST="$REPO_ROOT/test/soundness/manifest.json" +ESCRIPT="$REPO_ROOT/hypatia" + +if [[ ! -f "$MANIFEST" ]]; then + echo "FATAL: manifest not found at $MANIFEST" >&2 + exit 1 +fi + +# Build the escript fresh. We deliberately rebuild every time — the +# stale-binary scenario PR #278 documented is the entire failure mode +# this script is designed to catch. +echo "[soundness] Building escript fresh..." >&2 +rm -f "$ESCRIPT" +mix escript.build >&2 +if [[ ! -x "$ESCRIPT" ]]; then + echo "FATAL: mix escript.build did not produce $ESCRIPT" >&2 + exit 1 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo "FATAL: jq required to parse manifest" >&2 + exit 1 +fi + +entries_count=$(jq '.entries | length' "$MANIFEST") +echo "[soundness] Loaded $entries_count manifest entries; running escript against each..." >&2 + +failures=() +results=() + +# Scan the entire fixtures tree once. The escript's CLI only accepts +# directories, and scanning the whole tree exercises the language- +# dispatch + file-walking code paths together — which is closer to how +# the scanner runs in production than scanning each fixture in +# isolation. Per-rule assertions then filter the resulting JSON. +echo "[soundness] Scanning fixtures tree against built escript..." >&2 +output=$("$ESCRIPT" scan "$REPO_ROOT/test/soundness/fixtures" \ + --format json \ + --severity low \ + --exit-zero 2>/dev/null || true) + +if ! echo "$output" | jq -e 'type == "array"' >/dev/null 2>&1; then + echo "FATAL: escript did not return a JSON array from the fixtures tree" >&2 + echo "$output" | head -20 >&2 + exit 1 +fi + +total_findings=$(echo "$output" | jq 'length') +echo "[soundness] Escript produced $total_findings findings; checking manifest..." >&2 + +while IFS=$'\t' read -r rule_module rule_id language fixture expected; do + [[ -z "$rule_id" ]] && continue + + if [[ ! -f "$fixture" ]]; then + failures+=("$rule_module/$rule_id: fixture missing on disk: $fixture") + results+=("FAIL $rule_module/$rule_id (fixture missing)") + continue + fi + + # The escript's `file` field may be either repo-relative or absolute + # depending on how it walked the tree. Compare via a basename match + # so the test is robust to either form. Also matches when the + # fixture's directory was traversed but the file path is the only + # location info we have. + fixture_basename=$(basename "$fixture") + + matching=$(echo "$output" | jq --arg t "$rule_id" \ + --arg m "$rule_module" \ + --arg fb "$fixture_basename" \ + '[.[] | select(.type == $t and .rule_module == $m and (.file | endswith($fb)))]') + + found=$(echo "$matching" | jq 'length') + + if [[ "$found" -eq 0 ]]; then + failures+=("$rule_module/$rule_id: rule did not fire on $fixture (PR #278 class regression)") + results+=("FAIL $rule_module/$rule_id (rule silent)") + continue + fi + + actual_sev=$(echo "$matching" | jq -r 'first(.[] | .severity)') + + if [[ "$actual_sev" != "$expected" ]]; then + failures+=("$rule_module/$rule_id: severity drift — fired at '$actual_sev', expected '$expected'") + results+=("FAIL $rule_module/$rule_id (severity $actual_sev != $expected)") + continue + fi + + results+=("ok $rule_module/$rule_id") +done < <(jq -r '.entries[] | [.rule_module, .rule_id, .language, .fixture, .expected_severity] | @tsv' "$MANIFEST") + +# Report +printf '\n[soundness] Results:\n' >&2 +for line in "${results[@]}"; do + printf ' %s\n' "$line" >&2 +done + +if [[ ${#failures[@]} -gt 0 ]]; then + printf '\n[soundness] %d packaging regression(s) detected:\n' "${#failures[@]}" >&2 + for f in "${failures[@]}"; do + printf ' - %s\n' "$f" >&2 + done + printf '\nThis is the PR #278 bug class: the in-tree rule sources may be\n' >&2 + printf 'correct, but the escript build is silently dropping the rule.\n' >&2 + printf 'Investigate the escript build (mix.exs:escript, hypatia-cli.sh)\n' >&2 + printf 'before merging.\n' >&2 + exit 1 +fi + +printf '\n[soundness] %d/%d rules fired at expected severity on the built escript.\n' \ + "$entries_count" "$entries_count" >&2 diff --git a/test/watch_task_test.exs b/test/watch_task_test.exs new file mode 100644 index 00000000..f4094dfc --- /dev/null +++ b/test/watch_task_test.exs @@ -0,0 +1,66 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Mix.Tasks.Hypatia.WatchTest do + use ExUnit.Case, async: false + + import ExUnit.CaptureIO + + alias Hypatia.Telemetry, as: T + + setup do + case Process.whereis(Hypatia.Watcher) do + nil -> + {:ok, pid} = Hypatia.Watcher.start_link([]) + on_exit(fn -> if Process.alive?(pid), do: GenServer.stop(pid) end) + + _ -> + :ok + end + + :ok + end + + describe "render/3" do + test "renders all event kinds with their dotted-string names" do + T.scan_complete(50, 3, path: "/tmp/x", severity_floor: "low") + T.dispatch_decision(0.95, strategy: :auto_execute, tier: :eliminate, recipe_id: "r1", repo: "x") + T.outcome_recorded(recipe_id: "r1", repo: "x", outcome: "success", verification: "verified") + T.quarantine_triggered(kind: :recipe, id: "bad", reason: "verification_rate", level: :auto) + Process.sleep(50) + + snap = Hypatia.Watcher.snapshot() + output = capture_io(fn -> Mix.Tasks.Hypatia.Watch.render(snap, true, []) end) + + assert output =~ "Hypatia Watcher" + assert output =~ "Events / 5min" + assert output =~ "Events / 1hr" + assert output =~ "GenServer queue depths" + + # Each event kind should be present in the dotted-string format. + assert output =~ "hypatia.scan.complete" + assert output =~ "hypatia.dispatch.decision" + assert output =~ "hypatia.outcome.recorded" + assert output =~ "hypatia.quarantine.triggered" + end + + test "renders :unavailable when the watcher snapshot is missing" do + output = capture_io(fn -> Mix.Tasks.Hypatia.Watch.render(:unavailable, true, []) end) + assert output =~ "Watcher unavailable" + end + + test "warns when dropped_events > 0" do + snap = %{ + counts: %{m5: %{}, h1: %{}, d1: %{}}, + queue_depths: %{}, + dropped_events: 42, + uptime_seconds: 10, + generated_at: "2026-05-24T00:00:00Z", + recent_by_kind: %{} + } + + output = capture_io(fn -> Mix.Tasks.Hypatia.Watch.render(snap, true, []) end) + assert output =~ "Dropped 42 telemetry event" + end + end +end diff --git a/test/watcher_test.exs b/test/watcher_test.exs new file mode 100644 index 00000000..43952d2d --- /dev/null +++ b/test/watcher_test.exs @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: MPL-2.0 +# Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) + +defmodule Hypatia.WatcherTest do + # async: false because the Watcher is a named singleton attached to + # global telemetry handlers; concurrent tests would observe each + # other's events. + use ExUnit.Case, async: false + + alias Hypatia.Watcher + alias Hypatia.Telemetry, as: T + + setup do + # If the Application's Watcher is already running (production tests), + # use it. Otherwise spin one up just for this test. + pid = + case Process.whereis(Watcher) do + nil -> + {:ok, p} = Watcher.start_link([]) + on_exit(fn -> if Process.alive?(p), do: GenServer.stop(p) end) + p + + existing -> + existing + end + + {:ok, watcher: pid} + end + + describe "telemetry → counters" do + test "scan_complete event increments the m5 window counter" do + before = Watcher.counts(:m5) |> Map.get([:hypatia, :scan, :complete], 0) + + T.scan_complete(123, 7, path: "/tmp/x", severity_floor: "low") + + # Give the cast a moment to be processed (cast is async). + Process.sleep(50) + + after_ = Watcher.counts(:m5) |> Map.get([:hypatia, :scan, :complete], 0) + assert after_ == before + 1 + end + + test "dispatch_decision event lands in all three windows" do + before_5m = Watcher.counts(:m5) |> Map.get([:hypatia, :dispatch, :decision], 0) + before_1h = Watcher.counts(:h1) |> Map.get([:hypatia, :dispatch, :decision], 0) + before_1d = Watcher.counts(:d1) |> Map.get([:hypatia, :dispatch, :decision], 0) + + T.dispatch_decision(0.95, + strategy: :auto_execute, + tier: :eliminate, + recipe_id: "test-recipe", + repo: "test/repo" + ) + + Process.sleep(50) + + assert Watcher.counts(:m5) |> Map.get([:hypatia, :dispatch, :decision], 0) == before_5m + 1 + assert Watcher.counts(:h1) |> Map.get([:hypatia, :dispatch, :decision], 0) == before_1h + 1 + assert Watcher.counts(:d1) |> Map.get([:hypatia, :dispatch, :decision], 0) == before_1d + 1 + end + end + + describe "snapshot/0" do + test "returns a fully-shaped map" do + T.outcome_recorded(recipe_id: "x", repo: "r", outcome: "success", verification: "verified") + Process.sleep(50) + + snap = Watcher.snapshot() + + assert Map.has_key?(snap, :counts) + assert Map.has_key?(snap, :queue_depths) + assert Map.has_key?(snap, :dropped_events) + assert Map.has_key?(snap, :recent_by_kind) + assert Map.has_key?(snap, :uptime_seconds) + assert Map.has_key?(snap, :generated_at) + + assert Map.has_key?(snap.counts, :m5) + assert Map.has_key?(snap.counts, :h1) + assert Map.has_key?(snap.counts, :d1) + end + end + + describe "recent_events/0" do + test "captures the latest event per kind with measurements + metadata" do + T.verification_result(recipe_id: "drilldown", repo: "r/x", verdict: :verified) + Process.sleep(50) + + events = Watcher.recent_events() + kind_events = Map.get(events, [:hypatia, :verification, :result], []) + + assert is_list(kind_events) + assert length(kind_events) >= 1 + + [latest | _] = kind_events + assert latest.metadata.recipe_id == "drilldown" + assert latest.metadata.verdict == :verified + assert is_integer(latest.at) + end + end + + describe "queue_depths/0" do + test "returns depths for supervised processes when supervisor exists" do + # The depth map may be empty if Hypatia.Supervisor isn't started + # (tests in isolation). Just assert the shape rather than content. + depths = Watcher.queue_depths() + assert is_map(depths) + end + end +end