diff --git a/AGENTS.md b/AGENTS.md index e29d21bf..790b322d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -71,6 +71,7 @@ README.md public overview and development notes .agents/skills/ shared skills, committed .claude/skills symlink to .agents/skills for claude compatibility bin/ helper scripts, committed; read each script's header before first use + check-plugins/ durable watcher check plugins, committed; bin/fm-plugin.sh symlinks each into state/*.check.sh so the watcher picks them up .env optional X-mode pairing token; LOCAL, gitignored; presence-gates section 14 config/crew-harness crewmate harness override; LOCAL, gitignored; absent or "default" = same as firstmate. Inherited: the primary pushes this into every secondmate home's config/ (section 4), so a secondmate's own crewmates use the primary's value config/crew-dispatch.json optional crewmate dispatch profiles; LOCAL, gitignored; firstmate-maintained but human-editable natural-language rules that choose a per-task harness/model/effort profile (section 4). Inherited by secondmate homes @@ -90,7 +91,7 @@ state/ volatile runtime signals; gitignored .turn-ended touched by turn-end hooks .grok-turnend-token firstmate-owned grok hook registry token for the task; removed by teardown .meta written by fm-spawn: window=, worktree=, project=, harness=, model=, effort=, kind=, mode=, yolo=, tasktmp=; kind=secondmate also records home= and projects= (fm-pr-check, including through fm-pr-merge, appends pr= and GitHub's pr_head= when available; fm-x-link appends x_request= and x_request_ts= for an X-mention-originated task, section 14) - .check.sh optional slow poll you write per task (e.g. merged-PR check) + .check.sh optional slow poll you write per task (e.g. merged-PR check); fleet-wide plugin checks also appear here as symlinks into bin/check-plugins/ (bin/fm-plugin.sh manages them) x-watch.check.sh generated X-mode relay poll shim; present only when opted in (section 14) x-inbox/ generated X-mode pending mention payloads; fmx-respond drains it (section 14) x-outbox/ generated X-mode dry-run reply and dismiss previews; inspect it when FMX_DRY_RUN is set (section 14) @@ -102,6 +103,8 @@ state/ volatile runtime signals; gitignored .watch-triage.log watcher's absorbed-wake debug log (size-capped); never relied on, safe to delete .last-watcher-beat watcher liveness beacon, touched every poll (including while absorbing benign wakes); fm-guard.sh reads it .subsuper-* .supervise-daemon.* sub-supervisor internals; never touch + .github-watch-config fm-github-watch.sh filter/contributor config (key=value); never touch unless driving that tool + .github-watch-seen/ fm-github-watch.sh per-PR seen state (high-water marks); owned by that script .no-mistakes/ local validation state and evidence; gitignored ``` @@ -632,6 +635,13 @@ A secondmate may be sitting on its own watcher with no visible pane changes, so `fm-watch.sh` therefore skips stale-pane wakes for windows whose meta records `kind=secondmate`. This exception is narrow: ordinary crewmates still trip stale detection when their pane stops changing without a busy signature. +**Terminal-status crewmates must be progressed immediately.** +A crewmate that reports `done`, `failed`, or `blocked` and is then left idle in its tmux window is unfinished supervision work, not a quiet fleet. +The signal layer fires exactly once, on the status write; if you drop the thread after that, nothing re-nudges you - the stale-pane detector flags the idle pane, but that alarm is indistinguishable from a stuck crewmate until you re-read the status, so a busy supervisor dismisses it as noise. +The `done-crewmate` check plugin is the deterministic, recurring backstop: it scans every `state/*.meta` for a crewmate whose current status is terminal and whose window is still alive in tmux, and prints one wake line per check interval listing every offender until each is progressed (validated, merged, or torn down). +It is installed by `bin/fm-plugin.sh add done-crewmate state/done-crewmate.check.sh` and lives durably under `bin/check-plugins/` (symlinked into `state/` so the watcher's existing `*.check.sh` glob picks it up, no watcher changes required; `fm-plugin.sh sync`, called by bootstrap, restores the symlinks after a fresh clone). +Treat its wake with the same priority as a `signal:`: read the named task's status, then advance or tear it down. + **Watcher liveness is guarded, not just disciplined.** Arming the watcher is the last action of every wake-handling turn - but the protocol no longer relies on remembering that. While running, `fm-watch.sh` touches `state/.last-watcher-beat` every poll cycle. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 91cce792..defbdb6c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -85,6 +85,8 @@ tests/fm-secondmate-safety.test.sh # secondmate home safety, idle charter tests/fm-teardown.test.sh # fm-teardown.sh landed-work safety and reminder checks: fork-remote allow, squash/content landings, dirty and unlanded refusals, PR-head metadata, no-pr= branch discovery, tasks-axi/manual backlog reminder, --force override tests/fm-pr-merge.test.sh # fm-pr-merge.sh records pr= and available pr_head= before merging, propagates real merge failures, and forwards extra gh-axi pr merge flags tests/fm-crew-state.test.sh # fm-crew-state.sh current-state reconciliation: run-step authority including closed panes, stale needs-decision/blocked superseded by a resumed run, genuine-parked, cross-branch attribution, pane/status-log fallback, scout skip, torn-down/missing-meta graceful +tests/fm-github-watch.test.sh # fm-github-watch.sh events, filters, rolled-up CI flips, merge/close transitions, contributor resolution, seen-state losslessness, and concurrency via a fake gh fixture +tests/fm-plugin.test.sh # fm-plugin.sh add/remove/list/sync lifecycle, invalid-name and not-found guards, and the done-crewmate.check.sh terminal-status offender scan via a fake tmux fixture [ "$(readlink CLAUDE.md)" = "AGENTS.md" ] [ "$(readlink .claude/skills)" = "../.agents/skills" ] tmp=$(mktemp -d) && printf 'done: smoke\n' > "$tmp/smoke.status" && FM_STATE_OVERRIDE="$tmp" FM_SIGNAL_GRACE=1 FM_POLL=1 FM_HEARTBEAT=999999 bin/fm-watch-arm.sh # watcher re-arm smoke test (prints arm status, then an actionable signal) diff --git a/bin/check-plugins/done-crewmate.check.sh b/bin/check-plugins/done-crewmate.check.sh new file mode 100755 index 00000000..a8a953d7 --- /dev/null +++ b/bin/check-plugins/done-crewmate.check.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# Watcher check plugin: detect crewmates that reported a terminal status +# (done/failed/blocked) but whose tmux window is still alive - i.e. finished +# work firstmate has not yet progressed (validated / PR'd / merged) or torn down. +# +# Why this exists: a status write fires exactly once, on change. If firstmate +# gets the `done` signal, starts acting, then drops the thread, nothing re-nudges +# it - the stale-pane detector fires on the idle pane, but that alarm is +# indistinguishable from a stuck crewmate until firstmate re-reads the status, so +# a busy firstmate dismisses it as noise. This check is the deterministic, +# recurring backstop: every FM_CHECK_INTERVAL it re-asserts "done work is still +# sitting there" until the crewmate is torn down. +# +# Watcher check contract (same as bin/fm-pr-check.sh's per-task checks): +# print exactly one line -> wake firstmate (reason wrapped as +# `check: : `) +# print nothing -> fleet healthy; keep sleeping +# Runs via the watcher's state/*.check.sh glob (state/done-crewmate.check.sh is +# a symlink to this canonical copy under bin/check-plugins/; see bin/fm-plugin.sh). +# Fast by design: only tmux list-windows + small file reads, no network. +set -u + +# Resolve FM_ROOT independent of cwd and of symlink indirection +# (state/.check.sh -> bin/check-plugins/.check.sh). Prefer an explicit +# override, then cwd (the watcher runs from FM_ROOT, so state/ and bin/ are +# siblings of $PWD), then walk up from this script's resolved real path. +fm_root() { + [ -n "${FM_ROOT_OVERRIDE:-}" ] && { printf '%s\n' "$FM_ROOT_OVERRIDE"; return; } + if [ -d state ] && [ -d bin ]; then printf '%s\n' "$PWD"; return; fi + local src="${BASH_SOURCE[0]}" real d root + # readlink -f is GNU-only; plain readlink (one symlink level) is portable on + # BSD/GNU. fm-plugin.sh points state/.check.sh at an absolute + # bin/check-plugins/.check.sh; resolve a relative target against the link. + if real="$(readlink "$src" 2>/dev/null)" && [ -n "$real" ]; then + case "$real" in + /*) src="$real" ;; + *) src="$(cd -P "$(dirname "$src")" && pwd)/$real" ;; + esac + fi + d="$(cd -P "$(dirname "$src")" 2>/dev/null && pwd)" || { printf '%s\n' "$PWD"; return; } + for root in "$d/../.." "$d/.."; do + [ -d "$root/bin" ] && [ -d "$root/state" ] && { (cd -P "$root" && pwd); return; } + done + printf '%s\n' "$PWD" +} +FM_ROOT="$(fm_root)" +STATE="$FM_ROOT/state" + +[ -d "$STATE" ] || exit 0 + +# A terminal status means the crewmate's work is complete (or halted pending +# firstmate) and it should not still be occupying a tmux window. needs-decision +# is intentionally excluded: it escalates immediately through the signal layer on +# write, so it never needs this recurring backstop. +is_terminal() { + case "$1" in + done:*|failed:*|blocked:*) return 0 ;; + *) return 1 ;; + esac +} + +# Live crewmate windows, one ':' per line (matches the watcher's +# own enumeration in bin/fm-watch.sh). Empty if tmux is absent or no fm windows +# exist - which means nothing can be idle-done, so we stay silent. +WINDOWS="$(tmux list-windows -a -F '#{session_name}:#{window_name}' 2>/dev/null | grep ':fm-' || true)" +[ -n "$WINDOWS" ] || exit 0 + +offenders="" +for meta in "$STATE"/*.meta; do + [ -e "$meta" ] || continue + kind="$(grep -m1 '^kind=' "$meta" 2>/dev/null | cut -d= -f2-)" + [ -n "$kind" ] || kind=ship + [ "$kind" = secondmate ] && continue + id="$(basename "$meta" .meta)" + status_file="$STATE/$id.status" + [ -f "$status_file" ] || continue # no status reported yet -> still working + + # Current state = the last non-empty status line (crewmates append; a later + # `working:` means it resumed, which is not idle-done). Tolerate a missing + # trailing newline via the `|| [ -n "$line" ]` guard. + last="" + while IFS= read -r line || [ -n "$line" ]; do + [ -n "$line" ] && last="$line" + done < "$status_file" + is_terminal "$last" || continue + + # Cross-reference tmux: is this crewmate's window still alive? The meta's + # window= target is authoritative (recorded by fm-spawn as :). + win="$(grep -m1 '^window=' "$meta" 2>/dev/null | cut -d= -f2-)" + [ -n "$win" ] || continue + case "$WINDOWS" in + *"$win"*) offenders="${offenders:+$offenders }$id" ;; + esac +done + +[ -n "$offenders" ] || exit 0 +# One line listing every offender so a single wake carries the whole picture. +printf 'done crewmate %s still alive in tmux - progress or tear down\n' "$offenders" diff --git a/bin/fm-bootstrap.sh b/bin/fm-bootstrap.sh index 34800538..d4892ab2 100755 --- a/bin/fm-bootstrap.sh +++ b/bin/fm-bootstrap.sh @@ -412,4 +412,9 @@ fi secondmate_sync x_mode_setup fleet_sync +# Re-arm durable watcher check plugins (state/*.check.sh symlinks into the +# tracked canonical copies under bin/check-plugins/). state/ is gitignored, so a +# fresh clone has no symlinks until this runs. Best-effort and silent on success. +# shellcheck disable=SC2015 # best-effort: a missing exe or sync failure must never abort bootstrap +[ -x "$FM_ROOT/bin/fm-plugin.sh" ] && "$FM_ROOT/bin/fm-plugin.sh" sync || true exit 0 diff --git a/bin/fm-github-watch.sh b/bin/fm-github-watch.sh new file mode 100755 index 00000000..b9e1388a --- /dev/null +++ b/bin/fm-github-watch.sh @@ -0,0 +1,671 @@ +#!/usr/bin/env bash +# fm-github-watch.sh — GitHub events watcher for the fleet's open PRs. +# +# Discovers all of a contributor's open PRs and surfaces new comments (from +# maintainers, reviewers, or bots), CI status changes, reviews, and +# merge/close transitions as one-line events on stdout. Built to run as a +# watcher check script: it prints iff firstmate should wake, and stays +# silent otherwise. +# +# Wire it in with a check script the existing watcher already sweeps, e.g.: +# ln -s ../bin/fm-github-watch.sh state/github-events.check.sh +# bin/fm-watch.sh runs state/*.check.sh every FM_CHECK_INTERVAL (default +# 300s); any stdout is captured, classified as a `check` wake, escalated. +# A full poll issues up to 5 gh calls per open PR, but PRs are polled +# concurrently (bounded by FM_GH_CONCURRENCY, default 8) so a sweep across the +# fleet finishes in well under the watcher's 30s check-script timeout. Events +# emit per-PR (not all-at-end), so a timeout still surfaces partial progress. +# +# Usage: +# fm-github-watch.sh # one poll cycle (same as --once) +# fm-github-watch.sh --once # one poll cycle +# fm-github-watch.sh --daemon # loop, polling every poll_interval +# fm-github-watch.sh filter list # show active filters +# fm-github-watch.sh filter on|off +# fm-github-watch.sh contributor # show configured contributor +# fm-github-watch.sh contributor +# fm-github-watch.sh status # show config + seen-state summary +# +# Filter names: comments, ci, reviews, merge. +# Config: state/.github-watch-config (key=value lines). +# Seen: state/.github-watch-seen/-- (key=value lines). +# +# The ci filter rolls the Checks API (check-runs) up to a single overall state +# per PR (green/failure/pending) and fires one event only when that state flips, +# not once per check landing — so a PR whose many checks trickle in reports a +# single transition, not a burst. CI providers that report only via the legacy +# commit status API (some older Travis/Coveralls setups) are not covered; use +# `gh pr checks` directly for a unified view. +# Comment, review, and check-run counts fetch up to 100 items per type per PR +# (per_page=100, no pagination); a single PR with >100 of one kind would cap. +# +# Losslessness: for each PR, events are emitted BEFORE its seen marker advances +# (and bash's builtin printf write()s to the capture pipe immediately, so an +# emitted event survives even a SIGKILL). A crash between the print and the seen +# write at worst causes a redundant re-detect next cycle, never a permanent +# swallow. A failing seen write leaves the old marker in place, so the same +# event fires again next cycle. PRs are polled concurrently but each worker +# owns its own per-PR seen file, so this ordering holds per-worker exactly. +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +FM_ROOT="${FM_ROOT_OVERRIDE:-$(cd "$SCRIPT_DIR/.." && pwd)}" +STATE="${FM_STATE_OVERRIDE:-$FM_ROOT/state}" +CONFIG="$STATE/.github-watch-config" +SEEN_DIR="$STATE/.github-watch-seen" +ALL_FILTERS="comments,ci,reviews,merge" +DEFAULT_POLL_SECS="${FM_GH_POLL_SECS:-300}" +# How long after a PR closes to keep re-probing it for a close->reopen->merge. +# Bounds API cost: a closed PR is re-checked only within this window, then +# treated as settled. ~2h at the default 300s poll. +CLOSE_REPROBE_SECS="${FM_GH_CLOSE_REPROBE_SECS:-7200}" +# Max number of PRs polled concurrently in a single sweep. Bounded so a large +# fleet can't burst GitHub's rate limit or hammer the API. ~88 calls/sweep at +# the captain's ~22 PRs is well under the 5000/hr ceiling even at 12 sweeps/hr. +# Set FM_GH_CONCURRENCY to tune (>=1; 0/non-numeric falls back to the default 8). +DEFAULT_CONCURRENCY=8 +# Seen-state schema version. Bump when a stored field's meaning or the field set +# changes in a way that would make a prior value miscompare (e.g. the ci roll-up +# changed `ci` from a multiset signature to a single state). On a schema mismatch +# the first poll silently re-baselines: it writes the new seen state and emits no +# event, so deploying a schema change never floods once as every PR appears to +# "transition" off the old format. Only subsequent real transitions fire. +SEEN_SCHEMA=2 +# Regex (Oniguruma) of check-run NAMES to drop from the CI roll-up before it is +# computed. Default: the known fork-routing signature gap #293 ("PR must be +# raised via no-mistakes"), which fails on kunchenguid fork-PRs even though the +# PR's real checks pass. With it excluded such PRs roll up to green when their +# real checks pass, instead of a false failure. Set FM_GH_IGNORE_CHECKS to a +# custom regex, or to empty to disable filtering entirely. Only the CI roll-up +# applies this; the raw check list and the other filters are unchanged. +IGNORE_CHECKS="${FM_GH_IGNORE_CHECKS-PR must be raised via no-mistakes}" + +mkdir -p "$STATE" "$SEEN_DIR" + +# ---- small helpers ---- + +is_int() { case "${1:-}" in ''|*[!0-9]*) return 1 ;; *) return 0 ;; esac; } + +valid_filter() { + case "$1" in comments|ci|reviews|merge) return 0 ;; *) return 1 ;; esac +} + +# A GitHub REST API error body is a JSON object carrying top-level "message" and +# "documentation_url" (e.g. {"message":"Bad credentials","documentation_url":"...","status":"401"}). +# On a transient API failure (401, 5xx, rate limit) gh writes that body to stdout +# — bypassing any --jq template — and exits non-zero. Every successful probe +# output is a scalar/number/TSV, never this shape, so the pair is a safe signal. +is_gh_error() { + case "$1" in + *'"message"'*) + case "$1" in *'"documentation_url"'*) return 0 ;; esac + ;; + esac + return 1 +} + +# Run gh, capturing its stdout. Returns non-zero if gh exited non-zero OR its +# output is a GitHub API error body; in either case the body is suppressed so a +# caller that ignores the exit status can never parse an error response as data +# (the bug: a 401 body reached stdout and was parsed as CI state, firing a bogus +# "CI: ... -> { \"message\": ... }" event). Probe callers treat a non-zero return +# as "skip this PR this cycle" so a transient blip never surfaces as an event. +# stderr is always swallowed so a missing gh or a transient failure never spams +# the watcher's own capture pipe. +ghc() { + local out rc + out=$(command gh "$@" 2>/dev/null); rc=$? + if [ "$rc" -ne 0 ] || is_gh_error "$out"; then + return 1 + fi + printf '%s' "$out" +} + +# cfg_read -> prints value (empty if missing/unset) +cfg_read() { + local key=$1 + [ -f "$CONFIG" ] || return 0 + awk -F= -v k="$key" '$1==k { sub(/^[^=]*=/, ""); print; exit }' "$CONFIG" +} + +# cfg_has -> 0 if the key exists in the config (distinguishes a configured +# empty value, e.g. `filters=`, from a missing key so "all filters off" sticks). +cfg_has() { + local key=$1 + [ -f "$CONFIG" ] && grep -q "^${key}=" "$CONFIG" +} + +# cfg_write (upsert a single key=value line) +cfg_write() { + local key=$1 val=$2 tmp + val=$(printf '%s' "$val" | tr '\n' ' ') + if [ -f "$CONFIG" ] && grep -q "^${key}=" "$CONFIG"; then + tmp="${CONFIG}.tmp.$$" + awk -F= -v k="$key" -v v="$val" '$1==k { print k"="v; next } { print }' \ + "$CONFIG" > "$tmp" && mv "$tmp" "$CONFIG" + else + printf '%s=%s\n' "$key" "$val" >> "$CONFIG" + fi +} + +get_contributor() { + # Precedence: configured value > FM_GH_CONTRIBUTOR env > authenticated gh user. + # No hardcoded default: a shared tool should poll whoever is logged in. + local v + v=$(cfg_read contributor) + if [ -n "$v" ]; then printf '%s' "$v"; return; fi + if [ -n "${FM_GH_CONTRIBUTOR:-}" ]; then printf '%s' "$FM_GH_CONTRIBUTOR"; return; fi + ghc api user -q .login | tr -d '\n' || true +} + +get_filters() { + # A configured value (even empty = all filters off) is respected; only a + # never-configured key falls back to the full default set. + if cfg_has filters; then + cfg_read filters + else + printf '%s' "$ALL_FILTERS" + fi +} + +filter_enabled() { + case ",$(get_filters)," in *",$1,"*) return 0 ;; *) return 1 ;; esac +} + +get_poll() { + local v + v=$(cfg_read poll_interval) + case "${v:-}" in ''|*[!0-9]*) printf '%s' "$DEFAULT_POLL_SECS" ;; *) printf '%s' "$v" ;; esac +} + +# Max concurrent per-PR workers in a sweep. FM_GH_CONCURRENCY overrides; a +# missing, empty, non-numeric, or zero value falls back to the sane default. +get_concurrency() { + local v="${FM_GH_CONCURRENCY:-}" + case "$v" in ''|*[!0-9]*|0) printf '%s' "$DEFAULT_CONCURRENCY" ;; *) printf '%s' "$v" ;; esac +} + +# seen_file -> path to that PR's seen-state file +seen_file() { printf '%s/%s-%s-%s\n' "$SEEN_DIR" "$1" "$2" "$3"; } + +# seen_get -> value (empty if missing) +seen_get() { + local f=$1 key=$2 + [ -f "$f" ] || return 0 + awk -F= -v k="$key" '$1==k { sub(/^[^=]*=/, ""); print; exit }' "$f" +} + +# ---- comma-list set ops ---- + +# list_contains "a,b,c" "b" -> 0 if present +list_contains() { + case ",$1," in *",$2,"*) return 0 ;; *) return 1 ;; esac +} + +# list_add "a,b,c" "d" -> "a,b,c,d" (dedup; preserves order) +list_add() { + local list=$1 item=$2 + if list_contains "$list" "$item"; then + printf '%s' "$list" + else + if [ -z "$list" ]; then printf '%s' "$item"; else printf '%s,%s' "$list" "$item"; fi + fi +} + +# list_remove "a,b,c" "b" -> "a,c" +list_remove() { + local list=$1 item=$2 out="" i + local IFS=, + for i in $list; do + [ "$i" = "$item" ] && continue + if [ -z "$out" ]; then out=$i; else out="$out,$i"; fi + done + printf '%s' "$out" +} + +# ---- discovery + per-PR probes (each fails open: empty output, no crash) ---- + +# Prints "owner/reponumber" per open PR by the contributor. +discover_prs() { + local contributor + contributor=$(get_contributor) + # An empty contributor (gh missing/unauthed) must NOT pass --author="" to the + # search: GitHub treats an empty author qualifier as no filter, which would + # match open PRs across every repo and flood the seen state. + [ -n "$contributor" ] || return 0 + ghc search prs --author="$contributor" --state=open --limit 1000 \ + --json repository,number \ + --jq '.[] | [.repository.nameWithOwner, .number] | @tsv' +} + +# count_comments +count_comments() { + CONTRIB_WATCH="$4" ghc api "repos/$1/$2/issues/$3/comments?per_page=100" \ + --jq '[.[] | select(.user.login != env.CONTRIB_WATCH)] | length' +} + +# count_reviews +# Excludes the contributor's own reviews (self-reviews) but keeps maintainer and +# bot reviews (Greptile, coderabbit, etc. have distinct logins). +count_reviews() { + CONTRIB_WATCH="$4" ghc api "repos/$1/$2/pulls/$3/reviews?per_page=100" \ + --jq '[.[] | select(.user.login != env.CONTRIB_WATCH)] | length' +} + +# pr_state -> OPEN|MERGED|CLOSED (empty on failure) +pr_state() { + ghc pr view "$3" -R "$1/$2" --json state -q .state +} + +# head_sha +head_sha() { + ghc pr view "$3" -R "$1/$2" --json headRefOid -q .headRefOid +} + +# ci_state -> the commit's rolled-up overall CI state: +# success every non-neutral check passed (conclusion success/skipped), none still running +# failure at least one non-neutral check failed (failure/timed_out/cancelled/action_required/stale) +# pending at least one non-neutral check is still queued/in_progress (no conclusion yet) +# neutral only neutral check-runs are present +# (empty) no check-runs reported yet; the caller carries forward the prior state +# Rolled up from the Checks API so a PR with many staggered checks surfaces a +# single green/red transition instead of one event per check landing. Failure +# beats pending (a red check already settles the PR's outcome), matching +# GitHub's own combined-status precedence. Check-runs whose NAME matches the +# IGNORE_CHECKS regex (default: the known fork-routing gap #293) are dropped +# before the roll-up, so a PR that fails ONLY that signature check still rolls +# up to green when its real checks pass. The regex is embedded into the jq +# program (escaped for a JSON string literal) because `gh api` has no --arg +# binding for its --jq filter; a malformed regex fails open to empty (carried +# forward), never crashing the poll. +ci_state() { + [ -n "$3" ] || return 0 + local ignore_escaped jq_filter + ignore_escaped=${IGNORE_CHECKS//\\/\\\\} + ignore_escaped=${ignore_escaped//\"/\\\"} + # The regex is embedded into the jq program (escaped for a JSON string + # literal) because `gh api` has no --arg binding for its --jq filter. Every jq + # binding ($ignore/$raw/$all/$rel) is backslash-escaped so the heredoc leaves + # it literal; only $ignore_escaped expands. + # shellcheck disable=SC2016 + jq_filter=$(cat < -> the word printed in a CI event line (success -> green). +ci_label() { + case "${1:-}" in + success) printf 'green' ;; + *) printf '%s' "${1:-unknown}" ;; + esac +} + +# ---- the poll ---- + +# atomic_write — write seen state via temp + rename so a crash +# or a read-only state dir can never leave a partial file. On any failure the +# prior file is left untouched, so the event re-fires next cycle (lossless). +# The temp lives in a hidden .tmp subdir of the seen dir (same filesystem, so +# the rename is atomic) so a crash-leaked temp never matches detect_left_open's +# `"$SEEN_DIR"/*` glob and cause a double-fire. +atomic_write() { + local file=$1 content=$2 tmp stagedir + stagedir="$SEEN_DIR/.tmp" + tmp="$stagedir/$(basename "$file").$$" + mkdir -p "$stagedir" 2>/dev/null || true + # Redirect fd 2 to /dev/null BEFORE the output redirect so a failure to open + # the temp (read-only dir) is reported to /dev/null, not the terminal. + if printf '%s\n' "$content" 2>/dev/null > "$tmp"; then + mv -f "$tmp" "$file" 2>/dev/null || rm -f "$tmp" 2>/dev/null + else + rm -f "$tmp" 2>/dev/null + fi +} + +# build_seen +# Compose the seen-state block: high-water marks for counts, current value for +# ci/state. Fields with no fresh value this cycle are carried forward from the +# prior block, so toggling a filter off never wipes its remembered high-water. +# CI is the rolled-up overall state; it is carried forward across a transiently +# empty fetch (a new commit whose check-runs have not populated yet) so a later +# state transition still fires. +build_seen() { + local sf=$1 owner=$2 repo=$3 pr=$4 c_count=$5 r_count=$6 ci_st=$7 sha=$8 p_state=$9 + local seen_c seen_r seen_ci seen_state new_c new_r ci_val state_val block closed_at_val + seen_c=$(seen_get "$sf" comments) + seen_r=$(seen_get "$sf" reviews) + seen_ci=$(seen_get "$sf" ci) + seen_state=$(seen_get "$sf" state) + new_c=$seen_c; new_r=$seen_r + if is_int "$c_count"; then + if is_int "$seen_c"; then new_c=$((seen_c > c_count ? seen_c : c_count)); else new_c=$c_count; fi + fi + if is_int "$r_count"; then + if is_int "$seen_r"; then new_r=$((seen_r > r_count ? seen_r : r_count)); else new_r=$r_count; fi + fi + ci_val=$ci_st + [ -n "$ci_val" ] || ci_val=$seen_ci + state_val=$p_state + [ -n "$state_val" ] || state_val=$seen_state + block=$(printf 'owner=%s\nrepo=%s\npr=%s\nschema=%s\ninitialized=1' "$owner" "$repo" "$pr" "$SEEN_SCHEMA") + is_int "$new_c" && block=$(printf '%s\ncomments=%s' "$block" "$new_c") + is_int "$new_r" && block=$(printf '%s\nreviews=%s' "$block" "$new_r") + [ -n "$ci_val" ] && block=$(printf '%s\nci=%s' "$block" "$ci_val") + [ -n "$sha" ] && block=$(printf '%s\nsha=%s' "$block" "$sha") + [ -n "$state_val" ] && block=$(printf '%s\nstate=%s' "$block" "$state_val") + closed_at_val="" + if [ "$state_val" = "CLOSED" ]; then + closed_at_val=$(seen_get "$sf" closed_at) + [ -n "$closed_at_val" ] || closed_at_val=$(date +%s) + fi + [ -n "$closed_at_val" ] && block=$(printf '%s\nclosed_at=%s' "$block" "$closed_at_val") + printf '%s' "$block" +} + +# process_pr +# Gather fresh data for the enabled filters, EMIT any new events for this PR, +# then advance this PR's seen marker. Per-PR ordering (print before seen) plus +# bash's immediate write() to the capture pipe make this lossless even if the +# poll is killed mid-cycle: an emitted event is already in the pipe, and a PR +# whose marker never advanced simply re-fires next cycle. Runs one worker per +# PR under poll_once's bounded concurrency; each worker writes only this PR's +# own seen file, so concurrent workers never contend on seen state. +process_pr() { + local owner=$1 repo=$2 pr=$3 contributor=$4 + local sf c_count r_count p_state sha ci_st + local initialized seen_c seen_r seen_state seen_ci ev="" + sf=$(seen_file "$owner" "$repo" "$pr") + + local api_err=0 + c_count="" r_count="" p_state="" sha="" ci_st="" + if filter_enabled comments; then c_count=$(count_comments "$owner" "$repo" "$pr" "$contributor") || api_err=1; fi + if filter_enabled reviews; then r_count=$(count_reviews "$owner" "$repo" "$pr" "$contributor") || api_err=1; fi + if filter_enabled merge; then p_state=$(pr_state "$owner" "$repo" "$pr") || api_err=1; fi + if filter_enabled ci; then + sha=$(head_sha "$owner" "$repo" "$pr") || api_err=1 + if [ -n "$sha" ]; then ci_st=$(ci_state "$owner" "$repo" "$sha") || api_err=1; fi + fi + # If any enabled probe hit a GitHub API error this cycle, skip the whole PR: + # emit nothing and do not advance seen, so a transient blip can never surface + # as an event (e.g. an error JSON parsed as CI data). The next cycle + # re-evaluates from the same baseline — lossless, never a permanent swallow. + if [ "$api_err" -ne 0 ]; then + printf 'fm-github-watch: skipping %s/%s#%s this cycle (GitHub API error)\n' \ + "$owner" "$repo" "$pr" >&2 + return 0 + fi + + initialized=$(seen_get "$sf" initialized) + # A prior seen file whose schema does not match the current version is treated + # as a first-run baseline: emit nothing this cycle (so deploying a schema + # change never floods as every PR appears to "transition" off the old format) + # and let build_seen rewrite it at the current schema with carried-forward + # values. Only subsequent real transitions fire. + if [ -n "$initialized" ] && [ "$(seen_get "$sf" schema)" = "$SEEN_SCHEMA" ]; then + seen_c=$(seen_get "$sf" comments) + seen_r=$(seen_get "$sf" reviews) + seen_state=$(seen_get "$sf" state) + seen_ci=$(seen_get "$sf" ci) + + # comments (high-water): event on increase only. + if is_int "$c_count" && is_int "$seen_c" && [ "$c_count" -gt "$seen_c" ]; then + ev="${ev}COMMENT: ${owner}/${repo}#${pr} has $((c_count - seen_c)) new comment(s) +" + fi + # reviews (high-water): event on increase only. + if is_int "$r_count" && is_int "$seen_r" && [ "$r_count" -gt "$seen_r" ]; then + ev="${ev}REVIEW: ${owner}/${repo}#${pr} has $((r_count - seen_r)) new review(s) +" + fi + # ci: event on overall-state transition only (debounced). A PR with many + # staggered checks surfaces one event per green/red/pending flip, not one + # per check landing. No event while the rolled-up state is unchanged. + if [ -n "$ci_st" ] && [ -n "$seen_ci" ] && [ "$seen_ci" != "$ci_st" ]; then + ev="${ev}CI: ${owner}/${repo}#${pr} -> $(ci_label "$ci_st") +" + fi + # merge: event on open -> merged/closed transition. + if [ -n "$p_state" ] && [ "$p_state" != "$seen_state" ]; then + case "$p_state" in + MERGED) [ "${seen_state:-OPEN}" = "OPEN" ] && ev="${ev}MERGED: ${owner}/${repo}#${pr} +" ;; + CLOSED) [ "${seen_state:-OPEN}" = "OPEN" ] && ev="${ev}CLOSED: ${owner}/${repo}#${pr} +" ;; + esac + fi + fi + + # --- LOSSLESSNESS BOUNDARY (per-PR) --- + # Emit this PR's events first (bash's printf write()s to the pipe at once), + # then advance its seen marker. A crash between the two leaves the event + # delivered and the marker stale -> a redundant re-detect, never a swallow. + [ -n "$ev" ] && printf '%s' "$ev" + local block + block=$(build_seen "$sf" "$owner" "$repo" "$pr" "$c_count" "$r_count" "$ci_st" "$sha" "$p_state") + atomic_write "$sf" "$block" +} + +# Emit one poll cycle. +poll_once() { + local contributor prs fullname pr owner repo basename + local open_basenames=" " + local max_jobs running + max_jobs=$(get_concurrency) + running=0 + contributor=$(get_contributor) + # If discovery itself failed (transient API blip), abort the cycle: an empty + # result would otherwise make detect_left_open think every open PR merged. + prs=$(discover_prs) || { + printf 'fm-github-watch: PR discovery failed this cycle; skipping\n' >&2 + return 0 + } + + # Parallel per-PR polling. Each worker is a subshell running process_pr; each + # owns its own seen file (seen_file is keyed by owner/repo/pr), so concurrent + # seen writes never collide. Concurrency is bounded by FM_GH_CONCURRENCY + # (default 8) via a counting semaphore so a large fleet can't burst the GitHub + # rate limit. Each worker prints its whole event block in a single printf + # (one write() of a few hundred bytes, atomic under PIPE_BUF, so lines never + # interleave), and only then advances its own seen marker — the losslessness + # invariant (print before seen) holds per-worker exactly as in the serial + # model: a crash/timeout mid-sweep at worst re-detects, never swallows. + while IFS=$'\t' read -r fullname pr; do + [ -n "${fullname:-}" ] || continue + owner=${fullname%%/*} + repo=${fullname#*/} + if [ -z "$owner" ] || [ -z "$repo" ] || [ "$owner" = "$fullname" ] || [ -z "${pr:-}" ]; then + continue + fi + basename=$(seen_file "$owner" "$repo" "$pr"); basename=${basename##*/} + open_basenames="${open_basenames}${basename} " + + # Throttle: at capacity, wait for one worker to finish before launching the + # next. wait -n (bash >= 4.3) blocks until any child exits; the decrement + # keeps the running count honest (it can only under-count finished workers, + # which is conservative — concurrency never exceeds the cap). + while [ "$running" -ge "$max_jobs" ]; do + wait -n 2>/dev/null || wait + running=$((running - 1)) + done + + # reopen->merge still fires, without an unbounded per-cycle API cost as +# closed PRs accumulate. detect_left_open (space-padded: +# " key1 key2 " so the last entry matches too). +detect_left_open() { + local open_basenames=$1 f base owner repo pr seen_state p_state block closed_at now + filter_enabled merge || return 0 + [ -d "$SEEN_DIR" ] || return 0 + now=$(date +%s) + for f in "$SEEN_DIR"/*; do + [ -e "$f" ] || continue + base=${f##*/} + case "$base" in *.tmp.*) continue ;; esac + case "$open_basenames" in *" $base "*) continue ;; esac + [ -n "$(seen_get "$f" initialized)" ] || continue + seen_state=$(seen_get "$f" state) + [ "$seen_state" = "MERGED" ] && continue # merged is the only terminal state + # A CLOSED PR older than the re-probe window is settled: skip the API call + # so accumulated closed PRs cannot push the fleet past the rate limit. + if [ "$seen_state" = "CLOSED" ]; then + closed_at=$(seen_get "$f" closed_at) + if [ -n "$closed_at" ] && [ $((now - closed_at)) -ge "$CLOSE_REPROBE_SECS" ]; then + continue + fi + fi + owner=$(seen_get "$f" owner) + repo=$(seen_get "$f" repo) + pr=$(seen_get "$f" pr) + if [ -z "$owner" ] || [ -z "$repo" ] || [ -z "$pr" ]; then continue; fi + p_state=$(pr_state "$owner" "$repo" "$pr") || continue + [ -n "$p_state" ] || continue # transient gh failure: leave seen state untouched + # Migration: a prior seen file whose schema does not match the current + # version is silently re-baselined — stamp the current schema + observed + # state, emit nothing — so a schema change never floods as every PR appears + # to "transition" off the old format. All other fields (closed_at, counts, + # ci) are preserved; only schema/state are re-stamped. + if [ "$(seen_get "$f" schema)" != "$SEEN_SCHEMA" ]; then + block=$(awk -F= -v sch="$SEEN_SCHEMA" -v s="$p_state" \ + '$1 != "schema" && $1 != "state" { print } END { print "schema=" sch; print "state=" s }' "$f") + atomic_write "$f" "$block" + continue + fi + [ "$p_state" = "$seen_state" ] && continue # unchanged: no event, no rewrite + case "$p_state" in + MERGED|CLOSED) + # Emit, then advance state (same per-PR losslessness ordering). + printf '%s: %s/%s#%s\n' "$p_state" "$owner" "$repo" "$pr" + ;; + *) + # Reopened back to OPEN (or unknown): no event, but track the new state + # so a later merge still fires from the right baseline. + ;; + esac + # Rewrite state; stamp closed_at when entering CLOSED so the re-probe window + # can age it out, and clear it on any other transition. + local cat="" + [ "$p_state" = "CLOSED" ] && cat=$now + block=$(awk -F= -v s="$p_state" -v cat="$cat" \ + '$1!="state" && $1!="closed_at" { print } END { print "state=" s; if (cat != "") print "closed_at=" cat }' "$f") + atomic_write "$f" "$block" + done +} + +# ---- daemon ---- + +poll_daemon() { + local interval + interval=$(get_poll) + trap 'exit 0' INT TERM + while :; do + poll_once + sleep "$interval" + done +} + +# ---- CLI subcommands ---- + +cmd_filter() { + # filter list -> show active filters + # filter on|off -> toggle a filter + local name="${1:-}" state="${2:-}" + if [ -z "$name" ] || [ "$name" = "list" ]; then + local IFS=, + for f in $(get_filters); do printf '%s\n' "$f"; done + return + fi + valid_filter "$name" || { echo "error: unknown filter '$name' (comments|ci|reviews|merge)" >&2; exit 2; } + case "$state" in + on|off) ;; + *) echo "usage: fm-github-watch.sh filter [list | on|off]" >&2; exit 2 ;; + esac + local cur new + cur=$(get_filters) + if [ "$state" = "on" ]; then + new=$(list_add "$cur" "$name") + else + new=$(list_remove "$cur" "$name") + fi + cfg_write filters "$new" + echo "filters=$new" +} + +cmd_contributor() { + if [ "$#" -gt 0 ]; then + cfg_write contributor "$1" + echo "contributor=$1" + else + get_contributor + fi +} + +cmd_status() { + local contributor filters f on seen_count + contributor=$(get_contributor) + filters=$(get_filters) + printf 'contributor: %s\n' "$contributor" + printf 'filters:\n' + for f in comments ci reviews merge; do + if list_contains "$filters" "$f"; then on=on; else on=off; fi + printf ' %s: %s\n' "$f" "$on" + done + printf 'poll interval: %ss\n' "$(get_poll)" + seen_count=0 + if [ -d "$SEEN_DIR" ]; then + # Exclude the .tmp staging subdir so leaked temps never inflate the count. + seen_count=$(find "$SEEN_DIR" -type f -not -path '*/.tmp/*' 2>/dev/null | wc -l | tr -d ' ') + fi + printf 'seen PRs: %s\n' "$seen_count" +} + +usage() { + # Print the leading `#` header comment (lines 2..) up to the first non-comment + # line, stripping the `# ` prefix. Stops before `set -u` so no code leaks. + awk 'NR==1 { next } /^#/ { sub(/^# ?/, ""); print; next } { exit }' "$0" +} + +# ---- entry ---- + +case "${1:-}" in + --help|-h) usage; exit 0 ;; + --once|"") poll_once ;; + --daemon) poll_daemon ;; + filter) shift; cmd_filter "$@" ;; + contributor) shift; cmd_contributor "$@" ;; + status) cmd_status ;; + *) + echo "error: unknown command '${1:-}'" >&2 + usage >&2 + exit 2 + ;; +esac diff --git a/bin/fm-plugin.sh b/bin/fm-plugin.sh new file mode 100755 index 00000000..4bcee69a --- /dev/null +++ b/bin/fm-plugin.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# Manage durable watcher check plugins. +# +# The watcher discovers check scripts via a state/*.check.sh glob, but state/ is +# gitignored (volatile runtime signals) - fine for per-task checks that fm-pr-check +# writes at runtime, but a fleet-wide plugin must survive a fresh clone. So a +# plugin's source lives tracked under bin/check-plugins/.check.sh and is +# symlinked into state/.check.sh at runtime so the watcher picks it up with +# no watcher changes. This script owns that lifecycle. +# +# fm-plugin.sh add +# Install as plugin : copy its content to the tracked +# canonical home bin/check-plugins/.check.sh and point +# state/.check.sh at it. If state/.check.sh already exists as a +# real file (e.g. it is the source you just named), its content is now held +# canonically and the path becomes the symlink. +# fm-plugin.sh remove +# Drop the state/ symlink and the tracked canonical source. +# fm-plugin.sh list +# Print installed plugins and whether their state/ symlink is live. +# fm-plugin.sh sync +# Recreate state/ symlinks for every canonical plugin. Idempotent and +# non-fatal; bootstrap calls this so plugins come back alive after a fresh +# clone. Never clobbers a real (non-symlink) state file - that may be a live +# per-task check. +set -eu + +FM_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PLUGINS="$FM_ROOT/bin/check-plugins" +STATE="$FM_ROOT/state" + +die() { printf 'fm-plugin: %s\n' "$*" >&2; exit 1; } + +valid_name() { + case "$1" in + ''|*[!A-Za-z0-9._-]*) return 1 ;; + fm-*) return 1 ;; # reserved for task ids (state/.check.sh) + esac + return 0 +} + +ensure_dirs() { mkdir -p "$PLUGINS" "$STATE"; } +canonical_for() { printf '%s/%s.check.sh' "$PLUGINS" "$1"; } +state_link_for() { printf '%s/%s.check.sh' "$STATE" "$1"; } + +cmd_add() { + [ $# -eq 2 ] || die "usage: fm-plugin.sh add " + local name=$1 src=$2 canon link + valid_name "$name" || die "invalid plugin name: '$name' (use [A-Za-z0-9._-], must not start with 'fm-')" + [ -f "$src" ] || die "source script not found: $src" + ensure_dirs + canon="$(canonical_for "$name")" + link="$(state_link_for "$name")" + # Copy content FIRST: may itself be the state path we are about to replace + # with a symlink. + cp -f "$src" "$canon" + chmod +x "$canon" + # If a real (non-symlink) file sits at the state path, drop it - its content now + # lives canonically. A pre-existing symlink is just refreshed by ln -sfn. + if [ -e "$link" ] && [ ! -L "$link" ]; then rm -f "$link"; fi + ln -sfn "$canon" "$link" + printf 'added plugin %s\n canonical: %s\n state link: %s\n' "$name" "$canon" "$link" +} + +cmd_remove() { + [ $# -eq 1 ] || die "usage: fm-plugin.sh remove " + local name=$1 canon link + valid_name "$name" || die "invalid plugin name: '$name'" + canon="$(canonical_for "$name")" + link="$(state_link_for "$name")" + { [ -e "$canon" ] || [ -L "$link" ]; } || die "no such plugin: $name" + rm -f "$link" "$canon" + printf 'removed plugin %s\n' "$name" +} + +cmd_list() { + ensure_dirs + local f name link + if [ -z "$(ls -A "$PLUGINS" 2>/dev/null || true)" ]; then + printf '(no plugins installed)\n' + return + fi + for f in "$PLUGINS"/*.check.sh; do + [ -e "$f" ] || continue + name="$(basename "$f" .check.sh)" + link="$(state_link_for "$name")" + if [ -L "$link" ] && [ -e "$link" ]; then + printf '%s\tlive\n' "$name" + else + printf '%s\tstale (run: bin/fm-plugin.sh sync)\n' "$name" + fi + done +} + +cmd_sync() { + ensure_dirs + [ -d "$PLUGINS" ] || return 0 + local f name link n=0 + for f in "$PLUGINS"/*.check.sh; do + [ -e "$f" ] || continue + name="$(basename "$f" .check.sh)" + valid_name "$name" || continue + link="$(state_link_for "$name")" + # Never clobber a real (non-symlink) state file: it may be a live per-task + # check that happens to share the name. + if [ -e "$link" ] && [ ! -L "$link" ]; then continue; fi + ln -sfn "$f" "$link" + n=$((n + 1)) + done + return 0 +} + +[ $# -ge 1 ] || die "usage: fm-plugin.sh ..." +cmd=$1; shift +case "$cmd" in + add) cmd_add "$@" ;; + remove|rm) cmd_remove "$@" ;; + list|ls) cmd_list "$@" ;; + sync) cmd_sync "$@" ;; + -h|--help|help) + sed -n '2,21p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//' + ;; + *) die "unknown command: $cmd (use add|remove|list|sync)" ;; +esac diff --git a/bin/fm-teardown.sh b/bin/fm-teardown.sh index e95e4358..dfd97dec 100755 --- a/bin/fm-teardown.sh +++ b/bin/fm-teardown.sh @@ -205,7 +205,7 @@ pr_is_merged() { # "added". Returns non-zero when inconclusive (no default ref, or a merge conflict), # so the caller refuses rather than guesses. content_in_default() { - local name ref default_tree merged_tree + local name ref default_tree merged_tree base merge_out name=$(default_branch) || return 1 if git -C "$WT" remote get-url origin >/dev/null 2>&1; then git -C "$WT" fetch --quiet origin "+refs/heads/$name:refs/remotes/origin/$name" >/dev/null 2>&1 || return 1 @@ -217,9 +217,21 @@ content_in_default() { fi default_tree=$(git -C "$WT" rev-parse --quiet --verify "$ref^{tree}" 2>/dev/null) || return 1 [ -n "$default_tree" ] || return 1 - merged_tree=$(git -C "$WT" merge-tree --write-tree "$ref" HEAD 2>/dev/null) || return 1 - merged_tree=$(printf '%s\n' "$merged_tree" | head -1) - [ "$merged_tree" = "$default_tree" ] + # Modern git (>= 2.38) writes the merged tree object on stdout; the merged tree + # equals the default branch's tree exactly when the change already landed. + if merged_tree=$(git -C "$WT" merge-tree --write-tree "$ref" HEAD 2>/dev/null); then + merged_tree=$(printf '%s\n' "$merged_tree" | head -1) + [ "$merged_tree" = "$default_tree" ] + return $? + fi + # Older git (< 2.38) lacks --write-tree; fall back to the legacy 3-way form + # `git merge-tree `, which prints only entries that + # differ from (the default branch). An empty result therefore means + # merging HEAD into the default branch is a no-op - the change landed - while a + # net change or a conflict prints something, so the fail-safe (refuse) holds. + base=$(git -C "$WT" merge-base "$ref" HEAD 2>/dev/null) || return 1 + merge_out=$(git -C "$WT" merge-tree "$base" "$ref" HEAD 2>/dev/null) || return 1 + [ -z "$merge_out" ] } # Has the worktree's committed work actually LANDED, though its commits are not diff --git a/docs/architecture.md b/docs/architecture.md index 317122d0..0edc7478 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -20,6 +20,8 @@ Crew status files are append-only wake-event logs, not current-state fields. `bin/fm-crew-state.sh ` is the cheap current-state read for an actionable heartbeat review: it attributes the matching no-mistakes run, active or terminal, to the crew's own branch and keeps that run-step authoritative even if the pane has closed. Only when no matching run exists does it fall back to the pane busy-signature and then the status log; a dead pane without a run reports unknown instead of trusting a stale log. Optional X mode rides the same check path: bootstrap drops a local `state/x-watch.check.sh` shim only after the user opts in with `FMX_PAIRING_TOKEN`, and non-X homes keep the default watcher behavior. +Durable fleet-wide plugins live tracked under `bin/check-plugins/`, are symlinked into `state/*.check.sh` by `fm-plugin.sh`, and are restored by bootstrap's `sync` so they survive a fresh clone; the bundled `done-crewmate` plugin is a deterministic recurring backstop that wakes firstmate whenever a terminal-status crewmate's window is still alive, so finished work is never left idle instead of being progressed or torn down. +An optional GitHub events watcher (`bin/fm-github-watch.sh`) can be wired in as another check script to surface new comments, rolled-up CI state flips, reviews, and merge/close transitions for the fleet's open PRs. Routine re-arms go through `bin/fm-watch-arm.sh`, which forks the watcher as a tracked child, verifies it is genuinely alive with a fresh liveness beacon, and prints exactly one honest status line (`started` / `healthy` / `FAILED`, the last exiting non-zero) - never a false `already running` off a dying process. Its `--restart` mode signals only the watcher recorded in the current home's `state/.watch.lock`, so restarting one home cannot kill sibling secondmate watchers. diff --git a/docs/configuration.md b/docs/configuration.md index 3f605172..8d33a2c4 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -142,6 +142,15 @@ In dry-run, `fm-x-dismiss.sh` records `{request_id, endpoint:"dismiss"}` to the The live answer and follow-up bodies intentionally stay the same shape, including optional `image`; the relay distinguishes them by endpoint, and dismiss stays `{request_id}`. These paths need `jq` to build the JSON payload, but they run before token and network checks, so they need neither `FMX_PAIRING_TOKEN` nor `curl`. +## GitHub events watcher (fm-github-watch.sh) + +`bin/fm-github-watch.sh` is an optional GitHub events watcher you wire in as a check script (`ln -s ../bin/fm-github-watch.sh state/github-events.check.sh`) so the existing watcher sweep surfaces new comments, rolled-up CI state flips, reviews, and merge/close transitions for the fleet's open PRs. +Run `fm-github-watch.sh --daemon` for a self-looping poller, `--once` for a single poll, or use the `filter`/`contributor`/`status` subcommands. +Filter names are `comments`, `ci`, `reviews`, `merge`; the CI filter rolls the Checks API up to a single overall state per PR and fires one event only when that state flips. +Configuration lives in the local `state/.github-watch-config` (key=value lines) and per-PR seen state in `state/.github-watch-seen/`, both gitignored. +The contributor is resolved by precedence: the configured value, then `FM_GH_CONTRIBUTOR`, then the authenticated `gh` user; there is no hardcoded default, so a shared tool polls whoever is logged in. +Comment, review, and check-run counts fetch up to 100 items of each kind per PR, so a single PR with more than 100 of one kind would cap. + ## Environment variables Runtime tuning via environment variables (defaults shown): @@ -199,4 +208,10 @@ FM_CRASH_BACKOFF=60 # seconds to wait after crossing the crash th FM_CRASH_NORMAL_SLEEP=5 # seconds to wait after an isolated watcher crash FM_LOG_MAX_BYTES=1048576 # daemon log size that triggers trimming FM_LOG_KEEP_LINES=2000 # daemon log lines kept when trimming +# GitHub events watcher (bin/fm-github-watch.sh); config also via state/.github-watch-config +FM_GH_CONTRIBUTOR= # contributor login to poll; config value wins, else this env value, else authenticated gh user; no default +FM_GH_POLL_SECS=300 # daemon poll interval between sweeps +FM_GH_CLOSE_REPROBE_SECS=7200 # seconds after a PR closes to keep re-probing for a close->reopen->merge +FM_GH_CONCURRENCY=8 # max PRs polled concurrently per sweep; non-numeric/0 falls back to the default +FM_GH_IGNORE_CHECKS='PR must be raised via no-mistakes' # regex of check-run names dropped from the CI roll-up; empty disables filtering ``` diff --git a/docs/scripts.md b/docs/scripts.md index 078ddc09..77ce4bcb 100644 --- a/docs/scripts.md +++ b/docs/scripts.md @@ -35,6 +35,8 @@ Each file also starts with a short header comment. | `fm-peek.sh` | Print a bounded tail of a crewmate pane | | `fm-pr-check.sh` | Record `pr=` and GitHub's `pr_head=` when available for a PR-ready task, then arm the watcher's merge poll | | `fm-pr-merge.sh` | Record `pr=` and available `pr_head=` via `fm-pr-check.sh`, then merge the task PR through `gh-axi pr merge` with any extra flags | +| `fm-plugin.sh` | Manage durable watcher check plugins: track each canonical source under `bin/check-plugins/`, symlink it into `state/.check.sh` so the watcher sweeps it, with `add`/`remove`/`list`/`sync` subcommands (bootstrap calls `sync` so plugins survive a fresh clone) | +| `fm-github-watch.sh` | GitHub events watcher for the fleet's open PRs: run as a check script to surface new comments, rolled-up CI state flips, reviews, and merge/close transitions as one-line events, with `--once`/`--daemon` modes and `filter`/`contributor`/`status` subcommands | | `fm-promote.sh` | Promote a scout task in place so it becomes a protected ship task | | `fm-teardown.sh` | Return a clean, landed ship worktree or retire/release a secondmate home; requires scout reports, checks child work, removes firstmate-owned hook artifacts, and prints the backend-aware backlog reminder | | `fm-harness.sh` | Detect the running harness; resolve the effective crewmate (`crew`) or secondmate-launch (`secondmate`) harness | @@ -45,3 +47,11 @@ Each file also starts with a short header comment. | `fm-x-dismiss.sh` | Dismiss or dry-run preview a skipped X mention without replying by sending `{request_id}` to the relay's `connector/dismiss` endpoint | | `fm-x-link.sh` | Link a spawned task to its originating X mention by recording `x_request=` and `x_request_ts=` in `state/.meta` | | `fm-x-followup.sh` | Detect, post, and clear the single completion follow-up for an X-linked task, forwarding optional `--image `, enforcing the local 24h window, and retrying only when the relay post fails | + +## Durable check plugins (`bin/check-plugins/`) + +The watcher discovers check scripts via a `state/*.check.sh` glob, but `state/` is gitignored, so a fleet-wide plugin must survive a fresh clone. +Each plugin's canonical source lives tracked under `bin/check-plugins/.check.sh` and is symlinked into `state/.check.sh` at runtime (managed by `fm-plugin.sh`, restored by bootstrap's `sync`). +A plugin obeys the same contract as a per-task check: print one line to wake firstmate, print nothing to keep sleeping. + +- `done-crewmate.check.sh` - deterministic recurring backstop that wakes firstmate whenever a terminal-status (`done`/`failed`/`blocked`) crewmate's tmux window is still alive, so finished work is never left idle instead of being progressed or torn down. diff --git a/tests/fm-github-watch.test.sh b/tests/fm-github-watch.test.sh new file mode 100755 index 00000000..3091b28c --- /dev/null +++ b/tests/fm-github-watch.test.sh @@ -0,0 +1,873 @@ +#!/usr/bin/env bash +# Behavior tests for fm-github-watch.sh. +# A fake `gh` on PATH serves canned, file-driven responses so each test can +# mutate fixture state between poll cycles and assert on emitted events. +set -u + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +GH_WATCH="$ROOT/bin/fm-github-watch.sh" + +fail() { + printf 'not ok - %s\n' "$1" >&2 + exit 1 +} + +pass() { + printf 'ok - %s\n' "$1" +} + +TMP_ROOT= +cleanup() { + [ -n "${TMP_ROOT:-}" ] && rm -rf "$TMP_ROOT" +} +trap cleanup EXIT + +TMP_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/fm-ghwatch-tests.XXXXXX") + +# Build an isolated case dir with its own state/ + fakebin/gh, and echo its root. +# The fake gh reads fixtures from $GH_FIXTURE (one PR's data per set of files). +make_case() { + local name=$1 dir fakebin + dir="$TMP_ROOT/$name" + fakebin="$dir/fakebin" + mkdir -p "$dir/state" "$dir/fixture" "$fakebin" + cat > "$fakebin/gh" <<'GH' +#!/usr/bin/env bash +# Minimal, file-driven gh stand-in for fm-github-watch tests. +set -u +FX="${GH_FIXTURE:?no fixture}" +emit_default() { :; } # most commands print nothing by default + +sub="${1:-}" +shift || true + +case "$sub" in + search) + # gh search prs ... : print "owner/reponum" lines. + [ -f "$FX/prs" ] && cat "$FX/prs" + exit 0 + ;; + api) + # Injectable transient API error: when $FX/api-error exists, emit a GitHub + # error body to stdout and exit non-zero — exactly how real gh behaves on a + # 401/5xx (the --jq template is bypassed on error responses). This is the + # bug surface: the raw error JSON reached stdout and was parsed as CI data. + if [ -f "$FX/api-error" ]; then + printf '{"message":"Bad credentials","documentation_url":"https://docs.github.com/rest","status":"401"}\n' + exit 1 + fi + # gh api --jq ... : find the repos/... path argument. + path="" + for a in "$@"; do + case "$a" in repos/*) path=$a ;; esac + done + path="${path%%\?*}" # strip any ?per_page=... query before matching + # repos/OWNER/REPO/issues/NUM/comments -> comments-OWNER-REPO-NUM + # repos/OWNER/REPO/pulls/NUM/reviews -> reviews-OWNER-REPO-NUM + # repos/OWNER/REPO/commits/SHA/check-runs -> ci-SHA + case "$path" in + */issues/*/comments) + rest=${path#repos/} # OWNER/REPO/issues/NUM/comments + owner=${rest%%/*}; rest=${rest#*/} + repo=${rest%%/*}; rest=${rest#*/} + num=${rest#issues/}; num=${num%/comments} + f="$FX/comments-$owner-$repo-$num" + [ -f "$f" ] && { cat "$f"; exit 0; } + echo 0; exit 0 + ;; + */pulls/*/reviews) + rest=${path#repos/} + owner=${rest%%/*}; rest=${rest#*/} + repo=${rest%%/*}; rest=${rest#*/} + num=${rest#pulls/}; num=${num%/reviews} + f="$FX/reviews-$owner-$repo-$num" + [ -f "$f" ] && { cat "$f"; exit 0; } + echo 0; exit 0 + ;; + */commits/*/check-runs) + sha=${path##*/commits/}; sha=${sha%/check-runs} + f="$FX/ci-$sha" + [ -f "$f" ] || exit 0 + # The watcher passes --jq to roll check-runs up into a single overall + # state; run that same filter against the JSON fixture so the real + # roll-up logic (success/failure/pending/neutral) is exercised, not + # just the comparison. Falls back to cat for any caller without --jq. + jq_expr="" + prev="" + for a in "$@"; do + if [ "$prev" = "--jq" ]; then jq_expr=$a; fi + prev=$a + done + if [ -n "$jq_expr" ]; then + jq -r "$jq_expr" "$f" + else + cat "$f" + fi + exit 0 + ;; + esac + exit 0 + ;; + pr) + # gh pr view -R owner/repo --json -q ... + num=""; repo=""; field="" + prev="" + for a in "$@"; do + if [ "$prev" = "-R" ]; then repo=$a; fi + if [ "$prev" = "--json" ]; then field=$a; fi + case "$a" in [0-9]*) num=$a ;; esac + prev=$a + done + owner=${repo%%/*}; rn=${repo#*/} + case "$field" in + state) + f="$FX/state-$owner-$rn-$num" + [ -f "$f" ] && { cat "$f"; exit 0; } + echo "OPEN"; exit 0 + ;; + headRefOid) + f="$FX/sha-$owner-$rn-$num" + [ -f "$f" ] && { cat "$f"; exit 0; } + echo "deadbeef"; exit 0 + ;; + esac + exit 0 + ;; +esac +exit 0 +GH + chmod +x "$fakebin/gh" + printf '%s\n' "$dir" +} + +# run_poll : invoke one poll cycle with the fake gh on PATH. +# A known contributor is pinned via env so discovery proceeds even though the +# fake gh does not implement `api user`. +run_poll() { + local dir=$1 + PATH="$dir/fakebin:$PATH" GH_FIXTURE="$dir/fixture" \ + FM_GH_CONTRIBUTOR=e-jung \ + FM_STATE_OVERRIDE="$dir/state" \ + bash "$GH_WATCH" --once +} + +# Seed the open-PR list a fake gh search returns. +seed_prs() { + local dir=$1 + shift + : > "$dir/fixture/prs" + local ln + for ln in "$@"; do printf '%s\n' "$ln" >> "$dir/fixture/prs"; done +} + +# seed_ci -> write a JSON check-runs fixture the +# fake gh feeds through the watcher's real --jq roll-up. Each conclusion arg is +# a Checks-API value ("success","failure","neutral","skipped","timed_out",...) +# or the literal "pending" for a still-running check (status in_progress, +# conclusion null). The fake gh runs the watcher's --jq filter on this JSON, so +# the actual roll-up logic (not just the comparison) is what the tests exercise. +seed_ci() { + local f="$1/fixture/ci-$2" + shift 2 + printf '%s' '{"check_runs":[' > "$f" + local first=1 c status conclusion + for c in "$@"; do + [ "$first" = 1 ] || printf ',' >> "$f" + first=0 + if [ "$c" = "pending" ]; then + status="in_progress"; conclusion="null" + else + status="completed"; conclusion="\"$c\"" + fi + printf '{"status":"%s","conclusion":%s}' "$status" "$conclusion" >> "$f" + done + printf '%s' ']}' >> "$f" +} + +# seed_ci_named ... +# Like seed_ci but each check-run carries a name, so name-based ignore filters +# (FM_GH_IGNORE_CHECKS) can be exercised through the real --jq roll-up. The +# literal "pending" still means a running check (conclusion null). A name is +# embedded as a JSON string literal (backslash and double-quote escaped). +seed_ci_named() { + local f="$1/fixture/ci-$2" + shift 2 + printf '%s' '{"check_runs":[' > "$f" + local first=1 arg name c status conclusion esc + for arg in "$@"; do + name=${arg%%=*}; c=${arg#*=} + [ "$first" = 1 ] || printf ',' >> "$f" + first=0 + if [ "$c" = "pending" ]; then status="in_progress"; conclusion="null"; else status="completed"; conclusion="\"$c\""; fi + esc=${name//\\/\\\\}; esc=${esc//\"/\\\"} + printf '{"status":"%s","conclusion":%s,"name":"%s"}' "$status" "$conclusion" "$esc" >> "$f" + done + printf '%s' ']}' >> "$f" +} + +test_filter_toggling() { + local dir + dir=$(make_case filter-toggle) + local cfg="$dir/state/.github-watch-config" + + run_poll "$dir" >/dev/null 2>&1 # ensure default config materializes + # Default: all four filters active. + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter list > "$dir/list.out" + grep -Fxq comments "$dir/list.out" || fail "comments filter not on by default" + grep -Fxq ci "$dir/list.out" || fail "ci filter not on by default" + grep -Fxq merge "$dir/list.out" || fail "merge filter not on by default" + + # Turn comments off -> persisted in config, absent from list. + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter comments off > "$dir/off.out" + grep -Eq '^filters=ci,reviews,merge$' "$dir/off.out" || fail "turning comments off gave unexpected result" + ! awk -F= '/^filters=/{print $2}' "$cfg" | grep -qw comments \ + || fail "comments should be absent from filters= when toggled off" + + # Turn comments back on. + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter comments on > "$dir/on.out" + grep -Eq '^filters=ci,reviews,merge,comments$' "$dir/on.out" || fail "turning comments on gave unexpected result" + + # Disabling then re-enabling is idempotent (no dupes). + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter ci off >/dev/null + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter ci on >/dev/null + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter list > "$dir/list2.out" + [ "$(grep -Fc ci "$dir/list2.out")" -eq 1 ] || fail "filter toggling duplicated the ci filter" + + pass "filter on/off toggles persist in config without duplicates" +} + +test_first_run_baselines_silently() { + local dir out + dir=$(make_case baseline) + seed_prs "$dir" $'kunchenguid/firstmate\t30' + printf '5\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + printf '2\n' > "$dir/fixture/reviews-kunchenguid-firstmate-30" + + out=$(run_poll "$dir") + [ -z "$out" ] || fail "first poll should baseline silently, but printed: $out" + # Seen file exists with the baselined high-water marks. + local sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-30" + [ -f "$sf" ] || fail "baseline seen file was not written" + grep -Fxq "comments=5" "$sf" || fail "comments high-water not baselined" + grep -Fxq "reviews=2" "$sf" || fail "reviews high-water not baselined" + grep -Fxq "initialized=1" "$sf" || fail "initialized marker missing" + + pass "first run for a PR baselines silently with no event" +} + +test_comment_detection_advances_seen_after_print() { + local dir out sf + dir=$(make_case comment) + seed_prs "$dir" $'kunchenguid/firstmate\t30' + printf '5\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-30" + + # Cycle 1: baseline. + run_poll "$dir" >/dev/null + grep -Fxq "comments=5" "$sf" || fail "baseline comments not set" + + # Cycle 2: two new comments. + printf '7\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new comment(s)" \ + || fail "comment increase did not emit event; got: $out" + # Seen marker advanced to the new high-water (after the print). + grep -Fxq "comments=7" "$sf" || fail "seen marker not advanced after event" + + # Cycle 3: no change -> silence. + out=$(run_poll "$dir") + [ -z "$out" ] || fail "steady-state poll should be silent; got: $out" + + pass "comment increase emits event and advances seen after the print" +} + +test_losslessness_redetects_when_seen_write_fails() { + local dir out sf + dir=$(make_case lossless) + seed_prs "$dir" $'kunchenguid/firstmate\t30' + printf '5\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-30" + + # Cycle 1: baseline (writes the seen file while dir is writable). + run_poll "$dir" >/dev/null + grep -Fxq "comments=5" "$sf" || fail "baseline did not write seen" + + # New comment arrives. + printf '7\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + + # Simulate a failing seen write: make the seen dir read-only so atomic_write + # cannot advance the marker. The event must STILL print this cycle (print + # happens before the seen write). + chmod a-w "$dir/state/.github-watch-seen" + out=$(run_poll "$dir") + chmod u+w "$dir/state/.github-watch-seen" + printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new comment(s)" \ + || fail "event did not print when seen write failed; got: $out" + # Marker must NOT have advanced (the whole point). + grep -Fxq "comments=5" "$sf" || fail "seen marker advanced despite failing write (permanent swallow)" + + # Next cycle (writable again) re-detects the same event: lossless. + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "COMMENT: kunchenguid/firstmate#30 has 2 new comment(s)" \ + || fail "event was not re-detected after failed seen write; got: $out" + + pass "failed seen write leaves the event re-detectable (lossless)" +} + +test_merge_detection_on_left_open() { + local dir out sf + dir=$(make_case merge) + seed_prs "$dir" $'kunchenguid/firstmate\t42' + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-42" + + # Cycle 1: baseline the open PR. + run_poll "$dir" >/dev/null + grep -Fxq "state=OPEN" "$sf" || fail "baseline state not recorded as OPEN" + + # PR merges: it leaves the open search, and its state becomes MERGED. + : > "$dir/fixture/prs" # no longer open + printf 'MERGED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "MERGED: kunchenguid/firstmate#42" \ + || fail "open->merged transition did not emit event; got: $out" + grep -Fxq "state=MERGED" "$sf" || fail "seen state not advanced to MERGED" + + # A later cycle does not re-report the merge (state no longer OPEN). + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "MERGED"; then fail "merge event re-reported after settling"; fi + + pass "PR leaving the open set as merged emits MERGED once" +} + +test_closed_then_merged_is_not_swallowed() { + local dir out sf + dir=$(make_case close-merge) + seed_prs "$dir" $'kunchenguid/firstmate\t42' + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-42" + run_poll "$dir" >/dev/null # baseline OPEN + + # PR is closed (leaves the open set): emit CLOSED once. + : > "$dir/fixture/prs" + printf 'CLOSED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CLOSED: kunchenguid/firstmate#42" \ + || fail "open->closed did not emit; got: $out" + + # Steady closed: must NOT re-emit CLOSED every cycle. + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "CLOSED"; then fail "CLOSED re-emitted while settled"; fi + + # Closed -> reopened -> merged all between polls: MERGED must still fire + # (CLOSED is not terminal; the watcher re-probes it). + printf 'MERGED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "MERGED: kunchenguid/firstmate#42" \ + || fail "close->merge transition was swallowed; got: $out" + + pass "CLOSED is treated as non-terminal: close->merge still emits MERGED" +} + +test_closed_pr_reprobe_window_is_bounded() { + # A closed PR is re-probed only within CLOSE_REPROBE_SECS of closing, so + # accumulated closed PRs cannot push the fleet past the rate limit. With a + # zero window the PR is settled immediately: a later merge is intentionally + # not re-detected (the cost-bound tradeoff). The default window is generous. + local dir out sf + dir=$(make_case close-window) + seed_prs "$dir" $'kunchenguid/firstmate\t42' + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-42" + run_poll "$dir" >/dev/null # baseline OPEN + : > "$dir/fixture/prs" + printf 'CLOSED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(run_poll "$dir") # emits CLOSED, stamps closed_at + printf '%s\n' "$out" | grep -Fq "CLOSED: kunchenguid/firstmate#42" || fail "close not emitted" + grep -Fq "closed_at=" "$sf" || fail "closed_at not stamped on close" + + # Zero window: the aged-out CLOSED PR is not re-probed, so a merge is missed. + printf 'MERGED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(PATH="$dir/fakebin:$PATH" GH_FIXTURE="$dir/fixture" FM_GH_CONTRIBUTOR=e-jung \ + FM_GH_CLOSE_REPROBE_SECS=0 FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" --once) + if printf '%s\n' "$out" | grep -Fq "MERGED"; then + fail "aged-out CLOSED PR was re-probed (cost not bounded)" + fi + pass "closed PR past the re-probe window stops consuming an API call" +} + +test_closed_via_open_search_stamps_closed_at() { + # The process_pr close path: a PR that closes while STILL listed in the laggy + # open-search index (not emptied first, unlike the detect_left_open test + # above). process_pr observes OPEN->CLOSED, emits CLOSED, and build_seen must + # stamp closed_at so the later detect_left_open skip guard can age it out. + # Without closed_at the skip guard never fires and every cycle re-probes the + # settled PR with a gh pr view call, unbounded. + local dir out sf + dir=$(make_case close-via-open) + seed_prs "$dir" $'kunchenguid/firstmate\t42' + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-42" + run_poll "$dir" >/dev/null # baseline OPEN + grep -Fxq "state=OPEN" "$sf" || fail "baseline state not OPEN" + + # PR closes but is STILL in the open-search index: process_pr (not + # detect_left_open) observes the transition and must stamp closed_at. + printf 'CLOSED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CLOSED: kunchenguid/firstmate#42" \ + || fail "open->closed via open-search did not emit; got: $out" + grep -Fxq "state=CLOSED" "$sf" || fail "state not advanced to CLOSED" + grep -Fq "closed_at=" "$sf" \ + || fail "process_pr close path did not stamp closed_at: $(cat "$sf")" + + # PR leaves the open index. With closed_at now set, a zero re-probe window + # ages it out immediately, so a later merge is NOT re-detected and costs no + # gh pr view call. Without the fix closed_at stayed empty and the merge WAS + # re-detected every cycle (the unbounded cost). + : > "$dir/fixture/prs" + printf 'MERGED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(PATH="$dir/fakebin:$PATH" GH_FIXTURE="$dir/fixture" FM_GH_CONTRIBUTOR=e-jung \ + FM_GH_CLOSE_REPROBE_SECS=0 FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" --once) + if printf '%s\n' "$out" | grep -Fq "MERGED"; then + fail "aged-out CLOSED PR (closed via open-search) was re-probed (cost not bounded)" + fi + pass "close observed via the open-search index stamps closed_at and bounds re-probes" +} + +test_config_roundtrip() { + local dir + dir=$(make_case config) + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" contributor captain-ej >/dev/null + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter reviews off >/dev/null + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter ci off >/dev/null + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" contributor > "$dir/c.out" + [ "$(cat "$dir/c.out")" = "captain-ej" ] || fail "contributor did not roundtrip" + + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter list > "$dir/f.out" + # comments + merge remain; ci + reviews disabled. + grep -Fxq comments "$dir/f.out" || fail "comments should remain on" + grep -Fxq merge "$dir/f.out" || fail "merge should remain on" + ! grep -Fxq ci "$dir/f.out" || fail "ci should be off" + ! grep -Fxq reviews "$dir/f.out" || fail "reviews should be off" + + # status reflects the persisted config. + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" status > "$dir/s.out" + grep -Eq '^contributor: captain-ej$' "$dir/s.out" || fail "status did not show contributor" + grep -Eq '^ ci: off$' "$dir/s.out" || fail "status did not show ci off" + + pass "config writes round-trip across contributor + filter subcommands" +} + +test_review_detection() { + local dir out sf + dir=$(make_case review) + seed_prs "$dir" $'kunchenguid/no-mistakes\t310' + printf '1\n' > "$dir/fixture/reviews-kunchenguid-no-mistakes-310" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-310" + + run_poll "$dir" >/dev/null + grep -Fxq "reviews=1" "$sf" || fail "baseline reviews not set" + + printf '3\n' > "$dir/fixture/reviews-kunchenguid-no-mistakes-310" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "REVIEW: kunchenguid/no-mistakes#310 has 2 new review(s)" \ + || fail "review increase did not emit event; got: $out" + grep -Fxq "reviews=3" "$sf" || fail "review high-water not advanced" + + pass "review count increase emits REVIEW event" +} + +test_ci_detection() { + local dir out sf + dir=$(make_case ci) + seed_prs "$dir" $'kunchenguid/no-mistakes\t310' + printf 'abcdef1\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-310" + seed_ci "$dir" abcdef1 success success success + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-310" + + run_poll "$dir" >/dev/null + grep -Fxq "ci=success" "$sf" || fail "baseline ci state not rolled up to success" + + # One check goes red: the overall state flips success -> failure (one event). + seed_ci "$dir" abcdef1 failure success success + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#310 -> failure" \ + || fail "ci state change did not emit event; got: $out" + grep -Fxq "ci=failure" "$sf" || fail "ci state not advanced to failure" + + # Steady state again: silence. + out=$(run_poll "$dir") + [ -z "$out" ] || fail "steady-state ci poll should be silent; got: $out" + + pass "overall CI state change emits a single CI event" +} + +test_merge_filter_suppresses_merge_event() { + local dir out + dir=$(make_case merge-off) + seed_prs "$dir" $'kunchenguid/firstmate\t42' + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + run_poll "$dir" >/dev/null # baseline + + # Disable the merge filter; the PR then merges (leaves the open set). + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter merge off >/dev/null + : > "$dir/fixture/prs" + printf 'MERGED\n' > "$dir/fixture/state-kunchenguid-firstmate-42" + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "MERGED"; then + fail "merge event fired despite merge filter being off; got: $out" + fi + pass "merge filter off suppresses merge/close events" +} + +test_ci_carry_forward_across_empty_window() { + local dir out sf + dir=$(make_case ci-carry) + seed_prs "$dir" $'kunchenguid/no-mistakes\t310' + printf 'sha1\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-310" + seed_ci "$dir" sha1 success success + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-310" + + # Baseline: CI passing for sha1 (rolled up to success). + run_poll "$dir" >/dev/null + grep -Fxq "ci=success" "$sf" || fail "baseline ci state not recorded" + + # New commit: sha changes, check-runs not populated yet (empty ci_state). + printf 'sha2\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-310" + rm -f "$dir/fixture/ci-sha1" + # No ci-sha2 fixture yet -> ci_state returns empty. + out=$(run_poll "$dir") + [ -z "$out" ] || fail "transient empty ci window should be silent; got: $out" + # seen_ci must be carried forward (not dropped) so a later change still fires. + grep -Fxq "ci=success" "$sf" || fail "ci state was dropped during empty window" + + # CI completes for sha2 and FAILS: state differs from carried-forward success. + seed_ci "$dir" sha2 failure success + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#310 -> failure" \ + || fail "ci completion after empty window did not fire; got: $out" + + pass "overall CI state carries forward across an empty window and fires on change" +} + +test_all_filters_off_mutes_watcher() { + local dir out + dir=$(make_case all-off) + seed_prs "$dir" $'kunchenguid/firstmate\t30' + printf '5\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + run_poll "$dir" >/dev/null # baseline + + # Turn every filter off; the persisted config must keep filters empty (not + # fall back to defaults). + for f in comments ci reviews merge; do + FM_STATE_OVERRIDE="$dir/state" bash "$GH_WATCH" filter "$f" off >/dev/null + done + grep -Fxq 'filters=' "$dir/state/.github-watch-config" || fail "all-off should write filters= (empty), not default" + + # A new comment must NOT fire (every filter is muted). + printf '9\n' > "$dir/fixture/comments-kunchenguid-firstmate-30" + out=$(run_poll "$dir") + [ -z "$out" ] || fail "muted watcher emitted events; got: $out" + pass "all filters off (empty filters=) mutes the watcher instead of resetting to defaults" +} + +test_parallel_poll_is_lossless_and_does_not_cross_contaminate() { + # With PRs polled concurrently (bounded by FM_GH_CONCURRENCY), the per-PR + # losslessness invariant (print before seen write) and per-PR seen-file + # independence must both hold. Seed many PRs across distinct repos so several + # parallel waves run, each worker owning its own seen file. + local dir out i sf n=12 + dir=$(make_case parallel) + + local pr_lines=() + for i in $(seq 1 "$n"); do + pr_lines+=( "$(printf 'org/r%d\t1' "$i")" ) + printf '5\n' > "$dir/fixture/comments-org-r$i-1" + done + seed_prs "$dir" "${pr_lines[@]}" + run_poll "$dir" >/dev/null # baseline all n PRs (comments=5 each) + + # Each PR gains a DISTINCT count (PR i -> 5+i) so a worker that crossed wires + # would stamp another PR's count into the wrong seen file. + for i in $(seq 1 "$n"); do + printf '%d\n' "$((5 + i))" > "$dir/fixture/comments-org-r$i-1" + done + + # Losslessness under concurrency: make the seen dir read-only so every + # worker's seen write fails, then poll with concurrency well below n. Every + # PR's event must STILL print this cycle (each worker prints before its seen + # write, independent of the other workers). + chmod a-w "$dir/state/.github-watch-seen" + out=$(FM_GH_CONCURRENCY=4 run_poll "$dir") + chmod u+w "$dir/state/.github-watch-seen" + for i in $(seq 1 "$n"); do + printf '%s\n' "$out" | grep -Fq "COMMENT: org/r$i#1 has $i new comment(s)" \ + || fail "parallel poll did not emit PR r$i before its seen write; out: $out" + done + + # No cross-contamination: after a writable concurrent poll, each PR's seen + # file holds its OWN advanced count and its own identity (never another PR's + # values), even though workers ran concurrently with a shared .tmp stage. + out=$(FM_GH_CONCURRENCY=4 run_poll "$dir") + for i in $(seq 1 "$n"); do + sf="$dir/state/.github-watch-seen/org-r$i-1" + grep -Fxq "comments=$((5 + i))" "$sf" \ + || fail "r$i seen file has wrong count (cross-contamination?): $(cat "$sf")" + grep -Fxq "owner=org" "$sf" || fail "r$i seen file lost owner identity" + grep -Fxq "repo=r$i" "$sf" || fail "r$i seen file has wrong repo (cross-contamination?)" + grep -Fxq "pr=1" "$sf" || fail "r$i seen file lost pr identity" + done + + pass "parallel poll emits before each seen write and never cross-contaminates seen files" +} + +test_ci_debounces_staggered_checks() { + # Reproduces the no-mistakes#312 chatter: a PR whose many check-runs complete + # at staggered times. Under the old per-multiset logic each completion changed + # the signature and fired (one event per check). The roll-up keeps the state + # at "pending" while ANY check is still running, then flips to green exactly + # once when the last one completes. + local dir out sf finished i + dir=$(make_case ci-debounce) + seed_prs "$dir" $'kunchenguid/no-mistakes\t312' + printf 'sha7\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-312" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-312" + + # Cycle 1: 7 checks, all pending -> baseline (no event, first run). + seed_ci "$dir" sha7 pending pending pending pending pending pending pending + run_poll "$dir" >/dev/null + grep -Fxq "ci=pending" "$sf" || fail "baseline should roll 7 pending checks up to pending" + + # Checks complete a few at a time: state stays pending, so every one of these + # cycles must stay silent (under the old logic each would have fired). + for finished in 1 3 6; do + local args=() + for i in $(seq 1 7); do + if [ "$i" -le "$finished" ]; then args+=(success); else args+=(pending); fi + done + seed_ci "$dir" sha7 "${args[@]}" + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "CI:"; then + fail "fired while still pending after $finished/7 checks done; got: $out" + fi + done + + # Last check completes: pending -> green fires exactly once. + seed_ci "$dir" sha7 success success success success success success success + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#312 -> green" \ + || fail "pending->success transition did not fire once; got: $out" + # No second fire on the next (steady) cycle. + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "CI:"; then + fail "steady success re-fired; got: $out" + fi + + pass "staggered checks debounce to a single overall-state transition" +} + +test_ci_state_transitions() { + # The three transitions the captain cares about, each firing exactly once: + # pending->green, green->green (silent), green->failure. + local dir out sf + dir=$(make_case ci-trans) + seed_prs "$dir" $'kunchenguid/no-mistakes\t320' + printf 'shat\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-320" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-320" + + seed_ci "$dir" shat pending + run_poll "$dir" >/dev/null # baseline pending + + # pending -> green fires once. + seed_ci "$dir" shat success + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#320 -> green" \ + || fail "pending->success did not fire; got: $out" + grep -Fxq "ci=success" "$sf" || fail "state not advanced to success" + + # green -> green does not fire. + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "CI:"; then fail "success->success re-fired; got: $out"; fi + + # green -> failure fires once. + seed_ci "$dir" shat success failure + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#320 -> failure" \ + || fail "success->failure did not fire; got: $out" + grep -Fxq "ci=failure" "$sf" || fail "state not advanced to failure" + + pass "pending->green fires once, green->green is silent, green->failure fires once" +} + +test_ci_rollup_precedence() { + # The rolled-up state follows GitHub's combined-status precedence: a red check + # settles failure even while others are still pending; neutral checks are + # ignored entirely (never red, never green, never block). + local dir out sf + dir=$(make_case ci-rollup) + seed_prs "$dir" $'kunchenguid/no-mistakes\t321' + printf 'shar\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-321" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-321" + + # Baseline: a passing check plus a neutral informational check rolls up to + # success (neutral ignored). + seed_ci "$dir" shar success neutral + run_poll "$dir" >/dev/null + grep -Fxq "ci=success" "$sf" || fail "success+neutral should roll up to success" + + # A failure landing while another check is still pending settles failure + # immediately (no transient pending event). + seed_ci "$dir" shar success failure pending + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#321 -> failure" \ + || fail "failure+pending should roll straight up to failure; got: $out" + grep -Fxq "ci=failure" "$sf" || fail "state not advanced to failure" + + # The pending check then succeeds: state stays failure (no second fire). + seed_ci "$dir" shar success failure success + out=$(run_poll "$dir") + if printf '%s\n' "$out" | grep -Fq "CI:"; then fail "failure->failure re-fired; got: $out"; fi + + pass "roll-up precedence: failure beats pending, neutral checks are ignored" +} + +test_silent_baseline_on_schema_migration() { + # Reproduces the debounce deploy flood: a seen file written by an OLDER + # watcher version (here, an old per-multiset ci signature, no schema= field). + # Without the schema guard, the first poll under the new code sees + # seen_ci="success:success:failure" != ci_st="success" and fires a spurious + # CI transition for EVERY migrated PR at once. The guard treats a schema + # mismatch as a silent re-baseline: write the new schema + correct values, + # emit nothing. A subsequent REAL transition still fires. + local dir out sf + dir=$(make_case ci-migrate) + seed_prs "$dir" $'kunchenguid/no-mistakes\t330' + printf 'sham\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-330" + printf 'OPEN\n' > "$dir/fixture/state-kunchenguid-no-mistakes-330" + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-330" + mkdir -p "$(dirname "$sf")" + + # An old-format seen file: initialized but no schema=, and a stale ci value + # that the new roll-up would read as "different" from the fresh success. + cat > "$sf" <<'OLD' +owner=kunchenguid +repo=no-mistakes +pr=330 +initialized=1 +ci=success:success:failure +state=OPEN +OLD + + # Fresh roll-up is plain success; under the old code this != the stale sig. + seed_ci "$dir" sham success success success + + # First poll after migration: SILENT (no flood), seen rewritten to new schema. + out=$(run_poll "$dir") + [ -z "$out" ] || fail "schema migration should baseline silently; got: $out" + grep -Fxq "schema=2" "$sf" || fail "seen file not stamped with current schema" + grep -Fxq "ci=success" "$sf" || fail "ci not re-baselined to the rolled-up success" + + # A subsequent REAL transition still fires (migration only silenced once). + seed_ci "$dir" sham success failure success + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#330 -> failure" \ + || fail "post-migration real transition did not fire; got: $out" + + pass "schema mismatch is silently re-baselined; real transitions still fire" +} + +test_ci_ignore_excludes_known_gap_check() { + # A kunchenguid fork-PR whose ONLY failing check is the known fork-routing + # signature gap (#293: "PR must be raised via no-mistakes") must roll up to + # green when its real checks pass, not a false failure. The default + # FM_GH_IGNORE_CHECKS regex drops that name from the roll-up. A REAL failure + # (different name) must still roll up to failure, so the filter is not just + # disabling failure detection. + local dir out sf + dir=$(make_case ci-ignore) + seed_prs "$dir" $'kunchenguid/firstmate\t38' + printf 'sha38\n' > "$dir/fixture/sha-kunchenguid-firstmate-38" + sf="$dir/state/.github-watch-seen/kunchenguid-firstmate-38" + + # 3 real checks pass; the gap check fails by name. run_poll uses the default + # FM_GH_IGNORE_CHECKS, so the gap name is excluded -> rolls up to success. + seed_ci_named "$dir" sha38 \ + "build=success" "test=success" "lint=success" \ + "PR must be raised via no-mistakes=failure" + + run_poll "$dir" >/dev/null # baseline: gap excluded -> success, not failure + grep -Fxq "ci=success" "$sf" \ + || fail "gap-excluded PR should roll up to success, got: $(cat "$sf")" + + # A REAL check failing (different name) must still surface failure despite the + # gap check also failing: the ignore list is not a blanket failure suppressor. + seed_ci_named "$dir" sha38 \ + "build=success" "test=failure" "lint=success" \ + "PR must be raised via no-mistakes=failure" + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/firstmate#38 -> failure" \ + || fail "real check failure should still roll up to failure; got: $out" + + pass "known fork-routing gap check excluded from roll-up; real failures still surface" +} + +test_api_error_skips_pr_without_event() { + # Reproduces the bug: a transient 401 makes `gh api` write the error body + # {"message":"Bad credentials",...} to stdout (bypassing --jq). The old ghc() + # swallowed stderr + the exit code, so the watcher parsed that JSON as CI state + # and fired a bogus "CI: ... -> { \"message\": ... }" event. The fix detects the + # API error (non-zero exit OR an error-body shape) and skips the PR for the + # cycle: no event, no crash, seen left untouched so the next (recovered) cycle + # still fires the real transition (lossless). + local dir out sf + dir=$(make_case api-error) + seed_prs "$dir" $'kunchenguid/no-mistakes\t500' + printf 'sha500\n' > "$dir/fixture/sha-kunchenguid-no-mistakes-500" + seed_ci "$dir" sha500 success + sf="$dir/state/.github-watch-seen/kunchenguid-no-mistakes-500" + + # Baseline: CI green. + run_poll "$dir" >/dev/null + grep -Fxq "ci=success" "$sf" || fail "baseline ci not recorded as success" + + # Inject a transient 401 on every `gh api` call this cycle. + : > "$dir/fixture/api-error" + out=$(run_poll "$dir" 2>/dev/null) + [ -z "$out" ] || fail "transient API error must not surface as an event; got: $out" + # seen must be untouched (ci still the prior success, not the error JSON). + grep -Fxq "ci=success" "$sf" \ + || fail "seen state was clobbered during API error: $(cat "$sf")" + + # Recover: remove the blip and flip CI to failure. The real transition fires. + rm -f "$dir/fixture/api-error" + seed_ci "$dir" sha500 failure + out=$(run_poll "$dir") + printf '%s\n' "$out" | grep -Fq "CI: kunchenguid/no-mistakes#500 -> failure" \ + || fail "post-blip real transition did not fire; got: $out" + + pass "transient GitHub API error skips the PR without emitting an event" +} + +test_filter_toggling +test_first_run_baselines_silently +test_comment_detection_advances_seen_after_print +test_losslessness_redetects_when_seen_write_fails +test_merge_detection_on_left_open +test_closed_then_merged_is_not_swallowed +test_closed_pr_reprobe_window_is_bounded +test_closed_via_open_search_stamps_closed_at +test_config_roundtrip +test_review_detection +test_ci_detection +test_merge_filter_suppresses_merge_event +test_ci_carry_forward_across_empty_window +test_ci_debounces_staggered_checks +test_ci_state_transitions +test_ci_rollup_precedence +test_all_filters_off_mutes_watcher +test_parallel_poll_is_lossless_and_does_not_cross_contaminate +test_silent_baseline_on_schema_migration +test_ci_ignore_excludes_known_gap_check +test_api_error_skips_pr_without_event diff --git a/tests/fm-plugin.test.sh b/tests/fm-plugin.test.sh new file mode 100755 index 00000000..9aee8080 --- /dev/null +++ b/tests/fm-plugin.test.sh @@ -0,0 +1,342 @@ +#!/usr/bin/env bash +# Behavior tests for durable watcher check plugins: the fm-plugin.sh lifecycle +# (add/remove/list/sync) and the shipped done-crewmate.check.sh detector. +# +# A plugin's canonical source lives tracked under bin/check-plugins/.check.sh +# and is symlinked into state/.check.sh so the watcher's state/*.check.sh glob +# sweeps it. state/ is gitignored, so bootstrap calls `fm-plugin.sh sync` to recreate +# those symlinks after a fresh clone. These cases pin: +# - add: copies content to the canonical home and points state/ at it via symlink; +# - remove: drops both the symlink and the canonical source; +# - list: reports live vs stale, and '(no plugins installed)' when empty; +# - sync: recreates missing symlinks idempotently, never clobbering a real +# (non-symlink) state file that may be a live per-task check; +# - invalid-name / not-found / usage guards; +# - done-crewmate.check.sh: surfaces terminal-status (done/failed/blocked) crewmates +# whose tmux window is still alive, excludes secondmates and needs-decision, +# stays silent when the window is gone / crew resumed / no window recorded, +# and emits exactly one line listing every offender. +# +# fm-plugin.sh resolves FM_ROOT from its own script location (it operates on its +# own repo), so each case copies the script into a fresh temp FM_ROOT to keep the +# real repo untouched. done-crewmate.check.sh honors FM_ROOT_OVERRIDE, so it is +# pointed at a temp state dir directly. +set -u + +# shellcheck source=tests/lib.sh +. "$(dirname "${BASH_SOURCE[0]}")/lib.sh" + +PLUGIN_SH="$ROOT/bin/fm-plugin.sh" +DONE_CHECK="$ROOT/bin/check-plugins/done-crewmate.check.sh" + +TMP_ROOT=$(fm_test_tmproot fm-plugin) + +# A fresh fake FM_ROOT with bin/check-plugins and state, plus a copy of fm-plugin.sh +# inside bin/ so the script resolves this temp dir as its own FM_ROOT. Echoes the dir. +make_root() { + local name=$1 dir + dir="$TMP_ROOT/$name" + mkdir -p "$dir/bin/check-plugins" "$dir/state" + cp "$PLUGIN_SH" "$dir/bin/fm-plugin.sh" + chmod +x "$dir/bin/fm-plugin.sh" + printf '%s\n' "$dir" +} + +# A trivial source check script that prints a line. Echoes its path. +make_source() { + local f="$TMP_ROOT/source-$1.check.sh" + cat > "$f" <<'SH' +#!/usr/bin/env bash +printf 'sample fired\n' +SH + chmod +x "$f" + printf '%s\n' "$f" +} + +# A fake tmux that lists the windows in FM_FAKE_WINDOWS (one ':'/line). +# Installed into the given fakebin. +install_fake_tmux() { + local fakebin=$1 + cat > "$fakebin/tmux" <<'SH' +#!/usr/bin/env bash +set -u +if [ "${1:-}" = "list-windows" ]; then + [ -n "${FM_FAKE_WINDOWS:-}" ] && printf '%s\n' "$FM_FAKE_WINDOWS" + exit 0 +fi +exit 1 +SH + chmod +x "$fakebin/tmux" +} + +# Run done-crewmate.check.sh against a temp root with a fake tmux on PATH. +# Args: [FM_FAKE_WINDOWS value] +run_done_check() { + local root=$1 fakebin=$2 + FM_ROOT_OVERRIDE="$root" PATH="$fakebin:$PATH" FM_FAKE_WINDOWS="${3:-}" "$DONE_CHECK" +} + +# --- fm-plugin.sh: add / list / remove -------------------------------------- + +test_add_creates_canonical_and_symlink() { + local root src canon link + root=$(make_root add-basic); src=$(make_source add) + "$root/bin/fm-plugin.sh" add sample "$src" >/dev/null || fail "add failed" + canon="$root/bin/check-plugins/sample.check.sh" + link="$root/state/sample.check.sh" + [ -f "$canon" ] || fail "canonical source was not created" + [ -L "$link" ] || fail "state link is not a symlink" + [ "$(readlink -f "$link")" = "$(cd -P "$root/bin/check-plugins" && pwd)/sample.check.sh" ] \ + || fail "state symlink does not point at the canonical source" + diff -q "$src" "$canon" >/dev/null || fail "canonical content does not match source" + [ -x "$canon" ] || fail "canonical source is not executable" + pass "add copies content to the canonical home and points state/ at it via symlink" +} + +test_list_reports_live_stale_and_empty() { + local root src out + root=$(make_root list-cases) + out=$("$root/bin/fm-plugin.sh" list) || fail "list failed on empty" + assert_contains "$out" "(no plugins installed)" "empty list message" + src=$(make_source list) + "$root/bin/fm-plugin.sh" add sample "$src" >/dev/null + out=$("$root/bin/fm-plugin.sh" list) + assert_contains "$out" $'sample\tlive' "live plugin not reported" + rm -f "$root/state/sample.check.sh" # simulate a fresh clone / dropped symlink + out=$("$root/bin/fm-plugin.sh" list) + assert_contains "$out" "sample" "stale plugin name missing" + assert_contains "$out" "stale" "stale plugin not marked stale" + pass "list reports '(no plugins installed)', live, and stale states" +} + +test_remove_drops_symlink_and_canonical() { + local root src + root=$(make_root remove-case); src=$(make_source rm) + "$root/bin/fm-plugin.sh" add sample "$src" >/dev/null + "$root/bin/fm-plugin.sh" remove sample >/dev/null || fail "remove failed" + [ ! -e "$root/bin/check-plugins/sample.check.sh" ] || fail "canonical source not removed" + [ ! -e "$root/state/sample.check.sh" ] || fail "state symlink not removed" + pass "remove drops both the state symlink and the canonical source" +} + +# --- fm-plugin.sh: sync (the bootstrap fresh-clone path) -------------------- + +test_sync_recreates_missing_symlink() { + local root src link + root=$(make_root sync-restore); src=$(make_source sync) + "$root/bin/fm-plugin.sh" add sample "$src" >/dev/null + link="$root/state/sample.check.sh" + rm -f "$link" # fresh clone: state/ is gitignored and empty + "$root/bin/fm-plugin.sh" sync || fail "sync failed" + [ -L "$link" ] || fail "sync did not recreate the symlink" + [ "$(readlink -f "$link")" = "$(cd -P "$root/bin/check-plugins" && pwd)/sample.check.sh" ] \ + || fail "sync recreated symlink points at the wrong target" + # Idempotent: a second sync is a no-op. + "$root/bin/fm-plugin.sh" sync || fail "second sync failed" + pass "sync recreates a missing plugin symlink after a fresh clone (idempotent)" +} + +test_sync_never_clobbers_a_real_state_file() { + local root src link + root=$(make_root sync-noclobber); src=$(make_source sync2) + "$root/bin/fm-plugin.sh" add sample "$src" >/dev/null + link="$root/state/sample.check.sh" + rm -f "$link" + # A real (non-symlink) file at the state path may be a live per-task check that + # happens to share the name; sync must leave it untouched. + cat > "$link" <<'SH' +#!/usr/bin/env bash +echo "i am a live per-task check" +SH + "$root/bin/fm-plugin.sh" sync || fail "sync failed" + [ ! -L "$link" ] || fail "sync clobbered a real per-task check file (turned it into a symlink)" + grep -F "live per-task check" "$link" >/dev/null || fail "sync altered the real file's content" + pass "sync never clobbers a real (non-symlink) state file that may be a live per-task check" +} + +# --- fm-plugin.sh: guards --------------------------------------------------- + +test_invalid_names_rejected() { + local root src + root=$(make_root invalid-names); src=$(make_source inv) + if "$root/bin/fm-plugin.sh" add "fm-task1" "$src" 2>/dev/null; then + fail "fm- prefix (reserved for task ids) was accepted" + fi + if "$root/bin/fm-plugin.sh" add "bad name!" "$src" 2>/dev/null; then + fail "a name with invalid characters was accepted" + fi + if "$root/bin/fm-plugin.sh" add "" "$src" 2>/dev/null; then + fail "an empty name was accepted" + fi + pass "invalid plugin names (fm- prefix, bad chars, empty) are rejected" +} + +test_remove_unknown_fails() { + local root + root=$(make_root rm-unknown) + if "$root/bin/fm-plugin.sh" remove nosuch 2>/dev/null; then + fail "remove of a non-existent plugin succeeded" + fi + pass "remove of an unknown plugin fails" +} + +test_add_accepts_state_path_as_source() { + # The doc contract: if state/.check.sh already exists as a real file (it is + # the source you just named), add holds its content canonically and replaces the + # path with the symlink. Copy content first, then symlink, so the source survives. + local root canon link + root=$(make_root add-from-state) + cat > "$root/state/sample.check.sh" <<'SH' +#!/usr/bin/env bash +echo "promoted from a real state file" +SH + "$root/bin/fm-plugin.sh" add sample "$root/state/sample.check.sh" >/dev/null || fail "add failed" + canon="$root/bin/check-plugins/sample.check.sh" + link="$root/state/sample.check.sh" + [ -f "$canon" ] || fail "canonical source not created from the real state file" + [ -L "$link" ] || fail "the real state file was not replaced by a symlink" + grep -F "promoted from a real state file" "$canon" >/dev/null \ + || fail "canonical content did not capture the original state file content" + pass "add promotes a real state file to the canonical source + symlink" +} + +# --- done-crewmate.check.sh: detection -------------------------------------- + +test_done_crewmate_with_live_window_surfaces() { + local root fakebin + root=$(make_root dc-done); fakebin="$root/fakebin"; mkdir -p "$fakebin" + install_fake_tmux "$fakebin" + printf 'window=firstmate:fm-task-aaa\nkind=ship\n' > "$root/state/task-aaa.meta" + printf 'working: step 1\ndone: PR https://example.test/pr/9\n' > "$root/state/task-aaa.status" + out=$(run_done_check "$root" "$fakebin" "firstmate:fm-task-aaa") + [ -n "$out" ] || fail "a done crewmate with a live window was not reported" + assert_contains "$out" "task-aaa" "done offender not named in the wake line" + pass "a terminal (done:) crewmate whose window is alive is reported" +} + +test_terminal_verbes_failed_and_blocked_count() { + local root fakebin out + root=$(make_root dc-verbs); fakebin="$root/fakebin"; mkdir -p "$fakebin" + install_fake_tmux "$fakebin" + printf 'window=firstmate:fm-c1\nkind=ship\n' > "$root/state/c1.meta" + printf 'failed: tests blew up\n' > "$root/state/c1.status" + printf 'window=firstmate:fm-c2\nkind=ship\n' > "$root/state/c2.meta" + printf 'blocked: waiting on auth\n' > "$root/state/c2.status" + out=$(run_done_check "$root" "$fakebin" $'firstmate:fm-c1\nfirstmate:fm-c2') + assert_contains "$out" "c1" "failed: offender missed" + assert_contains "$out" "c2" "blocked: offender missed" + pass "failed: and blocked: terminal statuses are both reported" +} + +test_needs_decision_excluded() { + # needs-decision escalates immediately through the signal layer on write, so it + # never needs this recurring backstop. + local root fakebin out + root=$(make_root dc-nd); fakebin="$root/fakebin"; mkdir -p "$fakebin" + install_fake_tmux "$fakebin" + printf 'window=firstmate:fm-nd\nkind=ship\n' > "$root/state/nd.meta" + printf 'needs-decision: pick A or B\n' > "$root/state/nd.status" + out=$(run_done_check "$root" "$fakebin" "firstmate:fm-nd") + [ -z "$out" ] || fail "needs-decision was reported (should be excluded): $out" + pass "needs-decision is excluded (it escalates on write, not via this backstop)" +} + +test_secondmate_skipped() { + local root fakebin out + root=$(make_root dc-sm); fakebin="$root/fakebin"; mkdir -p "$fakebin" + install_fake_tmux "$fakebin" + printf 'window=firstmate:fm-domain\nkind=secondmate\nhome=%s/h\n' "$root" > "$root/state/domain.meta" + printf 'done: routine charter\n' > "$root/state/domain.status" + out=$(run_done_check "$root" "$fakebin" "firstmate:fm-domain") + [ -z "$out" ] || fail "a terminal secondmate was reported (should be skipped): $out" + pass "kind=secondmate is skipped even with a terminal status" +} + +test_window_gone_silent() { + local root fakebin out + root=$(make_root dc-gone); fakebin="$root/fakebin"; mkdir -p "$fakebin" + install_fake_tmux "$fakebin" + printf 'window=firstmate:fm-task-aaa\nkind=ship\n' > "$root/state/task-aaa.meta" + printf 'done: PR https://example.test/pr/9\n' > "$root/state/task-aaa.status" + out=$(run_done_check "$root" "$fakebin" "") # tmux reports no fm windows + [ -z "$out" ] || fail "a done crewmate whose window is gone was reported: $out" + pass "a done crewmate whose window is already gone (progressed/torn down) is silent" +} + +test_resumed_crew_silent() { + # A later non-terminal line (working:) means the crew resumed after a done:, so it + # is not idle-done. The current state is the LAST non-empty status line. + local root fakebin out + root=$(make_root dc-resumed); fakebin="$root/fakebin"; mkdir -p "$fakebin" + install_fake_tmux "$fakebin" + printf 'window=firstmate:fm-task-aaa\nkind=ship\n' > "$root/state/task-aaa.meta" + printf 'done: PR x\nworking: fixing review nits\n' > "$root/state/task-aaa.status" + out=$(run_done_check "$root" "$fakebin" "firstmate:fm-task-aaa") + [ -z "$out" ] || fail "a resumed crew (last line working:) was reported: $out" + pass "a crew whose last status line is non-terminal (resumed) is silent" +} + +test_no_window_recorded_silent() { + local root fakebin out + root=$(make_root dc-nowin); fakebin="$root/fakebin"; mkdir -p "$fakebin" + install_fake_tmux "$fakebin" + printf 'kind=ship\n' > "$root/state/nowin.meta" # no window= recorded + printf 'done: x\n' > "$root/state/nowin.status" + out=$(run_done_check "$root" "$fakebin" "firstmate:fm-other") + assert_not_contains "$out" "nowin" "a crew with no window= was reported" + pass "a crew with no window= recorded is skipped (cannot cross-reference tmux)" +} + +test_all_offenders_in_one_line() { + local root fakebin out nlines + root=$(make_root dc-oneline); fakebin="$root/fakebin"; mkdir -p "$fakebin" + install_fake_tmux "$fakebin" + printf 'window=firstmate:fm-a\nkind=ship\n' > "$root/state/a.meta" + printf 'done: a\n' > "$root/state/a.status" + printf 'window=firstmate:fm-b\nkind=ship\n' > "$root/state/b.meta" + printf 'failed: b\n' > "$root/state/b.status" + printf 'window=firstmate:fm-c\nkind=ship\n' > "$root/state/c.meta" + printf 'blocked: c\n' > "$root/state/c.status" + out=$(run_done_check "$root" "$fakebin" $'firstmate:fm-a\nfirstmate:fm-b\nfirstmate:fm-c') + nlines=$(printf '%s\n' "$out" | wc -l | tr -d ' ') + [ "$nlines" = "1" ] || fail "multiple offenders produced $nlines lines instead of one: $out" + assert_contains "$out" "a" "offender a missing" + assert_contains "$out" "b" "offender b missing" + assert_contains "$out" "c" "offender c missing" + pass "every offender is listed in a single wake line" +} + +# --- bootstrap integration: sync is wired into bootstrap -------------------- + +test_bootstrap_invokes_plugin_sync() { + # Bootstrap's final step must call fm-plugin.sh sync so plugins come back alive + # after a fresh clone (state/ is gitignored). Assert the call is present and + # guarded so a missing executable or a sync failure never aborts bootstrap. + local boot="$ROOT/bin/fm-bootstrap.sh" + grep -F 'fm-plugin.sh' "$boot" >/dev/null \ + || fail "bootstrap does not reference fm-plugin.sh" + # shellcheck disable=SC2016 # single quotes are deliberate: literal source string + grep -F '[ -x "$FM_ROOT/bin/fm-plugin.sh" ] && "$FM_ROOT/bin/fm-plugin.sh" sync' "$boot" >/dev/null \ + || fail "bootstrap does not invoke the sync subcommand with the documented guard" + pass "bootstrap invokes 'fm-plugin.sh sync' (guarded, best-effort) as its final step" +} + +test_add_creates_canonical_and_symlink +test_list_reports_live_stale_and_empty +test_remove_drops_symlink_and_canonical +test_sync_recreates_missing_symlink +test_sync_never_clobbers_a_real_state_file +test_invalid_names_rejected +test_remove_unknown_fails +test_add_accepts_state_path_as_source +test_done_crewmate_with_live_window_surfaces +test_terminal_verbes_failed_and_blocked_count +test_needs_decision_excluded +test_secondmate_skipped +test_window_gone_silent +test_resumed_crew_silent +test_no_window_recorded_silent +test_all_offenders_in_one_line +test_bootstrap_invokes_plugin_sync + +printf 'all fm-plugin / done-crewmate tests passed\n'