diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f8b0afcb..df5000f6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - - run: shellcheck bin/*.sh tests/*.sh + - run: shellcheck -x -P SCRIPTDIR bin/*.sh tests/*.sh tests: name: Behavior tests @@ -36,7 +36,7 @@ jobs: - run: | set -eu for test_script in tests/*.test.sh; do - "$test_script" + bash "$test_script" done invariants: diff --git a/AGENTS.md b/AGENTS.md index 855d25de..0feb32be 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -511,7 +511,7 @@ On wake, in order of cheapness: 5. `heartbeat:` a heartbeat wake now reaches you only when the watcher's bash fleet-scan caught a captain-relevant status the per-wake path missed (no-change heartbeats are absorbed in bash, never surfaced), so treat it as "something turned up" and review the whole fleet: read each crewmate's current state with `bin/fm-crew-state.sh ` (the cheap first read - it reconciles the authoritative run-step over a possibly-stale status-log line, so a crewmate whose gate you already resolved no longer reads as still parked), peek panes that look off, check PR-ready tasks for merge, reconcile data/backlog.md, then re-arm the watcher. Do not report that the fleet is unchanged. -When the picture is unclear or a display surface needs the shared decision model, run `bin/fm-supervise.sh` for a read-only checklist or `bin/fm-supervise.sh --json` for the `firstmate.supervision.v1` model. The command may report watcher proof as `unknown` when the current sandbox cannot see the watcher process; prove liveness with `bin/fm-watch-arm.sh` or `bin/fm-watch-session.sh --status` before treating that as an actual down watcher. +When the picture is unclear or a display surface needs the shared decision model, run `bin/fm-supervise.sh` for a read-only checklist or `bin/fm-supervise.sh --json` for the `firstmate.supervision.v1` model. For PRs, its `ci_state` combines GitHub commit status and check-runs; failing, cancelled, timed-out, action-required, startup-failure, or stale check-runs are not green. The command may report watcher proof as `unknown` when the current sandbox cannot see the watcher process; prove liveness with `bin/fm-watch-arm.sh` or `bin/fm-watch-session.sh --status` before treating that as an actual down watcher. Heartbeats back off exponentially while they are the only wakes firing (600s doubling to a 2h cap - an idle fleet stops burning turns); any signal, stale, or check wake resets the cadence to the base interval. Due per-task checks run before signal scanning so chatty crewmate status updates cannot starve slow polls like merge detection. @@ -632,6 +632,7 @@ Map firstmate's real backlog operations to the approved commands: - Manage dependencies: `tasks-axi block --by ` and `tasks-axi unblock --by `, then `tasks-axi ready` to list queued work with no unresolved blockers. This is a dependency check only; future-dated items still stay queued until their date arrives. - Read an item's full notes: `tasks-axi show --full`. +- Do not invent undocumented flags such as `tasks-axi list --json` or `tasks-axi ready --json`; use each command's `--help` before adding flags, because not every verb supports JSON output. - Hand a task off to a secondmate home: keep using `bin/fm-backlog-handoff.sh ...`; do not call bare `tasks-axi mv` for this path, because the helper resolves and validates the secondmate home before moving anything. - Normalize the file: `tasks-axi render` rewrites every id'd task in canonical form and leaves free-form lines untouched. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a907487a..00e04dc1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -41,7 +41,7 @@ See the [no-mistakes quick start](https://kunchenguid.github.io/no-mistakes/star - Helper scripts in `bin/` are plain bash. Each starts with a usage header comment; keep it accurate when you change behavior. Test scripts and helpers in `tests/` are plain bash too. - `shellcheck bin/*.sh tests/*.sh` must pass, and CI enforces it. + `shellcheck -x -P SCRIPTDIR bin/*.sh tests/*.sh` must pass, and CI enforces it. - Changes to harness adapters (launch templates in `bin/fm-spawn.sh`, facts in `.agents/skills/harness-adapters/SKILL.md`) must be verified empirically against the real harness, never written from documentation alone. - In Markdown, put each full sentence on its own line. @@ -57,7 +57,7 @@ Check and test the toolbelt before pushing: ```sh bash -n bin/*.sh # syntax-check the toolbelt -shellcheck bin/*.sh tests/*.sh # lint the toolbelt and behavior tests; CI enforces this +shellcheck -x -P SCRIPTDIR bin/*.sh tests/*.sh # lint the toolbelt and behavior tests; CI enforces this for test_script in tests/*.test.sh; do "$test_script"; done # behavior tests, matching CI tests/fm-wake-queue.test.sh # durable wake queue losslessness, catch-up, double-drain, duplicate-collapse, and drain liveness guard tests tests/fm-watcher-lock.test.sh # watcher singleton, lock-race, watch-arm liveness, and guard-warning tests @@ -71,15 +71,28 @@ tests/fm-composer-ghost.test.sh # dim-ghost stripping, ghost-only comp tests/fm-afk-inject-e2e.test.sh # private-socket end-to-end test of the afk injection path (partial-input deferral, swallowed-Enter retry) tests/fm-bootstrap.test.sh # bootstrap dependency and feature-probe tests tests/fm-fleet-sync.test.sh # project clone refresh: safe detached recovery, STUCK drift reports, benign skips, and bootstrap relay +tests/fm-backlog-audit.test.sh # read-only backlog/state drift audit findings and no-change contract +tests/fm-route.test.sh # deterministic route profiles, overrides, risk flags, and downgrade handling tests/fm-x-mode.test.sh # X-mode poll, inbox context round-trip, reply threading, dry-run preview, and .env-presence activation tests +tests/fm-memory-lookup.test.sh # manual Cognee memory lookup fallback, source-path verification, and optional brief append +tests/fm-cognee-lookup-gate.test.sh # fail-closed Cognee automatic/manual gate markers and unsafe-evidence rejection +tests/fm-cognee-lookup.test.sh # Cognee dry-run/live lookup wrapper, redacted telemetry, retry, and source verification behavior +tests/fm-cognee-session-cost-probe.test.sh # disabled Cognee session/cost probe planner, endpoint allowlist, and redacted JSONL output +tests/fm-cognee-source-verify.test.sh # Cognee answer reference parsing, manifest matching, local source reopen, and telemetry +tests/fm-cognee-telemetry.test.sh # secret-safe Cognee telemetry schema, redaction flags, IDs, and env-file loading +tests/fm-cognee-brief-rules.test.sh # generated briefs include the trial-only, hint-only Cognee memory rules tests/fm-tangle-guard.test.sh # primary-checkout tangle detection and spawn/brief isolation tests tests/fm-spawn-batch.test.sh # batch dispatch and FM_HOME project-path scoping tests +tests/fm-spawn-route.test.sh # spawn records route profile/model/effort metadata without changing launch behavior tests/fm-update.test.sh # fast-forward-only self-update, reread, nudge, dedup, and skip-safety tests tests/fm-secondmate-sync.test.sh # local-HEAD secondmate sync, no-fetch, bootstrap nudge gating, and spawn hook tests tests/fm-secondmate-lifecycle-e2e.test.sh # persistent secondmate routing, seeding, backlog handoff, spawn, recovery, teardown, and FM_HOME flow tests tests/fm-secondmate-safety.test.sh # secondmate home safety, idle charter, handoff validation, and teardown boundary tests tests/fm-teardown.test.sh # fm-teardown.sh landed-work safety and reminder checks: fork-remote allow, squash/content landings, dirty and unlanded refusals, PR-head metadata, tasks-axi reminder, --force override tests/fm-crew-state.test.sh # fm-crew-state.sh current-state reconciliation: run-step authority including closed panes, stale needs-decision/blocked superseded by a resumed run, genuine-parked, cross-branch attribution, pane/status-log fallback, scout skip, torn-down/missing-meta graceful +tests/fm-task-identity.test.sh # task branch/meta identity guard for PR check, diff review, and teardown helpers +tests/fm-watch-session.test.sh # durable home-scoped watcher tmux runner start, status, stop, restart, and AFK behavior +tests/fm-supervision-model.test.sh # read-only supervision checklist and `firstmate.supervision.v1` JSON/schema output [ "$(readlink CLAUDE.md)" = "AGENTS.md" ] [ "$(readlink .claude/skills)" = "../.agents/skills" ] tmp=$(mktemp -d) && printf 'done: smoke\n' > "$tmp/smoke.status" && FM_STATE_OVERRIDE="$tmp" FM_SIGNAL_GRACE=1 FM_POLL=1 FM_HEARTBEAT=999999 bin/fm-watch-arm.sh # watcher re-arm smoke test (prints arm status, then an actionable signal) diff --git a/README.md b/README.md index c60005a4..83f884e3 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,7 @@ Outside tmux, crewmates land in a detached `firstmate` session you can attach to You chat with the first mate. It routes each request to a crewmate in its own tmux window and git worktree, supervises the fleet with a zero-token event-driven watcher, and brings you finished PRs, approved local merges, or investigation reports. When the current fleet state is unclear, `bin/fm-supervise.sh` gives a passive read-only checklist, and `bin/fm-supervise.sh --json` exposes the same shared model for display tools such as Radar. +For PRs, that model combines GitHub commit status and check-runs before deciding whether CI is green, pending, failed, absent, or unknown. Persistent secondmate homes are linked firstmate worktrees; startup syncs live ones and secondmate launch syncs the target home to the primary default-branch commit without fetching from origin when it is safe. When a routed request goes to a secondmate, firstmate marks it so the answer returns through status or a document pointer; direct typing into that secondmate window stays conversational. A presence-gated sub-supervisor (`/afk`) can self-handle routine events and batch only what matters while you step away. @@ -139,6 +140,7 @@ Agent-only reference skills live under `.agents/skills/` and are loaded by first - [docs/architecture.md](docs/architecture.md) - how the crew, supervision, worktrees, secondmates, and project modes work. - [docs/configuration.md](docs/configuration.md) - environment variables, `FM_HOME`, optional X mode, the files you set, and harness support. +- [docs/cognee-policy.md](docs/cognee-policy.md) - the trial-only, hint-only Cognee memory policy and production gates. - [docs/scripts.md](docs/scripts.md) - the `bin/` toolbelt reference. - [`AGENTS.md`](AGENTS.md) - firstmate's full operating manual for the orchestrator agent. - [CONTRIBUTING.md](CONTRIBUTING.md) - how to contribute, including the dev/test commands. diff --git a/bin/fm-brief.sh b/bin/fm-brief.sh index 3a6ac084..01ee16e4 100755 --- a/bin/fm-brief.sh +++ b/bin/fm-brief.sh @@ -64,6 +64,7 @@ STATUS_FILE=$(shell_quote "$STATE/$ID.status") COGNEE_BRIEF_RULES=$(cat <<'EOF' # Cognee memory hints Cognee is memory/context only. It is not proof, source of truth, durable archive, or action authority. +Official docs expose raw readback and session/model cost surfaces, but Firstmate still treats raw retention/source-authority guarantees and per-wrapper-call cost correlation as unproven. Do not run automatic Cognee lookup for every task. Use a Cognee hint only when this brief says all of these are true: - Firstmate manually performed the lookup. diff --git a/bin/fm-cognee-lookup-gate.sh b/bin/fm-cognee-lookup-gate.sh index 18480030..7b15cd42 100755 --- a/bin/fm-cognee-lookup-gate.sh +++ b/bin/fm-cognee-lookup-gate.sh @@ -25,6 +25,11 @@ Evidence reports must contain these exact gate markers: Trial-only evidence is recognized but still blocks automatic lookup: FM_COGNEE_GATE_COST_USAGE_EVIDENCE=session_window_only + +Official Cognee docs expose raw readback and session/model cost surfaces. Those +surfaces are not enough for automatic lookup unless the local evidence set also +proves production raw source-authority guarantees and safe per-wrapper-call +cost correlation. EOF } diff --git a/bin/fm-cognee-lookup.sh b/bin/fm-cognee-lookup.sh index 1247c995..007af38b 100755 --- a/bin/fm-cognee-lookup.sh +++ b/bin/fm-cognee-lookup.sh @@ -12,11 +12,13 @@ usage() { usage: fm-cognee-lookup.sh [--dry-run] --query [--manifest --answer-file ] fm-cognee-lookup.sh -Live mode uses only already-exported environment variables: +Live mode uses already-exported environment variables, plus allowlisted names +from FM_COGNEE_ENV_FILE when set: COGNEE_BASE_URL COGNEE_API_KEY COGNEE_DATASET_ID or FM_COGNEE_DATASET_ALIAS FM_COGNEE_MANIFEST or --manifest + FM_COGNEE_TIMEOUT_MS defaults to 30000 and sets connect/request timeouts It can be used through: FM_COGNEE_LOOKUP_CMD=/absolute/path/to/bin/fm-cognee-lookup.sh @@ -60,6 +62,17 @@ dataset_id_hash() { fi } +fm_cognee_timeout_ms() { + local value=${FM_COGNEE_TIMEOUT_MS:-30000} + case "$value" in ''|*[!0-9]*) value=30000 ;; esac + [ "$value" -ge 1 ] || value=30000 + printf '%s' "$value" +} + +fm_cognee_timeout_seconds() { + awk -v ms="$(fm_cognee_timeout_ms)" 'BEGIN { printf "%.3f", ms / 1000 }' +} + has_live_dataset_selector() { if [ -n "${COGNEE_DATASET_ID:-}" ] && is_uuid "$COGNEE_DATASET_ID"; then return 0 @@ -91,7 +104,7 @@ live_telemetry_log() { fm_cognee_telemetry_log_api_attempt \ search POST /api/v1/search false "$status" "$error_class" "$http_status" "$retryable" \ "$attempt_number" "${FM_COGNEE_MAX_ATTEMPTS:-3}" "$is_retry" "$retry_reason" \ - "$latency_ms" "${FM_COGNEE_TIMEOUT_MS:-}" "$verification_outcome" true "$parsed_source_count" \ + "$latency_ms" "$(fm_cognee_timeout_ms)" "$verification_outcome" true "$parsed_source_count" \ "" unknown missing_vendor_metadata false "$FM_COGNEE_RUN_ID" "$request_id" "$FM_COGNEE_LOGICAL_SEARCH_ID" \ "$dataset_alias_value" "$dataset_id_hash_value" "${FM_COGNEE_SEARCH_TYPE:-RAG_COMPLETION}" "$top_k" true \ "$request_body_bytes" "$response_body_bytes" "$parsed_source_count" "$final_attempt" @@ -281,6 +294,7 @@ if ! "$DRY_RUN"; then fi TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/fm-cognee-live.XXXXXX") + # shellcheck disable=SC2317 # Invoked by trap. cleanup_live() { rm -rf "$TMP_DIR"; } trap cleanup_live EXIT PAYLOAD="$TMP_DIR/search.json" @@ -300,6 +314,7 @@ if ! "$DRY_RUN"; then http_status=0 retryable=false curl_rc=0 + timeout_seconds=$(fm_cognee_timeout_seconds) request_body_bytes=$(wc -c < "$PAYLOAD" | tr -d ' ') response_body_bytes=0 attempt_latency=0 @@ -311,6 +326,8 @@ if ! "$DRY_RUN"; then attempt_start_ms=$(fm_cognee_telemetry_now_ms) set +e http_status=$(curl -sS -o "$BODY" -w '%{http_code}' \ + --connect-timeout "$timeout_seconds" \ + --max-time "$timeout_seconds" \ -X POST "$endpoint" \ -H "X-Api-Key: $COGNEE_API_KEY" \ -H "Content-Type: application/json" \ @@ -405,6 +422,7 @@ fi [ -n "$ANSWER_FILE" ] || die "--answer-file is required when --manifest is used" TMP_OUT=$(mktemp "${TMPDIR:-/tmp}/fm-cognee-lookup.XXXXXX") +# shellcheck disable=SC2317 # Invoked by trap. cleanup_lookup() { rm -f "$TMP_OUT"; } trap cleanup_lookup EXIT diff --git a/bin/fm-cognee-manifest-check.sh b/bin/fm-cognee-manifest-check.sh index f1ba15db..7b372bdd 100755 --- a/bin/fm-cognee-manifest-check.sh +++ b/bin/fm-cognee-manifest-check.sh @@ -112,7 +112,7 @@ for field in $required_fields; do done field_value() { - local name=$1 idx=${IDX[$1]} + local idx=${IDX[$1]} printf '%s' "${COLS[$idx]:-}" } @@ -176,7 +176,7 @@ validate_current_row() { source_path=$(field_value source_path) source_path_lc=$(printf '%s' "$source_path" | tr '[:upper:]' '[:lower:]') case "$source_path_lc" in - *secret*|*token*|*api_key*|*password*|*credential*|*auth*|*bearer*|*cookie*|*private_key*|*.env*|*session*|*oauth*|*signed*) + *secret*|*token*|*api_key*|*password*|*credential*|*auth*|*bearer*|*cookie*|*private_key*|*.env*|*session*|*signed*) TELEMETRY_STATUS=blocked TELEMETRY_ERROR_CLASS=path_risk_scan_failed TELEMETRY_SOURCE_OUTCOME=path_risk_scan_failed diff --git a/bin/fm-cognee-session-cost-probe.sh b/bin/fm-cognee-session-cost-probe.sh index 665f725d..b1e5a9cb 100755 --- a/bin/fm-cognee-session-cost-probe.sh +++ b/bin/fm-cognee-session-cost-probe.sh @@ -114,7 +114,9 @@ done [ -n "$WINDOW_START_UTC" ] || die missing_required_args [ -n "$WINDOW_END_UTC" ] || die missing_required_args [ -n "$OUTPUT_JSONL" ] || die missing_required_args -[ -r "$TELEMETRY" ] && [ ! -d "$TELEMETRY" ] || die telemetry_unreadable +if [ ! -r "$TELEMETRY" ] || [ -d "$TELEMETRY" ]; then + die telemetry_unreadable +fi case "$MAX_SESSIONS" in ''|*[!0-9]*) die invalid_max_sessions ;; @@ -151,7 +153,7 @@ validate_endpoint() { fi [ "$method" = GET ] || return 1 case "$path" in - /health|/openapi.json|/api/v1/sessions|/api/v1/sessions/{session_id}|/api/v1/sessions/cost-by-model) + /health|/openapi.json|/api/v1/sessions|"/api/v1/sessions/{session_id}"|/api/v1/sessions/cost-by-model) ;; *) return 1 diff --git a/bin/fm-cognee-telemetry-lib.sh b/bin/fm-cognee-telemetry-lib.sh index 8f9d0f36..baa3bc0c 100755 --- a/bin/fm-cognee-telemetry-lib.sh +++ b/bin/fm-cognee-telemetry-lib.sh @@ -1,9 +1,11 @@ #!/usr/bin/env bash # Secret-safe local JSONL telemetry helpers for Cognee wrapper operations. # -# Callers pass only labels, counters, timings, and cost classifications. This -# helper never receives prompt text, answer bodies, source bodies, auth headers, -# API keys, cookies, signed URLs, bearer tokens, or secret values. +# Telemetry callers pass only labels, counters, timings, and cost classifications. +# This helper's safe env-file loader may read allowlisted Cognee connection names, +# but telemetry events never receive or write prompt text, answer bodies, source +# bodies, auth headers, API keys, cookies, signed URLs, bearer tokens, base URLs, +# or secret values. fm_cognee_env_trim() { local value=$1 @@ -63,7 +65,9 @@ fm_cognee_load_env_file() { value=$(fm_cognee_env_trim "$value") case "$key" in ''|[!A-Za-z_]*|*[!A-Za-z0-9_]*) + # shellcheck disable=SC2034 # Read by callers after fm_cognee_load_env_file returns. FM_COGNEE_ENV_FILE_LOAD_ERROR=env_file_malformed + # shellcheck disable=SC2034 # Read by callers after fm_cognee_load_env_file returns. FM_COGNEE_ENV_FILE_LOAD_LINE=$line_no return 1 ;; @@ -75,8 +79,11 @@ fm_cognee_load_env_file() { last=${value#"${value%?}"} if [ "$first" = "'" ] || [ "$first" = '"' ]; then if [ "$last" != "$first" ] || [ "${#value}" -lt 2 ]; then + # shellcheck disable=SC2034 # Read by callers after fm_cognee_load_env_file returns. FM_COGNEE_ENV_FILE_LOAD_ERROR=env_file_malformed + # shellcheck disable=SC2034 # Read by callers after fm_cognee_load_env_file returns. FM_COGNEE_ENV_FILE_LOAD_LINE=$line_no + # shellcheck disable=SC2034 # Read by callers after fm_cognee_load_env_file returns. FM_COGNEE_ENV_FILE_LOAD_KEY=$key return 1 fi @@ -87,7 +94,7 @@ fm_cognee_load_env_file() { if [ -z "${!key+x}" ] || [ -z "${!key}" ]; then printf -v "$key" '%s' "$value" - export "$key" + export "${key?}" fi done < "$env_file" return 0 diff --git a/bin/fm-cognee-verify-source.sh b/bin/fm-cognee-verify-source.sh index 15e0c05b..576aaa6a 100755 --- a/bin/fm-cognee-verify-source.sh +++ b/bin/fm-cognee-verify-source.sh @@ -9,7 +9,8 @@ set -eu SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" . "$SCRIPT_DIR/fm-cognee-telemetry-lib.sh" export FM_COGNEE_TELEMETRY_FILE="${FM_COGNEE_TELEMETRY_FILE:-$(fm_cognee_telemetry_default_path)}" -export FM_COGNEE_TELEMETRY_START_MS="$(fm_cognee_telemetry_now_ms)" +FM_COGNEE_TELEMETRY_START_MS=$(fm_cognee_telemetry_now_ms) +export FM_COGNEE_TELEMETRY_START_MS usage() { echo "usage: fm-cognee-verify-source.sh --manifest --answer " >&2 diff --git a/bin/fm-memory-lookup.sh b/bin/fm-memory-lookup.sh index 4a867581..884f1555 100755 --- a/bin/fm-memory-lookup.sh +++ b/bin/fm-memory-lookup.sh @@ -12,7 +12,6 @@ set -eu SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -FM_ROOT="${FM_ROOT_OVERRIDE:-$(cd "$SCRIPT_DIR/.." && pwd)}" . "$SCRIPT_DIR/fm-cognee-telemetry-lib.sh" TELEMETRY_START_MS=$(fm_cognee_telemetry_now_ms) @@ -150,6 +149,7 @@ append_brief_section() { } TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/fm-memory-lookup.XXXXXX") +# shellcheck disable=SC2317 # Invoked by trap. cleanup() { rm -rf "$TMP_DIR"; } trap cleanup EXIT diff --git a/bin/fm-route.sh b/bin/fm-route.sh index 5dd1ebe1..25ca73de 100755 --- a/bin/fm-route.sh +++ b/bin/fm-route.sh @@ -77,7 +77,7 @@ contains_git_danger() { } join_reasons() { - local result= part + local result="" part for part in "$@"; do [ -n "$part" ] || continue if [ -z "$result" ]; then diff --git a/bin/fm-supervision-model.sh b/bin/fm-supervision-model.sh index af81ccc9..eea325d7 100755 --- a/bin/fm-supervision-model.sh +++ b/bin/fm-supervision-model.sh @@ -322,8 +322,93 @@ fm_supervision_gh_api_get() { fi } +fm_supervision_check_runs_state() { + local text + text=$(cat) + FM_SUPERVISION_CHECK_RUNS_TEXT=$text \ + python3 - <<'PY' +import json +import os +import re +import sys + +text = os.environ.get("FM_SUPERVISION_CHECK_RUNS_TEXT", "") +if not text.strip(): + print("unknown\tunknown") + raise SystemExit(0) + +runs = [] +total_count = None +try: + data = json.loads(text) + total_count = data.get("total_count") + runs = data.get("check_runs") or [] +except json.JSONDecodeError: + match = re.search(r"(?m)^\s*total_count:\s*([0-9]+)\s*$", text) + if match: + total_count = int(match.group(1)) + statuses = re.findall(r"(?m)^\s*-?\s*status:\s*(\S+)\s*$", text) + conclusions = re.findall(r"(?m)^\s*-?\s*conclusion:\s*(\S+)\s*$", text) + for index, status in enumerate(statuses): + conclusion = conclusions[index] if index < len(conclusions) else "" + runs.append({"status": status, "conclusion": conclusion}) + +try: + total = int(total_count) +except (TypeError, ValueError): + total = len(runs) if runs else None + +if total == 0: + print("none\t0") + raise SystemExit(0) +if not runs: + print("unknown\t{}".format(total if total is not None else "unknown")) + raise SystemExit(0) + +failure_conclusions = {"failure", "cancelled", "timed_out", "action_required", "startup_failure", "stale"} +pending = False +for run in runs: + status = str(run.get("status") or "").lower() + conclusion = run.get("conclusion") + conclusion = "" if conclusion is None else str(conclusion).lower() + if conclusion in failure_conclusions: + print("failure\t{}".format(total if total is not None else len(runs))) + raise SystemExit(0) + if status != "completed" or conclusion in {"", "null"}: + pending = True + +if pending: + print("pending\t{}".format(total if total is not None else len(runs))) +else: + print("success\t{}".format(total if total is not None else len(runs))) +PY +} + +fm_supervision_merge_ci_state() { + local status_state=$1 status_count=$2 check_state=$3 check_count=$4 total + if printf '%s\n%s\n' "$status_state" "$check_state" | grep -Eq '^(failure|error)$'; then + printf 'failure\t%s' "$(fm_supervision_ci_total "$status_count" "$check_count")" + elif printf '%s\n%s\n' "$status_state" "$check_state" | grep -Eq '^pending$'; then + printf 'pending\t%s' "$(fm_supervision_ci_total "$status_count" "$check_count")" + elif printf '%s\n%s\n' "$status_state" "$check_state" | grep -Eq '^unknown$'; then + printf 'unknown\t%s' "$(fm_supervision_ci_total "$status_count" "$check_count")" + elif printf '%s\n%s\n' "$status_state" "$check_state" | grep -Eq '^success$'; then + printf 'success\t%s' "$(fm_supervision_ci_total "$status_count" "$check_count")" + else + total=$(fm_supervision_ci_total "$status_count" "$check_count") + [ "$total" = 0 ] && printf 'none\t0' || printf 'unknown\t%s' "$total" + fi +} + +fm_supervision_ci_total() { + local a=$1 b=$2 total=0 saw=false + case "$a" in ''|unknown) : ;; *[!0-9]*) : ;; *) total=$((total + a)); saw=true ;; esac + case "$b" in ''|unknown) : ;; *[!0-9]*) : ;; *) total=$((total + b)); saw=true ;; esac + "$saw" && printf '%s' "$total" || printf 'unknown' +} + fm_supervision_gh_pr() { - local url=$1 parsed repo number out state merged mergeable_state sha status_out ci_state total_count + local url=$1 parsed repo number out state merged mergeable_state sha status_out checks_out checks_data ci_data ci_state total_count status_state status_count check_state check_count parsed=$(fm_supervision_pr_from_url "$url") || return 1 [ -n "$parsed" ] || return 1 repo=${parsed% *} @@ -339,17 +424,47 @@ fm_supervision_gh_pr() { ci_state=unknown total_count=unknown if [ -n "$sha" ]; then + status_state=unknown + status_count=unknown status_out=$(fm_supervision_gh_api_get "/repos/$repo/commits/$sha/status") || status_out= if [ -n "$status_out" ]; then - ci_state=$(printf '%s\n' "$status_out" | fm_supervision_yaml_value state) - total_count=$(printf '%s\n' "$status_out" | fm_supervision_yaml_value total_count) - [ -n "$ci_state" ] || ci_state=unknown - [ "$total_count" = 0 ] && ci_state=none + status_state=$(printf '%s\n' "$status_out" | fm_supervision_yaml_value state) + status_count=$(printf '%s\n' "$status_out" | fm_supervision_yaml_value total_count) + [ -n "$status_state" ] || status_state=unknown + [ -n "$status_count" ] || status_count=unknown + [ "$status_count" = 0 ] && status_state=none fi + check_state=unknown + check_count=unknown + checks_out=$(fm_supervision_gh_api_get "/repos/$repo/commits/$sha/check-runs") || checks_out= + if [ -n "$checks_out" ]; then + checks_data=$(printf '%s\n' "$checks_out" | fm_supervision_check_runs_state) + check_state=$(printf '%s' "$checks_data" | awk -F '\t' '{ print $1 }') + check_count=$(printf '%s' "$checks_data" | awk -F '\t' '{ print $2 }') + fi + ci_data=$(fm_supervision_merge_ci_state "$status_state" "$status_count" "$check_state" "$check_count") + ci_state=$(printf '%s' "$ci_data" | awk -F '\t' '{ print $1 }') + total_count=$(printf '%s' "$ci_data" | awk -F '\t' '{ print $2 }') fi printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$state" "$ci_state" "${mergeable_state:-unknown}" "$repo" "${sha:-unknown}" "$total_count" } +fm_supervision_project_path() { + local project=$1 candidate + [ -n "$project" ] || return 1 + case "$project" in + /*) + [ -d "$project" ] || return 1 + printf '%s' "$project" + ;; + *) + candidate="$FM_SUPERVISION_PROJECTS/$project" + [ -d "$candidate" ] || return 1 + printf '%s' "$candidate" + ;; + esac +} + fm_supervision_path_age() { local path=$1 m [ -e "$path" ] || { @@ -526,11 +641,11 @@ fm_supervision_checklist_record() { fm_supervision_collect() { fm_supervision_paths - local records= source_records= checklist_records= task_records= worktree_records= external_records= + local records="" source_records="" checklist_records="" task_records="" worktree_records="" external_records="" local state_ok=true backlog_ok=true tmux_ok=true treehouse_ok=true git_ok=true github_ok=true github_detail="gh-axi api GET only" local task_count=0 checklist_count=0 high_count=0 medium_count=0 github_state=ok watcher_state=skipped watcher_ok=true watcher_detail= local referenced_worktrees="|" - local meta id project kind mode yolo harness route_profile route_harness route_model route_effort window worktree recorded_branch branch dirty_count last_status turn_ended pr_url pr_data pr_state ci_state mergeable_state + local meta id project project_status_path kind mode yolo harness route_profile route_harness route_model route_effort window worktree recorded_branch branch dirty_count last_status turn_ended pr_url pr_data pr_state ci_state mergeable_state local class_data classification severity owner action why evidence line status_pr window_live treehouse_failed=false [ -d "$FM_SUPERVISION_STATE" ] || state_ok=false @@ -588,8 +703,9 @@ fm_supervision_collect() { github_detail="one or more PR reads failed; affected PR states unknown" fi fi - if [ -n "$project" ] && [ -d "$FM_SUPERVISION_PROJECTS/$project" ]; then - if ! fm_supervision_treehouse_status "$FM_SUPERVISION_PROJECTS/$project"; then + project_status_path=$(fm_supervision_project_path "$project" 2>/dev/null || true) + if [ -n "$project_status_path" ]; then + if ! fm_supervision_treehouse_status "$project_status_path"; then treehouse_failed=true treehouse_ok=false fi @@ -608,7 +724,7 @@ fm_supervision_collect() { why="treehouse status failed for the project." fi evidence="meta=$(basename "$meta"); status=${last_status:-none}; window_live=$window_live; pr_state=$pr_state; ci_state=$ci_state; mergeable_state=$mergeable_state" - line=$(printf 'task\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' \ + line=$(printf 'task\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' \ "$(fm_supervision_field "$id")" "$(fm_supervision_field "$project")" "$(fm_supervision_field "$kind")" \ "$(fm_supervision_field "$mode")" "$(fm_supervision_field "$yolo")" "$(fm_supervision_field "$harness")" \ "$(fm_supervision_field "$route_profile")" "$(fm_supervision_field "$route_harness")" "$(fm_supervision_field "$route_model")" "$(fm_supervision_field "$route_effort")" \ @@ -744,7 +860,7 @@ EOF fm_supervision_emit_json() { fm_supervision_paths - local generated_at source_lines= task_lines= worktree_lines= external_lines= checklist_lines= summary_line= line kind + local generated_at source_lines="" task_lines="" worktree_lines="" external_lines="" checklist_lines="" summary_line="" line kind generated_at=$(date -u +%Y-%m-%dT%H:%M:%SZ) while IFS= read -r line; do [ -n "$line" ] || continue @@ -850,7 +966,7 @@ EOF } fm_supervision_emit_text() { - local generated_at include_ok checklist_lines= source_lines= task_lines= summary_line= line kind + local generated_at include_ok checklist_lines="" source_lines="" task_lines="" summary_line="" line kind generated_at=$(date -u +%Y-%m-%dT%H:%M:%SZ) include_ok=${FM_SUPERVISE_INCLUDE_OK:-0} while IFS= read -r line; do @@ -886,7 +1002,7 @@ EOF printf 'No immediate action items.\n\n' fi - local gh_ok=true gh_detail= watcher_ok=true watcher_detail= + local gh_ok=true gh_detail="" watcher_ok=true watcher_detail="" while IFS=$'\t' read -r _ name ok detail; do case "$name" in github) gh_ok=$ok; gh_detail=$detail ;; diff --git a/bin/fm-watch-session.sh b/bin/fm-watch-session.sh index 290fca45..93c4fa71 100755 --- a/bin/fm-watch-session.sh +++ b/bin/fm-watch-session.sh @@ -19,7 +19,7 @@ SESSION_DIR="$STATE/.watch-session" ENV_FILE="$SESSION_DIR/env.sh" RUNNER_FILE="$SESSION_DIR/runner.sh" STOP_FILE="$SESSION_DIR/stop" -RETRY_DELAY=${FM_WATCH_SESSION_RETRY_DELAY:-5} +RETRY_DELAY=${FM_WATCH_SESSION_REARM_DELAY:-${FM_WATCH_SESSION_RETRY_DELAY:-1}} AFK_DELAY=${FM_WATCH_SESSION_AFK_DELAY:-15} usage() { @@ -52,10 +52,12 @@ write_runner_files() { printf 'rm -f %s\n' "$(shell_quote "$STOP_FILE")" printf 'while :; do\n' printf ' [ -e %s ] && exit 0\n' "$(shell_quote "$STOP_FILE")" + # shellcheck disable=SC2016 # Generated runner expands FM_STATE_OVERRIDE at runtime. printf ' if [ -e "$FM_STATE_OVERRIDE/.afk" ]; then sleep %s; continue; fi\n' "$AFK_DELAY" printf ' %s/fm-watch-arm.sh\n' "$(shell_quote "$SCRIPT_DIR")" printf ' rc=$?\n' printf ' [ -e %s ] && exit 0\n' "$(shell_quote "$STOP_FILE")" + # shellcheck disable=SC2016 # Generated runner expands rc at runtime. printf ' if [ "$rc" -ne 0 ]; then sleep %s; else sleep 1; fi\n' "$RETRY_DELAY" printf 'done\n' } > "$RUNNER_FILE" diff --git a/docs/architecture.md b/docs/architecture.md index ecce91c6..025b16be 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -21,8 +21,9 @@ Optional X mode rides the same check path: bootstrap drops a local `state/x-watc Routine re-arms go through `bin/fm-watch-arm.sh`, which forks the watcher as a tracked child, verifies it is genuinely alive with a fresh liveness beacon, and prints exactly one honest status line (`started` / `healthy` / `FAILED`, the last exiting non-zero) - never a false `already running` off a dying process. Its `--restart` mode signals only the watcher recorded in the current home's `state/.watch.lock`, so restarting one home cannot kill sibling secondmate watchers. -For harnesses where a tracked background call is not durable enough, `bin/fm-watch-session.sh` provides a home-scoped runner that repeatedly arms the normal watcher from a persistent process, reports status from `state/.watch-session.lock`, and stops only the runner recorded for the current `FM_HOME`. +For harnesses where a tracked background call is not durable enough, `bin/fm-watch-session.sh` provides a home-scoped tmux runner that repeatedly arms the normal watcher from a persistent process, reports status from the derived `firstmate-watch:fm-watch-` window, and stops only that current-home runner window. A pull-based guard (`bin/fm-guard.sh`) warns through supervision tool output if the primary checkout is tangled, queued wakes are waiting to be drained, or tasks are in flight and watcher liveness is not proved by both a fresh beacon and a live `state/.watch.lock` for this same home/path. +The read-only supervision model combines GitHub commit status and check-runs when classifying PR CI, so Actions failures such as stale or failed check-runs are not treated as green just because legacy commit status is empty. The drain script calls that guard after emptying the queue, which avoids repeating the queued-wakes warning for records it just consumed while still warning on stale watcher liveness. It leads with prominent bordered banners for the tangle and no-watcher cases so they cannot be skimmed past. diff --git a/docs/cognee-policy.md b/docs/cognee-policy.md new file mode 100644 index 00000000..97306a2b --- /dev/null +++ b/docs/cognee-policy.md @@ -0,0 +1,69 @@ +# Cognee Policy + +Cognee is a trial memory layer for Firstmate. It can provide context hints, but +it is not a source of truth, durable archive, proof system, or action authority. + +## Current Contract + +- Use Cognee only through explicit, read-only, manual lookup flows unless the + automatic gate passes. +- Treat every Cognee answer as a hint until a local source file is reopened and + verified through the local manifest/source-verification path. +- Never use Cognee output to authorize merges, deploys, refreshes, imports, + deletes, purchases, vendor/customer actions, or any other external action. +- Do not log raw answer bodies, question bodies, context bodies, auth headers, + base URLs, secret values, or raw session JSON. + +## Docs Audit Update + +The June 2026 official-docs audit clarified two important points: + +- Cognee documents raw data listing and raw data download endpoints, and + `memory_only=True` deletion preserves raw files/records for reprocessing. +- Cognee documents Cloud pricing and session/model cost surfaces. + +Those facts improve the trial picture, but they do not promote Cognee to +production memory for Firstmate. + +## Still Blocked + +Automatic lookup still requires all existing gate evidence plus: + +- a vendor- or docs-backed raw retention, durability, health, restore, and + source-authority guarantee strong enough for Firstmate; +- safe per-wrapper-call cost correlation from one Firstmate lookup to one + Cognee request/session/QA/cost record without reading or storing sensitive + answer, question, or context bodies. + +Until both are proven, use this wording: + +- Raw readback is documented, but Cloud raw retention/source-of-truth + guarantees remain unproven for Firstmate. +- Pricing and session/model cost surfaces are documented, but safe + per-wrapper-call cost correlation remains unproven for Firstmate. + +## API Posture + +Cognee v1.0 presents `remember`, `recall`, `improve`, and `forget` as the main +memory lifecycle. Firstmate currently uses the documented lower-level +`POST /api/v1/search` path intentionally because the pilot needs explicit, +read-only search control and local source verification. + +## Operational Helpers + +- `fm-cognee-lookup-gate.sh manual-verified` prints the manual contract: + read-only, hint-only, fail-closed, local-source-required, and no external + action authority. +- `fm-cognee-lookup-gate.sh automatic` fails closed unless + `FM_COGNEE_AUTO_LOOKUP=1` and the local evidence set proves every gate marker, + including `FM_COGNEE_GATE_COST_USAGE_EVIDENCE=per_wrapper_call` and + `FM_COGNEE_GATE_RAW_DURABILITY_SOURCE_AUTHORITY=pass`. +- `fm-memory-lookup.sh` is the manual pre-dispatch helper. + It is not wired into automatic dispatch, and missing or failing lookup does + not block work. +- `fm-cognee-lookup.sh`, `fm-cognee-manifest-check.sh`, and + `fm-cognee-verify-source.sh` keep Cognee answers advisory by reopening local + source references before any hint can be attached. +- `fm-cognee-session-cost-probe.sh` is disabled-by-design planning support for + a separately approved live probe lane; it writes redacted local JSONL probe + plans and makes no network calls itself. diff --git a/docs/configuration.md b/docs/configuration.md index 0f26a973..ecfaffbe 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -87,6 +87,22 @@ Truthy means anything except unset, empty, `0`, `false`, `no`, or `off`; an expl In dry-run, `fm-x-reply.sh` records the full would-be payload to `state/x-outbox/.json`, including `texts` for a thread, prints a `DRY RUN` summary to stderr, echoes the `request_id`, and exits 0. This path needs `jq` to build the JSON payload, but it runs before token and network checks, so it needs neither `FMX_PAIRING_TOKEN` nor `curl`. +## Cognee trial memory + +Cognee is trial-only memory context for Firstmate. +It is not a source of truth, durable archive, proof system, or action authority; [cognee-policy.md](cognee-policy.md) owns the operating policy. + +Manual lookup is configured with `FM_COGNEE_LOOKUP_CMD`, `FM_COGNEE_MANIFEST`, and the already-exported Cognee read-only credentials. +`fm-memory-lookup.sh` runs the backend only when invoked by hand, treats output as a hint, and attaches only local source paths it can reopen. +Without `FM_COGNEE_LOOKUP_CMD`, it exits 0 with a memory-unavailable note so dispatch continues without Cognee. + +Live lookup through `fm-cognee-lookup.sh` requires `COGNEE_BASE_URL`, `COGNEE_API_KEY`, a dataset selector (`COGNEE_DATASET_ID` or `FM_COGNEE_DATASET_ALIAS`), and a manifest path (`FM_COGNEE_MANIFEST` or `--manifest`). +`FM_COGNEE_ENV_FILE` may load only the allowlisted Cognee names from an env-style file; malformed or unreadable files fail closed without shell-sourcing the file. +The live wrapper calls only `POST /api/v1/search`, records secret-safe telemetry, and still delegates proof to local manifest/source verification. + +Automatic lookup remains disabled unless `FM_COGNEE_AUTO_LOOKUP=1` and the local evidence under `FM_COGNEE_EVIDENCE_ROOT` proves every gate marker, including `FM_COGNEE_GATE_COST_USAGE_EVIDENCE=per_wrapper_call` and `FM_COGNEE_GATE_RAW_DURABILITY_SOURCE_AUTHORITY=pass`. +`session_window_only` cost evidence is accepted only as trial monitoring evidence and still blocks automatic promotion. + ## Environment variables Runtime tuning via environment variables (defaults shown): @@ -115,6 +131,10 @@ FM_GUARD_GRACE=300 # seconds before guard warnings and arm health checks tr FM_ARM_CONFIRM_TIMEOUT=10 # seconds fm-watch-arm waits to confirm a fresh watcher before reporting FAILED FM_WATCHER_STALE_GRACE=300 # defaults to FM_GUARD_GRACE; seconds a live watcher lock may have a stale beacon before re-arm errors FM_WATCH_SESSION_REARM_DELAY=1 # seconds the durable watch-session runner waits before re-arming after a watcher exit +FM_WATCH_SESSION_RETRY_DELAY= # legacy alias for FM_WATCH_SESSION_REARM_DELAY +FM_WATCH_SESSION_AFK_DELAY=15 # seconds watch-session sleeps while the AFK daemon owns supervision +FM_WATCH_SESSION_TMUX_SESSION=firstmate-watch # tmux session name for durable watch-session runner windows +FM_WATCH_SESSION_TMUX_WINDOW= # optional tmux window name; default is fm-watch- FM_SIGNAL_GRACE=30 # seconds to coalesce nearby status and turn-end signals into one wake FM_CAPTAIN_RE='done:|needs-decision:|blocked:|failed:|PR ready|checks green|ready in branch|merged' # status regex that makes watcher and daemon signal/stale/scan output captain-relevant FM_STALE_ESCALATE_SECS=240 # idle seconds before a non-terminal stale pane escalates as a possible wedge @@ -126,6 +146,25 @@ FM_COMPOSER_IDLE_RE= # optional empty-composer regex, applied after dim-ghost FM_SEND_RETRIES=3 # fm-send Enter-retry attempts after typing the line once FM_SEND_SLEEP=0.4 # seconds between fm-send submit checks FM_SEND_SETTLE=1 # seconds fm-send waits after a successful text submit; 0 disables +# read-only supervision view (bin/fm-supervise.sh) +FM_SUPERVISE_TREEHOUSE_TIMEOUT=5 # seconds allowed per treehouse status read +FM_SUPERVISE_GH_TIMEOUT=5 # seconds allowed per gh-axi GitHub read +# Cognee trial memory and local verification +FM_COGNEE_LOOKUP_CMD= # executable backend path for manual memory lookup, usually bin/fm-cognee-lookup.sh +FM_MEMORY_LOOKUP_MAX_HINT_LINES=40 # maximum hint lines printed from a manual memory lookup +COGNEE_BASE_URL= # Cognee Cloud/API base URL for explicit live lookup +COGNEE_API_KEY= # Cognee API key for explicit live lookup +COGNEE_DATASET_ID= # UUID dataset selector; logged only as a sha256 hash +FM_COGNEE_DATASET_ALIAS= # alternate dataset selector when COGNEE_DATASET_ID is absent +FM_COGNEE_MANIFEST= # local manifest used for Cognee answer/source verification +FM_COGNEE_ENV_FILE= # optional env-style file; only allowlisted Cognee names are loaded +FM_COGNEE_SEARCH_TYPE=RAG_COMPLETION # searchType sent to POST /api/v1/search +FM_COGNEE_TOP_K=8 # topK sent to POST /api/v1/search +FM_COGNEE_MAX_ATTEMPTS=3 # live lookup attempts before fail-closed exit +FM_COGNEE_TIMEOUT_MS=30000 # connect and request timeout budget for live lookup +FM_COGNEE_TELEMETRY_FILE= # default: $FM_HOME/data/cognee/telemetry.jsonl +FM_COGNEE_EVIDENCE_ROOT=/root/firstmate/data # local evidence root for fm-cognee-lookup-gate.sh +FM_COGNEE_AUTO_LOOKUP=0 # must be 1 plus all evidence markers before automatic lookup is allowed # sub-supervisor (bin/fm-supervise-daemon.sh); presence-gated via /afk FM_SUPERVISOR_TARGET=firstmate:0 # supervisor tmux target (override; auto-discovers from $TMUX_PANE) FM_INJECT_SKIP=heartbeat # |-prefixes force-self-handled bypassing classification; empty disables diff --git a/docs/scripts.md b/docs/scripts.md index f07749a8..c5295258 100644 --- a/docs/scripts.md +++ b/docs/scripts.md @@ -23,6 +23,9 @@ Each file also starts with a short header comment. | `fm-review-diff.sh` | Review a crewmate branch against the authoritative base, with optional `--stat` output | | `fm-cognee-lookup.sh` | Read-only Cognee lookup wrapper with dry-run fixtures and guarded live `POST /api/v1/search`; treats answers as hints and delegates source proof to local manifest/source verification | | `fm-cognee-manifest-check.sh` | Validate TSV Cognee manifest rows and verify `SOURCE_ID`, `SOURCE_PATH`, or `SEED_FILE` answer references against reopened local files | +| `fm-cognee-session-cost-probe.sh` | Disabled metadata-only planner for approved future Cognee session/cost probes; validates GET-only endpoint templates and writes redacted local JSONL probe-plan events without network calls | +| `fm-cognee-telemetry-lib.sh` | Secret-safe JSONL telemetry helper for Cognee wrappers; records labels, timings, counts, cost classifications, and hashed identifiers without raw prompts, answers, headers, URLs, or secrets | +| `fm-cognee-verify-source.sh` | Local-only verifier for Cognee hint text against JSONL manifests; reopens referenced source files and emits source-verification JSON plus secret-safe telemetry | | `fm-marker-lib.sh` | Shared from-firstmate request marker and detector sourced by `fm-send.sh`, `fm-brief.sh`, and tests | | `fm-watch-arm.sh` | Verified per-home watcher re-arm; reports `started`, `healthy`, or `FAILED`; `--restart` relaunches only this home's watcher | | `fm-watch-session.sh` | Durable home-scoped tmux runner that loops through `fm-watch-arm.sh` for harness lanes without reliable tracked background tasks | @@ -33,6 +36,7 @@ Each file also starts with a short header comment. | `fm-crew-state.sh` | Print one stable current-state line for a crew by reconciling its matching no-mistakes run-step, even when the pane has closed, with pane and status-log fallback | | `fm-tangle-lib.sh` | Shared default-branch resolution and primary-checkout tangle classification sourced by bootstrap and guard | | `fm-ff-lib.sh` | Shared guarded fast-forward helper for `/updatefirstmate` origin pulls and no-fetch local secondmate syncs | +| `fm-task-identity-lib.sh` | Shared branch/meta identity guard for helpers that must refuse when a ship task's worktree is not on `fm/` | | `fm-tasks-axi-lib.sh` | Shared `tasks-axi` compatibility probe sourced by bootstrap and teardown | | `fm-wake-drain.sh` | Atomically drain queued watcher wakes before handling supervision work, then run the watcher-liveness guard | | `fm-wake-lib.sh` | Shared durable wake queue and portable lock helpers sourced by the watcher, drain, arm, guard, and daemon | @@ -49,4 +53,6 @@ Each file also starts with a short header comment. | `fm-x-poll.sh` | Do one bounded X relay poll; without `FMX_PAIRING_TOKEN` it is silent, with a pending mention it stashes the full inbox JSON, including `in_reply_to`, and prints `x-mention ` | | `fm-x-reply.sh` | Post or dry-run preview a composed public-safe X reply, auto-splitting long text into `{request_id,text,texts}` threads; reads text from an argument, stdin, or `--text-file` | -Cognee automatic lookup needs per-wrapper-call cost evidence: `FM_COGNEE_GATE_COST_USAGE_EVIDENCE=per_wrapper_call`. Current `session_window_only` evidence is accepted only as trial monitoring evidence and still blocks automatic promotion because there is no safe per-wrapper-call cost/request/session/QA id bridge. Manual verified lookup remains read-only, hint-only, fail-closed, and local-source-verified. +Cognee policy lives in [cognee-policy.md](cognee-policy.md). Automatic lookup needs per-wrapper-call cost evidence: `FM_COGNEE_GATE_COST_USAGE_EVIDENCE=per_wrapper_call`. Current `session_window_only` evidence is accepted only as trial monitoring evidence and still blocks automatic promotion because there is no safe per-wrapper-call cost/request/session/QA id bridge. Manual verified lookup remains read-only, hint-only, fail-closed, and local-source-verified. + +The official docs now show raw data readback and session/model cost surfaces. That does not satisfy Firstmate's production gates by itself: raw retention/source-authority guarantees and safe per-wrapper-call cost correlation remain unproven. diff --git a/tests/fm-cognee-brief-rules.test.sh b/tests/fm-cognee-brief-rules.test.sh index 161c5a27..a8fef934 100755 --- a/tests/fm-cognee-brief-rules.test.sh +++ b/tests/fm-cognee-brief-rules.test.sh @@ -22,6 +22,7 @@ assert_cognee_rules() { "brief does not require reopening the local source path" assert_grep 'The hint includes stale-risk and says live state still needs verification.' "$brief" \ "brief does not require stale-risk and live verification wording" + # shellcheck disable=SC2016 # Backticks are literal expected brief text. assert_grep '`external_action_authorized=false`.' "$brief" \ "brief does not pin external_action_authorized=false" assert_grep 'Never use raw Cognee answer text as proof.' "$brief" \ diff --git a/tests/fm-cognee-lookup.test.sh b/tests/fm-cognee-lookup.test.sh index c07bca11..77a124b6 100755 --- a/tests/fm-cognee-lookup.test.sh +++ b/tests/fm-cognee-lookup.test.sh @@ -199,6 +199,54 @@ PY pass "retry attempts share logical search id and rotate request id" } +test_live_request_applies_transport_timeout() { + local dir source manifest fakebin out code sha telemetry secret + dir="$TMP_ROOT/live-timeout" + mkdir -p "$dir" + source="$dir/source.md" + manifest="$dir/manifest.tsv" + fakebin=$(fm_fakebin "$dir") + telemetry="$dir/telemetry.jsonl" + secret="SECRET_TIMEOUT_API_KEY_DO_NOT_LOG" + printf 'local source truth from timeout search\n' > "$source" + sha=$(sha256sum "$source" | awk '{print $1}') + write_manifest "$manifest" timeout-live-01 "$source" "$sha" + cat > "$fakebin/curl" < "\$out" <&1) + code=$? + set -e + expect_code 0 "$code" "live lookup should pass timeout options to curl" + assert_contains "$out" "label=verified_local_source" "timeout lookup should still verify" + assert_not_contains "$out" "$secret" "timeout output must not print API key" + assert_contains "$(cat "$telemetry")" '"timeout_ms": 2500' "telemetry should keep timeout budget" + pass "live request applies transport timeout" +} + test_live_env_file_loads_allowlisted_names_safely() { local dir source manifest fakebin envfile out code sha telemetry marker complex_secret dir="$TMP_ROOT/live-env-file" @@ -209,6 +257,7 @@ test_live_env_file_loads_allowlisted_names_safely() { fakebin=$(fm_fakebin "$dir") telemetry="$dir/telemetry.jsonl" marker="$dir/should-not-exist" + # shellcheck disable=SC2016 # Command substitution text must stay literal for env-loader safety coverage. complex_secret='value with spaces @ dollar$ backtick` double" single'\'' semi;colon $(touch '"$marker"')' printf 'local source truth from env-file lookup\n' > "$source" sha=$(sha256sum "$source" | awk '{print $1}') @@ -219,6 +268,7 @@ test_live_env_file_loads_allowlisted_names_safely() { printf 'COGNEE_API_KEY=%s\n' "$complex_secret" printf 'FM_COGNEE_DATASET_ALIAS=%s\n' 'firstmate-curated-memory-0629' printf 'FM_COGNEE_MANIFEST=%s\n' "$manifest" + # shellcheck disable=SC2016 # Command substitution text must stay literal for env-loader safety coverage. printf 'UNSAFE_UNKNOWN=%s\n' '$(touch '"$marker"')' } > "$envfile" cat > "$fakebin/curl" <<'SH' @@ -283,6 +333,7 @@ test_live_env_file_ignores_unknown_names() { printf 'FM_COGNEE_DATASET_ALIAS=%s\n' 'firstmate-curated-memory-0629' printf 'FM_COGNEE_MANIFEST=%s\n' "$manifest" printf 'FM_COGNEE_TELEMETRY_FILE=%s\n' "$dir/ignored-telemetry.jsonl" + # shellcheck disable=SC2016 # Command substitution text must stay literal for env-loader safety coverage. printf 'UNKNOWN_NAME=%s\n' '$(touch '"$marker"')' printf '%s\n' "UNKNOWN_QUOTED='ignored" } > "$envfile" @@ -703,6 +754,7 @@ test_live_payload_uses_dataset_alias_selector_without_uuid test_live_payload_uses_dataset_id_selector_when_uuid_is_set test_live_fake_search_parses_verifies_and_writes_redacted_telemetry test_live_retry_attempts_keep_logical_id_and_rotate_request_id +test_live_request_applies_transport_timeout test_live_env_file_loads_allowlisted_names_safely test_live_env_file_ignores_unknown_names test_live_env_file_malformed_fails_closed_without_values diff --git a/tests/fm-cognee-session-cost-probe.test.sh b/tests/fm-cognee-session-cost-probe.test.sh index e3144092..e6dfed39 100755 --- a/tests/fm-cognee-session-cost-probe.test.sh +++ b/tests/fm-cognee-session-cost-probe.test.sh @@ -122,11 +122,13 @@ test_env_file_values_load_safely_without_source_or_eval() { output="$dir/probe.jsonl" envfile="$dir/cognee.env" marker="$dir/should-not-exist" + # shellcheck disable=SC2016 # Command substitution text must stay literal for env-loader safety coverage. secret='safe value with spaces $(touch '"$marker"')' : > "$telemetry" { printf 'COGNEE_BASE_URL=%s\n' 'https://env-file.invalid' printf 'COGNEE_API_KEY=%s\n' "$secret" + # shellcheck disable=SC2016 # Command substitution text must stay literal for env-loader safety coverage. printf 'UNSAFE_UNKNOWN=%s\n' '$(touch '"$marker"')' } > "$envfile" diff --git a/tests/fm-secondmate-safety.test.sh b/tests/fm-secondmate-safety.test.sh index 905b0c84..82fc0a97 100755 --- a/tests/fm-secondmate-safety.test.sh +++ b/tests/fm-secondmate-safety.test.sh @@ -14,7 +14,7 @@ TMP_ROOT=$(fm_test_tmproot fm-secondmate-safety) test_fm_home_parameterization() { - local brief home_one home_two out + local brief fakebin home_one home_two out repo wt home_one="$TMP_ROOT/home one" home_two="$TMP_ROOT/home-two" mkdir -p "$home_one/data" "$home_one/state" "$home_two/data" "$home_two/state" @@ -39,8 +39,25 @@ test_fm_home_parameterization() { brief="$home_one/data/task-c/brief.md" grep -F ">> '$home_one/state/task-c.status'" "$brief" >/dev/null || fail "secondmate brief did not shell-quote FM_HOME state path" - printf 'project=x\n' > "$home_one/state/task-a.meta" - FM_HOME="$home_one" FM_GUARD_GRACE=999999 "$ROOT/bin/fm-pr-check.sh" task-a https://github.com/example/repo/pull/1 >/dev/null 2>/dev/null \ + repo="$TMP_ROOT/pr-check-project" + wt="$TMP_ROOT/pr-check-wt" + fm_git_worktree "$repo" "$wt" "fm/task-a" + fakebin=$(fm_fakebin "$TMP_ROOT/pr-check-fakebin") + cat > "$fakebin/gh" </dev/null 2>/dev/null \ || fail "fm-pr-check failed under FM_HOME" [ -f "$home_one/state/task-a.check.sh" ] || fail "pr check was not written under FM_HOME/state" [ ! -e "$home_two/state/task-a.check.sh" ] || fail "pr check leaked into another home" diff --git a/tests/fm-spawn-route.test.sh b/tests/fm-spawn-route.test.sh index 1e23f506..64382981 100644 --- a/tests/fm-spawn-route.test.sh +++ b/tests/fm-spawn-route.test.sh @@ -58,7 +58,7 @@ run_spawn_case() { } test_ordinary_spawn_records_route_fields() { - local setup home proj wt fakebin id out status meta brief + local home proj wt fakebin id out status meta brief IFS='|' read -r home proj wt fakebin < "$out" \ + || fail "watch-session did not start with documented rearm delay" + runner="$state/.watch-session/runner.sh" + assert_present "$runner" "watch-session should write runner file" + assert_grep 'then sleep 9; else sleep 1; fi' "$runner" "runner should use documented rearm delay" + pass "watch-session uses documented rearm delay" +} + test_watch_session_start_status_stop_are_home_scoped +test_watch_session_uses_documented_rearm_delay diff --git a/tests/wake-helpers.sh b/tests/wake-helpers.sh index d29f7aaa..0dbaa731 100644 --- a/tests/wake-helpers.sh +++ b/tests/wake-helpers.sh @@ -231,7 +231,9 @@ dead_pid() { fm_test_cleanup_watch_processes() { local d f pid pgid for d in "${FM_TEST_CLEANUP_DIRS[@]:-}"; do - [ -n "$d" ] && [ -d "$d" ] || continue + if [ -z "$d" ] || [ ! -d "$d" ]; then + continue + fi while IFS= read -r f; do pid=$(cat "$f" 2>/dev/null || true) case "$pid" in @@ -246,7 +248,9 @@ EOF done sleep 0.2 for d in "${FM_TEST_CLEANUP_DIRS[@]:-}"; do - [ -n "$d" ] && [ -d "$d" ] || continue + if [ -z "$d" ] || [ ! -d "$d" ]; then + continue + fi while IFS= read -r f; do pid=$(cat "$f" 2>/dev/null || true) case "$pid" in