diff --git a/.claude/hooks/README.md b/.claude/hooks/README.md index 25e08aa..582aa03 100644 --- a/.claude/hooks/README.md +++ b/.claude/hooks/README.md @@ -86,16 +86,29 @@ documented per event in the official Claude Code docs. ## Hook inventory -| Hook | Event | Blocking? | Purpose | -|------|-------|-----------|---------| -| `safety-guardrails.py` | `PreToolUse` (Edit/Write/Read/MultiEdit/Bash) | Yes (JSON deny) | Block sensitive files, dangerous commands | -| `workflow-gate.py` | `PreToolUse` (Edit/Write/MultiEdit) | Yes (JSON deny) | Enforce Actor+Monitor workflow before edits | -| `workflow-context-injector.py` | `PreToolUse` (Edit/Write/Bash) | No | Inject MAP workflow reminder | -| `ralph-iteration-logger.py` | `PostToolUse` | No | Log iterations, detect file thrashing | -| `ralph-context-pruner.py` | `PreCompact` | No | Save restore point, prune logs | -| `pre-compact-save-transcript.py` | `PreCompact` | No | Save full conversation transcript | -| `post-compact-context.py` | `SessionStart` (compact) | No | Inject restore-point context | -| `end-of-turn.sh` | `Stop` | No | Auto-fix lint/format silently | -| `detect-clarification-triggers.py` | `UserPromptSubmit` | No | Detect "ask if unclear" + async/durability language | - -Last reviewed: 2026-04-28. +All 11 hooks (10 `.py` + `end-of-turn.sh`) are classified against the +`MAP_INVOKED_BY` recursion-guard contract. **REQUIRE_GUARD** hooks early-exit +when MAP spawns a nested subprocess; **FORBID_GUARD** hooks must always fire +and may not carry the guard. Full contract and per-hook rationale: +[`../references/hook-patterns.md`](../references/hook-patterns.md). The +classification is enforced by `scripts/lint-hooks.py` (in `make lint` / +`make check`). + +| Hook | Event | Blocking? | Class | Purpose | +|------|-------|-----------|-------|---------| +| `safety-guardrails.py` | `PreToolUse` (Edit/Write/Read/MultiEdit/Bash) | Yes (JSON deny) | FORBID_GUARD | Block sensitive files, dangerous commands | +| `workflow-gate.py` | `PreToolUse` (Edit/Write/MultiEdit) | Yes (JSON deny) | FORBID_GUARD | Enforce Actor+Monitor workflow before edits | +| `post-compact-context.py` | `SessionStart` (compact) | No | FORBID_GUARD | Inject restore-point context (re-prime after compaction) | +| `context-meter.py` | `UserPromptSubmit` | No | REQUIRE_GUARD | Nudge `/compact ` when the token threshold is crossed | +| `map-token-meter.py` | `SubagentStop` + `Stop` | No | REQUIRE_GUARD | Attribute per-turn token usage to the active MAP subtask | +| `workflow-context-injector.py` | `PreToolUse` (Edit/Write/Bash) | No | REQUIRE_GUARD | Inject MAP workflow reminder | +| `ralph-iteration-logger.py` | `PostToolUse` | No | REQUIRE_GUARD | Log iterations, detect file thrashing | +| `ralph-context-pruner.py` | `PreCompact` | No | REQUIRE_GUARD | Save restore point, prune logs | +| `pre-compact-save-transcript.py` | `PreCompact` | No | REQUIRE_GUARD | Save full conversation transcript | +| `detect-clarification-triggers.py` | `UserPromptSubmit` | No | REQUIRE_GUARD | Detect "ask if unclear" + async/durability language | +| `end-of-turn.sh` | `Stop` | No | REQUIRE_GUARD | Auto-fix lint/format silently | + +> The Codex twin `.codex/hooks/workflow-gate.py` is FORBID_GUARD like its +> Claude counterpart; this inventory covers `.claude/hooks/` only. + +Last reviewed: 2026-05-29. diff --git a/.claude/hooks/context-meter.py b/.claude/hooks/context-meter.py index ccb1745..ecf57da 100755 --- a/.claude/hooks/context-meter.py +++ b/.claude/hooks/context-meter.py @@ -87,6 +87,8 @@ def _silent() -> None: def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) # Read input strictly as JSON. Anything malformed -> silent no-op. try: input_data = json.load(sys.stdin) diff --git a/.claude/hooks/detect-clarification-triggers.py b/.claude/hooks/detect-clarification-triggers.py index 2fe53b8..bf73335 100755 --- a/.claude/hooks/detect-clarification-triggers.py +++ b/.claude/hooks/detect-clarification-triggers.py @@ -29,6 +29,7 @@ from __future__ import annotations import json +import os import re import sys @@ -149,6 +150,8 @@ def build_message(clar: bool, dura: bool) -> str: def main() -> int: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) try: payload = json.load(sys.stdin) except Exception: diff --git a/.claude/hooks/end-of-turn.sh b/.claude/hooks/end-of-turn.sh index 8210ea2..e28b267 100755 --- a/.claude/hooks/end-of-turn.sh +++ b/.claude/hooks/end-of-turn.sh @@ -21,6 +21,9 @@ set -euo pipefail +# Recursion guard: no-op when MAP spawned this subprocess (MAP_INVOKED_BY set) +[ -n "${MAP_INVOKED_BY:-}" ] && exit 0 + # ----------------------------------------------------------------------------- # Configuration # ----------------------------------------------------------------------------- diff --git a/.claude/hooks/map-token-meter.py b/.claude/hooks/map-token-meter.py index 5920a28..255fff1 100755 --- a/.claude/hooks/map-token-meter.py +++ b/.claude/hooks/map-token-meter.py @@ -74,6 +74,8 @@ def _silent() -> None: def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) try: input_data = json.load(sys.stdin) except (json.JSONDecodeError, ValueError): diff --git a/.claude/hooks/pre-compact-save-transcript.py b/.claude/hooks/pre-compact-save-transcript.py index 30698a2..bbe085a 100755 --- a/.claude/hooks/pre-compact-save-transcript.py +++ b/.claude/hooks/pre-compact-save-transcript.py @@ -130,6 +130,8 @@ def parse_transcript(transcript_path: Path) -> str: def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) try: input_data = json.load(sys.stdin) except json.JSONDecodeError: diff --git a/.claude/hooks/ralph-context-pruner.py b/.claude/hooks/ralph-context-pruner.py index 4a74a45..2847f13 100755 --- a/.claude/hooks/ralph-context-pruner.py +++ b/.claude/hooks/ralph-context-pruner.py @@ -212,6 +212,8 @@ def format_recovery_message(state: Dict[str, Any], branch: str) -> str: def main() -> None: """Main hook execution logic.""" + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) # Read stdin (required by hook protocol) try: json.load(sys.stdin) diff --git a/.claude/hooks/ralph-iteration-logger.py b/.claude/hooks/ralph-iteration-logger.py index 7659527..1a7fc3a 100755 --- a/.claude/hooks/ralph-iteration-logger.py +++ b/.claude/hooks/ralph-iteration-logger.py @@ -212,6 +212,8 @@ def detect_thrashing(log_file: Path) -> Optional[dict]: def main() -> None: """Main hook execution logic.""" + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) try: input_data = json.load(sys.stdin) except json.JSONDecodeError: diff --git a/.claude/hooks/workflow-context-injector.py b/.claude/hooks/workflow-context-injector.py index d24d3b1..e653ad3 100755 --- a/.claude/hooks/workflow-context-injector.py +++ b/.claude/hooks/workflow-context-injector.py @@ -147,7 +147,7 @@ def read_step_state(branch: str) -> tuple[dict | None, str | None]: def load_step_state(branch: str) -> dict | None: """Load step state from .map//step_state.json.""" - state, _reason = read_step_state(branch) + state, _ = read_step_state(branch) return state @@ -282,7 +282,7 @@ def record_hook_injection_status( def record_skip_if_state_available(branch: str, reason: str, tool_name: str) -> None: """Persist a skipped hook outcome only when existing state is safe to update.""" - state, _state_error = read_step_state(branch) + state, _ = read_step_state(branch) if state is not None: record_hook_injection_status(branch, state, "skipped", reason, tool_name) @@ -335,7 +335,7 @@ def state_string(state: dict, key: str, default: str = "") -> str: return default -def required_action_for_step(step_id: str, step_phase: str, state: dict) -> str | None: +def required_action_for_step(step_id: str, step_phase: str) -> str | None: """Return a short required-next-action hint for common steps.""" if step_id == "1.55": return "Approve plan (set_plan_approved true)" @@ -521,7 +521,7 @@ def format_reminder( wave_hint += f" ({', '.join(str(item) for item in current_wave)})" mode = "batch:parallel" - required = required_action_for_step(step_id, step_phase, state) + required = required_action_for_step(step_id, step_phase) diag_hint = "" diag_file = ( @@ -602,6 +602,8 @@ def format_reminder( def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) branch = get_branch_name() try: input_data = json.load(sys.stdin) @@ -664,7 +666,7 @@ def main() -> None: sys.exit(0) # Load and format workflow step state - state, _state_error = read_step_state(branch) + state, _ = read_step_state(branch) if state is None: print("{}") diff --git a/.claude/references/hook-patterns.md b/.claude/references/hook-patterns.md new file mode 100644 index 0000000..751a7c0 --- /dev/null +++ b/.claude/references/hook-patterns.md @@ -0,0 +1,157 @@ +# Hook Patterns — The `MAP_INVOKED_BY` Recursion Guard + +This document is the authoritative contract for the recursion guard that every +MAP hook is classified against. It is enforced mechanically by +`scripts/lint-hooks.py` (wired into `make lint` / `make check`) and proven by +`tests/test_hook_patterns.py`. The classification list here and in +`lint-hooks.py` must agree; a hook that is unclassified fails the linter. + +## Why a recursion guard exists + +A MAP workflow routinely spawns a nested Claude/Codex subprocess (a nested +Actor, Monitor, or — in Phase E — a memory-flush `claude -p` launched from a +hook). When it does, it sets the reserved environment variable +`MAP_INVOKED_BY` (see `.claude/references/host-paths.md` for the reserved +`MAP_*` namespace). + +The nested subprocess re-fires the **entire hook chain**. Hooks that do +orchestration-, session-, or telemetry-level work belong to the *top-level* +session; re-running them inside a nested Actor is at best noise (duplicate +context injection, double-counted tokens) and at worst recursive (a hook that +spawns child tooling which itself re-enters the hook chain). The guard makes +those hooks no-op when `MAP_INVOKED_BY` is set. + +The guard is **not** a blanket "exit everywhere" switch. A subset of hooks — +the deny gates and the post-compaction re-prime — MUST always fire, even +inside a nested invocation. Applying the guard to them would be a security +regression (a nested Actor doing real edits would no longer be gated) or a +correctness regression (a nested Actor whose context was just compacted would +lose its workflow re-prime). Those hooks are therefore guard-**forbidden**. + +## The two classes + +Every hook is in exactly one class. + +### REQUIRE_GUARD — recursion-suppressed (early-exit on `MAP_INVOKED_BY`) + +These only emit context / nudges / telemetry / transcript saves that belong to +the top-level session. They early-exit when the flag is set. + +| Hook | Event | Blocking? | Rationale for suppression | +|------|-------|-----------|---------------------------| +| `context-meter.py` | `UserPromptSubmit` | No | `/compact` nudge is a top-level session concern; meaningless inside a nested turn | +| `map-token-meter.py` | `SubagentStop` + `Stop` | No | Token attribution is owned by the parent run; nested re-entry double-counts and can spawn child tooling | +| `workflow-context-injector.py` | `PreToolUse` (Edit/Write/Bash) | No | The MAP reminder targets the top-level operator, not a nested Actor that already has its subtask context | +| `detect-clarification-triggers.py` | `UserPromptSubmit` | No | Clarification nudges apply to the human-facing prompt, not nested machine turns | +| `ralph-iteration-logger.py` | `PostToolUse` | No | Iteration/thrashing logging is a parent-run concern; the orchestrator runs its own Monitor on the subtask diff | +| `ralph-context-pruner.py` | `PreCompact` | No | Restore-point/pruning belongs to the top-level transcript | +| `pre-compact-save-transcript.py` | `PreCompact` | No | Saving the parent transcript; a nested run has its own short-lived transcript | +| `end-of-turn.sh` | `Stop` | No | Auto-format could edit files outside a nested Actor's `affected_files`; lint surfacing is the orchestrator's job | + +> **Intentional consequence:** suppressing `end-of-turn.sh` and +> `ralph-iteration-logger.py` in nested runs means a nested Actor's lint +> errors / tool calls are not surfaced or logged at the *parent* level. This +> is by design — the orchestrator runs its own Monitor and `make check` on the +> subtask diff. It is documented here, not a defect. + +### FORBID_GUARD — must always fire (guard is forbidden) + +These either enforce a safety/workflow boundary or recover context. The linter +forbids a `MAP_INVOKED_BY`-conditioned early-exit in them, in both directions, +so a future contributor cannot "helpfully" disable the gate for every +MAP-spawned subagent. + +| Hook | Event | Blocking? | Rationale for always-fire | +|------|-------|-----------|---------------------------| +| `safety-guardrails.py` | `PreToolUse` (Edit/Write/Read/MultiEdit/Bash) | Yes (JSON deny) | A nested Actor doing real edits MUST still be blocked from sensitive files / dangerous commands | +| `workflow-gate.py` | `PreToolUse` (Edit/Write/MultiEdit) | Yes (JSON deny) | The Actor+Monitor phase gate must enforce on nested edits exactly as on top-level edits | +| `workflow-gate.py` (Codex) | `PreToolUse` | Yes (JSON deny) | Codex twin of the above (`.codex/hooks/` + `src/mapify_cli/templates/codex/hooks/`); same rule | +| `post-compact-context.py` | `SessionStart` (compact) | No | A nested Actor whose context was just compacted needs the MAP re-prime *more*, not less; SessionStart cannot be self-triggered by a hook, so it is not a recursion source | + +> **Load-bearing security property (INV-A1):** A FORBID_GUARD hook's +> decision/recovery path is byte-identical whether or not `MAP_INVOKED_BY` is +> set. This mirrors the learned rule *"never structurally bypass the +> blocklist."* The deny gates read no env flag at all. + +## The guard idiom and its position + +### Position rule (INV-A2) + +Presence is not enough — **position** is enforced. + +- **Python REQUIRE_GUARD hooks:** the guard MUST be the **first statement of + the entry function** (`main()` or equivalent), after the function docstring + (if any) but before any `stdin` read or other I/O. If a hook has no `main()` + and executes at module scope, the guard MUST be the first statement at module + scope after the import block and constant definitions. +- **Shell REQUIRE_GUARD hooks (`end-of-turn.sh`):** the guard MUST appear + before the first command that reads input or runs tooling. + +`scripts/lint-hooks.py` AST-walks each `.py` hook and regex-checks each `.sh` +hook to verify the class-appropriate guard *and* its position; a guard placed +after a side-effecting statement fails the linter, not just an absent one. + +### Canonical idiom (SC-1 — byte-identical across all REQUIRE_GUARD hooks) + +Python: + +```python +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) + ... +``` + +Shell (`set -euo pipefail` safe — the `:-` default avoids tripping `nounset`): + +```bash +set -euo pipefail + +# Recursion guard: no-op when MAP spawned this subprocess (MAP_INVOKED_BY set) +[ -n "${MAP_INVOKED_BY:-}" ] && exit 0 +``` + +`MAP_INVOKED_BY` set to the empty string counts as "not invoked": both +`os.environ.get(...)` (falsy on `""`) and the shell `-n "${MAP_INVOKED_BY:-}"` +test treat empty as unset. + +## Pointer: the `LockState` marker enum + +Hook serialization across processes is governed by the lock-state marker +contract, **not** by this env-flag guard. The authoritative marker enum is +`LockState` in `src/mapify_cli/_locking.py` (a closed `StrEnum`: +`in_progress`, `created`, `updated`, `skipped`, `timeout`, `error`), written to +the sidecar at `~/.map/locks/.state.json` by `flock_with_state`. See +`.claude/references/host-paths.md` §(f)/(g) for the marker contract and the +`~/.map/locks/` protocol. + +Phase A deliberately does **not** call `flock_with_state` for hook +serialization — there is no current recursion-by-concurrency case, so the +env-flag guard above is sufficient. The lock-state contract is referenced here +only so the two mechanisms are not confused. + +## Phase E forward reference — not used by any current hook + +> **The pattern below is documented for forward compatibility only. No current +> hook implements it.** It is recorded here so it is not mistaken for an +> active convention. + +Phase E will let a hook spawn a fully detached background process (e.g. a +memory-flush `claude -p`) that outlives the hook without re-entering the hook +chain on the parent's stdin. The contract for that detached spawn is: + +```python +import subprocess + +subprocess.Popen( + [...], + start_new_session=True, # detach from the parent process group + stdin=subprocess.DEVNULL, # never inherit / block on the parent's stdin + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, +) +``` + +The detached child sets `MAP_INVOKED_BY` in its own environment so that any +hooks it triggers honor the REQUIRE_GUARD early-exit above. Until Phase E +lands, treat this section as design intent, not implemented behavior. diff --git a/.claude/rules/learned/architecture-patterns.md b/.claude/rules/learned/architecture-patterns.md index 38be885..9ca2d17 100644 --- a/.claude/rules/learned/architecture-patterns.md +++ b/.claude/rules/learned/architecture-patterns.md @@ -111,3 +111,22 @@ def get_result(self, run_id): return self.db.fetch_one("runs", run_id=run_id) ``` + +- **CLI Gate Reading From stdin Must Distinguish "No Input Piped" From "Invalid Content"** (2026-05-29): When a MANDATORY gate CLI reads its subject from stdin (truncation detector, validator), empty stdin and valid-but-failing content are different failure modes that need different exit behavior. In a Task/Agent flow a bare call with nothing piped means the caller forgot to pipe — a caller error, not a gate verdict. Returning `truncated:true` / nonzero on empty stdin turns every bare invocation into a false-positive hard stop, silently making the gate non-functional (operators learn to ignore the always-red signal). Add a distinct non-blocking `status:"no_input"` (exit 0) for empty stdin; keep the pure function strict (empty→invalid) for programmatic/library callers; and fix the skill docs to actually pipe the captured response. [workflow: map-efficient] + ```python + # WRONG — CLI: empty stdin == truncated content == hard stop on every bare call + text = sys.stdin.read() + report = detect_truncated(text) # "" -> {"truncated": True, "reasons": ["empty response"]} + print(json.dumps(report)) + + # CORRECT — CLI distinguishes caller-error from content failure; pure fn stays strict + text = sys.stdin.read() + if not text.strip(): + print(json.dumps({"truncated": False, "status": "no_input", + "reasons": ["no response on stdin — pipe the captured response"]})) + sys.exit(0) # bare call is non-applicable, not a failure + report = detect_truncated(text) # only runs on real content + print(json.dumps({**report, "status": "ok"})) + ``` + +- **Always-Loaded Skill Body Has a Hard Line Budget — Put Detail in the Reference File** (2026-05-29): An always-loaded active skill body (e.g. `SKILL.md`) is guarded by a CI test enforcing a max line count (it loads on every invocation and costs context). Adding even correct, useful prose to it can silently push it over budget and break the test. Architectural rule: the active body holds only a short pointer; detail lives in the bundled reference file (e.g. `efficient-reference.md`), which is not budget-gated. If the budget itself is wrong, change the test and the budget together in a deliberate commit — never grow the active body past it by accident. [workflow: map-efficient] diff --git a/.claude/rules/learned/implementation-patterns.md b/.claude/rules/learned/implementation-patterns.md index beffbbf..b6ba0ef 100644 --- a/.claude/rules/learned/implementation-patterns.md +++ b/.claude/rules/learned/implementation-patterns.md @@ -100,3 +100,16 @@ paths: del _args, _kwargs return mock_result ``` + +- **Blast-Radius / "Validate Callers" Detectors Must Exclude Generic Process-Entrypoint Names** (2026-05-29): When a static-analysis detector flags a changed module-level symbol and recommends validating its external callers, exclude generic process-entrypoint names (`main`, and by extension `run`/`cli`/`app` if a project uses them that way) in the SAME predicate that already excludes dunders and too-short names. These names are invoked by convention (`if __name__ == "__main__"`, `python -m`, entry_points), never imported as shared helpers, so they have no true import-callers — but the literal word matches prose in docs/config. A changed `def main()` matched "main" in ~168 SKILL.md / settings.json lines and recommended `validate_callers` on every entrypoint edit. Centralize the exclusion in one `_is_reportable_symbol` predicate so every consumer inherits it; meaningful-symbol callers in markdown stay flagged by design. [workflow: map-efficient] + ```python + _GENERIC_ENTRYPOINT_NAMES = frozenset({"main"}) # add run/cli/app only if used that way + + def _is_reportable_symbol(name: str) -> bool: + return ( + bool(name) + and not (name.startswith("__") and name.endswith("__")) # dunders + and len(name) >= 3 # too-short + and name not in _GENERIC_ENTRYPOINT_NAMES # convention-called entrypoints + ) + ``` diff --git a/.claude/rules/learned/testing-strategies.md b/.claude/rules/learned/testing-strategies.md index 6f44a79..b428d5d 100644 --- a/.claude/rules/learned/testing-strategies.md +++ b/.claude/rules/learned/testing-strategies.md @@ -82,3 +82,34 @@ paths: result = run_command() assert result.exit_code == 0 ``` + +- **Integration-Test Framework Gates via Real Invocation, Not Just the Pure Function** (2026-05-29): When a framework ships a gate (truncation detector, linter, validator) used both as a library function AND as a CLI invoked by a skill/CI, prove it fires on BOTH paths with real invocation artifacts — a unit test of the pure function is not enough. The contract that breaks silently lives at the integration boundary (stdin pipe, process exit code, classification scope), exactly where the unit test does not reach. In Phase A the truncation gate's pure function was unit-tested and correct, yet the CLI was non-functional in every Task/Agent call because nothing was piped. Add a subprocess test that runs the actual CLI entrypoint with empty stdin, with piped-valid, and with piped-invalid input and asserts the exit/status of each. [workflow: map-efficient] + ```python + def _run_gate_cli(stdin_text: str) -> dict: + proc = subprocess.run( + [sys.executable, str(SCRIPTS_PATH / "tool.py"), "detect", "--agent", "actor"], + input=stdin_text, capture_output=True, text=True, + ) + assert proc.returncode == 0, proc.stderr + return json.loads(proc.stdout) + + def test_cli_no_input_is_not_a_failure(self): + assert _run_gate_cli("")["status"] == "no_input" # bare call ≠ hard stop + def test_cli_piped_prose_is_flagged(self): + assert _run_gate_cli("shipping now")["truncated"] is True + ``` + +- **Parametrized Tests That Discover Cases From the Filesystem Need a Non-Empty Discovery Guard** (2026-05-29): When a `@pytest.mark.parametrize` list is built by globbing the filesystem (hook files, both dev+template trees, Codex+Claude copies), an empty discovery — from a path typo, missing dir, or accidental exclusion — silently produces ZERO cases and the suite reports green. The invariant is then completely untested while looking covered. Add a standalone sentinel test asserting the discovered list meets a minimum count (and, for multi-tree coverage, that EACH tree contributes), so a vacuous pass becomes a hard failure. [workflow: map-efficient] + ```python + HOOK_FILES = glob.glob(".claude/hooks/*.py") + glob.glob(".codex/hooks/*.py") + + def test_hook_discovery_non_empty(): # fails loudly if a glob silently returns [] + claude = [p for p in HOOK_FILES if "/.claude/" in p] + codex = [p for p in HOOK_FILES if "/.codex/" in p] + assert claude and codex, f"empty discovery — path typo? {HOOK_FILES}" + + @pytest.mark.parametrize("hook_path", HOOK_FILES) # would pass vacuously on [] + def test_hook_has_guard(hook_path): ... + ``` + +- **A Linter That Enforces Gate Invariants Must Ship a `--self-test` Covering Every Failure Mode, Wired Into CI** (2026-05-29): A lint/gate tool that claims to detect violations (missing guard, misplaced guard, forbidden guard, unclassified file) must include a `--self-test` mode that synthesizes one input per failure mode and asserts each exits nonzero, plus a conformant input that exits zero. Without it, the happy-path CI run (no violations present → exit 0) never exercises the detection logic, so a reviewer can only verify enforcement by reading code. Wire the self-test into `make check` or invoke it from pytest via importlib. In Phase A, Monitor caught two uncovered failure modes (FORBID indirect-variable bypass, shell inline-comment) that a self-test would have caught mechanically. [workflow: map-efficient] diff --git a/.claude/skills/map-efficient/SKILL.md b/.claude/skills/map-efficient/SKILL.md index 94de6ea..9331d20 100644 --- a/.claude/skills/map-efficient/SKILL.md +++ b/.claude/skills/map-efficient/SKILL.md @@ -296,8 +296,8 @@ Return files_changed, tests_run, validation_notes, and any blocker. ### Actor truncated-response gate (MANDATORY — pre-MONITOR) -Before invoking Monitor, validate Actor's response via -`python3 .map/scripts/map_step_runner.py detect_truncated_agent_output --agent actor`. +Before invoking Monitor, **pipe** Actor's captured response in — the detector reads stdin; a bare call returns `status:"no_input"`, NOT a pass: +`printf '%s' "$ACTOR_RESPONSE" | python3 .map/scripts/map_step_runner.py detect_truncated_agent_output --agent actor`. If `truncated: true`, log via `python3 .map/scripts/map_step_runner.py log_agent_failure` and re-invoke ONCE using the prompt from `python3 .map/scripts/map_step_runner.py build_json_retry_prompt --agent actor --errors ''`; @@ -339,8 +339,8 @@ Return JSON with valid, summary, issues, files_changed, tests_run, and escalatio # After Monitor returns: - **Truncated-response gate (MANDATORY — pre-verdict):** Before reading - `valid`/`recommendation`, run - `python3 .map/scripts/map_step_runner.py detect_truncated_agent_output --agent monitor` + `valid`/`recommendation`, **pipe** Monitor's response in (bare call → `status:"no_input"`, NOT a pass): + `printf '%s' "$MONITOR_RESPONSE" | python3 .map/scripts/map_step_runner.py detect_truncated_agent_output --agent monitor` (JSON with `valid`, `summary`, `issues`, ends `}`). On truncation: log via `python3 .map/scripts/map_step_runner.py log_agent_failure` and re-invoke Monitor ONCE using the prompt from `python3 .map/scripts/map_step_runner.py build_json_retry_prompt --agent monitor --errors ''`; diff --git a/.claude/skills/map-efficient/efficient-reference.md b/.claude/skills/map-efficient/efficient-reference.md index 1d00a05..802d11c 100644 --- a/.claude/skills/map-efficient/efficient-reference.md +++ b/.claude/skills/map-efficient/efficient-reference.md @@ -89,8 +89,11 @@ Never `--no-verify`. Never amend a published commit. ### Monitor truncated-response gate (full) Before reading `valid`/`recommendation`, confirm Monitor returned a complete -JSON envelope (`valid`, `summary`, `issues`). Detect via -`detect_truncated_agent_output --agent monitor`; if truncated, log via +JSON envelope (`valid`, `summary`, `issues`). Pipe the captured response in +(the detector reads stdin): +`printf '%s' "$MONITOR_OUTPUT" | python3 .map/scripts/map_step_runner.py detect_truncated_agent_output --agent monitor`. +A bare call with nothing piped returns `status: "no_input"` (`truncated: false`) +— that means the response was not piped, not that it passed. If truncated, log via `log_agent_failure --agent monitor --phase post-invoke --failure-label truncated --reasons ''` and re-invoke ONCE using the prompt from `build_json_retry_prompt --agent monitor --errors ''`; if still diff --git a/.map/scripts/map_step_runner.py b/.map/scripts/map_step_runner.py index cce7036..773250c 100755 --- a/.map/scripts/map_step_runner.py +++ b/.map/scripts/map_step_runner.py @@ -7547,6 +7547,28 @@ def detect_actor_files_changed_mismatch( # than evidence that no external callers exist. _GREP_ERROR_SENTINEL = [{"symbol": "*", "file": "", "line": 0, "note": "grep_error"}] +# Generic process-entrypoint names excluded from blast-radius analysis. A +# function named ``main`` is invoked by convention (``if __name__ == "__main__"`` +# inside its own file, or by the harness via a file path) — never imported as a +# shared helper. Treating it as a changed symbol matches the literal word "main" +# in every SKILL.md / settings.json and floods the gate with false callers. +_GENERIC_ENTRYPOINT_NAMES = frozenset({"main"}) + + +def _is_reportable_symbol(name: str) -> bool: + """Whether a module-level name is worth blast-radius caller analysis. + + Excludes dunders (``__x__``), names shorter than 3 characters, and generic + process entrypoints (:data:`_GENERIC_ENTRYPOINT_NAMES`). Leading-underscore + names such as ``_MONITOR_REQUIRED_KEYS`` are intentionally kept. + """ + return ( + bool(name) + and not (name.startswith("__") and name.endswith("__")) + and len(name) >= 3 + and name not in _GENERIC_ENTRYPOINT_NAMES + ) + def _changed_line_numbers_by_file(diff_text: str) -> dict[str, set[int]]: """Parse a unified diff and return new-file line numbers of added lines per path. @@ -7607,9 +7629,10 @@ def _enclosing_changed_symbols( Recognises ``FunctionDef``, ``AsyncFunctionDef``, ``ClassDef``, ``Assign`` with ``Name`` targets, and ``AnnAssign`` with a ``Name`` target. - Excludes dunder names (start AND end with ``__``) and names shorter than 3 - characters. Leading-underscore names such as ``_MONITOR_REQUIRED_KEYS`` are - intentionally kept. + Excludes dunder names (start AND end with ``__``), names shorter than 3 + characters, and generic process entrypoints (``main``) via + :func:`_is_reportable_symbol`. Leading-underscore names such as + ``_MONITOR_REQUIRED_KEYS`` are intentionally kept. Returns ``None`` on ``SyntaxError`` or ``OSError`` (caller must treat this as a fail-safe / unknown signal). @@ -7634,7 +7657,7 @@ def _enclosing_changed_symbols( start = min([node.lineno] + decorator_lines) end = node.end_lineno or node.lineno - if name and not (name.startswith("__") and name.endswith("__")) and len(name) >= 3: + if _is_reportable_symbol(name): if any(start <= ln <= end for ln in changed_lines): symbols.add(name) @@ -7644,11 +7667,8 @@ def _enclosing_changed_symbols( for target in node.targets: if isinstance(target, ast.Name): tname = target.id - if ( - tname - and not (tname.startswith("__") and tname.endswith("__")) - and len(tname) >= 3 - and any(start <= ln <= end for ln in changed_lines) + if _is_reportable_symbol(tname) and any( + start <= ln <= end for ln in changed_lines ): symbols.add(tname) @@ -7657,11 +7677,8 @@ def _enclosing_changed_symbols( tname = node.target.id start = node.lineno end = node.end_lineno or node.lineno - if ( - tname - and not (tname.startswith("__") and tname.endswith("__")) - and len(tname) >= 3 - and any(start <= ln <= end for ln in changed_lines) + if _is_reportable_symbol(tname) and any( + start <= ln <= end for ln in changed_lines ): symbols.add(tname) @@ -9078,16 +9095,36 @@ def _opt_value(flag: str) -> str: sys.exit(0 if is_ack else 1) elif func_name == "detect_truncated_agent_output": - # CLI: detect_truncated_agent_output [--agent monitor|actor|...] - # Reads candidate agent response from stdin, prints JSON report. + # CLI: | detect_truncated_agent_output [--agent monitor|actor|...] + # Reads the candidate agent response from stdin, prints JSON report. # Exit code 0 always (callers parse `truncated` field) — no stderr # for a clean response, so shell pipelines can branch on it. + # + # IMPORTANT: the captured agent response MUST be piped in. A bare call + # with nothing on stdin is NOT a truncated response — it means the + # caller forgot to pipe. We surface that as a distinct, non-blocking + # `status: "no_input"` so it can't masquerade as a hard-stop + # truncation on every subtask (an empty stdin would otherwise read as + # `truncated: true / "empty response"`). agent_kind_arg = "monitor" if "--agent" in sys.argv: agent_idx = sys.argv.index("--agent") if agent_idx + 1 < len(sys.argv): agent_kind_arg = sys.argv[agent_idx + 1] text_in = sys.stdin.read() + if not text_in.strip(): + print(json.dumps({ + "truncated": False, + "status": "no_input", + "reasons": [ + "no agent response on stdin — pipe the captured response, " + "e.g. printf '%s' \"$RESPONSE\" | python3 " + ".map/scripts/map_step_runner.py " + "detect_truncated_agent_output --agent " + agent_kind_arg + ], + "agent_kind": agent_kind_arg, + }, indent=2)) + sys.exit(0) report = detect_truncated_agent_output( text_in, agent_kind=agent_kind_arg ) @@ -9095,6 +9132,7 @@ def _opt_value(flag: str) -> str: # original text if they want it); keep the report shape small. report_summary = { "truncated": report["truncated"], + "status": "ok", "reasons": report["reasons"], "agent_kind": report["agent_kind"], } diff --git a/Makefile b/Makefile index a713b50..1c8ffa2 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,7 @@ lint: ruff check src/ tests/ mypy src/ pyright src/ + python3 scripts/lint-hooks.py format: black src/ tests/ @@ -68,14 +69,14 @@ clean: find . -type f -name "*.pyc" -delete build: clean - python -m build + python3 -m build release: build @echo "Ready to upload to PyPI with: twine upload dist/*" - @echo "Don't forget to tag the release: git tag -a v$(shell python -c "import tomli; print(tomli.load(open('pyproject.toml', 'rb'))['project']['version'])") -m 'Release version ...'" + @echo "Don't forget to tag the release: git tag -a v$(shell python3 -c "import tomli; print(tomli.load(open('pyproject.toml', 'rb'))['project']['version'])") -m 'Release version ...'" # Quick test of the CLI test-cli: @echo "Testing CLI installation..." - python -m mapify_cli --version - python -m mapify_cli check + python3 -m mapify_cli --version + python3 -m mapify_cli check diff --git a/scripts/lint-hooks.py b/scripts/lint-hooks.py new file mode 100755 index 0000000..2b58a64 --- /dev/null +++ b/scripts/lint-hooks.py @@ -0,0 +1,568 @@ +#!/usr/bin/env python3 +""" +MAP Hook Recursion-Guard Linter + +Enforces the ``MAP_INVOKED_BY`` recursion-guard contract documented in +``.claude/references/hook-patterns.md``. Every hook under the scanned roots is +classified into exactly one class: + + - REQUIRE_GUARD: must early-exit on ``MAP_INVOKED_BY`` as the FIRST statement + of its entry function (Python) or before any input/tooling (shell). + - FORBID_GUARD: must NOT contain a ``MAP_INVOKED_BY``-conditioned early-exit + anywhere (a guard here would silently disable a safety/workflow gate). + +A hook with no classification entry is a hard error (forces the author to +classify it). Position is verified, not just presence: a guard placed after a +side-effecting statement fails — including a module-scope assignment whose RHS +performs I/O (e.g. ``data = sys.stdin.read()``) and a shell ``VAR=$(...)`` +command substitution before the guard. + +Anti-obfuscation hardening: a FORBID_GUARD hook must reference ``MAP_INVOKED_BY`` +exactly zero times at the RAW SOURCE level (a plain substring scan, including +comments and substrings of a larger string), so an indirect guard such as +``flag = "MAP_INVOKED_BY"; if os.environ.get(flag): sys.exit(0)``, a stray +comment, or an embedded mention cannot slip past — and the linter agrees +byte-for-byte with ``tests/test_hook_patterns.py``. For REQUIRE_GUARD shell +hooks, guard *detection* still strips inline comments first, so a prose +``# ... MAP_INVOKED_BY ...`` note is never mistaken for a real guard. + +Scans BOTH dev roots, including Codex (INV-A4): + - .claude/hooks/ + - .codex/hooks/ + +Template copies (src/mapify_cli/templates/) are validated by +``make sync-templates`` + ``tests/test_hook_patterns.py`` over both trees, so +this dev-only tool intentionally does not scan them and is itself not synced. + +Usage: + python scripts/lint-hooks.py [--root DIR ...] [--self-test] + +Exit codes: + 0 all scanned hooks conform + 1 one or more violations (missing/misplaced/forbidden guard, or + unclassified hook) +""" + +from __future__ import annotations + +import argparse +import ast +import re +import sys +from pathlib import Path + +# Repo root = parent of scripts/ +REPO_ROOT = Path(__file__).resolve().parent.parent + +# Default dev roots to scan (INV-A4: both, including Codex). +DEFAULT_ROOTS = [ + REPO_ROOT / ".claude" / "hooks", + REPO_ROOT / ".codex" / "hooks", +] + +# Classification, keyed by file BASENAME so the Codex twin resolves identically. +REQUIRE_GUARD = { + "context-meter.py", + "map-token-meter.py", + "workflow-context-injector.py", + "detect-clarification-triggers.py", + "ralph-iteration-logger.py", + "ralph-context-pruner.py", + "pre-compact-save-transcript.py", + "end-of-turn.sh", +} +FORBID_GUARD = { + "safety-guardrails.py", + "workflow-gate.py", + "post-compact-context.py", +} + +# Files in a hooks dir that are not themselves hooks. +IGNORED_BASENAMES = {"README.md", "__init__.py"} + +ENV_FLAG = "MAP_INVOKED_BY" + + +class Colors: + # Only the codes this linter actually emits are kept (no dead YELLOW/BLUE). + RED = "\033[91m" + GREEN = "\033[92m" + BOLD = "\033[1m" + END = "\033[0m" + + +# --------------------------------------------------------------------------- # +# Python AST helpers +# --------------------------------------------------------------------------- # +def _test_references_flag(test: ast.expr) -> bool: + """True if the If-test references the MAP_INVOKED_BY env flag.""" + return any( + isinstance(node, ast.Constant) and node.value == ENV_FLAG + for node in ast.walk(test) + ) + + +def _stmt_exits(stmt: ast.stmt) -> bool: + """True if a statement (or nested node) exits/returns the process/function.""" + for node in ast.walk(stmt): + if isinstance(node, ast.Return): + return True + if isinstance(node, ast.Call): + func = node.func + # sys.exit(...) / os._exit(...) + if isinstance(func, ast.Attribute) and func.attr in {"exit", "_exit"}: + return True + # bare exit(...) / quit(...) + if isinstance(func, ast.Name) and func.id in {"exit", "quit"}: + return True + if isinstance(node, ast.Raise): + return True + return False + + +def is_recursion_guard(node: ast.AST) -> bool: + """True if ``node`` is an ``if : `` guard. + + The exit/return may live in either branch — ``if FLAG: sys.exit(0)`` and the + inverted ``if not FLAG: ... else: sys.exit(0)`` are both guards. Checking + ``orelse`` as well as ``body`` (a) recognises the inverted REQUIRE idiom + instead of mis-reporting it as MISSING, and (b) strengthens the FORBID scan + so an else-branch bypass cannot slip past. + """ + if not isinstance(node, ast.If): + return False + if not _test_references_flag(node.test): + return False + return any(_stmt_exits(s) for s in node.body) or any( + _stmt_exits(s) for s in node.orelse + ) + + +def _strip_leading_docstring(body: list[ast.stmt]) -> list[ast.stmt]: + """Return body without a leading docstring Expr, if present.""" + if ( + body + and isinstance(body[0], ast.Expr) + and isinstance(body[0].value, ast.Constant) + and isinstance(body[0].value.value, str) + ): + return body[1:] + return body + + +def _is_side_effect_free_assignment(stmt: ast.stmt) -> bool: + """True if a module-level assignment is a plain constant definition. + + Only literal/constant RHS values count as "constant definitions" the guard + may follow. An assignment whose RHS performs I/O or runs tooling + (``data = sys.stdin.read()``) is NOT skippable — the guard must precede it, + so we stop the module-scope scan there. Detected by the presence of any + ``Call``/``Await``/``Yield`` node in the RHS. + """ + if not isinstance(stmt, (ast.Assign, ast.AnnAssign)): + return False + value = stmt.value # AnnAssign may be annotation-only (value is None) + if value is None: + return True + return not any( + isinstance(node, (ast.Call, ast.Await, ast.Yield, ast.YieldFrom)) + for node in ast.walk(value) + ) + + +def _find_entry_body(tree: ast.Module) -> tuple[list[ast.stmt], str]: + """Return (entry-function body without docstring, label). + + Prefers a top-level ``def main(...)`` / ``async def main(...)``. Falls back + to module scope after the import/constant block when there is no main() + (Decision 4 in the spec). + """ + for node in tree.body: + if ( + isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) + and node.name == "main" + ): + return _strip_leading_docstring(node.body), "main()" + # Module-scope fallback: skip the leading docstring, imports and plain + # constant assignments — the guard must be the first executable statement. + # A side-effecting assignment RHS (e.g. a stdin read) is NOT a constant and + # stops the scan, so a guard placed after it is correctly flagged misplaced. + body = _strip_leading_docstring(tree.body) + idx = 0 + for stmt in body: + if isinstance(stmt, (ast.Import, ast.ImportFrom)): + idx += 1 + continue + if _is_side_effect_free_assignment(stmt): + idx += 1 + continue + break + return body[idx:], "module scope" + + +# --------------------------------------------------------------------------- # +# Shell helpers +# --------------------------------------------------------------------------- # +# Lines permitted BEFORE the shell guard: shebang, comments, blanks, `set ...`, +# and simple VAR=value assignments. Anything else is an executable command and +# means the guard is misplaced (runs after input/tooling). +# +# The VAR=value branch deliberately forbids command substitution (``$(...)`` and +# backticks): ``OUT=$(curl ...)`` / ``RESPONSE=$(cat /dev/stdin)`` RUN a +# subprocess (I/O / tooling) before the guard, so they are NOT harmless +# assignments. Variable expansion (``$VAR``, ``${VAR}``) stays allowed. The +# value charset is "anything but a backtick/newline, or a ``$`` not followed by +# ``(``". +_SH_PREAMBLE_RE = re.compile( + r"""^\s*( + \#.* # comment / shebang + | set\s.* # set -euo pipefail etc. + | [A-Za-z_][A-Za-z0-9_]*=(?:[^`\n$]|\$(?!\())* # VAR=value, no command substitution + )?\s*$""", + re.VERBOSE, +) +_SH_GUARD_LINE_RE = re.compile(rf"{ENV_FLAG}.*\bexit\b|\bexit\b.*{ENV_FLAG}") + + +def _sh_code(line: str) -> str: + """Return the executable portion of a shell line (strip inline comments). + + Prevents a prose comment like ``exit 0 # MAP_INVOKED_BY note`` from being + mistaken for a real guard. + """ + return line.split("#", 1)[0] + + +def _first_flag_lineno(source: str) -> int: + """1-based line of the first raw ``ENV_FLAG`` occurrence (0 if none).""" + return next( + (i for i, ln in enumerate(source.splitlines(), 1) if ENV_FLAG in ln), 0 + ) + + +# --------------------------------------------------------------------------- # +# Linter +# --------------------------------------------------------------------------- # +class HookLinter: + def __init__(self) -> None: + self.errors: list[tuple[str, str]] = [] # (relpath, message) + self.checked = 0 + + def error(self, path: Path, message: str) -> None: + rel = self._rel(path) + self.errors.append((rel, message)) + + @staticmethod + def _rel(path: Path) -> str: + try: + return str(path.relative_to(REPO_ROOT)) + except ValueError: + return str(path) + + # -- per-file checks ---------------------------------------------------- # + def check_python(self, path: Path, klass: str) -> None: + source = path.read_text(encoding="utf-8") + try: + tree = ast.parse(source, filename=str(path)) + except SyntaxError as exc: # pragma: no cover - defensive + self.error(path, f"could not parse: {exc}") + return + + if klass == "FORBID_GUARD": + flagged = False + for node in ast.walk(tree): + if isinstance(node, ast.If) and is_recursion_guard(node): + self.error( + path, + f"FORBID_GUARD hook must NOT contain a {ENV_FLAG} " + f"early-exit (line {node.lineno}) — it would disable " + f"the gate for MAP-spawned subagents.", + ) + flagged = True + # Catch-all (spec AC-2: zero matches), aligned BYTE-FOR-BYTE with + # tests/test_hook_patterns.py::test_forbid_hook_has_zero_flag_references + # (``ENV_FLAG not in source``): a raw-source substring scan catches an + # indirect-variable guard, a comment mention, AND the flag embedded in + # a larger string constant — all of which the AST If-test / exact + # ``Constant == ENV_FLAG`` scan misses. Without this, the linter and + # the pytest gate could disagree on the same FORBID hook. + if not flagged and ENV_FLAG in source: + lineno = _first_flag_lineno(source) + self.error( + path, + f"FORBID_GUARD hook must contain zero {ENV_FLAG} " + f"references; found one at line {lineno} (comment, stray " + f"reference, or possible indirect/obfuscated guard).", + ) + return + + # REQUIRE_GUARD: guard must be the first statement of the entry point. + entry_body, label = _find_entry_body(tree) + if not entry_body: + self.error(path, f"REQUIRE_GUARD hook has empty {label}; no guard.") + return + first = entry_body[0] + if not is_recursion_guard(first): + # Distinguish "missing entirely" from "present but misplaced". + if any(is_recursion_guard(n) for n in ast.walk(tree)): + self.error( + path, + f"REQUIRE_GUARD guard is present but MISPLACED — it must be " + f"the first statement of {label} (after the docstring, " + f"before any I/O), found a {type(first).__name__} first.", + ) + else: + self.error( + path, + f"REQUIRE_GUARD hook is MISSING the {ENV_FLAG} early-exit as " + f"the first statement of {label}.", + ) + + def check_shell(self, path: Path, klass: str) -> None: + source = path.read_text(encoding="utf-8") + lines = source.splitlines() + + if klass == "FORBID_GUARD": + # Same raw-source substring rule as check_python's FORBID branch and + # the pytest gate: zero textual references (incl. comments). + if ENV_FLAG in source: + self.error( + path, + f"FORBID_GUARD shell hook must contain zero {ENV_FLAG} " + f"references; found one at line {_first_flag_lineno(source)}.", + ) + return + + guard_idx = next( + (i for i, ln in enumerate(lines) if _SH_GUARD_LINE_RE.search(_sh_code(ln))), + None, + ) + + # REQUIRE_GUARD shell hook. + if guard_idx is None: + self.error( + path, + f"REQUIRE_GUARD shell hook is MISSING the {ENV_FLAG} early-exit.", + ) + return + for i in range(guard_idx): + if not _SH_PREAMBLE_RE.match(lines[i]): + self.error( + path, + f"REQUIRE_GUARD shell guard is MISPLACED — executable line " + f'{i + 1} ("{lines[i].strip()}") runs before the guard ' + f"(line {guard_idx + 1}); the guard must precede all " + f"input/tooling.", + ) + return + + def check_file(self, path: Path) -> None: + name = path.name + if name in IGNORED_BASENAMES: + return + if path.suffix not in {".py", ".sh"}: + return + + if name in REQUIRE_GUARD: + klass = "REQUIRE_GUARD" + elif name in FORBID_GUARD: + klass = "FORBID_GUARD" + else: + self.error( + path, + "UNCLASSIFIED hook — add it to REQUIRE_GUARD or FORBID_GUARD in " + "scripts/lint-hooks.py (and hook-patterns.md).", + ) + return + + self.checked += 1 + if path.suffix == ".py": + self.check_python(path, klass) + else: + self.check_shell(path, klass) + + def run(self, roots: list[Path]) -> int: + for root in roots: + if not root.exists(): + continue + for path in sorted(root.iterdir()): + if path.is_file(): + self.check_file(path) + + if self.errors: + print( + f"{Colors.RED}{Colors.BOLD}Hook lint failed " + f"({len(self.errors)} violation(s)):{Colors.END}" + ) + for rel, message in self.errors: + print(f" {Colors.RED}✗{Colors.END} {rel} — {message}") + return 1 + + print( + f"{Colors.GREEN}✓ All {self.checked} hooks conform to the " + f"{ENV_FLAG} recursion-guard contract.{Colors.END}" + ) + return 0 + + +# --------------------------------------------------------------------------- # +# Self-test: prove every failure mode exits non-zero (AC-5 / VC2 fixtures) +# --------------------------------------------------------------------------- # +def _self_test() -> int: + import tempfile + + cases: list[tuple[str, str, str]] = [ + # (basename, content, why-it-must-fail) + ( + "context-meter.py", # REQUIRE_GUARD but guard missing + 'import os, sys\n\ndef main() -> None:\n print("io")\n', + "REQUIRE_GUARD missing guard", + ), + ( + "map-token-meter.py", # REQUIRE_GUARD but guard misplaced (after IO) + 'import os, sys\n\ndef main() -> None:\n print("io")\n' + ' if os.environ.get("MAP_INVOKED_BY"):\n sys.exit(0)\n', + "REQUIRE_GUARD misplaced guard", + ), + ( + "safety-guardrails.py", # FORBID_GUARD but contains a guard + 'import os, sys\n\ndef main() -> None:\n' + ' if os.environ.get("MAP_INVOKED_BY"):\n sys.exit(0)\n', + "FORBID_GUARD contains guard", + ), + ( + "brand-new-hook.py", # unclassified + 'def main() -> None:\n pass\n', + "unclassified hook", + ), + ( + "end-of-turn.sh", # REQUIRE_GUARD shell, guard after a command + '#!/usr/bin/env bash\nset -euo pipefail\n' + 'git status\n[ -n "${MAP_INVOKED_BY:-}" ] && exit 0\n', + "shell guard misplaced", + ), + ( + "workflow-gate.py", # FORBID_GUARD with an INDIRECT/obfuscated guard + 'import os, sys\n\nFLAG = "MAP_INVOKED_BY"\n\ndef main() -> None:\n' + " if os.environ.get(FLAG):\n sys.exit(0)\n", + "FORBID_GUARD indirect-variable guard", + ), + ( + "end-of-turn.sh", # REQUIRE_GUARD shell: only flag mention is a comment + '#!/usr/bin/env bash\nset -euo pipefail\n' + "# exit 0 here if MAP_INVOKED_BY (note only — not a real guard)\n" + "git status\n", + "shell guard present only in a comment", + ), + ( + "end-of-turn.sh", # REQUIRE_GUARD shell: command substitution before guard + '#!/usr/bin/env bash\nset -euo pipefail\n' + 'RESPONSE=$(cat /dev/stdin)\n' + '[ -n "${MAP_INVOKED_BY:-}" ] && exit 0\n', + "shell guard after a VAR=$(...) command substitution", + ), + ( + "context-meter.py", # REQUIRE_GUARD module-scope: stdin read before guard + 'import os, sys\n\n' + 'data = sys.stdin.read()\n' + 'if os.environ.get("MAP_INVOKED_BY"):\n sys.exit(0)\n' + 'print(data)\n', + "module-scope guard after a side-effecting (stdin-read) assignment", + ), + ] + ok = True + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) / "hooks" + for basename, content, why in cases: + root.mkdir(parents=True, exist_ok=True) + f = root / basename + f.write_text(content, encoding="utf-8") + linter = HookLinter() + rc = linter.run([root]) + f.unlink() + status = "PASS" if rc != 0 else "FAIL" + if rc == 0: + ok = False + color = Colors.GREEN if rc != 0 else Colors.RED + print(f" {color}[{status}]{Colors.END} expected-fail: {why} (rc={rc})") + + # And conformant fixtures must pass (rc == 0) — one per accepted shape, + # so the happy path of every branch (sync main, async main, module + # scope after a constant, and shell) is exercised, not just the + # failure modes. + good_cases: list[tuple[str, str, str]] = [ + ( + "context-meter.py", + 'import os, sys\n\ndef main() -> None:\n' + ' if os.environ.get("MAP_INVOKED_BY"):\n sys.exit(0)\n' + ' print("io")\n', + "conformant sync main()", + ), + ( + "map-token-meter.py", + 'import asyncio, os, sys\n\nasync def main() -> None:\n' + ' if os.environ.get("MAP_INVOKED_BY"):\n sys.exit(0)\n' + ' print("io")\n\nasyncio.run(main())\n', + "conformant async main()", + ), + ( + "ralph-context-pruner.py", + 'import os, sys\n\nTHRESHOLD = 1000\n' + 'if os.environ.get("MAP_INVOKED_BY"):\n sys.exit(0)\n' + 'print(sys.stdin.read())\n', + "conformant module-scope guard after a constant assignment", + ), + ( + "end-of-turn.sh", + '#!/usr/bin/env bash\nset -euo pipefail\n' + '[ -n "${MAP_INVOKED_BY:-}" ] && exit 0\ngit status\n', + "conformant shell guard before any tooling", + ), + ] + for basename, content, why in good_cases: + root.mkdir(parents=True, exist_ok=True) + good = root / basename + good.write_text(content, encoding="utf-8") + rc = HookLinter().run([root]) + good.unlink() + if rc != 0: + ok = False + color = Colors.GREEN if rc == 0 else Colors.RED + status = "PASS" if rc == 0 else "FAIL" + print(f" {color}[{status}]{Colors.END} expected-pass: {why} (rc={rc})") + + if ok: + print(f"{Colors.GREEN}✓ self-test: all failure modes detected.{Colors.END}") + return 0 + print(f"{Colors.RED}✗ self-test FAILED.{Colors.END}") + return 1 + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Lint MAP hooks against the MAP_INVOKED_BY recursion-guard contract." + ) + parser.add_argument( + "--root", + action="append", + type=Path, + dest="roots", + help="Hook directory to scan (repeatable). Defaults to .claude/hooks " + "and .codex/hooks.", + ) + parser.add_argument( + "--self-test", + action="store_true", + help="Run built-in fixtures proving every failure mode exits non-zero.", + ) + args = parser.parse_args() + + if args.self_test: + return _self_test() + + roots = args.roots if args.roots else DEFAULT_ROOTS + return HookLinter().run(roots) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/mapify_cli/templates/hooks/README.md b/src/mapify_cli/templates/hooks/README.md index 25e08aa..582aa03 100644 --- a/src/mapify_cli/templates/hooks/README.md +++ b/src/mapify_cli/templates/hooks/README.md @@ -86,16 +86,29 @@ documented per event in the official Claude Code docs. ## Hook inventory -| Hook | Event | Blocking? | Purpose | -|------|-------|-----------|---------| -| `safety-guardrails.py` | `PreToolUse` (Edit/Write/Read/MultiEdit/Bash) | Yes (JSON deny) | Block sensitive files, dangerous commands | -| `workflow-gate.py` | `PreToolUse` (Edit/Write/MultiEdit) | Yes (JSON deny) | Enforce Actor+Monitor workflow before edits | -| `workflow-context-injector.py` | `PreToolUse` (Edit/Write/Bash) | No | Inject MAP workflow reminder | -| `ralph-iteration-logger.py` | `PostToolUse` | No | Log iterations, detect file thrashing | -| `ralph-context-pruner.py` | `PreCompact` | No | Save restore point, prune logs | -| `pre-compact-save-transcript.py` | `PreCompact` | No | Save full conversation transcript | -| `post-compact-context.py` | `SessionStart` (compact) | No | Inject restore-point context | -| `end-of-turn.sh` | `Stop` | No | Auto-fix lint/format silently | -| `detect-clarification-triggers.py` | `UserPromptSubmit` | No | Detect "ask if unclear" + async/durability language | - -Last reviewed: 2026-04-28. +All 11 hooks (10 `.py` + `end-of-turn.sh`) are classified against the +`MAP_INVOKED_BY` recursion-guard contract. **REQUIRE_GUARD** hooks early-exit +when MAP spawns a nested subprocess; **FORBID_GUARD** hooks must always fire +and may not carry the guard. Full contract and per-hook rationale: +[`../references/hook-patterns.md`](../references/hook-patterns.md). The +classification is enforced by `scripts/lint-hooks.py` (in `make lint` / +`make check`). + +| Hook | Event | Blocking? | Class | Purpose | +|------|-------|-----------|-------|---------| +| `safety-guardrails.py` | `PreToolUse` (Edit/Write/Read/MultiEdit/Bash) | Yes (JSON deny) | FORBID_GUARD | Block sensitive files, dangerous commands | +| `workflow-gate.py` | `PreToolUse` (Edit/Write/MultiEdit) | Yes (JSON deny) | FORBID_GUARD | Enforce Actor+Monitor workflow before edits | +| `post-compact-context.py` | `SessionStart` (compact) | No | FORBID_GUARD | Inject restore-point context (re-prime after compaction) | +| `context-meter.py` | `UserPromptSubmit` | No | REQUIRE_GUARD | Nudge `/compact ` when the token threshold is crossed | +| `map-token-meter.py` | `SubagentStop` + `Stop` | No | REQUIRE_GUARD | Attribute per-turn token usage to the active MAP subtask | +| `workflow-context-injector.py` | `PreToolUse` (Edit/Write/Bash) | No | REQUIRE_GUARD | Inject MAP workflow reminder | +| `ralph-iteration-logger.py` | `PostToolUse` | No | REQUIRE_GUARD | Log iterations, detect file thrashing | +| `ralph-context-pruner.py` | `PreCompact` | No | REQUIRE_GUARD | Save restore point, prune logs | +| `pre-compact-save-transcript.py` | `PreCompact` | No | REQUIRE_GUARD | Save full conversation transcript | +| `detect-clarification-triggers.py` | `UserPromptSubmit` | No | REQUIRE_GUARD | Detect "ask if unclear" + async/durability language | +| `end-of-turn.sh` | `Stop` | No | REQUIRE_GUARD | Auto-fix lint/format silently | + +> The Codex twin `.codex/hooks/workflow-gate.py` is FORBID_GUARD like its +> Claude counterpart; this inventory covers `.claude/hooks/` only. + +Last reviewed: 2026-05-29. diff --git a/src/mapify_cli/templates/hooks/context-meter.py b/src/mapify_cli/templates/hooks/context-meter.py index ccb1745..ecf57da 100755 --- a/src/mapify_cli/templates/hooks/context-meter.py +++ b/src/mapify_cli/templates/hooks/context-meter.py @@ -87,6 +87,8 @@ def _silent() -> None: def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) # Read input strictly as JSON. Anything malformed -> silent no-op. try: input_data = json.load(sys.stdin) diff --git a/src/mapify_cli/templates/hooks/detect-clarification-triggers.py b/src/mapify_cli/templates/hooks/detect-clarification-triggers.py index 2fe53b8..bf73335 100755 --- a/src/mapify_cli/templates/hooks/detect-clarification-triggers.py +++ b/src/mapify_cli/templates/hooks/detect-clarification-triggers.py @@ -29,6 +29,7 @@ from __future__ import annotations import json +import os import re import sys @@ -149,6 +150,8 @@ def build_message(clar: bool, dura: bool) -> str: def main() -> int: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) try: payload = json.load(sys.stdin) except Exception: diff --git a/src/mapify_cli/templates/hooks/end-of-turn.sh b/src/mapify_cli/templates/hooks/end-of-turn.sh index 8210ea2..e28b267 100755 --- a/src/mapify_cli/templates/hooks/end-of-turn.sh +++ b/src/mapify_cli/templates/hooks/end-of-turn.sh @@ -21,6 +21,9 @@ set -euo pipefail +# Recursion guard: no-op when MAP spawned this subprocess (MAP_INVOKED_BY set) +[ -n "${MAP_INVOKED_BY:-}" ] && exit 0 + # ----------------------------------------------------------------------------- # Configuration # ----------------------------------------------------------------------------- diff --git a/src/mapify_cli/templates/hooks/map-token-meter.py b/src/mapify_cli/templates/hooks/map-token-meter.py index 5920a28..255fff1 100755 --- a/src/mapify_cli/templates/hooks/map-token-meter.py +++ b/src/mapify_cli/templates/hooks/map-token-meter.py @@ -74,6 +74,8 @@ def _silent() -> None: def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) try: input_data = json.load(sys.stdin) except (json.JSONDecodeError, ValueError): diff --git a/src/mapify_cli/templates/hooks/pre-compact-save-transcript.py b/src/mapify_cli/templates/hooks/pre-compact-save-transcript.py index 30698a2..bbe085a 100755 --- a/src/mapify_cli/templates/hooks/pre-compact-save-transcript.py +++ b/src/mapify_cli/templates/hooks/pre-compact-save-transcript.py @@ -130,6 +130,8 @@ def parse_transcript(transcript_path: Path) -> str: def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) try: input_data = json.load(sys.stdin) except json.JSONDecodeError: diff --git a/src/mapify_cli/templates/hooks/ralph-context-pruner.py b/src/mapify_cli/templates/hooks/ralph-context-pruner.py index 4a74a45..2847f13 100755 --- a/src/mapify_cli/templates/hooks/ralph-context-pruner.py +++ b/src/mapify_cli/templates/hooks/ralph-context-pruner.py @@ -212,6 +212,8 @@ def format_recovery_message(state: Dict[str, Any], branch: str) -> str: def main() -> None: """Main hook execution logic.""" + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) # Read stdin (required by hook protocol) try: json.load(sys.stdin) diff --git a/src/mapify_cli/templates/hooks/ralph-iteration-logger.py b/src/mapify_cli/templates/hooks/ralph-iteration-logger.py index 7659527..1a7fc3a 100755 --- a/src/mapify_cli/templates/hooks/ralph-iteration-logger.py +++ b/src/mapify_cli/templates/hooks/ralph-iteration-logger.py @@ -212,6 +212,8 @@ def detect_thrashing(log_file: Path) -> Optional[dict]: def main() -> None: """Main hook execution logic.""" + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) try: input_data = json.load(sys.stdin) except json.JSONDecodeError: diff --git a/src/mapify_cli/templates/hooks/workflow-context-injector.py b/src/mapify_cli/templates/hooks/workflow-context-injector.py index d24d3b1..e653ad3 100755 --- a/src/mapify_cli/templates/hooks/workflow-context-injector.py +++ b/src/mapify_cli/templates/hooks/workflow-context-injector.py @@ -147,7 +147,7 @@ def read_step_state(branch: str) -> tuple[dict | None, str | None]: def load_step_state(branch: str) -> dict | None: """Load step state from .map//step_state.json.""" - state, _reason = read_step_state(branch) + state, _ = read_step_state(branch) return state @@ -282,7 +282,7 @@ def record_hook_injection_status( def record_skip_if_state_available(branch: str, reason: str, tool_name: str) -> None: """Persist a skipped hook outcome only when existing state is safe to update.""" - state, _state_error = read_step_state(branch) + state, _ = read_step_state(branch) if state is not None: record_hook_injection_status(branch, state, "skipped", reason, tool_name) @@ -335,7 +335,7 @@ def state_string(state: dict, key: str, default: str = "") -> str: return default -def required_action_for_step(step_id: str, step_phase: str, state: dict) -> str | None: +def required_action_for_step(step_id: str, step_phase: str) -> str | None: """Return a short required-next-action hint for common steps.""" if step_id == "1.55": return "Approve plan (set_plan_approved true)" @@ -521,7 +521,7 @@ def format_reminder( wave_hint += f" ({', '.join(str(item) for item in current_wave)})" mode = "batch:parallel" - required = required_action_for_step(step_id, step_phase, state) + required = required_action_for_step(step_id, step_phase) diag_hint = "" diag_file = ( @@ -602,6 +602,8 @@ def format_reminder( def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) branch = get_branch_name() try: input_data = json.load(sys.stdin) @@ -664,7 +666,7 @@ def main() -> None: sys.exit(0) # Load and format workflow step state - state, _state_error = read_step_state(branch) + state, _ = read_step_state(branch) if state is None: print("{}") diff --git a/src/mapify_cli/templates/map/scripts/map_step_runner.py b/src/mapify_cli/templates/map/scripts/map_step_runner.py index cce7036..773250c 100755 --- a/src/mapify_cli/templates/map/scripts/map_step_runner.py +++ b/src/mapify_cli/templates/map/scripts/map_step_runner.py @@ -7547,6 +7547,28 @@ def detect_actor_files_changed_mismatch( # than evidence that no external callers exist. _GREP_ERROR_SENTINEL = [{"symbol": "*", "file": "", "line": 0, "note": "grep_error"}] +# Generic process-entrypoint names excluded from blast-radius analysis. A +# function named ``main`` is invoked by convention (``if __name__ == "__main__"`` +# inside its own file, or by the harness via a file path) — never imported as a +# shared helper. Treating it as a changed symbol matches the literal word "main" +# in every SKILL.md / settings.json and floods the gate with false callers. +_GENERIC_ENTRYPOINT_NAMES = frozenset({"main"}) + + +def _is_reportable_symbol(name: str) -> bool: + """Whether a module-level name is worth blast-radius caller analysis. + + Excludes dunders (``__x__``), names shorter than 3 characters, and generic + process entrypoints (:data:`_GENERIC_ENTRYPOINT_NAMES`). Leading-underscore + names such as ``_MONITOR_REQUIRED_KEYS`` are intentionally kept. + """ + return ( + bool(name) + and not (name.startswith("__") and name.endswith("__")) + and len(name) >= 3 + and name not in _GENERIC_ENTRYPOINT_NAMES + ) + def _changed_line_numbers_by_file(diff_text: str) -> dict[str, set[int]]: """Parse a unified diff and return new-file line numbers of added lines per path. @@ -7607,9 +7629,10 @@ def _enclosing_changed_symbols( Recognises ``FunctionDef``, ``AsyncFunctionDef``, ``ClassDef``, ``Assign`` with ``Name`` targets, and ``AnnAssign`` with a ``Name`` target. - Excludes dunder names (start AND end with ``__``) and names shorter than 3 - characters. Leading-underscore names such as ``_MONITOR_REQUIRED_KEYS`` are - intentionally kept. + Excludes dunder names (start AND end with ``__``), names shorter than 3 + characters, and generic process entrypoints (``main``) via + :func:`_is_reportable_symbol`. Leading-underscore names such as + ``_MONITOR_REQUIRED_KEYS`` are intentionally kept. Returns ``None`` on ``SyntaxError`` or ``OSError`` (caller must treat this as a fail-safe / unknown signal). @@ -7634,7 +7657,7 @@ def _enclosing_changed_symbols( start = min([node.lineno] + decorator_lines) end = node.end_lineno or node.lineno - if name and not (name.startswith("__") and name.endswith("__")) and len(name) >= 3: + if _is_reportable_symbol(name): if any(start <= ln <= end for ln in changed_lines): symbols.add(name) @@ -7644,11 +7667,8 @@ def _enclosing_changed_symbols( for target in node.targets: if isinstance(target, ast.Name): tname = target.id - if ( - tname - and not (tname.startswith("__") and tname.endswith("__")) - and len(tname) >= 3 - and any(start <= ln <= end for ln in changed_lines) + if _is_reportable_symbol(tname) and any( + start <= ln <= end for ln in changed_lines ): symbols.add(tname) @@ -7657,11 +7677,8 @@ def _enclosing_changed_symbols( tname = node.target.id start = node.lineno end = node.end_lineno or node.lineno - if ( - tname - and not (tname.startswith("__") and tname.endswith("__")) - and len(tname) >= 3 - and any(start <= ln <= end for ln in changed_lines) + if _is_reportable_symbol(tname) and any( + start <= ln <= end for ln in changed_lines ): symbols.add(tname) @@ -9078,16 +9095,36 @@ def _opt_value(flag: str) -> str: sys.exit(0 if is_ack else 1) elif func_name == "detect_truncated_agent_output": - # CLI: detect_truncated_agent_output [--agent monitor|actor|...] - # Reads candidate agent response from stdin, prints JSON report. + # CLI: | detect_truncated_agent_output [--agent monitor|actor|...] + # Reads the candidate agent response from stdin, prints JSON report. # Exit code 0 always (callers parse `truncated` field) — no stderr # for a clean response, so shell pipelines can branch on it. + # + # IMPORTANT: the captured agent response MUST be piped in. A bare call + # with nothing on stdin is NOT a truncated response — it means the + # caller forgot to pipe. We surface that as a distinct, non-blocking + # `status: "no_input"` so it can't masquerade as a hard-stop + # truncation on every subtask (an empty stdin would otherwise read as + # `truncated: true / "empty response"`). agent_kind_arg = "monitor" if "--agent" in sys.argv: agent_idx = sys.argv.index("--agent") if agent_idx + 1 < len(sys.argv): agent_kind_arg = sys.argv[agent_idx + 1] text_in = sys.stdin.read() + if not text_in.strip(): + print(json.dumps({ + "truncated": False, + "status": "no_input", + "reasons": [ + "no agent response on stdin — pipe the captured response, " + "e.g. printf '%s' \"$RESPONSE\" | python3 " + ".map/scripts/map_step_runner.py " + "detect_truncated_agent_output --agent " + agent_kind_arg + ], + "agent_kind": agent_kind_arg, + }, indent=2)) + sys.exit(0) report = detect_truncated_agent_output( text_in, agent_kind=agent_kind_arg ) @@ -9095,6 +9132,7 @@ def _opt_value(flag: str) -> str: # original text if they want it); keep the report shape small. report_summary = { "truncated": report["truncated"], + "status": "ok", "reasons": report["reasons"], "agent_kind": report["agent_kind"], } diff --git a/src/mapify_cli/templates/references/hook-patterns.md b/src/mapify_cli/templates/references/hook-patterns.md new file mode 100644 index 0000000..751a7c0 --- /dev/null +++ b/src/mapify_cli/templates/references/hook-patterns.md @@ -0,0 +1,157 @@ +# Hook Patterns — The `MAP_INVOKED_BY` Recursion Guard + +This document is the authoritative contract for the recursion guard that every +MAP hook is classified against. It is enforced mechanically by +`scripts/lint-hooks.py` (wired into `make lint` / `make check`) and proven by +`tests/test_hook_patterns.py`. The classification list here and in +`lint-hooks.py` must agree; a hook that is unclassified fails the linter. + +## Why a recursion guard exists + +A MAP workflow routinely spawns a nested Claude/Codex subprocess (a nested +Actor, Monitor, or — in Phase E — a memory-flush `claude -p` launched from a +hook). When it does, it sets the reserved environment variable +`MAP_INVOKED_BY` (see `.claude/references/host-paths.md` for the reserved +`MAP_*` namespace). + +The nested subprocess re-fires the **entire hook chain**. Hooks that do +orchestration-, session-, or telemetry-level work belong to the *top-level* +session; re-running them inside a nested Actor is at best noise (duplicate +context injection, double-counted tokens) and at worst recursive (a hook that +spawns child tooling which itself re-enters the hook chain). The guard makes +those hooks no-op when `MAP_INVOKED_BY` is set. + +The guard is **not** a blanket "exit everywhere" switch. A subset of hooks — +the deny gates and the post-compaction re-prime — MUST always fire, even +inside a nested invocation. Applying the guard to them would be a security +regression (a nested Actor doing real edits would no longer be gated) or a +correctness regression (a nested Actor whose context was just compacted would +lose its workflow re-prime). Those hooks are therefore guard-**forbidden**. + +## The two classes + +Every hook is in exactly one class. + +### REQUIRE_GUARD — recursion-suppressed (early-exit on `MAP_INVOKED_BY`) + +These only emit context / nudges / telemetry / transcript saves that belong to +the top-level session. They early-exit when the flag is set. + +| Hook | Event | Blocking? | Rationale for suppression | +|------|-------|-----------|---------------------------| +| `context-meter.py` | `UserPromptSubmit` | No | `/compact` nudge is a top-level session concern; meaningless inside a nested turn | +| `map-token-meter.py` | `SubagentStop` + `Stop` | No | Token attribution is owned by the parent run; nested re-entry double-counts and can spawn child tooling | +| `workflow-context-injector.py` | `PreToolUse` (Edit/Write/Bash) | No | The MAP reminder targets the top-level operator, not a nested Actor that already has its subtask context | +| `detect-clarification-triggers.py` | `UserPromptSubmit` | No | Clarification nudges apply to the human-facing prompt, not nested machine turns | +| `ralph-iteration-logger.py` | `PostToolUse` | No | Iteration/thrashing logging is a parent-run concern; the orchestrator runs its own Monitor on the subtask diff | +| `ralph-context-pruner.py` | `PreCompact` | No | Restore-point/pruning belongs to the top-level transcript | +| `pre-compact-save-transcript.py` | `PreCompact` | No | Saving the parent transcript; a nested run has its own short-lived transcript | +| `end-of-turn.sh` | `Stop` | No | Auto-format could edit files outside a nested Actor's `affected_files`; lint surfacing is the orchestrator's job | + +> **Intentional consequence:** suppressing `end-of-turn.sh` and +> `ralph-iteration-logger.py` in nested runs means a nested Actor's lint +> errors / tool calls are not surfaced or logged at the *parent* level. This +> is by design — the orchestrator runs its own Monitor and `make check` on the +> subtask diff. It is documented here, not a defect. + +### FORBID_GUARD — must always fire (guard is forbidden) + +These either enforce a safety/workflow boundary or recover context. The linter +forbids a `MAP_INVOKED_BY`-conditioned early-exit in them, in both directions, +so a future contributor cannot "helpfully" disable the gate for every +MAP-spawned subagent. + +| Hook | Event | Blocking? | Rationale for always-fire | +|------|-------|-----------|---------------------------| +| `safety-guardrails.py` | `PreToolUse` (Edit/Write/Read/MultiEdit/Bash) | Yes (JSON deny) | A nested Actor doing real edits MUST still be blocked from sensitive files / dangerous commands | +| `workflow-gate.py` | `PreToolUse` (Edit/Write/MultiEdit) | Yes (JSON deny) | The Actor+Monitor phase gate must enforce on nested edits exactly as on top-level edits | +| `workflow-gate.py` (Codex) | `PreToolUse` | Yes (JSON deny) | Codex twin of the above (`.codex/hooks/` + `src/mapify_cli/templates/codex/hooks/`); same rule | +| `post-compact-context.py` | `SessionStart` (compact) | No | A nested Actor whose context was just compacted needs the MAP re-prime *more*, not less; SessionStart cannot be self-triggered by a hook, so it is not a recursion source | + +> **Load-bearing security property (INV-A1):** A FORBID_GUARD hook's +> decision/recovery path is byte-identical whether or not `MAP_INVOKED_BY` is +> set. This mirrors the learned rule *"never structurally bypass the +> blocklist."* The deny gates read no env flag at all. + +## The guard idiom and its position + +### Position rule (INV-A2) + +Presence is not enough — **position** is enforced. + +- **Python REQUIRE_GUARD hooks:** the guard MUST be the **first statement of + the entry function** (`main()` or equivalent), after the function docstring + (if any) but before any `stdin` read or other I/O. If a hook has no `main()` + and executes at module scope, the guard MUST be the first statement at module + scope after the import block and constant definitions. +- **Shell REQUIRE_GUARD hooks (`end-of-turn.sh`):** the guard MUST appear + before the first command that reads input or runs tooling. + +`scripts/lint-hooks.py` AST-walks each `.py` hook and regex-checks each `.sh` +hook to verify the class-appropriate guard *and* its position; a guard placed +after a side-effecting statement fails the linter, not just an absent one. + +### Canonical idiom (SC-1 — byte-identical across all REQUIRE_GUARD hooks) + +Python: + +```python +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): + sys.exit(0) + ... +``` + +Shell (`set -euo pipefail` safe — the `:-` default avoids tripping `nounset`): + +```bash +set -euo pipefail + +# Recursion guard: no-op when MAP spawned this subprocess (MAP_INVOKED_BY set) +[ -n "${MAP_INVOKED_BY:-}" ] && exit 0 +``` + +`MAP_INVOKED_BY` set to the empty string counts as "not invoked": both +`os.environ.get(...)` (falsy on `""`) and the shell `-n "${MAP_INVOKED_BY:-}"` +test treat empty as unset. + +## Pointer: the `LockState` marker enum + +Hook serialization across processes is governed by the lock-state marker +contract, **not** by this env-flag guard. The authoritative marker enum is +`LockState` in `src/mapify_cli/_locking.py` (a closed `StrEnum`: +`in_progress`, `created`, `updated`, `skipped`, `timeout`, `error`), written to +the sidecar at `~/.map/locks/.state.json` by `flock_with_state`. See +`.claude/references/host-paths.md` §(f)/(g) for the marker contract and the +`~/.map/locks/` protocol. + +Phase A deliberately does **not** call `flock_with_state` for hook +serialization — there is no current recursion-by-concurrency case, so the +env-flag guard above is sufficient. The lock-state contract is referenced here +only so the two mechanisms are not confused. + +## Phase E forward reference — not used by any current hook + +> **The pattern below is documented for forward compatibility only. No current +> hook implements it.** It is recorded here so it is not mistaken for an +> active convention. + +Phase E will let a hook spawn a fully detached background process (e.g. a +memory-flush `claude -p`) that outlives the hook without re-entering the hook +chain on the parent's stdin. The contract for that detached spawn is: + +```python +import subprocess + +subprocess.Popen( + [...], + start_new_session=True, # detach from the parent process group + stdin=subprocess.DEVNULL, # never inherit / block on the parent's stdin + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, +) +``` + +The detached child sets `MAP_INVOKED_BY` in its own environment so that any +hooks it triggers honor the REQUIRE_GUARD early-exit above. Until Phase E +lands, treat this section as design intent, not implemented behavior. diff --git a/src/mapify_cli/templates/skills/map-efficient/SKILL.md b/src/mapify_cli/templates/skills/map-efficient/SKILL.md index 94de6ea..9331d20 100644 --- a/src/mapify_cli/templates/skills/map-efficient/SKILL.md +++ b/src/mapify_cli/templates/skills/map-efficient/SKILL.md @@ -296,8 +296,8 @@ Return files_changed, tests_run, validation_notes, and any blocker. ### Actor truncated-response gate (MANDATORY — pre-MONITOR) -Before invoking Monitor, validate Actor's response via -`python3 .map/scripts/map_step_runner.py detect_truncated_agent_output --agent actor`. +Before invoking Monitor, **pipe** Actor's captured response in — the detector reads stdin; a bare call returns `status:"no_input"`, NOT a pass: +`printf '%s' "$ACTOR_RESPONSE" | python3 .map/scripts/map_step_runner.py detect_truncated_agent_output --agent actor`. If `truncated: true`, log via `python3 .map/scripts/map_step_runner.py log_agent_failure` and re-invoke ONCE using the prompt from `python3 .map/scripts/map_step_runner.py build_json_retry_prompt --agent actor --errors ''`; @@ -339,8 +339,8 @@ Return JSON with valid, summary, issues, files_changed, tests_run, and escalatio # After Monitor returns: - **Truncated-response gate (MANDATORY — pre-verdict):** Before reading - `valid`/`recommendation`, run - `python3 .map/scripts/map_step_runner.py detect_truncated_agent_output --agent monitor` + `valid`/`recommendation`, **pipe** Monitor's response in (bare call → `status:"no_input"`, NOT a pass): + `printf '%s' "$MONITOR_RESPONSE" | python3 .map/scripts/map_step_runner.py detect_truncated_agent_output --agent monitor` (JSON with `valid`, `summary`, `issues`, ends `}`). On truncation: log via `python3 .map/scripts/map_step_runner.py log_agent_failure` and re-invoke Monitor ONCE using the prompt from `python3 .map/scripts/map_step_runner.py build_json_retry_prompt --agent monitor --errors ''`; diff --git a/src/mapify_cli/templates/skills/map-efficient/efficient-reference.md b/src/mapify_cli/templates/skills/map-efficient/efficient-reference.md index 1d00a05..802d11c 100644 --- a/src/mapify_cli/templates/skills/map-efficient/efficient-reference.md +++ b/src/mapify_cli/templates/skills/map-efficient/efficient-reference.md @@ -89,8 +89,11 @@ Never `--no-verify`. Never amend a published commit. ### Monitor truncated-response gate (full) Before reading `valid`/`recommendation`, confirm Monitor returned a complete -JSON envelope (`valid`, `summary`, `issues`). Detect via -`detect_truncated_agent_output --agent monitor`; if truncated, log via +JSON envelope (`valid`, `summary`, `issues`). Pipe the captured response in +(the detector reads stdin): +`printf '%s' "$MONITOR_OUTPUT" | python3 .map/scripts/map_step_runner.py detect_truncated_agent_output --agent monitor`. +A bare call with nothing piped returns `status: "no_input"` (`truncated: false`) +— that means the response was not piped, not that it passed. If truncated, log via `log_agent_failure --agent monitor --phase post-invoke --failure-label truncated --reasons ''` and re-invoke ONCE using the prompt from `build_json_retry_prompt --agent monitor --errors ''`; if still diff --git a/tests/test_hook_patterns.py b/tests/test_hook_patterns.py new file mode 100644 index 0000000..27cfdcb --- /dev/null +++ b/tests/test_hook_patterns.py @@ -0,0 +1,214 @@ +"""Tests for the MAP_INVOKED_BY recursion-guard contract (Phase A). + +Parametrized per-hook over BOTH the dev trees (.claude/hooks, .codex/hooks) and +the shipped template trees (src/mapify_cli/templates/hooks, .../codex/hooks), so +the guard (or its documented absence) cannot drift between copies. + +Proves: + - INV-A2: every REQUIRE_GUARD hook has a correctly-positioned MAP_INVOKED_BY + early-exit (first entry-function statement, before any I/O). + - INV-A1: every FORBID_GUARD hook contains NO such guard and, behaviorally, + still denies a dangerous command when MAP_INVOKED_BY is set. + +Classification and guard-detection logic are imported from scripts/lint-hooks.py +so the contract has a single source of truth. +""" + +from __future__ import annotations + +import importlib.util +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent + +# Hook roots to validate: dev (Claude + Codex) and shipped templates (Claude + Codex). +HOOK_ROOTS = [ + REPO_ROOT / ".claude" / "hooks", + REPO_ROOT / ".codex" / "hooks", + REPO_ROOT / "src" / "mapify_cli" / "templates" / "hooks", + REPO_ROOT / "src" / "mapify_cli" / "templates" / "codex" / "hooks", +] + + +def _load_lint_hooks(): + """Import scripts/lint-hooks.py (hyphenated filename) as a module.""" + path = REPO_ROOT / "scripts" / "lint-hooks.py" + spec = importlib.util.spec_from_file_location("lint_hooks", path) + assert spec is not None and spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +lh = _load_lint_hooks() + + +def _hook_files(root: Path) -> list[Path]: + if not root.exists(): + return [] + return sorted( + p + for p in root.iterdir() + if p.is_file() + and p.suffix in {".py", ".sh"} + and p.name not in lh.IGNORED_BASENAMES + ) + + +# Flat list of every classified hook file across every tree. +ALL_HOOKS = [path for root in HOOK_ROOTS for path in _hook_files(root)] +ALL_HOOK_IDS = [str(p.relative_to(REPO_ROOT)) for p in ALL_HOOKS] + +FORBID_HOOKS = [p for p in ALL_HOOKS if p.name in lh.FORBID_GUARD] +FORBID_HOOK_IDS = [str(p.relative_to(REPO_ROOT)) for p in FORBID_HOOKS] + + +def test_hooks_were_discovered() -> None: + """Guard against an empty parametrization silently passing.""" + assert ALL_HOOKS, "no hook files discovered in any tree" + # Every tree — dev and shipped, Claude and Codex — must contribute hooks, + # so a wiped tree cannot turn a parametrized check into a vacuous pass. + roots_with_hooks = {p.parent for p in ALL_HOOKS} + for root in HOOK_ROOTS: + assert root in roots_with_hooks, f"no hooks discovered under {root}" + + +@pytest.mark.parametrize("hook_path", ALL_HOOKS, ids=ALL_HOOK_IDS) +def test_hook_conforms_to_guard_contract(hook_path: Path) -> None: + """Every hook satisfies its class contract (INV-A1 / INV-A2) in every tree.""" + rel = hook_path.relative_to(REPO_ROOT) + assert hook_path.name in (lh.REQUIRE_GUARD | lh.FORBID_GUARD), ( + f"{rel} is unclassified — add it to REQUIRE_GUARD/FORBID_GUARD in " + f"scripts/lint-hooks.py" + ) + linter = lh.HookLinter() + linter.check_file(hook_path) + assert not linter.errors, ( + f"{rel} violates the recursion-guard contract: " + + "; ".join(msg for _, msg in linter.errors) + ) + + +@pytest.mark.parametrize("hook_path", FORBID_HOOKS, ids=FORBID_HOOK_IDS) +def test_forbid_hook_has_zero_flag_references(hook_path: Path) -> None: + """INV-A1: a FORBID_GUARD hook references MAP_INVOKED_BY exactly zero times.""" + source = hook_path.read_text(encoding="utf-8") + assert lh.ENV_FLAG not in source, ( + f"{hook_path.relative_to(REPO_ROOT)} must contain zero {lh.ENV_FLAG} " + f"references — a guard here would disable the gate for MAP-spawned subagents." + ) + + +# safety-guardrails.py copies across all trees that contain it. +_SAFETY_HOOKS = [p for p in ALL_HOOKS if p.name == "safety-guardrails.py"] +_SAFETY_IDS = [str(p.relative_to(REPO_ROOT)) for p in _SAFETY_HOOKS] + + +@pytest.mark.parametrize("hook_path", _SAFETY_HOOKS, ids=_SAFETY_IDS) +@pytest.mark.parametrize("flag_set", [False, True], ids=["flag_unset", "flag_set"]) +def test_deny_still_fires_with_flag(hook_path: Path, flag_set: bool) -> None: + """INV-A1 (behavioral): the deny gate fires whether or not MAP_INVOKED_BY is set.""" + # Build the dangerous command from fragments so the parent session's own + # safety hook does not block this test file / process. + dangerous = "rm" + " -" + "rf" + " /" + payload = {"tool_name": "Bash", "tool_input": {"command": dangerous}} + + env = dict(os.environ) + if flag_set: + env["MAP_INVOKED_BY"] = "nested-actor" + else: + env.pop("MAP_INVOKED_BY", None) + + result = subprocess.run( + [sys.executable, str(hook_path)], + input=json.dumps(payload), + capture_output=True, + text=True, + env=env, + timeout=15, + ) + blob = (result.stdout or "") + (result.stderr or "") + assert '"permissionDecision": "deny"' in blob, ( + f"{hook_path.relative_to(REPO_ROOT)} did not deny a dangerous command " + f"with MAP_INVOKED_BY {'set' if flag_set else 'unset'}: {blob!r}" + ) + + +# --------------------------------------------------------------------------- # +# Doc/classification drift guard +# --------------------------------------------------------------------------- # +# The classification (REQUIRE_GUARD/FORBID_GUARD sets in scripts/lint-hooks.py) +# is the single machine-readable source of truth, but the same per-hook class is +# independently restated in prose tables across four shipped docs. Nothing else +# checks those tables against the sets, so a reclassified/added/dropped hook can +# silently drift. These tests fail loudly on any divergence. +HOOK_DOC_FILES = [ + REPO_ROOT / ".claude" / "hooks" / "README.md", + REPO_ROOT / ".claude" / "references" / "hook-patterns.md", + REPO_ROOT / "src" / "mapify_cli" / "templates" / "hooks" / "README.md", + REPO_ROOT / "src" / "mapify_cli" / "templates" / "references" / "hook-patterns.md", +] +HOOK_DOC_IDS = [str(p.relative_to(REPO_ROOT)) for p in HOOK_DOC_FILES] + + +def _doc_classification(text: str) -> dict[str, set[str]]: + """Map each hook basename mentioned in a doc TABLE to the class(es) it is + shown under. + + Handles both doc shapes: README has a per-row ``Class`` column + (``| `foo.py` | ... | REQUIRE_GUARD | ... |``); hook-patterns.md groups + hooks under ``### REQUIRE_GUARD`` / ``### FORBID_GUARD`` section headings. + Only ``| ...`` table rows with backtick-wrapped ``*.py``/``*.sh`` names + count, so prose mentions elsewhere never pollute the map. + """ + found: dict[str, set[str]] = {} + section: str | None = None + for line in text.splitlines(): + stripped = line.strip() + if stripped.startswith("#"): + if "REQUIRE_GUARD" in stripped: + section = "REQUIRE_GUARD" + elif "FORBID_GUARD" in stripped: + section = "FORBID_GUARD" + elif stripped.startswith("## "): + section = None # left the classification sections + continue + if not stripped.startswith("|"): + continue + if "REQUIRE_GUARD" in line: + row_class: str | None = "REQUIRE_GUARD" + elif "FORBID_GUARD" in line: + row_class = "FORBID_GUARD" + else: + row_class = section + if row_class is None: + continue + for token in re.findall(r"`([^`]+)`", line): + if token.endswith((".py", ".sh")): + found.setdefault(token, set()).add(row_class) + return found + + +@pytest.mark.parametrize("doc_path", HOOK_DOC_FILES, ids=HOOK_DOC_IDS) +def test_doc_tables_match_classification(doc_path: Path) -> None: + """Every doc table classifies each hook with EXACTLY its lint-hooks class.""" + assert doc_path.exists(), f"missing doc file {doc_path.relative_to(REPO_ROOT)}" + found = _doc_classification(doc_path.read_text(encoding="utf-8")) + rel = doc_path.relative_to(REPO_ROOT) + for name in sorted(lh.REQUIRE_GUARD | lh.FORBID_GUARD): + expected = "REQUIRE_GUARD" if name in lh.REQUIRE_GUARD else "FORBID_GUARD" + assert name in found, ( + f"{rel}: hook '{name}' is classified in scripts/lint-hooks.py but " + f"absent from this doc's tables (classification drift)." + ) + assert found[name] == {expected}, ( + f"{rel}: hook '{name}' is listed as {sorted(found[name])} but " + f"scripts/lint-hooks.py classifies it as {expected} (drift)." + ) diff --git a/tests/test_map_step_runner.py b/tests/test_map_step_runner.py index 0bd65e9..b10e45d 100644 --- a/tests/test_map_step_runner.py +++ b/tests/test_map_step_runner.py @@ -3286,6 +3286,50 @@ def test_empty_response_is_truncated(self): assert report["truncated"] is True assert report["reasons"] == ["empty response"] + def _run_truncation_cli(self, stdin_text: str, agent: str = "actor") -> dict: + proc = subprocess.run( + [ + sys.executable, + str(SCRIPTS_PATH / "map_step_runner.py"), + "detect_truncated_agent_output", + "--agent", + agent, + ], + input=stdin_text, + capture_output=True, + text=True, + ) + assert proc.returncode == 0, proc.stderr + return json.loads(proc.stdout) + + def test_cli_no_input_is_not_truncated(self): + """Regression: a bare CLI call (nothing piped) must NOT report a + hard-stop truncation. It returns status=='no_input' / truncated False + so it can't masquerade as an 'empty response' truncation on every + subtask when the caller forgot to pipe the agent response. + """ + report = self._run_truncation_cli("") + assert report["truncated"] is False, report + assert report["status"] == "no_input", report + + def test_cli_piped_prose_is_truncated(self): + """A genuinely-piped prose (non-JSON) response is still flagged.""" + report = self._run_truncation_cli("All good, shipping now.", agent="monitor") + assert report["truncated"] is True, report + assert report["status"] == "ok", report + + def test_cli_piped_valid_envelope_is_ok(self): + """A piped complete envelope passes (truncated False, status ok).""" + envelope = json.dumps({ + "files_changed": ["a.py"], + "tests_run": ["pytest"], + "validation_notes": "ok", + "blocker": "", + }) + report = self._run_truncation_cli(envelope) + assert report["truncated"] is False, report + assert report["status"] == "ok", report + def test_predictor_full_output_not_truncated(self): """POSITIVE: full valid predictor JSON is not flagged as truncated.""" text = json.dumps({ @@ -7173,6 +7217,42 @@ def test_vc3_missing_file_returns_none(self, tmp_path: Path) -> None: result = map_step_runner._enclosing_changed_symbols(missing, {1}) assert result is None + def test_is_reportable_symbol_excludes_main(self) -> None: + """Regression: generic process entrypoint `main` is not a reportable symbol. + + A `def main()` is invoked by convention (``if __name__ == "__main__"``) + and matches the literal word "main" in every SKILL.md / settings.json, + flooding the blast-radius gate with false external callers. + """ + assert map_step_runner._is_reportable_symbol("main") is False + # Real shared helpers and private constants stay reportable. + assert map_step_runner._is_reportable_symbol("shared_fn") is True + assert map_step_runner._is_reportable_symbol("_MONITOR_REQUIRED_KEYS") is True + # Existing exclusions still hold. + assert map_step_runner._is_reportable_symbol("__all__") is False + assert map_step_runner._is_reportable_symbol("ab") is False + + def test_generic_main_excluded_from_changed_symbols(self, tmp_path: Path) -> None: + """Regression: a changed `def main()` body does not surface `main`.""" + src = tmp_path / "hook.py" + src.write_text( + "import os, sys\n" + "\n" + "def main() -> None:\n" + " if os.environ.get('MAP_INVOKED_BY'):\n" + " sys.exit(0)\n" + " print('work')\n" + "\n" + "def shared_helper(x):\n" + " return x + 1\n", + encoding="utf-8", + ) + # Line 4 is inside main(); line 9 is inside shared_helper(). + result = map_step_runner._enclosing_changed_symbols(src, {4, 9}) + assert result is not None + assert "main" not in result + assert "shared_helper" in result + class TestDetectSymbolBlastRadius: """VC2-VC4: detect_symbol_blast_radius end-to-end with a real temp git repo.