jim4226 · jim4226 · May 25, 2026
diff --git a/csis/verification/critic_stack.py b/csis/verification/critic_stack.py
@@ -10,18 +10,60 @@
 returns a recent precision metric; if it drops below floor, promotions
 freeze. This is a separate signal from the verified-gain rate so the
 Critic can't be tuned-into-quietude.
+
+``CriticEffortLevel`` parameterises review depth, mirroring the effort tiers
+shipped in Claude Code v2.1.147 (``/code-review low|medium|high|max``). The
+Coordinator picks ``low`` for cheap sanity-checks inside a tight budget and
+``max`` for pre-promotion gates where a missed flaw is costlier than latency.
+The mapping is intentionally coarse — the point is that the Coordinator can
+express intent without hard-coding token counts in call sites.
 """
 from __future__ import annotations
 
 import json
 import re
 from dataclasses import dataclass, field
+from enum import Enum
 from typing import Iterable
 
 from csis.backends.base import LLMBackend, LLMRequest
 from csis.contracts import Artifact, CriticFinding, GraderResult, Plan
 
 
+class CriticEffortLevel(str, Enum):
+    """How deeply should the critic look?
+
+    Maps to (min_attempts, max_tokens) in EFFORT_PARAMS.
+
+    low   — quick sanity check; 1 attempt, 500 tokens. Use inside tight
+             per-iteration budgets (e.g. rapid Researcher → Builder loops).
+    medium — default; 3 attempts, 2 000 tokens. Matches the historical
+             Phase-0 baseline.
+    high  — thorough; 5 attempts, 4 000 tokens. Use before Verifier cert.
+    max   — exhaustive; 8 attempts, 8 000 tokens. Use at promotion gates
+             where a missed flaw is more costly than extra latency.
+    """
+
+    low = "low"
+    medium = "medium"
+    high = "high"
+    max = "max"
+
+
+@dataclass(frozen=True)
+class _EffortParams:
+    min_attempts: int
+    max_tokens: int
+
+
+EFFORT_PARAMS: dict[CriticEffortLevel, _EffortParams] = {
+    CriticEffortLevel.low:    _EffortParams(min_attempts=1, max_tokens=500),
+    CriticEffortLevel.medium: _EffortParams(min_attempts=3, max_tokens=2000),
+    CriticEffortLevel.high:   _EffortParams(min_attempts=5, max_tokens=4000),
+    CriticEffortLevel.max:    _EffortParams(min_attempts=8, max_tokens=8000),
+}
+
+
 CRITIC_SYSTEM_PROMPT = """You are the Critic in CSIS Phase-0.
 
 Your one job is to falsify the claim that the artifact passes the plan
@@ -84,14 +126,24 @@ def run_critic(
     plan: Plan,
     artifact: Artifact,
     grader_results: list[GraderResult],
-    min_attempts: int = 3,
+    min_attempts: int | None = None,
+    effort: CriticEffortLevel = CriticEffortLevel.medium,
 ) -> list[CriticFinding]:
+    """Run the V2 critic at the requested effort level.
+
+    ``effort`` selects the (min_attempts, max_tokens) pair from EFFORT_PARAMS.
+    If ``min_attempts`` is supplied explicitly it overrides the effort-derived
+    value; this preserves backward compatibility for callers that hard-code a
+    count (e.g. the F7 seeded-flaw evaluator, which always wants exactly 3).
+    """
+    params = EFFORT_PARAMS[effort]
+    effective_min = min_attempts if min_attempts is not None else params.min_attempts
     req = LLMRequest(
         role="critic",
         checkpoint_id=checkpoint_id,
         system=CRITIC_SYSTEM_PROMPT,
-        prompt=build_critic_prompt(plan, artifact, grader_results, min_attempts),
-        max_tokens=2000,
+        prompt=build_critic_prompt(plan, artifact, grader_results, effective_min),
+        max_tokens=params.max_tokens,
     )
     resp = backend.complete(req)
     return parse_critic_output(resp.text)

diff --git a/tests/test_verification.py b/tests/test_verification.py
@@ -4,6 +4,7 @@
   F1 — mock-vs-mock cross-checkpoint must be structural, not decorative
   F6 — corrupted grader: pinned source hash check
   F7 — critic incentive: seeded synthetic flaws + minimum attempts
+  CriticEffortLevel — effort tiers produce correct min_attempts + max_tokens
 """
 from __future__ import annotations
 
@@ -22,6 +23,8 @@
     build_certificate,
 )
 from csis.verification.critic_stack import (
+    EFFORT_PARAMS,
+    CriticEffortLevel,
     CriticEvaluator,
     SeededFlaw,
     parse_critic_output,
@@ -202,3 +205,95 @@ def test_seeded_flaw_evaluator_tracks_catch_rate() -> None:
     caught_2 = evaluator.submit_seeded(backend=backend, checkpoint_id="beta", flaw=flaw)
     assert caught_1 and not caught_2
     assert evaluator.catch_rate() == 0.5
+
+
+# ---- CriticEffortLevel ------------------------------------------------------
+
+
+def test_effort_params_ordering() -> None:
+    """Higher effort levels must demand more attempts and allow more tokens.
+
+    This test fails if the EFFORT_PARAMS table is mis-ordered, which would
+    let a low-effort critic silently substitute for a high-effort gate.
+    """
+    levels = [CriticEffortLevel.low, CriticEffortLevel.medium, CriticEffortLevel.high, CriticEffortLevel.max]
+    for i in range(len(levels) - 1):
+        lo = EFFORT_PARAMS[levels[i]]
+        hi = EFFORT_PARAMS[levels[i + 1]]
+        assert lo.min_attempts < hi.min_attempts, (
+            f"{levels[i]} min_attempts={lo.min_attempts} must be < "
+            f"{levels[i+1]} min_attempts={hi.min_attempts}"
+        )
+        assert lo.max_tokens < hi.max_tokens, (
+            f"{levels[i]} max_tokens={lo.max_tokens} must be < "
+            f"{levels[i+1]} max_tokens={hi.max_tokens}"
+        )
+
+
+def test_run_critic_effort_low_uses_fewer_attempts_than_high() -> None:
+    """run_critic at 'low' effort sends a smaller min_attempts prompt than 'high'."""
+    captured: list[str] = []
+
+    class CapturingBackend(MockBackend):
+        def complete(self, req):  # type: ignore[override]
+            captured.append(req.prompt)
+            return super().complete(req)
+
+    backend_low = CapturingBackend()
+    # Return enough falsification attempts for any effort level.
+    big_response = (
+        '[{"attempt":"a","falsified":false},'
+        '{"attempt":"b","falsified":false},'
+        '{"attempt":"c","falsified":false},'
+        '{"attempt":"d","falsified":false},'
+        '{"attempt":"e","falsified":false},'
+        '{"attempt":"f","falsified":false},'
+        '{"attempt":"g","falsified":false},'
+        '{"attempt":"h","falsified":false}]'
+    )
+    backend_low.script("critic", "beta", big_response)
+    run_critic(backend=backend_low, checkpoint_id="beta", plan=_plan(),
+               artifact=_artifact(), grader_results=[], effort=CriticEffortLevel.low)
+
+    backend_max = CapturingBackend()
+    backend_max.script("critic", "beta", big_response)
+    run_critic(backend=backend_max, checkpoint_id="beta", plan=_plan(),
+               artifact=_artifact(), grader_results=[], effort=CriticEffortLevel.max)
+
+    low_prompt, max_prompt = captured[0], captured[1]
+    # The prompt embeds the min_attempts number; max must request more.
+    assert "1" in low_prompt   # low: at least 1
+    assert "8" in max_prompt   # max: at least 8
+
+
+def test_run_critic_explicit_min_attempts_overrides_effort() -> None:
+    """Explicit min_attempts= kwarg overrides the effort-derived value.
+
+    This preserves backward compatibility for the F7 seeded-flaw evaluator
+    and any other caller that hard-codes a count.
+    """
+    backend = MockBackend()
+    backend.script("critic", "beta",
+                   '[{"attempt":"x","falsified":false},{"attempt":"y","falsified":false}]')
+    # Pass effort=low (which gives 1 attempt) but override to 2 explicitly.
+    findings = run_critic(
+        backend=backend,
+        checkpoint_id="beta",
+        plan=_plan(),
+        artifact=_artifact(),
+        grader_results=[],
+        effort=CriticEffortLevel.low,
+        min_attempts=2,
+    )
+    assert len(findings) == 2
+
+
+def test_effort_medium_matches_historical_baseline() -> None:
+    """CriticEffortLevel.medium must keep min_attempts=3, max_tokens=2000.
+
+    These values match the Phase-0 baseline so existing certs and the F7
+    seeded-flaw floor remain calibrated after the effort-level refactor.
+    """
+    params = EFFORT_PARAMS[CriticEffortLevel.medium]
+    assert params.min_attempts == 3
+    assert params.max_tokens == 2000