AVADSA25 · AVADSA25 · May 29, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/codec_agent_plan.py b/codec_agent_plan.py
@@ -290,6 +290,21 @@ def set_grants_hash(agent_id: str) -> str:
     return h
 
 
+def grants_lock(agent_id: str):
+    """C5 (Fix #5): cross-process flock around the per-agent grants.json
+    read-modify-write (mirrors _status_lock for the manifest CAS). The /grant
+    endpoint holds this across load_grants -> modify -> save_grants ->
+    set_grants_hash so two concurrent grants can't clobber each other. Falls
+    back to a no-op context if codec_jsonstore is unavailable (headless/CI) —
+    never breaks grant_permission."""
+    try:
+        import codec_jsonstore
+        return codec_jsonstore.file_lock(_agent_dir(agent_id) / "grants.json")
+    except Exception:
+        import contextlib
+        return contextlib.nullcontext()
+
+
 # ── Skill-registry validation ─────────────────────────────────────────────────
 def validate_plan_skills(plan: Plan, registry=None) -> Tuple[bool, List[str]]:
     """Walk every checkpoint's skills_needed; return (ok, missing_skills).

diff --git a/codec_agents.py b/codec_agents.py
@@ -180,7 +180,26 @@ def _web_search(query: str) -> str:
 
 def _web_fetch(url: str) -> str:
     try:
-        r = _sync_http.get(url.strip())
+        # Fix #7 (H1) + re-audit N3: SSRF guard BEFORE the request AND on every
+        # redirect hop. The fetched text is returned to the agent/LLM, so a read
+        # of an internal/metadata host is an exfil path; _sync_http defaults to
+        # follow_redirects=True, which would reach an internal target via a 302
+        # the guard never saw — so we follow redirects manually here.
+        import codec_ssrf
+        from urllib.parse import urljoin
+        cur = url.strip()
+        try:
+            for _ in range(6):  # initial request + up to 5 redirects
+                codec_ssrf.validate_url(cur)
+                r = _sync_http.get(cur, follow_redirects=False)
+                if r.is_redirect and r.headers.get("location"):
+                    cur = urljoin(cur, r.headers["location"])
+                    continue
+                break
+            else:
+                return "Fetch error: blocked URL (too many redirects)"
+        except codec_ssrf.SSRFError as e:
+            return f"Fetch error: blocked URL ({e})"
         if r.status_code in (401, 403):
             return f"Blocked by site (HTTP {r.status_code}). Site requires JavaScript or blocks automated access."
         if r.status_code >= 400:

diff --git a/codec_ask_user.py b/codec_ask_user.py
@@ -201,7 +201,12 @@ def _write_question_notification(record: dict) -> None:
     surface; failures here log + continue.
     """
     try:
-        with _FILE_LOCK:
+        # C5 (Fix #5): hold the cross-process file_lock across the whole
+        # read-modify-write so concurrent daemons (dashboard / voice /
+        # agent-runner) can't clobber each other's append. _FILE_LOCK is the
+        # in-process guard; codec_jsonstore.file_lock is the cross-process one —
+        # same pairing the PENDING_QUESTIONS read-modify-write already uses.
+        with _FILE_LOCK, codec_jsonstore.file_lock(NOTIFICATIONS_PATH):
             try:
                 with open(NOTIFICATIONS_PATH) as f:
                     notifs = json.load(f)
@@ -648,7 +653,10 @@ def submit_answer(qid: str, answer: str, *, answered_via: str = "pwa") -> dict:
 
     # Mark the matching notification as read.
     try:
-        with _FILE_LOCK:
+        # C5 (Fix #5): same cross-process file_lock as the question-write path
+        # above, so a concurrent _write_question_notification and this
+        # mark-read can't clobber each other on notifications.json.
+        with _FILE_LOCK, codec_jsonstore.file_lock(NOTIFICATIONS_PATH):
             try:
                 with open(NOTIFICATIONS_PATH) as f:
                     notifs = json.load(f)

diff --git a/codec_concurrency.py b/codec_concurrency.py
@@ -0,0 +1,72 @@
+"""codec_concurrency — small, dependency-free concurrency helpers.
+
+`run_with_timeout` runs a callable in a daemon thread with a hard wall-clock
+timeout that ACTUALLY bounds wall-clock time.
+
+Motivation (audit C4): the common idiom
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex:
+        fut = ex.submit(fn)
+        return fut.result(timeout=T)
+
+defeats its own timeout. When `fut.result(timeout=T)` raises TimeoutError,
+the `with` block's __exit__ calls `executor.shutdown(wait=True)`, which BLOCKS
+until the runaway task finishes. So a 50ms "timeout" on a 5s task takes ~5s —
+the MCP tool dispatch (codec_mcp) and the observer OCR call (codec_observer)
+could hang on a slow skill / screencapture popup.
+
+This helper uses a daemon thread + `join(timeout=...)` and never calls
+shutdown(wait=True): on timeout it abandons the still-running worker and
+raises promptly. daemon=True means an abandoned worker never blocks process
+shutdown. Same shape as the proven `codec_hooks._run_hook_with_timeout`.
+"""
+import queue
+import threading
+from typing import Any, Callable
+
+__all__ = ["run_with_timeout"]
+
+
+def run_with_timeout(
+    fn: Callable[..., Any],
+    timeout: float,
+    *args: Any,
+    **kwargs: Any,
+) -> Any:
+    """Run ``fn(*args, **kwargs)`` in a daemon thread, bounded by ``timeout`` seconds.
+
+    Returns ``fn``'s return value on success. Re-raises, in the calling thread,
+    any exception ``fn`` raised (original type, message, and instance preserved).
+    Raises ``TimeoutError`` if ``fn`` does not complete within ``timeout`` —
+    promptly, without waiting for the (abandoned, still-running) worker to finish.
+
+    On Python 3.11+ ``concurrent.futures.TimeoutError`` is an alias of the
+    builtin ``TimeoutError``, so call sites catching either are satisfied.
+    """
+    result_q: "queue.Queue[Any]" = queue.Queue(maxsize=1)
+    exc_q: "queue.Queue[BaseException]" = queue.Queue(maxsize=1)
+
+    def _runner() -> None:
+        try:
+            result_q.put(fn(*args, **kwargs))
+        except BaseException as e:  # noqa: BLE001 — propagate ANY error to the caller
+            try:
+                exc_q.put(e)
+            except Exception:
+                pass
+
+    t = threading.Thread(target=_runner, daemon=True, name="codec-run-with-timeout")
+    t.start()
+    t.join(timeout=timeout)
+
+    if t.is_alive():
+        # Abandon the worker — daemon=True so it never blocks shutdown. No
+        # shutdown(wait=True), so we return control to the caller immediately.
+        raise TimeoutError(f"operation exceeded {timeout}s timeout")
+    if not exc_q.empty():
+        raise exc_q.get_nowait()
+    if not result_q.empty():
+        return result_q.get_nowait()
+    # Thread finished without putting a result or exception — only reachable if
+    # the result put itself failed. Treat as a None return.
+    return None
diff --git a/codec_config.py b/codec_config.py
@@ -714,6 +714,10 @@ def is_dangerous(cmd):
 _SKILL_DANGEROUS_MODULES = {
     "os", "subprocess", "ctypes", "shutil", "importlib", "signal", "pty",
     "socket",
+    # re-audit N19: `import builtins; builtins.exec(src)` bypassed the gate —
+    # builtins.exec/eval are ast.Attribute calls, not the bare-name Calls the
+    # _SKILL_DANGEROUS_CALLS check catches. Blocking the import closes it.
+    "builtins",
 }
 _SKILL_SAFE_MODULES = {
     "json", "re", "math", "datetime", "collections", "itertools", "functools",

diff --git a/codec_consent.py b/codec_consent.py
@@ -0,0 +1,102 @@
+"""codec_consent — strict-consent gate for the chat + MCP skill paths.
+
+Re-audit (red-team CHAIN-001/002/006): the Step-3 consent gate
+(docs/PHASE1-STEP3-DESIGN.md §1.7) was wired ONLY into codec_agent_runner. The
+chat ([SKILL:] tag + pre-LLM hijack → codec_dispatch.run_skill) and MCP
+(codec_mcp.tool_fn) paths could reach high-power skills with only the
+`is_dangerous` heuristic / path blocklists. This module is the shared
+classifier + per-transport policy:
+
+  - MCP  → hard-refuse destructive skills (claude.ai can't consent at the
+           operator tier; consistent with the _HTTP_BLOCKED principle).
+  - chat → require explicit confirmation (the handler returns consent_required;
+           the user confirms; re-dispatch carries a token).
+  - voice/agent → existing ask_user announce-and-listen (unchanged).
+
+A skill is "destructive" if it declares `SKILL_DESTRUCTIVE = True`
+(registry-AST-extracted — the extensible per-skill path, Decision C), OR is in
+`codec_config._HTTP_BLOCKED`, OR is one of the known high-power built-ins below
+(so coverage doesn't depend on regenerating the hash-pinned skill manifest).
+
+Kill switch: `CONSENT_GATE_ENABLED=false`.
+"""
+import os
+
+__all__ = ["gate_enabled", "is_destructive_skill", "chat_consent_ok", "mcp_refuse_message"]
+
+# Known high-power built-ins that are destructive but NOT in _HTTP_BLOCKED.
+# (terminal / python_exec / process_manager / pm2_control / ax_control are
+# already covered by the _HTTP_BLOCKED backstop.)
+_DESTRUCTIVE_BUILTINS = frozenset({
+    "file_ops",       # write/append/delete to the filesystem
+    "file_write",     # writes files
+    "imessage_send",  # sends messages as the user
+    "pilot",          # drives a real browser session
+    "skill_forge",    # writes a skill to disk (no review gate)
+})
+
+
+def gate_enabled() -> bool:
+    """Consent gate on by default; CONSENT_GATE_ENABLED=false disables it."""
+    return os.environ.get("CONSENT_GATE_ENABLED", "true").lower() != "false"
+
+
+def is_destructive_skill(tool_name, registry=None) -> bool:
+    """True if `tool_name` is a high-power/destructive skill needing consent
+    (chat) or refusal (MCP). Never raises."""
+    if not tool_name:
+        return False
+    # 1) per-skill SKILL_DESTRUCTIVE flag (extensible — user skills opt in)
+    try:
+        reg = registry
+        if reg is None:
+            from codec_dispatch import registry as reg  # the singleton
+        if reg is not None and reg.get_destructive(tool_name):
+            return True
+    except Exception:
+        pass
+    # 2) _HTTP_BLOCKED backstop (terminal, python_exec, process_manager, …)
+    try:
+        from codec_config import _HTTP_BLOCKED
+        if tool_name in _HTTP_BLOCKED:
+            return True
+    except Exception:
+        pass
+    # 3) known high-power built-ins
+    return tool_name in _DESTRUCTIVE_BUILTINS
+
+
+def chat_consent_ok(tool_name, query, *, registry=None) -> bool:
+    """Chat path (A2): a destructive skill requires explicit consent via the
+    existing AskUserQuestion PWA panel (Phase 1 Step 3 §1.7 — literal verb-match;
+    generic yes/ok rejected). Returns True if the skill may run (non-destructive,
+    gate disabled, or consent granted); False if blocked (declined / timeout /
+    ask_user unavailable). BLOCKS the worker thread on ask_user until answered —
+    the chat handler invokes this via asyncio.to_thread, so the event loop isn't
+    blocked. Fail-closed: any error → False (a destructive skill never
+    auto-runs)."""
+    if not gate_enabled() or not is_destructive_skill(tool_name, registry=registry):
+        return True
+    try:
+        import codec_ask_user
+        answer = codec_ask_user.ask(
+            f"CODEC wants to run the '{tool_name}' skill — a destructive / "
+            f"high-power operation — for: {(query or '')[:200]}",
+            destructive=True,
+            asked_from="chat",
+            tool_name=tool_name,
+        )
+        return answer not in (
+            codec_ask_user.TIMEOUT_SENTINEL,
+            codec_ask_user.DISABLED_SENTINEL,
+        )
+    except Exception:
+        return False
+
+
+def mcp_refuse_message(tool_name) -> str:
+    return (
+        f"Skill '{tool_name}' is a destructive/high-power operation and is not "
+        "permitted over MCP. Run it locally (chat or voice), where the operator "
+        "can confirm it."
+    )