From 0318e0607bad8960fd750fd52c2005baa86e5aad Mon Sep 17 00:00:00 2001
From: Mike Abernathy <mabernathy87@gmail.com>
Date: Fri, 17 Apr 2026 14:37:02 -0600
Subject: [PATCH] feat(architect): two-phase read-only tool access (#193 PR 3)

Add optional Phase 2 tool loop for the Architect when Phase 1 output
is insufficient. Phase 1 preserves existing no-tools behavior; Phase 2
binds READONLY_TOOLS (filesystem_list/read, github_read_diff) via
_run_tool_loop, capped at MAX_ARCHITECT_TOOL_TURNS (default 4), then
reparses the Blueprint. On Phase 2 parse failure, falls back to the
Phase 1 Blueprint so the retry loop can still make progress.

Sufficiency gate (_blueprint_is_sufficient) rejects empty target_files,
placeholder paths ("path/to/...", angle-bracketed, TODO/TBD/unknown),
and overly short instructions (<20 chars).

New tests (16) cover sufficiency heuristics, both-phase behavior, max-
turn exhaustion, and a Fix-issue-#113 integration flow that exercises
Planner-prefetched issue context through the two-phase architect.

Refs #193

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 dev-suite/src/orchestrator.py               | 155 +++++-
 dev-suite/tests/test_architect_two_phase.py | 525 ++++++++++++++++++++
 2 files changed, 672 insertions(+), 8 deletions(-)
 create mode 100644 dev-suite/tests/test_architect_two_phase.py

diff --git a/dev-suite/src/orchestrator.py b/dev-suite/src/orchestrator.py
index 9f943e1..9c6cff7 100644
--- a/dev-suite/src/orchestrator.py
+++ b/dev-suite/src/orchestrator.py
@@ -64,6 +64,7 @@
     validate_paths_for_workspace,
 )
 from .tools.github_fetch import extract_github_refs, fetch_issue_or_pr
+from .tools.mcp_bridge import READONLY_TOOLS
 from .tracing import add_trace_event, create_trace_config
 
 load_dotenv()
@@ -82,6 +83,11 @@ def _safe_int(env_key: str, default: int) -> int:
 MAX_RETRIES = _safe_int("MAX_RETRIES", 3)
 TOKEN_BUDGET = _safe_int("TOKEN_BUDGET", 50000)
 MAX_TOOL_TURNS = _safe_int("MAX_TOOL_TURNS", 10)
+# Issue #193 PR 3: Architect's Phase 2 tool-loop cap. Architect only
+# needs a handful of read-only probes (list → read → maybe read again)
+# to disambiguate target_files, so we keep this tight vs. Developer's
+# MAX_TOOL_TURNS.
+MAX_ARCHITECT_TOOL_TURNS = _safe_int("MAX_ARCHITECT_TOOL_TURNS", 4)
 MAX_RETRY_FILE_CHARS = _safe_int("MAX_RETRY_FILE_CHARS", 30000)
 CONTEXT_BUDGET_CHARS = _safe_int("CONTEXT_BUDGET_CHARS", 120000)  # ~30k tokens
 CONTEXT_FILE_MAX_LINES = _safe_int("CONTEXT_FILE_MAX_LINES", 500)
@@ -958,7 +964,40 @@ async def decompose_task_node(state: GraphState) -> dict:
 # CRITICAL: All graph nodes MUST be async def on Python 3.13+.
 # Sync nodes cause "generator didn't stop after throw()" under astream().
 
-async def architect_node(state: GraphState) -> dict:
+def _blueprint_is_sufficient(blueprint: Blueprint) -> bool:
+    """Heuristic: is the Architect's Phase-1 Blueprint good enough to build?
+
+    Issue #193 PR 3 — When the Architect has no tool access, it sometimes
+    emits placeholder paths ("path/to/file.py") or an empty target_files
+    because it can't verify the codebase layout. In those cases we escalate
+    to Phase 2 (read-only tools). Otherwise the Phase-1 Blueprint is used
+    as-is, matching the pre-#193 behavior and avoiding unnecessary tool
+    cost.
+    """
+    if not blueprint.target_files:
+        return False
+    # Reject placeholder-looking paths — angle brackets, "TODO", or the
+    # literal "path/to" prefix LLMs emit when they're guessing.
+    for tf in blueprint.target_files:
+        stripped = tf.strip()
+        if not stripped:
+            return False
+        lower = stripped.lower()
+        if "<" in stripped or ">" in stripped:
+            return False
+        if lower.startswith("path/to") or lower in {"todo", "tbd", "unknown"}:
+            return False
+    # Require non-trivial instructions so an empty/handwave Blueprint
+    # doesn't squeak through.
+    if len(blueprint.instructions.strip()) < 20:
+        return False
+    return True
+
+
+async def architect_node(
+    state: GraphState,
+    config: RunnableConfig | None = None,
+) -> dict:
     trace = list(state.get("trace", []))
     trace.append("architect: starting planning")
     retry_count = state.get("retry_count", 0)
@@ -1041,19 +1080,119 @@ async def architect_node(state: GraphState) -> dict:
         user_msg += f"Recommendation: {failure_report.recommendation}\n"
         user_msg += "\nGenerate a COMPLETELY NEW Blueprint. Do not patch the old one. The previous target_files or approach was wrong."
     llm = _get_architect_llm()
-    response = await llm.ainvoke([SystemMessage(content=system_prompt), HumanMessage(content=user_msg)])
+    # Phase 1: no tools. Keep pre-#193 behavior intact — the Architect
+    # produces a Blueprint from memory + gathered_context alone. This is
+    # cheap and handles the common case where gather_context_node already
+    # surfaced the right files.
+    phase1_messages = [SystemMessage(content=system_prompt), HumanMessage(content=user_msg)]
+    response = await llm.ainvoke(phase1_messages)
+    tokens_used += _extract_token_count(response)
     try:
         raw = _extract_text_content(response.content)
         blueprint_data = _extract_json(raw)
         blueprint = Blueprint(**blueprint_data)
     except (json.JSONDecodeError, Exception) as e:
-        trace.append(f"architect: failed to parse blueprint: {e}")
-        logger.error("[ARCH] Blueprint parse failed: %s", e)
-        return {"status": WorkflowStatus.FAILED, "error_message": f"Architect failed to produce valid Blueprint: {e}", "trace": trace, "memory_context": memory_context}
-    trace.append(f"architect: blueprint created for {len(blueprint.target_files)} files")
-    tokens_used = tokens_used + _extract_token_count(response)
+        trace.append(f"architect: phase 1 failed to parse blueprint: {e}")
+        logger.error("[ARCH] Phase 1 Blueprint parse failed: %s", e)
+        blueprint = None
+
+    # Phase 2 gate (Issue #193 PR 3): escalate to read-only tools if the
+    # Phase-1 Blueprint is missing/insufficient AND tools are available.
+    # When no tools are configured (e.g. single-shot mode, tests without
+    # a provider) we fall through with whatever Phase 1 produced.
+    tool_calls_log: list[dict] = []
+    phase2_attempted = False
+    if blueprint is None or not _blueprint_is_sufficient(blueprint):
+        readonly_tools = _get_agent_tools(config, allowed_names=READONLY_TOOLS)
+        if readonly_tools:
+            phase2_attempted = True
+            reason = (
+                "phase 1 produced no parseable blueprint"
+                if blueprint is None
+                else f"phase 1 blueprint insufficient (target_files={blueprint.target_files})"
+            )
+            trace.append(f"architect: escalating to phase 2 -- {reason}")
+            logger.info(
+                "[ARCH] Phase 2 start: %d read-only tool(s) bound, reason=%s",
+                len(readonly_tools), reason,
+            )
+            # Carry Phase 1 forward as context and nudge the LLM to use
+            # the tools to verify target_files before re-emitting JSON.
+            escalation_prompt = (
+                "The previous Blueprint was insufficient -- either empty "
+                "target_files, placeholder paths, or missing instructions. "
+                "Use the read-only tools (filesystem_list, filesystem_read, "
+                "github_read_diff) to inspect the codebase, verify which "
+                "files exist, and then emit a CORRECTED JSON Blueprint "
+                "matching the schema above. Respond with ONLY the JSON "
+                "when done."
+            )
+            phase2_messages = list(phase1_messages)
+            phase2_messages.append(response)
+            phase2_messages.append(HumanMessage(content=escalation_prompt))
+            llm_with_tools = llm.bind_tools(readonly_tools)
+            response, tokens_used, new_tool_log = await _run_tool_loop(
+                llm_with_tools,
+                phase2_messages,
+                readonly_tools,
+                max_turns=MAX_ARCHITECT_TOOL_TURNS,
+                tokens_used=tokens_used,
+                trace=trace,
+                agent_name="architect",
+            )
+            tool_calls_log.extend(new_tool_log)
+            try:
+                raw = _extract_text_content(response.content)
+                blueprint_data = _extract_json(raw)
+                blueprint = Blueprint(**blueprint_data)
+                trace.append(
+                    f"architect: phase 2 blueprint parsed for "
+                    f"{len(blueprint.target_files)} file(s)"
+                )
+            except (json.JSONDecodeError, Exception) as e:
+                trace.append(f"architect: phase 2 failed to parse blueprint: {e}")
+                logger.warning("[ARCH] Phase 2 Blueprint parse failed: %s", e)
+                # If Phase 1 already gave us *something* parseable, keep it;
+                # otherwise this is a hard failure.
+                if blueprint is None:
+                    return {
+                        "status": WorkflowStatus.FAILED,
+                        "error_message": (
+                            f"Architect failed to produce valid Blueprint "
+                            f"(phase 1 + phase 2 both failed): {e}"
+                        ),
+                        "trace": trace,
+                        "memory_context": memory_context,
+                        "tool_calls_log": tool_calls_log,
+                    }
+
+    if blueprint is None:
+        # Phase 1 failed and no tools to escalate with.
+        return {
+            "status": WorkflowStatus.FAILED,
+            "error_message": "Architect failed to produce valid Blueprint",
+            "trace": trace,
+            "memory_context": memory_context,
+        }
+
+    if not phase2_attempted:
+        trace.append(
+            f"architect: phase 1 blueprint created for "
+            f"{len(blueprint.target_files)} files"
+        )
     logger.info("[ARCH] done. tokens_used now=%d", tokens_used)
-    return {"blueprint": blueprint, "status": WorkflowStatus.BUILDING, "tokens_used": tokens_used, "trace": trace, "memory_context": memory_context}
+    result = {
+        "blueprint": blueprint,
+        "status": WorkflowStatus.BUILDING,
+        "tokens_used": tokens_used,
+        "trace": trace,
+        "memory_context": memory_context,
+    }
+    # Only touch tool_calls_log if Phase 2 ran, so single-shot tasks
+    # keep a clean empty log.
+    if tool_calls_log:
+        result["tool_calls_log"] = list(state.get("tool_calls_log", [])) + tool_calls_log
+    return result
 
 
 async def developer_node(state: GraphState, config: RunnableConfig | None = None) -> dict:
diff --git a/dev-suite/tests/test_architect_two_phase.py b/dev-suite/tests/test_architect_two_phase.py
new file mode 100644
index 0000000..3b4d453
--- /dev/null
+++ b/dev-suite/tests/test_architect_two_phase.py
@@ -0,0 +1,525 @@
+"""Architect two-phase tool-access tests (Issue #193 PR 3).
+
+Phase 1 keeps the pre-#193 behavior intact: the Architect generates a
+Blueprint from memory + gathered_context without any tool access. When
+that Blueprint is empty or placeholder-shaped, and READONLY_TOOLS are
+available in the RunnableConfig, the Architect re-runs with tools bound
+via `_run_tool_loop` (max = MAX_ARCHITECT_TOOL_TURNS) and re-parses.
+
+These tests mock `_get_architect_llm` so no API calls are made.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from langchain_core.messages import AIMessage
+from langchain_core.tools import Tool
+
+from src.agents.architect import Blueprint
+from src.orchestrator import (
+    MAX_ARCHITECT_TOOL_TURNS,
+    WorkflowStatus,
+    _blueprint_is_sufficient,
+    architect_node,
+)
+
+# ---------------------------------------------------------------------------
+# _blueprint_is_sufficient heuristic
+# ---------------------------------------------------------------------------
+
+
+def _bp(**kwargs: Any) -> Blueprint:
+    """Build a Blueprint with sensible defaults."""
+    return Blueprint(
+        task_id=kwargs.get("task_id", "t"),
+        target_files=kwargs.get("target_files", ["src/app.py"]),
+        instructions=kwargs.get(
+            "instructions",
+            "Implement the login fix per the QA spec (non-trivial body).",
+        ),
+        constraints=kwargs.get("constraints", []),
+        acceptance_criteria=kwargs.get("acceptance_criteria", []),
+        summary=kwargs.get("summary", "Fix login"),
+    )
+
+
+class TestBlueprintSufficiency:
+    def test_valid_blueprint_is_sufficient(self):
+        assert _blueprint_is_sufficient(_bp()) is True
+
+    def test_empty_target_files_is_insufficient(self):
+        assert _blueprint_is_sufficient(_bp(target_files=[])) is False
+
+    def test_placeholder_path_is_insufficient(self):
+        assert _blueprint_is_sufficient(
+            _bp(target_files=["path/to/file.py"])
+        ) is False
+
+    def test_angle_bracket_placeholder_is_insufficient(self):
+        assert _blueprint_is_sufficient(
+            _bp(target_files=["<unknown>"])
+        ) is False
+        assert _blueprint_is_sufficient(
+            _bp(target_files=["src/<something>.py"])
+        ) is False
+
+    def test_tbd_style_placeholder_is_insufficient(self):
+        assert _blueprint_is_sufficient(_bp(target_files=["TODO"])) is False
+        assert _blueprint_is_sufficient(_bp(target_files=["TBD"])) is False
+
+    def test_blank_entry_is_insufficient(self):
+        assert _blueprint_is_sufficient(_bp(target_files=[""])) is False
+        assert _blueprint_is_sufficient(_bp(target_files=["  "])) is False
+
+    def test_short_instructions_is_insufficient(self):
+        assert _blueprint_is_sufficient(_bp(instructions="Fix it")) is False
+
+    def test_exact_20_char_boundary(self):
+        # 19 chars — should fail
+        assert _blueprint_is_sufficient(
+            _bp(instructions="x" * 19)
+        ) is False
+        # 20 chars — should pass
+        assert _blueprint_is_sufficient(
+            _bp(instructions="x" * 20)
+        ) is True
+
+
+# ---------------------------------------------------------------------------
+# architect_node two-phase behavior
+# ---------------------------------------------------------------------------
+
+
+def _make_response(text: str, *, tokens: int = 100) -> AIMessage:
+    """Build an AIMessage shaped like langchain LLM output, with token meta."""
+    msg = AIMessage(content=text)
+    # Match what _extract_token_count expects — usage_metadata with total_tokens
+    msg.usage_metadata = {
+        "input_tokens": tokens // 2,
+        "output_tokens": tokens // 2,
+        "total_tokens": tokens,
+    }
+    return msg
+
+
+def _fake_filesystem_list_tool() -> Tool:
+    """A fake filesystem_list tool returning a fixed listing."""
+    async def _arun(*_args, **_kwargs):
+        return "src/auth.py\nsrc/login.py\ntests/test_auth.py"
+
+    return Tool.from_function(
+        name="filesystem_list",
+        description="List files in a directory",
+        func=lambda *_a, **_kw: "src/auth.py\nsrc/login.py",
+        coroutine=_arun,
+    )
+
+
+def _fake_filesystem_read_tool() -> Tool:
+    """A fake filesystem_read tool returning a fixed file body."""
+    async def _arun(*_args, **_kwargs):
+        return "def login():\n    pass  # broken"
+
+    return Tool.from_function(
+        name="filesystem_read",
+        description="Read a file",
+        func=lambda *_a, **_kw: "def login(): pass",
+        coroutine=_arun,
+    )
+
+
+@pytest.mark.asyncio
+class TestArchitectTwoPhase:
+    @patch("src.orchestrator._fetch_memory_context", return_value=[])
+    @patch("src.orchestrator._get_architect_llm")
+    async def test_phase1_sufficient_skips_phase2(self, mock_llm, _mock_mem):
+        """Sufficient Phase-1 blueprint → no tools called even if available."""
+        good_blueprint = _bp(task_id="p1-good")
+        mock_llm.return_value.ainvoke = AsyncMock(
+            return_value=_make_response(good_blueprint.model_dump_json(), tokens=500)
+        )
+        # Even if the bind_tools branch was attempted, ensure we don't go there
+        mock_llm.return_value.bind_tools = MagicMock(
+            side_effect=AssertionError("bind_tools should not be called")
+        )
+
+        state = {
+            "task_description": "Fix login",
+            "trace": [],
+            "tokens_used": 0,
+            "retry_count": 0,
+        }
+        config = {
+            "configurable": {
+                "tools": [_fake_filesystem_list_tool(), _fake_filesystem_read_tool()],
+            }
+        }
+        result = await architect_node(state, config)
+
+        assert result["status"] == WorkflowStatus.BUILDING
+        assert result["blueprint"].task_id == "p1-good"
+        # Phase 1 path emits the classic trace and no tool_calls_log
+        assert any("phase 1 blueprint created" in t for t in result["trace"])
+        assert "tool_calls_log" not in result
+
+    @patch("src.orchestrator._fetch_memory_context", return_value=[])
+    @patch("src.orchestrator._get_architect_llm")
+    async def test_phase1_insufficient_no_tools_falls_through(
+        self, mock_llm, _mock_mem
+    ):
+        """Insufficient Phase-1 Blueprint but no tools → returns Phase-1 anyway.
+
+        Keeps single-shot (test) mode and MCP-disabled deployments working —
+        we don't fail the run just because we can't escalate.
+        """
+        weak_bp = _bp(task_id="p1-weak", target_files=["path/to/file.py"])
+        mock_llm.return_value.ainvoke = AsyncMock(
+            return_value=_make_response(weak_bp.model_dump_json())
+        )
+
+        state = {
+            "task_description": "Fix something",
+            "trace": [],
+            "tokens_used": 0,
+            "retry_count": 0,
+        }
+        # No tools in config → no escalation
+        config = {"configurable": {"tools": []}}
+        result = await architect_node(state, config)
+
+        assert result["status"] == WorkflowStatus.BUILDING
+        assert result["blueprint"].task_id == "p1-weak"
+        assert not any("phase 2" in t for t in result["trace"])
+
+    @patch("src.orchestrator._fetch_memory_context", return_value=[])
+    @patch("src.orchestrator._get_architect_llm")
+    async def test_phase1_insufficient_escalates_to_phase2(
+        self, mock_llm, _mock_mem
+    ):
+        """Empty target_files + tools available → Phase 2 corrects."""
+        # Phase 1: empty target_files (insufficient)
+        weak_bp = _bp(task_id="p1-empty", target_files=[])
+        # Phase 2: corrected blueprint
+        good_bp = _bp(task_id="p2-corrected", target_files=["src/login.py"])
+
+        llm_instance = MagicMock()
+        # Phase 1 call returns weak; Phase 2 tool-loop-final response returns good.
+        # The tool-loop also calls `llm_with_tools.ainvoke` — we mock that
+        # via bind_tools returning a separate mock.
+        llm_instance.ainvoke = AsyncMock(
+            return_value=_make_response(weak_bp.model_dump_json())
+        )
+
+        tools_llm = MagicMock()
+        # First tool-loop turn: return final answer directly with no tool_calls
+        tools_llm.ainvoke = AsyncMock(
+            return_value=_make_response(good_bp.model_dump_json(), tokens=300)
+        )
+        llm_instance.bind_tools = MagicMock(return_value=tools_llm)
+        mock_llm.return_value = llm_instance
+
+        state = {
+            "task_description": "Please fix issue #113",
+            "trace": [],
+            "tokens_used": 0,
+            "retry_count": 0,
+        }
+        config = {
+            "configurable": {
+                "tools": [_fake_filesystem_list_tool(), _fake_filesystem_read_tool()],
+            }
+        }
+        result = await architect_node(state, config)
+
+        assert result["status"] == WorkflowStatus.BUILDING
+        assert result["blueprint"].task_id == "p2-corrected"
+        assert result["blueprint"].target_files == ["src/login.py"]
+        assert any("escalating to phase 2" in t for t in result["trace"])
+        assert any("phase 2 blueprint parsed" in t for t in result["trace"])
+
+    @patch("src.orchestrator._fetch_memory_context", return_value=[])
+    @patch("src.orchestrator._get_architect_llm")
+    async def test_phase2_tool_call_is_logged(self, mock_llm, _mock_mem):
+        """Phase 2 tool invocations show up in tool_calls_log under 'architect'."""
+        weak_bp = _bp(target_files=[])
+        good_bp = _bp(task_id="p2-with-tool", target_files=["src/app.py"])
+
+        llm_instance = MagicMock()
+        llm_instance.ainvoke = AsyncMock(
+            return_value=_make_response(weak_bp.model_dump_json())
+        )
+
+        # First tool-loop turn: emit a tool call to filesystem_list.
+        # Second tool-loop turn: no tool calls → final blueprint.
+        first_turn = _make_response("", tokens=50)
+        first_turn.tool_calls = [
+            {"name": "filesystem_list", "args": {"path": "."}, "id": "call-1"},
+        ]
+        second_turn = _make_response(good_bp.model_dump_json(), tokens=80)
+        tools_llm = MagicMock()
+        tools_llm.ainvoke = AsyncMock(side_effect=[first_turn, second_turn])
+        llm_instance.bind_tools = MagicMock(return_value=tools_llm)
+        mock_llm.return_value = llm_instance
+
+        state = {
+            "task_description": "Fix things",
+            "trace": [],
+            "tokens_used": 0,
+            "retry_count": 0,
+            "tool_calls_log": [],
+        }
+        config = {
+            "configurable": {
+                "tools": [_fake_filesystem_list_tool(), _fake_filesystem_read_tool()],
+            }
+        }
+        result = await architect_node(state, config)
+
+        assert result["status"] == WorkflowStatus.BUILDING
+        assert result["blueprint"].task_id == "p2-with-tool"
+        log = result.get("tool_calls_log", [])
+        assert any(
+            entry.get("agent") == "architect"
+            and entry.get("tool") == "filesystem_list"
+            for entry in log
+        )
+
+    @patch("src.orchestrator._fetch_memory_context", return_value=[])
+    @patch("src.orchestrator._get_architect_llm")
+    async def test_phase1_unparseable_phase2_rescues(self, mock_llm, _mock_mem):
+        """Phase 1 returns garbage JSON; Phase 2 produces a valid Blueprint."""
+        good_bp = _bp(task_id="p2-rescue", target_files=["src/app.py"])
+        llm_instance = MagicMock()
+        llm_instance.ainvoke = AsyncMock(
+            return_value=_make_response("not json at all")
+        )
+        tools_llm = MagicMock()
+        tools_llm.ainvoke = AsyncMock(
+            return_value=_make_response(good_bp.model_dump_json())
+        )
+        llm_instance.bind_tools = MagicMock(return_value=tools_llm)
+        mock_llm.return_value = llm_instance
+
+        state = {
+            "task_description": "Fix a thing",
+            "trace": [],
+            "tokens_used": 0,
+            "retry_count": 0,
+        }
+        config = {
+            "configurable": {
+                "tools": [_fake_filesystem_list_tool()],
+            }
+        }
+        result = await architect_node(state, config)
+
+        assert result["status"] == WorkflowStatus.BUILDING
+        assert result["blueprint"].task_id == "p2-rescue"
+        assert any("phase 1 failed to parse" in t for t in result["trace"])
+
+    @patch("src.orchestrator._fetch_memory_context", return_value=[])
+    @patch("src.orchestrator._get_architect_llm")
+    async def test_both_phases_fail_returns_failed_status(
+        self, mock_llm, _mock_mem
+    ):
+        """Phase 1 and Phase 2 both return garbage → WorkflowStatus.FAILED."""
+        llm_instance = MagicMock()
+        llm_instance.ainvoke = AsyncMock(
+            return_value=_make_response("nope not json")
+        )
+        tools_llm = MagicMock()
+        tools_llm.ainvoke = AsyncMock(
+            return_value=_make_response("still not json")
+        )
+        llm_instance.bind_tools = MagicMock(return_value=tools_llm)
+        mock_llm.return_value = llm_instance
+
+        state = {
+            "task_description": "Something",
+            "trace": [],
+            "tokens_used": 0,
+            "retry_count": 0,
+        }
+        config = {
+            "configurable": {
+                "tools": [_fake_filesystem_list_tool()],
+            }
+        }
+        result = await architect_node(state, config)
+
+        assert result["status"] == WorkflowStatus.FAILED
+        assert "phase 1 + phase 2 both failed" in result["error_message"]
+
+    @patch("src.orchestrator._fetch_memory_context", return_value=[])
+    @patch("src.orchestrator._get_architect_llm")
+    async def test_phase2_respects_max_tool_turns(self, mock_llm, _mock_mem):
+        """Phase 2 tool-loop caps at MAX_ARCHITECT_TOOL_TURNS.
+
+        When Phase 2 hits max turns without producing a parseable Blueprint
+        AND Phase 1 already gave us *some* Blueprint (even a weak one), we
+        fall back to Phase 1 so the downstream retry loop gets a chance
+        rather than hard-failing the run.
+        """
+        weak_bp = _bp(task_id="weak-fallback", target_files=[])
+        llm_instance = MagicMock()
+        llm_instance.ainvoke = AsyncMock(
+            return_value=_make_response(weak_bp.model_dump_json())
+        )
+
+        # Every turn emits a tool call — the loop must still terminate.
+        call_count = 0
+
+        async def never_stops(*_args, **_kwargs):
+            nonlocal call_count
+            call_count += 1
+            msg = _make_response("", tokens=10)
+            msg.tool_calls = [
+                {"name": "filesystem_list", "args": {"path": "."}, "id": f"c-{call_count}"}
+            ]
+            return msg
+
+        tools_llm = MagicMock()
+        tools_llm.ainvoke = never_stops
+        llm_instance.bind_tools = MagicMock(return_value=tools_llm)
+        mock_llm.return_value = llm_instance
+
+        state = {
+            "task_description": "Endless",
+            "trace": [],
+            "tokens_used": 0,
+            "retry_count": 0,
+        }
+        config = {
+            "configurable": {
+                "tools": [_fake_filesystem_list_tool()],
+            }
+        }
+        result = await architect_node(state, config)
+
+        # The loop must call the LLM at most MAX_ARCHITECT_TOOL_TURNS times.
+        assert call_count <= MAX_ARCHITECT_TOOL_TURNS
+        # Fallback to Phase 1 blueprint (even though insufficient), so the
+        # broader retry + QA loop gets its shot.
+        assert result["status"] == WorkflowStatus.BUILDING
+        assert result["blueprint"].task_id == "weak-fallback"
+        # Trace must note the tool-loop hit its cap
+        assert any("hit max turns" in t for t in result["trace"])
+
+
+# ---------------------------------------------------------------------------
+# "Fix issue #113" integration-style flow (Issue #193 acceptance)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestFixIssue113Integration:
+    """Simulates the canonical self-dev gate flow end-to-end from the
+    Architect's perspective.
+
+    The Planner has pre-fetched issue #113 (`github://Abernaughty/agent-dev/issues/113`)
+    into gathered_context. The Architect's Phase 1 produces an empty
+    Blueprint because it can't tell which file to touch. Phase 2 uses
+    filesystem_list/read to orient, then emits a proper Blueprint.
+    """
+
+    @patch("src.orchestrator._fetch_memory_context", return_value=[])
+    @patch("src.orchestrator._get_architect_llm")
+    async def test_planner_prefetched_context_flows_into_architect(
+        self, mock_llm, _mock_mem
+    ):
+        # Planner's pre-fetch (PR 2) already put the issue body on state.
+        planner_prefetched = [
+            {
+                "path": "github://Abernaughty/agent-dev/issues/113",
+                "content": (
+                    "# Issue #113: Self-dev gate test\n\n"
+                    "State: open\n\n"
+                    "Body: Run the end-to-end gate test; the orchestrator "
+                    "should resolve its own task."
+                ),
+                "truncated": False,
+                "source": "github_issue",
+            }
+        ]
+
+        # Phase 1: architect has the issue body but no code to ground on;
+        # emits empty target_files.
+        phase1_bp = _bp(
+            task_id="gate-p1",
+            target_files=[],
+            instructions=(
+                "Need to locate the gate test runner before I can specify "
+                "target_files. Please provide codebase access."
+            ),
+        )
+        # Phase 2: after poking filesystem_list, architect commits.
+        phase2_bp = _bp(
+            task_id="gate-p2",
+            target_files=["scripts/smoke-test.sh"],
+            instructions=(
+                "Extend smoke-test.sh stage 2 to cover the self-dev gate "
+                "scenario described in the issue."
+            ),
+            acceptance_criteria=["Stage 2 of smoke-test.sh exercises the gate scenario"],
+        )
+
+        llm_instance = MagicMock()
+        llm_instance.ainvoke = AsyncMock(
+            return_value=_make_response(phase1_bp.model_dump_json())
+        )
+
+        # Tool-loop: one list → one final blueprint
+        turn1 = _make_response("", tokens=40)
+        turn1.tool_calls = [
+            {"name": "filesystem_list", "args": {"path": "scripts"}, "id": "t1"},
+        ]
+        turn2 = _make_response(phase2_bp.model_dump_json(), tokens=120)
+        tools_llm = MagicMock()
+        tools_llm.ainvoke = AsyncMock(side_effect=[turn1, turn2])
+        llm_instance.bind_tools = MagicMock(return_value=tools_llm)
+        mock_llm.return_value = llm_instance
+
+        # The state captures the full flow as it would arrive after
+        # gather_context_node has folded prefetched items into gathered_context.
+        state = {
+            "task_description": (
+                "Please address issue #113 — the self-dev gate test is blocking "
+                "the roadmap."
+            ),
+            "trace": [],
+            "tokens_used": 0,
+            "retry_count": 0,
+            "gathered_context": planner_prefetched,
+        }
+        config = {
+            "configurable": {
+                "tools": [_fake_filesystem_list_tool(), _fake_filesystem_read_tool()],
+            }
+        }
+        result = await architect_node(state, config)
+
+        assert result["status"] == WorkflowStatus.BUILDING
+        assert result["blueprint"].task_id == "gate-p2"
+        assert result["blueprint"].target_files == ["scripts/smoke-test.sh"]
+
+        # Trace shows both phases ran in order
+        trace_str = "\n".join(result["trace"])
+        assert "escalating to phase 2" in trace_str
+        assert "phase 2 blueprint parsed" in trace_str
+
+        # Phase 1 system prompt must have embedded the prefetched GitHub body.
+        # Inspect the first LLM call's messages.
+        phase1_call = llm_instance.ainvoke.await_args
+        messages = phase1_call.args[0]
+        system_content = messages[0].content
+        assert "github://Abernaughty/agent-dev/issues/113" in system_content
+        assert "Self-dev gate test" in system_content
+
+        # tool_calls_log records the Architect using filesystem_list
+        log = result.get("tool_calls_log", [])
+        assert log, "Phase 2 must append tool calls to the log"
+        assert log[0]["agent"] == "architect"
+        assert log[0]["tool"] == "filesystem_list"