Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 39 additions & 10 deletions dev-suite/src/agents/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

from pydantic import BaseModel, Field

from ..tools.github_fetch import fetch_refs_as_context_items
from ..tools.github_fetch import extract_github_refs, fetch_refs_as_context_items

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -501,8 +501,14 @@ def create_planner_session(
create a clear, complete task specification before it's sent to the Architect \
agent for blueprint generation.

You have ZERO tool access — no filesystem, no GitHub, no sandbox. You only \
work with information the user provides and any auto-detected project context.
You do not call tools directly. However, when the user references a GitHub \
issue or PR (e.g. "fix #42", "review issue #113"), the orchestrator \
deterministically pre-fetches the issue/PR body and injects it as a \
message labelled "=== PRE-FETCHED GITHUB CONTEXT ===" below. Treat that \
block as authoritative source material — never tell the user you cannot \
access GitHub when that block is present; summarise its contents and move \
the task forward. You also work with information the user provides and \
any auto-detected project context.

Your responsibilities:
1. Understand the user's objective
Expand Down Expand Up @@ -575,10 +581,12 @@ def _format_github_context_block(github_context: list[dict]) -> str:
if not github_context:
return ""
sections: list[str] = [
"Pre-fetched GitHub references (from the user's message; "
"use these to orient the task, do not ask the user to paste "
"them):",
"=== PRE-FETCHED GITHUB CONTEXT ===",
"The orchestrator already fetched the GitHub issue/PR the user "
"mentioned. Use these summaries to orient the task. Do not tell "
"the user you cannot access GitHub — the content is below.",
]
body_count = 0
for item in github_context:
if not isinstance(item, dict):
continue
Expand All @@ -587,8 +595,10 @@ def _format_github_context_block(github_context: list[dict]) -> str:
if not content:
continue
sections.append(f"--- {path} ---\n{content}")
if len(sections) == 1:
body_count += 1
if body_count == 0:
return ""
sections.append("=== END PRE-FETCHED GITHUB CONTEXT ===")
return "\n\n".join(sections)


Expand Down Expand Up @@ -631,13 +641,26 @@ async def _prefetch_github_refs_for_message(
messages that mention the same issue don't re-fetch). Best-effort:
missing GITHUB_TOKEN or network errors quietly return no new items.
"""
default_owner = os.getenv("GITHUB_OWNER", "")
default_repo = os.getenv("GITHUB_REPO", "")

detected_refs = extract_github_refs(
user_message,
default_owner=default_owner,
default_repo=default_repo,
max_refs=PLANNER_MAX_GITHUB_REFS,
)

token = os.getenv("GITHUB_TOKEN", "")
if not token:
if detected_refs:
logger.warning(
"[PLANNER] Detected %d GitHub ref(s) in session %s but "
"GITHUB_TOKEN is not set — pre-fetch skipped",
len(detected_refs), session.session_id,
)
return []

default_owner = os.getenv("GITHUB_OWNER", "")
default_repo = os.getenv("GITHUB_REPO", "")

existing_paths = {
item.get("path") for item in session.task_spec.github_context
if isinstance(item, dict) and item.get("path")
Expand All @@ -651,6 +674,12 @@ async def _prefetch_github_refs_for_message(
max_refs=PLANNER_MAX_GITHUB_REFS,
max_chars=PLANNER_GITHUB_REF_MAX_CHARS,
)
if detected_refs and not items:
logger.warning(
"[PLANNER] Detected %d GitHub ref(s) in session %s but "
"fetch returned 0 items (bad token, 404, or network error)",
len(detected_refs), session.session_id,
)
new_items = [
item for item in items
if item.get("path") and item["path"] not in existing_paths
Expand Down
134 changes: 134 additions & 0 deletions dev-suite/tests/test_planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,3 +741,137 @@ async def fake_llm(model_name, messages):

assert response.task_spec.objective == "Fix"
assert session.task_spec.github_context == []


# =========================================================================
# System-prompt reconciliation with pre-fetched GitHub context (Issue #193)
# =========================================================================


class TestPlannerSystemPromptReconciliation:
"""Regression tests for the bug where the Planner's system prompt
told the LLM it had "ZERO tool access" even when the orchestrator
had pre-fetched GitHub issue/PR bodies. See issue #193 AC #3.
"""

@pytest.mark.asyncio
async def test_prompt_does_not_claim_zero_tool_access_when_context_present(
self, monkeypatch,
):
"""System messages sent to the LLM must not tell it it has ZERO
tool access once github_context is populated, and must include
the PRE-FETCHED GITHUB CONTEXT marker so the LLM knows to use
the injected data instead of apologizing about missing tools.
"""
from src.agents.planner import create_planner_session, send_planner_message

monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
monkeypatch.setenv("GITHUB_OWNER", "Abernaughty")
monkeypatch.setenv("GITHUB_REPO", "agent-dev")

session = create_planner_session(workspace="/proj", languages=["Python"])

fetched = {
"path": "github://Abernaughty/agent-dev/issues/113",
"content": (
"Issue #113: Fix the planner\n\nState: open\n\n"
"Body: the planner needs real GitHub context"
),
"truncated": False,
"source": "github_issue",
}
captured: list[list[dict]] = []

async def fake_fetch(*args, **kwargs):
return [fetched]

async def fake_llm(model_name, messages):
captured.append(messages)
return '```json\n{"objective": "Fix the planner"}\n```'

with patch(
"src.agents.planner.fetch_refs_as_context_items",
side_effect=fake_fetch,
), patch(
"src.agents.planner._call_planner_llm",
side_effect=fake_llm,
):
await send_planner_message(
session,
"Review and implement the fix for GitHub Issue #113.",
)

assert captured, "LLM must have been called"
system_content = "\n".join(
m["content"] for m in captured[0] if m["role"] == "system"
)

# The self-denial phrasing is gone.
assert "ZERO tool access" not in system_content

# The LLM is explicitly told not to claim no GitHub access when
# context is present, and the pre-fetched block is clearly marked.
assert "PRE-FETCHED GITHUB CONTEXT" in system_content
assert "github://Abernaughty/agent-dev/issues/113" in system_content

def test_context_block_has_start_and_end_markers(self):
"""The pre-fetched block uses explicit start/end markers that
match the phrasing the system prompt tells the LLM to look for.
"""
from src.agents.planner import _format_github_context_block

block = _format_github_context_block([
{
"path": "github://acme/widgets/issues/42",
"content": "Body text",
"source": "github_issue",
}
])

assert "=== PRE-FETCHED GITHUB CONTEXT ===" in block
assert "=== END PRE-FETCHED GITHUB CONTEXT ===" in block
assert "github://acme/widgets/issues/42" in block
assert "Body text" in block

def test_context_block_empty_when_no_items_have_content(self):
"""Empty or malformed context items produce no block at all — we
don't want to emit markers with no body and confuse the LLM.
"""
from src.agents.planner import _format_github_context_block

assert _format_github_context_block([]) == ""
assert _format_github_context_block([{"path": "x", "content": ""}]) == ""
assert _format_github_context_block(["not-a-dict"]) == "" # type: ignore[list-item]

@pytest.mark.asyncio
async def test_warns_when_refs_detected_but_no_token(
self, monkeypatch, caplog,
):
"""Missing GITHUB_TOKEN should log a warning when the user's
message actually contained refs, so operators can diagnose why
pre-fetch silently returned nothing.
"""
import logging as _logging

from src.agents.planner import create_planner_session, send_planner_message

monkeypatch.delenv("GITHUB_TOKEN", raising=False)
monkeypatch.setenv("GITHUB_OWNER", "Abernaughty")
monkeypatch.setenv("GITHUB_REPO", "agent-dev")

session = create_planner_session(workspace="/proj", languages=["Python"])

async def fake_llm(model_name, messages):
return '```json\n{}\n```'

with caplog.at_level(_logging.WARNING, logger="src.agents.planner"):
with patch(
"src.agents.planner._call_planner_llm",
side_effect=fake_llm,
):
await send_planner_message(session, "Fix issue #113")

assert any(
"GITHUB_TOKEN is not set" in record.message
for record in caplog.records
)
Loading