From 4c75874016984e8de94d9e4905e5b221bd958a2a Mon Sep 17 00:00:00 2001 From: Mike Abernathy Date: Fri, 17 Apr 2026 15:47:39 -0600 Subject: [PATCH] fix(planner): plumb github_repo through API + auto-detect from git remote (refs #193) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After PR #197 landed the system-prompt reconciliation, manual smoke revealed the pre-fetch still didn't run because the default owner/repo was never actually resolvable: `extract_github_refs` drops same-repo refs like "Issue #113" when default_owner/default_repo are empty, and the Planner only read them from `GITHUB_OWNER`/`GITHUB_REPO` env vars (undocumented). The dashboard's REMOTE repo picker never reached the Planner either — `PlannerStartRequest` had no `github_repo` field. End-to-end plumbing: - `PlannerStartRequest` now accepts `github_repo` + `workspace_type`. - Dashboard `startSession` sends both on REMOTE and LOCAL starts. - `create_planner_session` stores `github_repo` on the session. - When `github_repo` is not passed, the server auto-detects it by parsing `remote.origin.url` from the workspace's `.git/config` (SSH and HTTPS, with/without `.git` suffix, origin-only). - `_prefetch_github_refs_for_message` prefers session.github_repo, falling back to env vars for CLI/headless use. Anti-hallucination + operator ergonomics: - System prompt now explicitly forbids inventing issue/PR contents when the "=== PRE-FETCHED GITHUB CONTEXT ===" block is absent — the LLM must tell the user the context is missing instead of fabricating plausible-sounding bodies. - New loose `#\d+` heuristic fires a warning log when the message contains `#N` references but no default repo is resolvable, so operators get a breadcrumb instead of a silent drop. Tests: 12 new tests covering explicit-repo plumbing, git-remote auto-detect (SSH/HTTPS/non-origin/non-github), session-repo used for pre-fetch, env-var fallback, silent-drop warning, and the anti-hallucination system prompt. 73/73 planner tests pass; 205/205 related tests pass. Closes amended ACs #3a-#3e in #193. AC #3f (manual smoke on LOCAL and REMOTE) follows post-merge. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/lib/components/views/ChatView.svelte | 20 +- dashboard/src/lib/stores/planner.svelte.ts | 11 +- dev-suite/src/agents/planner.py | 110 ++++++- dev-suite/src/api/main.py | 9 +- dev-suite/src/api/models.py | 17 ++ dev-suite/tests/test_planner.py | 268 ++++++++++++++++++ 6 files changed, 425 insertions(+), 10 deletions(-) diff --git a/dashboard/src/lib/components/views/ChatView.svelte b/dashboard/src/lib/components/views/ChatView.svelte index 98b4a49..22fe9f8 100644 --- a/dashboard/src/lib/components/views/ChatView.svelte +++ b/dashboard/src/lib/components/views/ChatView.svelte @@ -209,9 +209,23 @@ if (!workspaceReady) return; if (plannerStore.phase === 'idle' || plannerStore.phase === 'submitted') { - const options = workspacesStore.isSelectedProtected && workspacesStore.verifiedPin - ? { pin: workspacesStore.verifiedPin } - : undefined; + // Issue #193: forward workspace_type + github_repo on start + // (not just on submit) so the Planner's pre-fetch can + // resolve same-repo issue/PR refs during the conversation. + const options: { + pin?: string; + workspace_type?: string; + github_repo?: string | null; + } = { + workspace_type: workspacesStore.workspaceType, + github_repo: + workspacesStore.workspaceType === 'github' + ? workspacesStore.githubRepo + : null, + }; + if (workspacesStore.isSelectedProtected && workspacesStore.verifiedPin) { + options.pin = workspacesStore.verifiedPin; + } if (plannerStore.phase === 'submitted') { plannerStore.reset(); diff --git a/dashboard/src/lib/stores/planner.svelte.ts b/dashboard/src/lib/stores/planner.svelte.ts index 12922f3..319f1d4 100644 --- a/dashboard/src/lib/stores/planner.svelte.ts +++ b/dashboard/src/lib/stores/planner.svelte.ts @@ -115,7 +115,11 @@ export const plannerStore = { */ async startSession( workspace: string, - options?: { pin?: string } + options?: { + pin?: string; + workspace_type?: string; + github_repo?: string | null; + } ): Promise { const generation = ++requestGeneration; error = null; @@ -130,8 +134,13 @@ export const plannerStore = { submittedTaskId = null; try { + // Issue #193: forward workspace_type + github_repo so the + // Planner can resolve same-repo refs like "Issue #113" in + // user messages using the correct default owner/repo. const payload: Record = { workspace }; if (options?.pin) payload.pin = options.pin; + if (options?.workspace_type) payload.workspace_type = options.workspace_type; + if (options?.github_repo) payload.github_repo = options.github_repo; const res = await fetch('/api/planner', { method: 'POST', diff --git a/dev-suite/src/agents/planner.py b/dev-suite/src/agents/planner.py index 1eb4415..827c038 100644 --- a/dev-suite/src/agents/planner.py +++ b/dev-suite/src/agents/planner.py @@ -61,6 +61,12 @@ # Planner only needs a quick orientation, not full issue bodies. PLANNER_GITHUB_REF_MAX_CHARS = 1200 +# Issue #193: loose heuristic for "the user mentioned a `#N` ref but we +# couldn't resolve it." Used only for warning diagnostics when the +# precise `extract_github_refs` returns nothing because no default +# owner/repo is configured. Intentionally wider than the real pattern. +_LOOSE_HASH_REF_RE = re.compile(r"(? bool: @@ -438,15 +450,67 @@ def build_checklist(task_spec: TaskSpec) -> ChecklistStatus: # --------------------------------------------------------------------------- +_GIT_REMOTE_GITHUB_RE = re.compile( + r""" + (?:git@github\.com:|https?://(?:[^@/]+@)?github\.com/) # ssh or https prefix + (?P[\w.-]+)/(?P[\w.-]+?) # owner/repo + (?:\.git)?/?\s*$ # optional .git + trailing slash + """, + re.VERBOSE | re.IGNORECASE, +) + + +def _detect_github_repo_from_workspace(workspace: str) -> str | None: + """Best-effort parse of `.git/config` for the origin GitHub repo. + + Returns "owner/repo" or None. Handles both SSH (`git@github.com:o/r.git`) + and HTTPS (`https://github.com/o/r(.git)`) remote formats. Never raises. + """ + if not workspace: + return None + try: + config_path = Path(workspace) / ".git" / "config" + if not config_path.is_file(): + return None + text = config_path.read_text(encoding="utf-8", errors="replace") + except OSError: + return None + + in_origin = False + for raw in text.splitlines(): + line = raw.strip() + if line.startswith("[") and line.endswith("]"): + in_origin = line.lower() == '[remote "origin"]' + continue + if not in_origin: + continue + if line.lower().startswith("url"): + _, _, value = line.partition("=") + match = _GIT_REMOTE_GITHUB_RE.search(value.strip()) + if match: + owner = match.group("owner") + repo = match.group("repo") + if repo.endswith(".git"): + repo = repo[:-4] + return f"{owner}/{repo}" + return None + + def create_planner_session( workspace: str, languages: list[str] | None = None, frameworks: list[str] | None = None, + github_repo: str | None = None, ) -> PlannerSession: """Create a new planner session with optional pre-populated fields. The workspace is always set. Languages and frameworks can be pre-populated from auto-inference (infer_workspace_stack). + + If ``github_repo`` is not provided, attempts to auto-detect it by + parsing ``remote.origin.url`` from the workspace's ``.git/config``. + The resolved repo is used as the default owner/repo when the Planner + pre-fetches issue/PR refs like "Issue #113" from user messages. """ task_spec = TaskSpec( workspace=workspace, @@ -455,9 +519,12 @@ def create_planner_session( ) checklist = build_checklist(task_spec) + resolved_repo = github_repo or _detect_github_repo_from_workspace(workspace) + session = PlannerSession( task_spec=task_spec, checklist=checklist, + github_repo=resolved_repo, ) # System message with pre-populated context — formatted with clear @@ -507,8 +574,18 @@ def create_planner_session( message labelled "=== PRE-FETCHED GITHUB CONTEXT ===" below. Treat that \ block as authoritative source material — never tell the user you cannot \ access GitHub when that block is present; summarise its contents and move \ -the task forward. You also work with information the user provides and \ -any auto-detected project context. +the task forward. + +CRITICAL anti-hallucination rule: if NO "=== PRE-FETCHED GITHUB CONTEXT ===" \ +block is present below, the pre-fetch did not run (missing token, wrong \ +repo configured, or network error). In that case you MUST NOT invent, \ +guess, or describe the issue/PR contents — doing so produces confidently \ +wrong task specs. Instead, briefly tell the user the context wasn't \ +injected and ask them to paste the issue title and body, or confirm the \ +repo configuration. + +You also work with information the user provides and any auto-detected \ +project context. Your responsibilities: 1. Understand the user's objective @@ -641,8 +718,18 @@ async def _prefetch_github_refs_for_message( messages that mention the same issue don't re-fetch). Best-effort: missing GITHUB_TOKEN or network errors quietly return no new items. """ - default_owner = os.getenv("GITHUB_OWNER", "") - default_repo = os.getenv("GITHUB_REPO", "") + # Resolve default owner/repo for same-repo refs ("Issue #113" without + # an owner/repo prefix). Session-level value wins — that's what the + # dashboard repo picker or the git-remote auto-detect stored. + # Env vars are a fallback for CLI/testing scenarios. + default_owner = "" + default_repo = "" + source_repo = session.github_repo or "" + if source_repo and "/" in source_repo: + default_owner, _, default_repo = source_repo.partition("/") + if not default_owner or not default_repo: + default_owner = os.getenv("GITHUB_OWNER", "") or default_owner + default_repo = os.getenv("GITHUB_REPO", "") or default_repo detected_refs = extract_github_refs( user_message, @@ -651,6 +738,21 @@ async def _prefetch_github_refs_for_message( max_refs=PLANNER_MAX_GITHUB_REFS, ) + # Loose heuristic: catch the case where the user clearly referenced + # something like "Issue #113" but we had no default repo configured + # to resolve it. `extract_github_refs` drops same-repo refs silently + # when default_owner/repo are empty, so we'd otherwise have no + # breadcrumb in the logs pointing at the missing config. + if not detected_refs and (not default_owner or not default_repo): + if _LOOSE_HASH_REF_RE.search(user_message): + logger.warning( + "[PLANNER] Session %s message contains '#N' refs but no " + "GitHub repo is configured — pre-fetch skipped. Set the " + "dashboard repo picker, ensure the workspace has a GitHub " + "`remote.origin.url`, or set GITHUB_OWNER/GITHUB_REPO.", + session.session_id, + ) + token = os.getenv("GITHUB_TOKEN", "") if not token: if detected_refs: diff --git a/dev-suite/src/api/main.py b/dev-suite/src/api/main.py index 4773cb9..973876e 100644 --- a/dev-suite/src/api/main.py +++ b/dev-suite/src/api/main.py @@ -363,11 +363,14 @@ async def start_planner_session( logger.warning("Auto-inference failed for %s: %s", body.workspace, e) stack = {"languages": [], "frameworks": []} - # Create session + # Create session (Issue #193: pass github_repo through so the + # Planner's deterministic pre-fetch can resolve same-repo refs + # like "Issue #113" in user messages). session = create_planner_session( workspace=body.workspace, languages=stack["languages"], frameworks=stack["frameworks"], + github_repo=body.github_repo, ) planner_sessions.create(session) @@ -376,11 +379,13 @@ async def start_planner_session( resp.message = session.messages[0].content if session.messages else "" logger.info( - "Planner session started: %s (workspace=%s, languages=%s, frameworks=%s)", + "Planner session started: %s (workspace=%s, languages=%s, " + "frameworks=%s, github_repo=%s)", session.session_id, body.workspace, stack["languages"], stack["frameworks"], + session.github_repo, ) return _ok(resp) diff --git a/dev-suite/src/api/models.py b/dev-suite/src/api/models.py index 1267cf7..0e4eef1 100644 --- a/dev-suite/src/api/models.py +++ b/dev-suite/src/api/models.py @@ -438,6 +438,23 @@ class PlannerStartRequest(BaseModel): default=None, description="Admin PIN for protected workspaces.", ) + # Issue #193: let the dashboard tell the Planner which GitHub repo + # to use when resolving same-repo refs like "Issue #113" in user + # messages. If omitted, the server auto-detects from the workspace's + # `.git/config` (LOCAL) or falls back to env vars. + workspace_type: str = Field( + default="local", + description="Workspace kind: 'local' or 'github'.", + ) + github_repo: str | None = Field( + default=None, + description=( + "GitHub repository in 'owner/repo' format. Optional — used " + "as the default owner/repo when pre-fetching issue/PR refs " + "mentioned in user messages. Sent by REMOTE mode, " + "auto-detected from `.git/config` in LOCAL mode." + ), + ) class PlannerMessageRequest(BaseModel): diff --git a/dev-suite/tests/test_planner.py b/dev-suite/tests/test_planner.py index ea7c40c..f7a7be2 100644 --- a/dev-suite/tests/test_planner.py +++ b/dev-suite/tests/test_planner.py @@ -875,3 +875,271 @@ async def fake_llm(model_name, messages): "GITHUB_TOKEN is not set" in record.message for record in caplog.records ) + + +# ========================================================================= +# Session-level github_repo plumbing + git-remote auto-detect (Issue #193) +# ========================================================================= + + +class TestPlannerGithubRepoPlumbing: + """Issue #193 AC #3a-#3c: the Planner must resolve same-repo refs + like "Issue #113" using either an explicitly-passed github_repo + (from the dashboard repo picker) or one auto-detected by parsing + the workspace's `.git/config` — not just env vars. + """ + + def test_explicit_github_repo_stored_on_session(self, tmp_path): + """Passing github_repo to create_planner_session sticks it on + the session for downstream pre-fetch resolution. + """ + from src.agents.planner import create_planner_session + + session = create_planner_session( + workspace=str(tmp_path), + github_repo="Abernaughty/agent-dev", + ) + assert session.github_repo == "Abernaughty/agent-dev" + + def test_explicit_repo_wins_over_auto_detect(self, tmp_path): + """Explicit github_repo must win over the auto-detect so users + can override what git says (fork workflows, monorepos, etc.). + """ + from src.agents.planner import create_planner_session + + git_dir = tmp_path / ".git" + git_dir.mkdir() + (git_dir / "config").write_text( + '[remote "origin"]\n' + '\turl = https://github.com/wrong/repo.git\n', + encoding="utf-8", + ) + session = create_planner_session( + workspace=str(tmp_path), + github_repo="right/repo", + ) + assert session.github_repo == "right/repo" + + def test_auto_detect_https_remote(self, tmp_path): + """HTTPS `remote.origin.url` with .git suffix parses cleanly.""" + from src.agents.planner import create_planner_session + + git_dir = tmp_path / ".git" + git_dir.mkdir() + (git_dir / "config").write_text( + '[core]\n' + '\trepositoryformatversion = 0\n' + '[remote "origin"]\n' + '\turl = https://github.com/Abernaughty/agent-dev.git\n' + '\tfetch = +refs/heads/*:refs/remotes/origin/*\n', + encoding="utf-8", + ) + session = create_planner_session(workspace=str(tmp_path)) + assert session.github_repo == "Abernaughty/agent-dev" + + def test_auto_detect_ssh_remote(self, tmp_path): + """SSH `git@github.com:owner/repo.git` parses cleanly too.""" + from src.agents.planner import create_planner_session + + git_dir = tmp_path / ".git" + git_dir.mkdir() + (git_dir / "config").write_text( + '[remote "origin"]\n' + '\turl = git@github.com:Abernaughty/agent-dev.git\n', + encoding="utf-8", + ) + session = create_planner_session(workspace=str(tmp_path)) + assert session.github_repo == "Abernaughty/agent-dev" + + def test_auto_detect_handles_no_dot_git_suffix(self, tmp_path): + """Some clones omit the `.git` suffix in the remote URL.""" + from src.agents.planner import create_planner_session + + git_dir = tmp_path / ".git" + git_dir.mkdir() + (git_dir / "config").write_text( + '[remote "origin"]\n' + '\turl = https://github.com/Abernaughty/agent-dev\n', + encoding="utf-8", + ) + session = create_planner_session(workspace=str(tmp_path)) + assert session.github_repo == "Abernaughty/agent-dev" + + def test_auto_detect_ignores_non_github_remotes(self, tmp_path): + """Self-hosted Gitea/GitLab remotes don't trigger false positives.""" + from src.agents.planner import create_planner_session + + git_dir = tmp_path / ".git" + git_dir.mkdir() + (git_dir / "config").write_text( + '[remote "origin"]\n' + '\turl = https://gitlab.example.com/team/proj.git\n', + encoding="utf-8", + ) + session = create_planner_session(workspace=str(tmp_path)) + assert session.github_repo is None + + def test_auto_detect_skips_non_origin_remote(self, tmp_path): + """Only the `origin` remote matters; other remotes are ignored.""" + from src.agents.planner import create_planner_session + + git_dir = tmp_path / ".git" + git_dir.mkdir() + (git_dir / "config").write_text( + '[remote "upstream"]\n' + '\turl = https://github.com/Abernaughty/agent-dev.git\n' + '[remote "origin"]\n' + '\turl = https://github.com/forkuser/agent-dev.git\n', + encoding="utf-8", + ) + session = create_planner_session(workspace=str(tmp_path)) + assert session.github_repo == "forkuser/agent-dev" + + def test_auto_detect_returns_none_when_no_git_dir(self, tmp_path): + """Plain directories without a .git folder just yield None.""" + from src.agents.planner import create_planner_session + + session = create_planner_session(workspace=str(tmp_path)) + assert session.github_repo is None + + @pytest.mark.asyncio + async def test_session_github_repo_used_for_same_repo_refs( + self, tmp_path, monkeypatch, + ): + """The session's github_repo resolves "Issue #113" even when + GITHUB_OWNER/GITHUB_REPO env vars are unset. + """ + from src.agents.planner import create_planner_session, send_planner_message + + monkeypatch.setenv("GITHUB_TOKEN", "fake-token") + monkeypatch.delenv("GITHUB_OWNER", raising=False) + monkeypatch.delenv("GITHUB_REPO", raising=False) + + session = create_planner_session( + workspace=str(tmp_path), + github_repo="Abernaughty/agent-dev", + ) + + captured_kwargs: dict = {} + + async def fake_fetch(*args, **kwargs): + captured_kwargs.update(kwargs) + return [{ + "path": "github://Abernaughty/agent-dev/issues/113", + "content": "Issue #113 body", + "truncated": False, + "source": "github_issue", + }] + + async def fake_llm(model_name, messages): + return '```json\n{}\n```' + + with patch( + "src.agents.planner.fetch_refs_as_context_items", + side_effect=fake_fetch, + ), patch( + "src.agents.planner._call_planner_llm", + side_effect=fake_llm, + ): + await send_planner_message( + session, "Review and implement the fix for Issue #113." + ) + + assert captured_kwargs.get("default_owner") == "Abernaughty" + assert captured_kwargs.get("default_repo") == "agent-dev" + assert any( + item["path"] == "github://Abernaughty/agent-dev/issues/113" + for item in session.task_spec.github_context + ) + + @pytest.mark.asyncio + async def test_env_vars_are_fallback_when_session_repo_missing( + self, tmp_path, monkeypatch, + ): + """Env vars still work as a fallback for CLI / headless use.""" + from src.agents.planner import create_planner_session, send_planner_message + + monkeypatch.setenv("GITHUB_TOKEN", "fake-token") + monkeypatch.setenv("GITHUB_OWNER", "envowner") + monkeypatch.setenv("GITHUB_REPO", "envrepo") + + session = create_planner_session(workspace=str(tmp_path)) + assert session.github_repo is None # no git dir, no explicit + + captured_kwargs: dict = {} + + async def fake_fetch(*args, **kwargs): + captured_kwargs.update(kwargs) + return [] + + async def fake_llm(model_name, messages): + return '```json\n{}\n```' + + with patch( + "src.agents.planner.fetch_refs_as_context_items", + side_effect=fake_fetch, + ), patch( + "src.agents.planner._call_planner_llm", + side_effect=fake_llm, + ): + await send_planner_message(session, "Fix issue #7") + + assert captured_kwargs.get("default_owner") == "envowner" + assert captured_kwargs.get("default_repo") == "envrepo" + + @pytest.mark.asyncio + async def test_warns_when_hash_ref_present_but_no_repo_configured( + self, tmp_path, monkeypatch, caplog, + ): + """The loose `#N` heuristic catches the silent-drop case so + operators see a breadcrumb pointing at the missing config. + """ + import logging as _logging + + from src.agents.planner import create_planner_session, send_planner_message + + monkeypatch.setenv("GITHUB_TOKEN", "fake-token") + monkeypatch.delenv("GITHUB_OWNER", raising=False) + monkeypatch.delenv("GITHUB_REPO", raising=False) + + session = create_planner_session(workspace=str(tmp_path)) + assert session.github_repo is None + + async def fake_llm(model_name, messages): + return '```json\n{}\n```' + + with caplog.at_level(_logging.WARNING, logger="src.agents.planner"): + with patch( + "src.agents.planner._call_planner_llm", + side_effect=fake_llm, + ): + # "Issue #113" — extract_github_refs returns [] because + # no default_owner/repo, but the loose heuristic fires. + await send_planner_message(session, "Please look at Issue #113") + + assert any( + "no GitHub repo is configured" in record.message + for record in caplog.records + ) + + +# ========================================================================= +# Anti-hallucination system prompt (Issue #193 AC #3d) +# ========================================================================= + + +class TestPlannerAntiHallucination: + def test_system_prompt_forbids_inventing_issue_contents(self): + """The prompt must explicitly tell the LLM not to invent issue + contents when the pre-fetched block is absent — this was the + failure mode from the first manual smoke where the LLM made up + a fake #113 body. + """ + from src.agents.planner import _PLANNER_SYSTEM_PROMPT + + prompt = _PLANNER_SYSTEM_PROMPT.lower() + # Mentions the marker so the LLM can check for it + assert "pre-fetched github context" in prompt + # Explicitly forbids invention + assert "must not invent" in prompt or "must not" in prompt + assert "invent" in prompt or "guess" in prompt