From 75c1ae6f57517fa9f05f6189a7a6815f9f8457c2 Mon Sep 17 00:00:00 2001 From: lbx154 Date: Thu, 19 Mar 2026 09:35:30 +0000 Subject: [PATCH] fix: prevent BTW from returning unrelated files for normal questions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The file-request detection in btw_skills.py was too aggressive — broad keywords like "看看", "show me", ".py", ".md", "readme" caused almost every /btw question to be misclassified as a file request, skipping the LLM answer entirely and returning an unrelated README instead. Three changes: 1. Trim fileRequestKeywords in config.json to intent-clear words only 2. Add _has_file_intent_context gate so broad keywords alone no longer trigger file search on longer conversational questions 3. Only give README the +50 score boost when the user explicitly mentions "readme" in the question Co-Authored-By: Claude Opus 4.6 (1M context) --- codex_autoloop/btw_skills.py | 30 ++++++++++- skills/btw-file-return/config.json | 16 +----- tests/test_btw_skills.py | 81 ++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 17 deletions(-) diff --git a/codex_autoloop/btw_skills.py b/codex_autoloop/btw_skills.py index 4a42bf0..c7e8a63 100644 --- a/codex_autoloop/btw_skills.py +++ b/codex_autoloop/btw_skills.py @@ -39,7 +39,10 @@ def resolve_btw_skill_result(*, working_dir: str, question: str, max_attachments lowered = normalized.lower() explicit_names = _extract_explicit_file_names(normalized) image_request = any(keyword in lowered for keyword in config.image_request_keywords) or "图" in normalized - file_request = image_request or explicit_names or any(keyword in lowered for keyword in config.file_request_keywords) + file_request = image_request or bool(explicit_names) or ( + any(keyword in lowered for keyword in config.file_request_keywords) + and _has_file_intent_context(normalized) + ) if not file_request: return BtwSkillResult(is_file_request=False, attachments=[], summary_lines=[]) @@ -169,7 +172,7 @@ def _score_candidate( if path.suffix.lower() in config.textual_file_extensions: score += 20 reasons.append("document/code extension") - if name_lower in {"readme.md", "readme.txt"}: + if name_lower in {"readme.md", "readme.txt"} and any("readme" in n for n in explicit_names): score += 50 reasons.append("README priority") @@ -190,6 +193,29 @@ def _iter_files(root: Path, *, skip_dir_names: set[str]): yield current_path / name +def _has_file_intent_context(question: str) -> bool: + """Return True when the question looks like a file request, not a general question that happens to contain a keyword.""" + lowered = question.lower() + # Short queries with a file keyword are likely file requests + if len(question.split()) <= 4: + return True + # Patterns that strongly suggest "give me a file" + intent_patterns = [ + r"发我\s*\S+", + r"给我\s*\S+", + r"send\s+(me\s+)?the\s+", + r"send\s+(me\s+)?\S+\.\S+", + r"上传\s*\S+", + r"把\s*\S+.*发", + r"file\s+for\b", + r"files?\s+named\b", + ] + for pat in intent_patterns: + if re.search(pat, lowered): + return True + return False + + def _extract_explicit_file_names(question: str) -> list[str]: matches = re.findall(r"([A-Za-z0-9_\-./\\]+\.[A-Za-z0-9]{1,8})", question) normalized: list[str] = [] diff --git a/skills/btw-file-return/config.json b/skills/btw-file-return/config.json index 4acb0b2..9cc3ffe 100644 --- a/skills/btw-file-return/config.json +++ b/skills/btw-file-return/config.json @@ -50,24 +50,10 @@ "文件", "发我", "给我", - "看看", "上传", "file", "files", - "send", - "show me", - "open", - "readme", - ".md", - ".png", - ".jpg", - ".jpeg", - ".pdf", - ".json", - ".yaml", - ".yml", - ".txt", - ".py" + "send" ], "preferredImageHints": [ "effect", diff --git a/tests/test_btw_skills.py b/tests/test_btw_skills.py index 73aa9eb..c13310e 100644 --- a/tests/test_btw_skills.py +++ b/tests/test_btw_skills.py @@ -1,5 +1,7 @@ from pathlib import Path +import pytest + from codex_autoloop.btw_skills import load_btw_file_return_skill_config, resolve_btw_skill_result @@ -35,3 +37,82 @@ def test_resolve_btw_skill_result_finds_repo_image_fixture() -> None: assert result.is_file_request is True assert result.attachments assert any(item.path.endswith("argusbot-preview.png") for item in result.attachments) + + +# --- Tests for false-positive prevention --- + + +@pytest.mark.parametrize( + "question", + [ + "帮我看看这个函数怎么用", + "show me how the loop works", + "can you explain the readme structure", + "open a discussion about the design", + "看看这段代码有什么问题", + "what does this .py module do", + "tell me about the .md format", + ], +) +def test_normal_questions_not_treated_as_file_request(tmp_path: Path, question: str) -> None: + """Normal questions should NOT be detected as file requests.""" + readme = tmp_path / "README.md" + readme.write_text("# hello") + result = resolve_btw_skill_result(working_dir=str(tmp_path), question=question) + assert result.is_file_request is False + + +def test_explicit_file_request_still_works(tmp_path: Path) -> None: + """Explicit file name in question should still trigger file request.""" + config_file = tmp_path / "config.json" + config_file.write_text("{}") + result = resolve_btw_skill_result( + working_dir=str(tmp_path), + question="send me config.json", + ) + assert result.is_file_request is True + assert result.attachments + assert result.attachments[0].path.endswith("config.json") + + +def test_explicit_readme_request_returns_readme(tmp_path: Path) -> None: + """Asking for README.md explicitly should return it with priority boost.""" + readme = tmp_path / "README.md" + readme.write_text("# project") + other = tmp_path / "notes.md" + other.write_text("notes") + result = resolve_btw_skill_result( + working_dir=str(tmp_path), + question="发我 README.md", + ) + assert result.is_file_request is True + assert result.attachments + assert result.attachments[0].path.endswith("README.md") + + +def test_readme_not_returned_for_generic_file_keyword(tmp_path: Path) -> None: + """Generic file keyword should not return README when user didn't ask for it.""" + readme = tmp_path / "README.md" + readme.write_text("# project") + result = resolve_btw_skill_result( + working_dir=str(tmp_path), + question="给我文件", + ) + # Even if is_file_request is True, README should not get priority boost + if result.attachments: + for att in result.attachments: + assert "README priority" not in att.reason + + +def test_image_request_still_works(tmp_path: Path) -> None: + """Image requests via imageRequestKeywords should still work.""" + assets = tmp_path / "assets" + assets.mkdir() + img = assets / "demo.png" + img.write_bytes(b"png") + result = resolve_btw_skill_result( + working_dir=str(tmp_path), + question="我要看看效果图", + ) + assert result.is_file_request is True + assert result.attachments