fix post-review robustness for test generation flow

SummerOneTwo · SummerOneTwo · commit 4e89ec4293ec · 2026-04-29T03:33:10.000+08:00
Tighten answer_ext normalization and retry semantics, clear stale generated outputs before final writes, and make cleanup/semantic checks more resilient across platforms and generator branch styles.

Made-with: Cursor
diff --git a/src/autocode_mcp/tools/generator.py b/src/autocode_mcp/tools/generator.py
@@ -142,18 +142,19 @@ async def execute(
         )
 
     def _check_type34_semantics(self, code: str) -> dict:
-        has_type3 = bool(re.search(r"type\s*==\s*3", code))
-        has_type4 = bool(re.search(r"type\s*==\s*4", code))
+        type3_blocks = self._extract_type_branch_snippets(code, 3)
+        type4_blocks = self._extract_type_branch_snippets(code, 4)
+        has_type3 = bool(type3_blocks)
+        has_type4 = bool(type4_blocks)
         if not has_type3 or not has_type4:
             return {
                 "enabled": True,
-                "passed": False,
-                "reason": "generator lacks explicit type==3/type==4 branches",
-                "hint": "需要给 type=3/type=4 设计不同逻辑，避免仅靠参数放大",
+                "passed": True,
+                "advisory": True,
+                "reason": "semantic check could not reliably detect both type=3/type=4 branches",
+                "hint": "请人工确认 type=3/type=4 分支存在且有实质差异",
             }
 
-        type3_blocks = re.findall(r"type\s*==\s*3[\s\S]{0,240}", code)
-        type4_blocks = re.findall(r"type\s*==\s*4[\s\S]{0,240}", code)
         norm3 = " ".join(type3_blocks).replace(" ", "")
         norm4 = " ".join(type4_blocks).replace(" ", "")
         output_lines = [line.strip() for line in code.splitlines() if "cout" in line or "printf" in line]
@@ -166,6 +167,18 @@ def _check_type34_semantics(self, code: str) -> dict:
             "hint": "为 type=4 增加针对性卡法，而不仅是 n_max/t_max 取最大值",
         }
 
+    def _extract_type_branch_snippets(self, code: str, type_value: int) -> list[str]:
+        patterns = [
+            rf"type\s*==\s*{type_value}\b",
+            rf"\b{type_value}\s*==\s*type\b",
+            rf"case\s+{type_value}\s*:",
+        ]
+        snippets: list[str] = []
+        for pattern in patterns:
+            for match in re.finditer(pattern, code):
+                snippets.append(code[match.start(): match.start() + 240])
+        return snippets
+
 
 class GeneratorRunTool(Tool):
     """运行多策略数据生成器。"""
diff --git a/src/autocode_mcp/tools/problem.py b/src/autocode_mcp/tools/problem.py
@@ -563,6 +563,11 @@ async def execute(
         else:
             final_tests = candidates
 
+        # 最终写盘前清理历史生成产物，防止 resume 场景残留旧编号样例。
+        clear_before_write_error = self._clear_generated_tests(tests_dir, normalized_answer_ext)
+        if clear_before_write_error:
+            return clear_before_write_error
+
         # 写入文件
         generated_tests = []
         test_manifest: list[dict[str, str | int]] = []
@@ -721,6 +726,8 @@ def _normalize_answer_ext(self, answer_ext: str) -> tuple[str | None, ToolResult
             return None, ToolResult.fail("answer_ext cannot be empty")
         if not ext.startswith("."):
             ext = f".{ext}"
+        if not any(ch != "." for ch in ext[1:]):
+            return None, ToolResult.fail("answer_ext must contain non-dot characters")
         if any(ch in ext for ch in ('/', '\\', ':', '*', '?', '"', "<", ">", "|")):
             return None, ToolResult.fail("answer_ext contains illegal characters")
         if ext == ".in":
@@ -758,7 +765,7 @@ def _on_start(pid: int) -> None:
                 # 取消路径保留 PID 到状态文件，供 cleanup 精准回收。
                 if started_pid is not None and not cancelled:
                     active_pids.discard(started_pid)
-            if not getattr(last_result, "error", None):
+            if last_result.success:
                 return last_result
             await asyncio.sleep(0.1 * (2**attempt))
         if last_result is not None:
@@ -1076,7 +1083,13 @@ async def execute(self, problem_dir: str, kill_all_generators: bool = False) ->
         if os.path.exists(state_path) and not pids:
             os.remove(state_path)
             removed_files.append(state_path)
-        return ToolResult.ok(removed_files=removed_files, message="Cleanup finished")
+        return ToolResult.ok(
+            removed_files=removed_files,
+            killed_pids=[],
+            failed_pids=[],
+            warning="PID termination is only supported on Windows" if kill_all_generators and os.name != "nt" else "",
+            message="Cleanup finished",
+        )
 
     def _load_cleanup_state(self, state_path: str) -> dict | None:
         if not os.path.exists(state_path):
diff --git a/src/autocode_mcp/tools/test_verify.py b/src/autocode_mcp/tools/test_verify.py
@@ -493,16 +493,33 @@ def _check_limit_semantics(self, tests_dir: str) -> dict:
         }
 
     def _resolve_answer_ext(self, tests_dir: str, answer_ext: str | None) -> str:
-        if answer_ext:
-            return answer_ext if answer_ext.startswith(".") else f".{answer_ext}"
+        normalized = self._normalize_answer_ext(answer_ext)
+        if normalized:
+            return normalized
         manifest_path = os.path.join(tests_dir, _TEST_MANIFEST_FILENAME)
         if os.path.exists(manifest_path):
             try:
                 with open(manifest_path, encoding="utf-8") as f:
                     manifest = json.load(f)
-                ext = manifest.get("answer_ext")
-                if isinstance(ext, str) and ext:
-                    return ext if ext.startswith(".") else f".{ext}"
+                ext = self._normalize_answer_ext(manifest.get("answer_ext"))
+                if ext:
+                    return ext
             except (json.JSONDecodeError, OSError):
                 pass
         return ".ans"
+
+    def _normalize_answer_ext(self, answer_ext: str | None) -> str | None:
+        if not isinstance(answer_ext, str):
+            return None
+        ext = answer_ext.strip()
+        if not ext:
+            return None
+        if not ext.startswith("."):
+            ext = f".{ext}"
+        if not any(ch != "." for ch in ext[1:]):
+            return None
+        if any(ch in ext for ch in ('/', '\\', ':', '*', '?', '"', "<", ">", "|")):
+            return None
+        if ext == ".in":
+            return None
+        return ext