diff --git a/AGENTS.md b/AGENTS.md
index f0255b8e..349c25fd 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -217,6 +217,8 @@ dingo info --rules                        # Rule evaluators only
 dingo info --llm                          # LLM evaluators only
 dingo info --groups                       # Rule groups only
 dingo info --json                         # JSON output
+dingo info --count                        # Metric counts only (rules / llm / groups / total_metrics)
+dingo info --json --count                  # JSON with top-level "counts" plus full lists
 
 # Start MCP server (for AI agent integration)
 dingo serve                               # SSE transport on 0.0.0.0:8000
diff --git a/dingo/model/llm/compare/llm_html_extract_compare.py b/dingo/model/llm/compare/llm_html_extract_compare.py
index 48977ccd..43908130 100644
--- a/dingo/model/llm/compare/llm_html_extract_compare.py
+++ b/dingo/model/llm/compare/llm_html_extract_compare.py
@@ -95,13 +95,28 @@ class LLMHtmlExtractCompare(BaseOpenAI):
 
     @classmethod
     def build_messages(cls, input_data: Data) -> List:
+        raw_data = getattr(input_data, "raw_data", None) or {}
+        # Backward-compatible input handling:
+        # - Preferred: raw_data["magic_md"] and raw_data["content"] (legacy dataset schema)
+        # - Fallback: input_data.prompt (tool A) and input_data.reference (tool B)
+        # - Last resort: input_data.prompt (tool A) and input_data.extra fields if provided
+        tool_a_md = raw_data.get("magic_md", None) or getattr(input_data, "prompt", None)
+        tool_b_md = raw_data.get("content", None) or getattr(input_data, "reference", None)
+
+        if tool_a_md is None or tool_b_md is None:
+            raise ValueError(
+                "LLMHtmlExtractCompare requires Tool A and Tool B markdown. "
+                "Provide raw_data['magic_md'] and raw_data['content'], or provide Data.prompt (tool A) "
+                "and Data.reference (tool B)."
+            )
+
         messages = [
             {
                 "role": "user",
                 "content": cls.prompt.format(
                     input_data.content,
-                    input_data.raw_data["magic_md"],
-                    input_data.raw_data["content"],
+                    tool_a_md,
+                    tool_b_md,
                 ),
             }
         ]
diff --git a/dingo/model/llm/compare/llm_html_extract_compare_v3.py b/dingo/model/llm/compare/llm_html_extract_compare_v3.py
new file mode 100644
index 00000000..1f0c4941
--- /dev/null
+++ b/dingo/model/llm/compare/llm_html_extract_compare_v3.py
@@ -0,0 +1,221 @@
+import json
+import re
+from typing import List
+
+from dingo.io.input import Data, RequiredField
+from dingo.io.output.eval_detail import EvalDetail
+from dingo.model import Model
+from dingo.model.llm.base_openai import BaseOpenAI
+from dingo.model.response.response_class import ResponseScoreTypeNameReason
+from dingo.utils import log
+from dingo.utils.exception import ConvertJsonError
+
+
+@Model.llm_register("LLMHtmlExtractCompareV3")
+class LLMHtmlExtractCompareV3(BaseOpenAI):
+    """
+    HTML提取工具对比评估 V3 版本
+
+    基于 LLMTextQualityV5 的质量维度（Completeness / Effectiveness / Similarity / Security）
+    对两个 HTML 提取工具的完整输出做对比评估，判断哪个工具的提取质量更高。
+
+    与 V2 的区别：V2 侧重"谁保留了更多信息内容"，V3 侧重"谁引入了更少质量缺陷"。
+    V3 直接发送全文（不做 diff 预处理），保留完整上下文，确保质量缺陷（尤其是
+    Error_Formula 等需要上下文才能正确归因的问题）能被准确识别。
+
+    输入数据要求：
+    - input_data.prompt: 工具A提取的文本（对应 Data.prompt 字段）
+    - input_data.content: 工具B提取的文本（对应 Data.content 字段）
+    - language: 可选，来自 input_data.language 或 raw_data["language"]，缺省为 "en"
+
+    EvalDetail.label 前缀与 Data 字段对齐（避免 TOOL_ONE/TOOL_TWO 歧义）：
+    - PROMPT_BETTER：score=1，Data.prompt 侧提取质量更好
+    - CONTENT_BETTER：score=2，Data.content 侧更好
+    - EXTRACTION_EQUAL：score=0，两者相当
+    """
+
+    _metric_info = {
+        "category": "Pretrain Text Quality Assessment Metrics",
+        "metric_name": "LLMHtmlExtractCompareV3",
+        "description": "Compares two HTML extraction tools using LLM pretraining quality dimensions (completeness, effectiveness, similarity, security) with full-text evaluation for accurate defect attribution",
+    }
+
+    _required_fields = [RequiredField.CONTENT, RequiredField.PROMPT]
+
+    prompt = {
+        "content_en": r"""You are an expert in assessing pretraining data quality for large language models. You will compare two texts extracted from the same HTML page by different tools, and determine which extraction is of higher quality for LLM pretraining.
+
+# Quality Dimensions
+
+Evaluate BOTH texts against these dimensions and compare:
+
+## 1. Completeness
+- **Error_Content_Coverage**: One extraction tool failed to capture the full main-body content of the page — at least one complete paragraph or named section present in the other extraction is entirely absent (e.g., an "Applications" or "Common Algorithms" section is missing). This is about **extraction-level omission** (the tool did not locate or include that block), NOT about individual missing words, broken formatting, or formula stripping (use the specific error types below for those).
+- **Error_Formula**: Mathematical content with broken LaTeX syntax (unmatched delimiters, unclosed environments) OR systematically stripped symbols/formulas (orphan hyphens from stripped Greek letters like "-solutions" instead of "κ-solutions", empty positions after connective words like "thus ;" where a formula was removed)
+- **Error_Table**: Malformed or unreadable table structures (misaligned columns, missing headers, garbled HTML tags)
+- **Error_Code**: Code blocks with formatting corruption (missing code fences, lost indentation, broken identifiers like "sys .argv", line numbers mixed with code)
+
+## 2. Effectiveness
+- **Error_Garbled_Characters**: Encoding issues or anti-crawler artifacts ("â€™", "□□□", "ï»¿"); threshold: >1% of characters garbled
+- **Error_Words_Stuck**: Missing spaces breaking tokenization ("Thequickbrownfox"); threshold: >1% of text affected
+- **Error_Lack_Punctuation**: Unclear sentence boundaries ("I like apples they are red also I like oranges")
+
+## 3. Similarity
+- **Error_Duplicate**: Excessive repetition dominating the text; threshold: same phrase repeats >5 times OR duplicate ratio >30%
+
+## 4. Security
+- **Error_Politics**: Content promoting extremism, terrorism, ethnic hatred
+- **Error_Prohibition**: Violence, pornography, gambling, drugs
+
+# Input
+
+**Text A** (Data.prompt — first extraction tool):
+{text_tool_a}
+
+**Text B** (Data.content — second extraction tool):
+{text_tool_b}
+
+# Evaluation Rules
+
+1. Evaluate each text independently against the quality dimensions above, then compare.
+2. Identify the dimension with the **largest quality difference** between the two texts.
+3. Minor formatting or whitespace differences that do not affect training quality should be ignored.
+
+⚠️ The order of Text A and Text B reflects the fixed field mapping: A = `Data.prompt`, B = `Data.content`. Do NOT favor either text based on its position.
+
+# Output Format
+
+Return JSON only:
+{{
+  "score": [0|1|2],
+  "name": "[error_type from the dimension with greatest difference]",
+  "reason": "[objective description of quality differences]"
+}}
+
+Where:
+- `score`: 1 if Text A (`Data.prompt`) is better, 2 if Text B (`Data.content`) is better, 0 if equal
+- `name`: The specific error type with the biggest quality difference (e.g., "Error_Content_Coverage", "Error_Formula", "Error_Table", "Error_Code", "Error_Garbled_Characters", "Error_Words_Stuck", "Error_Lack_Punctuation", "Error_Duplicate", "Error_Politics", "Error_Prohibition"). Use "None" if both are equal.
+- `reason`: Brief objective description (1-3 sentences)
+""",
+        "content_cn": r"""你是一位大语言模型预训练数据质量评估专家。你将对比两个不同 HTML 提取工具从同一网页中提取的文本，判断哪个提取结果的质量更高，更适合用于 LLM 预训练。
+
+# 质量维度
+
+请基于以下维度分别评估两段文本并进行对比：
+
+## 1. 完整性 (Completeness)
+- **Error_Content_Coverage**：一个提取工具未能覆盖网页的完整主体内容——另一方存在的至少一个完整段落或命名小节在这方完全缺失（例如"应用场景"或"常用算法"整节不见）。这针对的是**提取层面的遗漏**（工具未识别或未包含该区块），而非个别词语缺失、格式损坏或公式剥离（这些请用下方对应的专用错误类型）。
+- **Error_Formula**：数学内容存在 LaTeX 语法错误（未匹配的定界符、未关闭的环境）或符号/公式被系统性剥离（如 "κ-solutions" 被剥离为 "-solutions"，连接词后公式缺失如 "thus ;" ）
+- **Error_Table**：表格结构畸形或不可读（列未对齐、缺少表头、HTML标签残留）
+- **Error_Code**：代码块格式损坏（缺少代码围栏、缩进丢失、标识符断裂如 "sys .argv"、行号混入代码）
+
+## 2. 有效性 (Effectiveness)
+- **Error_Garbled_Characters**：编码问题或反爬虫伪影（"â€™"、"□□□"、"ï»¿"）；阈值：>1% 的字符为乱码
+- **Error_Words_Stuck**：缺失空格导致分词错误（"Thequickbrownfox"）；阈值：>1% 的文本受影响
+- **Error_Lack_Punctuation**：句子边界不清（"I like apples they are red also I like oranges"）
+
+## 3. 相似性 (Similarity)
+- **Error_Duplicate**：过度重复内容；阈值：同一短语重复>5次 或 重复率>30%
+
+## 4. 安全性 (Security)
+- **Error_Politics**：宣扬极端主义、恐怖主义、民族仇恨的内容
+- **Error_Prohibition**：暴力、色情、赌博、毒品相关内容
+
+# 输入
+
+**文本A**（Data.prompt — 第一个提取工具的结果）：
+{text_tool_a}
+
+**文本B**（Data.content — 第二个提取工具的结果）：
+{text_tool_b}
+
+# 评估规则
+
+1. 独立按上述质量维度评估每段文本，再进行对比。
+2. 找出两段文本之间**质量差异最大**的维度。
+3. 不影响训练质量的细微格式差异或空白差异应忽略。
+
+⚠️ 文本A和文本B的顺序反映固定字段映射：A = `Data.prompt`，B = `Data.content`。请勿因位置先后偏好任何一方。
+
+# 输出格式
+
+仅返回 JSON：
+{{
+  "score": [0|1|2],
+  "name": "[差异最大维度中的具体错误类型]",
+  "reason": "[客观描述两段文本的质量差异]"
+}}
+
+其中：
+- `score`：文本A（`Data.prompt`）更好为 1，文本B（`Data.content`）更好为 2，质量相当为 0
+- `name`：差异最大的具体错误类型（如 "Error_Content_Coverage"、"Error_Formula"、"Error_Table"、"Error_Code"、"Error_Garbled_Characters"、"Error_Words_Stuck"、"Error_Lack_Punctuation"、"Error_Duplicate"、"Error_Politics"、"Error_Prohibition"）。如果两者相当则为 "None"。
+- `reason`：简要客观描述（1-3句话）
+""",
+    }
+
+    @classmethod
+    def build_messages(cls, input_data: Data) -> List:
+        text_tool_a = input_data.prompt
+        text_tool_b = input_data.content
+
+        raw_data = getattr(input_data, "raw_data", {}) or {}
+        language = raw_data.get("language", getattr(input_data, "language", "en"))
+
+        if language == "zh":
+            prompt_template = cls.prompt["content_cn"]
+        else:
+            prompt_template = cls.prompt["content_en"]
+
+        prompt_content = prompt_template.format(
+            text_tool_a=text_tool_a,
+            text_tool_b=text_tool_b,
+        )
+
+        return [{"role": "user", "content": prompt_content}]
+
+    @classmethod
+    def process_response(cls, response: str) -> EvalDetail:
+        log.info(response)
+
+        response_think = ""
+        if response.startswith("<think>"):
+            think_content = re.search(
+                r"<think>(.*?)</think>", response, flags=re.DOTALL
+            )
+            if think_content:
+                response_think = think_content.group(1).strip()
+            response = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL)
+            response = response.strip()
+
+        if response.startswith("```json"):
+            response = response[7:]
+        if response.startswith("```"):
+            response = response[3:]
+        if response.endswith("```"):
+            response = response[:-3]
+        response = response.strip()
+
+        try:
+            response_json = json.loads(response)
+            if response_think:
+                response_json["reason"] = response_json.get("reason", "") + "\n" + response_think
+        except json.JSONDecodeError:
+            raise ConvertJsonError(f"Convert to JSON format failed: {response}")
+
+        response_model = ResponseScoreTypeNameReason(**response_json)
+
+        result = EvalDetail(metric=cls.__name__)
+
+        # Label prefixes match Data fields: prompt=first extraction, content=second.
+        if response_model.score == 1:
+            tmp_type = "PROMPT_BETTER"
+        elif response_model.score == 2:
+            tmp_type = "CONTENT_BETTER"
+        else:
+            tmp_type = "EXTRACTION_EQUAL"
+
+        result.status = response_model.score != 1
+        result.label = [f"{tmp_type}.{response_model.name}"]
+        result.reason = [json.dumps(response_json, ensure_ascii=False)]
+
+        return result
diff --git a/dingo/run/cli.py b/dingo/run/cli.py
index a47a7c6c..ef6b85ae 100644
--- a/dingo/run/cli.py
+++ b/dingo/run/cli.py
@@ -53,6 +53,13 @@ def parse_args():
         default=False,
         help="Output as JSON",
     )
+    info_parser.add_argument(
+        "--count",
+        action="store_true",
+        default=False,
+        help="Print metric counts (rules, llm, groups, total_metrics=rules+llm). "
+        "Human mode: counts only. With --json: prepend a \"counts\" object to the payload.",
+    )
 
     # --- dingo serve ---
     serve_parser = subparsers.add_parser("serve", help="Start MCP server for AI agent integration")
@@ -177,9 +184,23 @@ def cmd_info(args):
             groups[group_name] = [cls.__name__ for cls in rule_list]
         info["groups"] = groups
 
+    counts = {
+        "rules": len(Model.rule_name_map),
+        "llm": len(Model.llm_name_map),
+        "groups": len(Model.rule_groups),
+        "total_metrics": len(Model.rule_name_map) + len(Model.llm_name_map),
+    }
+
     if args.json:
-        json.dump(info, sys.stdout, indent=2, ensure_ascii=False)
+        if args.count:
+            payload = {"counts": counts, **info}
+            json.dump(payload, sys.stdout, indent=2, ensure_ascii=False)
+        else:
+            json.dump(info, sys.stdout, indent=2, ensure_ascii=False)
         sys.stdout.write("\n")
+    elif args.count:
+        for key in ("rules", "llm", "groups", "total_metrics"):
+            print(f"{key}: {counts[key]}")
     else:
         _print_info_table(info)
 
diff --git a/docs/metrics.md b/docs/metrics.md
index 44e75482..5dec75ec 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -21,6 +21,7 @@ This document provides comprehensive information about all quality metrics used
 | `LLMCodeCompare` | LLMCodeCompare | Compares the effectiveness of two tools in extracting code blocks from HTML to Markdown format by evaluating recognit... | Internal Implementation | N/A | N/A |
 | `LLMDatamanAssessment` | LLMDatamanAssessment | Evaluates pre-training data quality using the DataMan methodology (14 standards, 15 domains). Assigns a score (0/1), ... | [DataMan: Data Manager for Pre-training Large Language Models](https://arxiv.org/abs/2502.19363) (Peng et al., 2025) | N/A | N/A |
 | `LLMHtmlExtractCompareV2` | LLMHtmlExtractCompareV2 | Compares two HTML main-content extraction tools by computing text diffs and using LLM to judge which preserves more c... | Internal Implementation | N/A | N/A |
+| `LLMHtmlExtractCompareV3` | LLMHtmlExtractCompareV3 | Compares two HTML extraction tools using LLM pretraining quality dimensions (completeness, effectiveness, similarity,... | Internal Implementation | N/A | N/A |
 | `LLMMathCompare` | LLMMathCompare | Compares the effectiveness of two tools in extracting mathematical formulas from HTML to Markdown format by evaluatin... | Internal Implementation | N/A | N/A |
 | `LLMSecurityPolitics` | LLMSecurityPolitics | Evaluates whether the text contains politics-related content | Internal Implementation | N/A | N/A |
 | `LLMTableCompare` | LLMTableCompare | Compares the effectiveness of two tools in extracting tables from HTML to Markdown format by evaluating recognition r... | Internal Implementation | N/A | N/A |
@@ -60,7 +61,7 @@ This document provides comprehensive information about all quality metrics used
 | Type | Metric | Description | Paper Source | Evaluation Results | Examples |
 |------|--------|-------------|--------------|-------------------|----------|
 | `QUALITY_BAD_COMPLETENESS` | RuleLineEndWithEllipsis, RuleLineEndWithTerminal, RuleSentenceNumber, RuleWordNumber | Checks whether the ratio of lines ending with ellipsis is below threshold; Checks whether the ratio of lines ending w... | [RedPajama: an Open Dataset for Training Large Language Models](https://github.com/togethercomputer/RedPajama-Data) (Together Computer, 2023) | [📊 See Results](eval/rule/slimpajama_data_evaluated_by_rule.md) | N/A |
-| `QUALITY_BAD_EFFECTIVENESS` | RuleDoi, RuleIsbn, RuleAbnormalChar, RuleAbnormalHtml, RuleAlphaWords, RuleAudioDataFormat, RuleCharNumber, RuleColonEnd, RuleContentNull, RuleContentShort, RuleContentShortMultiLan, RuleEnterAndSpace, RuleEnterMore, RuleEnterRatioMore, RuleHtmlEntity, RuleHtmlTag, RuleInvisibleChar, RuleImageDataFormat, RuleLatexSpecialChar, RuleLineJavascriptCount, RuleLoremIpsum, RuleMeanWordLength, RuleNlpDataFormat, RuleSftDataFormat, RuleSpaceMore, RuleSpecialCharacter, RuleStopWord, RuleSymbolWordRatio, RuleVedioDataFormat, RuleOnlyUrl | Check whether the string is in the correct format of the doi; Check whether the string is in the correct format of th... | Internal Implementation | N/A | N/A |
+| `QUALITY_BAD_EFFECTIVENESS` | RuleAbnormalChar, RuleAbnormalHtml, RuleAlphaWords, RuleAudioDataFormat, RuleCharNumber, RuleColonEnd, RuleContentNull, RuleContentShort, RuleContentShortMultiLan, RuleEnterAndSpace, RuleEnterMore, RuleEnterRatioMore, RuleHtmlEntity, RuleHtmlTag, RuleInvisibleChar, RuleImageDataFormat, RuleLatexSpecialChar, RuleLineJavascriptCount, RuleLoremIpsum, RuleMeanWordLength, RuleNlpDataFormat, RuleSftDataFormat, RuleSpaceMore, RuleSpecialCharacter, RuleStopWord, RuleSymbolWordRatio, RuleVedioDataFormat, RuleOnlyUrl, RuleDoi, RuleIsbn | Detects garbled text and anti-crawling characters by combining special character and invisible character detection; D... | [RedPajama: an Open Dataset for Training Large Language Models](https://github.com/togethercomputer/RedPajama-Data) (Together Computer, 2023) | [📊 See Results](eval/rule/slimpajama_data_evaluated_by_rule.md) | N/A |
 | `QUALITY_BAD_FLUENCY` | RuleAbnormalNumber, RuleCharSplit, RuleNoPunc, RuleWordSplit, RuleWordStuck | Checks PDF content for abnormal book page or index numbers that disrupt text flow; Checks PDF content for abnormal ch... | [RedPajama: an Open Dataset for Training Large Language Models](https://github.com/togethercomputer/RedPajama-Data) (Together Computer, 2023) | [📊 See Results](eval/rule/slimpajama_data_evaluated_by_rule.md) | N/A |
 | `QUALITY_BAD_RELEVANCE` | RuleHeadWordAr, RuleHeadWordCs, RuleHeadWordHu, RuleHeadWordKo, RuleHeadWordRu, RuleHeadWordSr, RuleHeadWordTh, RuleHeadWordVi, RulePatternSearch, RuleWatermark | Checks whether Arabic content contains irrelevant tail source information; Checks whether Czech content contains irre... | [RedPajama: an Open Dataset for Training Large Language Models](https://github.com/togethercomputer/RedPajama-Data) (Together Computer, 2023) | [📊 See Results](eval/rule/slimpajama_data_evaluated_by_rule.md) | N/A |
 | `QUALITY_BAD_SECURITY` | RuleIDCard, RuleUnsafeWords, RulePIIDetection | Checks whether content contains ID card information; Checks whether content contains unsafe words; Detects Personal I... | [RedPajama: an Open Dataset for Training Large Language Models](https://github.com/togethercomputer/RedPajama-Data) (Together Computer, 2023) | [📊 See Results](eval/rule/slimpajama_data_evaluated_by_rule.md) | N/A |
diff --git a/examples/compare/html_extract_compare_v1.py b/examples/compare/html_extract_compare_v1.py
index a45ed890..b4bdddf2 100644
--- a/examples/compare/html_extract_compare_v1.py
+++ b/examples/compare/html_extract_compare_v1.py
@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 
 from dingo.config import InputArgs
@@ -5,10 +6,13 @@
 
 # 获取项目根目录
 PROJECT_ROOT = Path(__file__).parent.parent.parent
+OPENAI_MODEL = os.getenv("OPENAI_MODEL")
+OPENAI_URL = os.getenv("OPENAI_BASE_URL")
+OPENAI_KEY = os.getenv("OPENAI_API_KEY")
 
 if __name__ == '__main__':
     input_data = {
-        "input_path": str(PROJECT_ROOT / "test/data/compare/old_new_compare_10000.jsonl"),
+        "input_path": str(PROJECT_ROOT / "test/data/compare/test_compare_content.jsonl"),
         "dataset": {
             "source": "local",
             "format": "jsonl",
@@ -24,9 +28,21 @@
         },
         "evaluator": [
             {
-                "fields": {"id": "track_id", "content": "clean_html"},
+                "fields": {
+                    "data_id": "track_id",
+                    "prompt": "markdown_m10",
+                    "reference": "markdown_ours",
+                    "content": "clean_html",
+                },
                 "evals": [
-                    {"name": "LLMHtmlExtractCompare", "config": {"key": "", "api_url": ""}}
+                    {
+                        "name": "LLMHtmlExtractCompare",
+                        "config": {
+                            "key": OPENAI_KEY,
+                            "api_url": OPENAI_URL,
+                            "model": OPENAI_MODEL,
+                        },
+                    }
                 ]
             }
         ]
diff --git a/examples/compare/html_extract_compare_v2_example.py b/examples/compare/html_extract_compare_v2_example.py
index 63279352..44ccd8e3 100644
--- a/examples/compare/html_extract_compare_v2_example.py
+++ b/examples/compare/html_extract_compare_v2_example.py
@@ -139,9 +139,9 @@ def run_comparison(data: Data, description: str):
     # 打印结果
     # print(f"评估结果类型: {result.type}")
     # print(f"判断名称: {result.name}")
-    print(f"是否存在问题: {result.eval_status}")
-    print(f"评估结果类型: {result.eval_details.label}")
-    print(f"\n推理过程:\n{result.eval_details.reason[0]}")
+    print(f"是否存在问题: {result.status}")
+    print(f"评估结果类型: {result.label}")
+    print(f"\n推理过程:\n{result.reason}")
     print(f"\n{'=' * 60}\n")
 
 
diff --git a/examples/compare/html_extract_compare_v3_example.py b/examples/compare/html_extract_compare_v3_example.py
new file mode 100644
index 00000000..77bed9cb
--- /dev/null
+++ b/examples/compare/html_extract_compare_v3_example.py
@@ -0,0 +1,399 @@
+"""
+HTML 提取工具对比评估 V3 示例
+
+这个示例展示了如何使用 LLMHtmlExtractCompareV3 来对比两种 HTML 提取工具的效果。
+
+使用方法：
+python examples/compare/html_extract_compare_v3_example.py
+"""
+
+import os
+
+from dingo.io import Data
+from dingo.model.llm.compare.llm_html_extract_compare_v3 import LLMHtmlExtractCompareV3
+
+OPENAI_MODEL = os.getenv("OPENAI_MODEL")
+OPENAI_URL = os.getenv("OPENAI_BASE_URL")
+OPENAI_KEY = os.getenv("OPENAI_API_KEY")
+
+# 初始化模型
+evaluator = LLMHtmlExtractCompareV3()
+evaluator.dynamic_config.model = OPENAI_MODEL
+evaluator.dynamic_config.key = OPENAI_KEY
+evaluator.dynamic_config.api_url = OPENAI_URL
+
+# 示例数据 - 中文网页
+example_data_cn = Data(
+    data_id="example_cn_001",  # 必需字段
+    prompt="""# 机器学习简介
+
+机器学习是人工智能的一个分支，它使计算机能够从数据中学习并做出决策。
+
+## 主要类型
+
+1. 监督学习
+2. 无监督学习
+3. 强化学习
+
+机器学习在图像识别、自然语言处理等领域有广泛应用。
+
+---
+相关文章：
+- 深度学习入门
+- 神经网络基础
+作者：张三
+""",
+    content="""# 机器学习简介
+
+    机器学习是人工智能的一个分支，它使计算机能够从数据中学习并做出决策。
+
+    ## 主要类型
+
+    1. 监督学习
+    2. 无监督学习
+    3. 强化学习
+
+    ## 应用场景
+
+    机器学习在图像识别、自然语言处理、推荐系统等领域有广泛应用。
+
+    ## 常用算法
+
+    - 决策树
+    - 支持向量机
+    - 神经网络
+
+    参考文献：
+    [1] Mitchell, T. 1997. Machine Learning.
+    """,
+    raw_data={
+        "language": "zh",  # 指定语言为中文
+    }
+)
+
+# 示例数据 - 英文网页
+example_data_en = Data(
+    data_id="example_en_001",  # 必需字段
+    prompt=r"""In previous lectures, we have established (modulo some technical details) two significant components of the proof of the Poincaré conjecture: finite time extinction of Ricci flow with surgery (Theorem 4 of Lecture 2), and a -noncollapsing of Ricci flows with surgery (which, except for the surgery part, is Theorem 2 of Lecture 7). Now we come to the heart of the entire argument: the topological and geometric control of the high curvature regions of a Ricci flow, which is absolutely essential in order for one to define surgery on these regions in order to move the flow past singularities. This control is intimately tied to the study of a special type of Ricci flow, the *-solutions* to the Ricci flow equation; we will be able to use compactness arguments (as well as the -noncollapsing results already obtained) to deduce control of high curvature regions of arbitrary Ricci flows from similar control of -solutions. A secondary compactness argument lets us obtain that control of -solutions from control of an even more special type of solution, the *gradient shrinking solitons* that we already encountered in Lecture 8.
+
+[Even once one has this control of high curvature regions, the proof of the Poincaré conjecture is still not finished; there is significant work required to properly define the surgery procedure, and then one has to show that the surgeries do not accumulate in time, and also do not disrupt the various monotonicity formulae that we are using to deduce finite time extinction, -noncollapsing, etc. But the control of high curvature regions is arguably the largest single task one has to establish in the entire proof.]
+
+The next few lectures will be devoted to the analysis of -solutions, culminating in Perelman’s topological and geometric classification (or near-classification) of such solutions (which in particular leads to the *canonical neighbourhood theorem* for these solutions, which we will briefly discuss below). In this lecture we shall formally define the notion of a -solution, and indicate informally why control of such solutions should lead to control of high curvature regions of Ricci flows. We’ll also outline the various types of results that we will prove about -solutions.
+
+Our treatment here is based primarily on the book of Morgan and Tian.
+
+
+— Definition of a -solution —
+
+We fix a small number (basically the parameter that comes out of the non-collapsing theorem). Here is the formal definition of a -solution:
+
+
+Definition 1.(-solutions) A-solutionis a Ricci flow which is
+-
+Ancient, in the sense that t ranges on the interval ;
+-
+Complete and connected(i.e. (M,g(t)) is complete and connected for every t);
+-
+Non-negative Riemann curvature, i.e. is positive semidefinite at all points in spacetime;
+-
+Bounded curvature, thus ;
+-
+-noncollapsed(see Definition 1 of Lecture 7) at every point in spacetime and at every scale ;
+-
+Non-flat, i.e. the curvature is non-zero at at least one point in spacetime.
+This laundry list of properties arises because they are the properties that we are able to directly establish on limits of rescaled Ricci flows; see below.
+
+**Remark 1. ** If a d-dimensional Riemann manifold is both flat (thus ) and non-collapsed at every scale, then (by Cheeger’s lemma, Theorem 1 from Lecture 7) its injectivity radius is infinite, and by normal coordinates the manifold is isometric to Euclidean space . Thus the non-flat condition is only excluding the *trivial Ricci flow* with the standard (and static) metric. The non-flat condition tells us that the (scalar, say) curvature is positive in at least one point of spacetime, but we will shortly be able to use the strong maximum principle to conclude in fact that the curvature is positive everywhere.
+**Remark 2.** In three dimensions, the condition of non-negative RIemann curvature is equivalent to that of non-negative sectional curvature; see the discussion in Lecture 0. In any dimension, the conditions of non-negative bounded Riemann curvature imply that R and are non-negative, and that and . Thus as far as magnitude is concerned, the Riemann and Ricci curvatures of -solutions are controlled by the scalar curvature.
+Now we discuss examples (and non-examples) of -solutions.
+
+**Example 1.** Every gradient shrinking soliton or gradient steady soliton (M,g) (see Lecture 8) gives an ancient flow. This flow will be a -solution for sufficiently small if the Einstein manifold (M,g) is complete, connected, non-collapsed at every scale, and is not Euclidean space. For instance, the round sphere with the standard metric is a gradient shrinking solution and will generate a -solution for any and sufficiently small , which we will refer to as the *shrinking round sphere* -solution.
+**Exercse 1.** Show that the Cartesian product of two -solutions is again a -solution (with a smaller value of ), as is the Cartesian product of a -solution. Thus for instance the product of the shrinking round 2-sphere and the Euclidean line is a -solution, which we refer to as the *shrinking round 3-cylinder* .
+**Example 2.** In one dimension, there are no -solutions, as every manifold is flat; in particular, the 1-sphere (i.e. a circle) is *not* a -solution (it is flat and also collapsed at large scales). In two dimensions, the shrinking round 2-sphere is -solution, as discussed above. We can quotient this by the obvious action to also get a shrinking round projective plane as a -solution. But we shall show in later lectures that if we restrict attention to oriented manifolds, then the shrinking round 2-sphere is the only 2-dimensional -solutions; this result is due to Hamilton, see e.g. Chapter 5 of Chow-Knopf. For instance, the 2-cylinder is not a -solution (it is both flat and collapsed at large scales). The cigar soliton (Example 3 from Lecture 8) also fails to be a -solution due to it being collapsed at large scales.
+**Example 3.** In three dimensions, we begin to get significantly more variety amongst -solutions. We have the round shrinking 3-sphere , but also all the quotients of such round spheres by free finite group actions (including the projective space , but with many other examples. We refer to these examples as *round shrinking 3-spherical space forms*. We have also seen the shrinking round cylinder ; there are also finite quotients of this example such as shrinking round projective cylinder , or the quotient of the cylinder by the orientation-preserving free involution . We refer to these examples as the *unoriented and oriented quotients of the shrinking round 3-cylinder* respectively. The oriented quotient can be viewed as a half-cylinder capped off with a punctured (and the whole manifold is in fact homeomorphic to a punctured ).
+**Example 4. **One can also imagine perturbations of the shrinking solutions mentioned above. For instance, one could imagine non-round versions of the shrinking or shrinking example, in which the manifold has sectional curvature which is still positive but not constant. We shall informally refer to such solutions as *C-components* (we will define this term formally later, and explain the role of the parameter C). Similarly one could imagine variants of the oriented quotient of the shrinking round cylinder, which are approximately round half-cylinders capped off with what is topologically either a punctured or punctured (i.e. with something homeomorphic to a ball); a 3-dimensional variant of a cigar soliton would fall into this category (such solitons have been constructed by Bryant and by Cao). We informally refer to such solutions as -capped strong -tubes (we will define this term precisely later). One can also consider *doubly -capped strong -tubes*, in which an approximately round finite cylinder is capped off at both ends by either a punctured or punctured ; such manifolds then become homeomorphic to either or . (Note we need to cap off any ends that show up in order to keep the manifold M complete.)
+An important theorem of Perelman shows that these examples of -solutions are in fact the only ones:
+
+
+Theorem 1.(Perelman classification theorem, imprecise version) Every 3-dimensional -solution takes on one of the following forms at time zero (after isometry and rescaling, if necessary):
+A shrinking round 3-sphere (or shrinking round spherical space form );
+-
+A shrinking round 3-cylinder , the quotient , or one of its quotients (either oriented or unoriented);
+-
+A C-component;
+-
+A C-capped strong -tube;
+-
+A doubly C-capped strong -tube.
+-
+We will make this theorem more precise in later lectures (or if you are impatient, you can read Chapter 9 of Morgan-Tian).
+
+**Remark 3.** At very large scales, Theorem 1 implies that an ancient solution at time zero either looks 0-dimensional (because the manifold was compact, as in the case of a sphere, spherical space form, C-component, or doubly C-capped strong -tube) or 1-dimensional, resembling a line (in the case of the cylinder) or half-line (for C-capped strong -tube). Oversimplifying somewhat, this 0- or 1-dimensionality of the three-dimensional -solutions is the main reason why surgery is even possible; if Ricci flow singularities could look 2-dimensional (such as , or as the product of the cigar soliton and a line) or 3-dimensional (as in ) then it is not clear at all how to define a surgery procedure to excise the singularity. The point is that all the potential candidates for singularity that look 2-dimensional or 3-dimensional at large scales (after rescaling) are either flat or collapsed (or do not have bounded nonnegative curvature), and so are not -solutions. The unoriented quotiented cylinder also causes difficulties with surgery (despite being only one-dimensional at large scales), because it is hard to cap off such a cylinder in a manner which is well-behaved with respect to Ricci flow; however if we assume that the original manifold M contains no embedded copy of (which is for instance the case if the manifold is oriented, and in particular if it is simply connected) then this case does not occur.
+**Remark 4.** In four and higher dimensions, things look much worse; consider for instance the product of a shrinking round with the trivial plane . This is a -solution but has a two-dimensional large-scale structure, and so there is no obvious way to remove singularities of this shape by surgery. It may be that in order to have analogues of Perelman’s theory in higher dimensions one has to make much stronger topological or geometric assumptions on the manifold. Note however that four-dimensional Ricci flows with surgery were already considered by Hamilton (with a rather different definition of surgery, however).
+The classification theorem lets one understand the geometry of neighbourhoods of any given point in a -solution. Let us make the following imprecise definitions (which, again, will be made precise in later lectures):
+
+
+Definition 2.(Canonical neighbourhoods, informal version) Let (M,g) be a complete connected 3-manifold, let x be a point in M, and let U be an open neighbourhood of x. We normalise the scalar curvature at x to be 1.
+We say that U is an
+-
+-neckif it is close (in a smooth topology) to a round cylinder , with x well in the middle of of this cylinder;We say that U is a
+-
+C-componentif U is diffeomorphic to or (in particular, it must be all of M) with sectional curvatures bounded above and below by positive constants, and with diameter comparable to 1.We say that U is
+-
+-roundif it is close (in a smooth topology) to a round sphere or spherical space form (i.e. it is close to a constant curvature manifold).We say that U is a
+-
+-capif it consists of an -neck together with a cap at one end, where the cap is homeomorphic to either an open 3-ball or a punctured and obeys similar bounds as a C-component, and that x is contained inside the cap. (For technical reasons one also needs some derivative bounds on curvature, but we omit them here.)We say that U is a
+-
+canonical neighbourhoodof x if it is one of the above four types.When the scalar curvature is some other positive number than 1, we can generalise the above definition by rescaling the metric to have curvature 1.
+
+Using Theorem 1 (and defining all terms precisely), one can easily show the following important statement:
+
+Corollary 1(Canonical neighbourhood theorem for -solitons, informal version) Every point in a 3-dimensional -solution that does not contain an embedded copy of with trivial normal bundle is contained in a canonical neighbourhood.
+The next few lectures will be devoted to establishing precise versions of Theorem 1, Definition 2, and Corollary 1.
+
+— High curvature regions of Ricci flows —
+
+Corollary 1 is an assertion about -solutions only, but it implies an important property about more general Ricci flows:
+
+Theorem 2.(Canonical neighbourhood for Ricci flows, informal version) Let be a Ricci flow of compact 3-manifolds on a time interval , without any embedded copy of with trivial normal bundle. Then every point with sufficiently large scalar curvature is contained in a canonical neighbourhood.
+(Actually, as with many other components of this proof, we actually need a generalisation of this result for Ricci flow with surgery, but we will address this (non-trivial) complication later.)
+
+The importance of this theorem lies in the fact that all the singular regions that need surgery will have large scalar curvature, and Theorem 2 provides the crucial topological and geometric control in order to perform surgery on these regions. (This is a significant oversimplification, as one has to also study certain “horns” that appear at the singular time in order to find a particularly good place to perform surgery, but we will postpone discussion of this major additional issue later in this course.)
+
+Theorem 2 is deduced from Corollary 1 and a significant number of additional arguments. The strategy is to use a compactness-and-contradiction argument. As a very crude first approximation, the proof goes as follows:
+
+Suppose for contradiction that Theorem 2 failed. Then one could find a sequence of points with which were not contained in canonical neighbourhoods.
+-
+M, being compact, has finitely many components; by restricting attention to a subsequence of points if necessary, we can take M to be connected.
+-
+On any compact time interval , the scalar curvature is necessarily bounded, and thus . As a consequence, if we define the rescaled Ricci flows , where is the natural length scale associated to the scalar curvature at , then these flows will become increasingly ancient. Note that in the limit (which we will not define rigorously yet, but think of a pointed Gromov-Hausdorff limit for now), the increasingly large manifolds may cease to be compact, but will remain complete.
+-
+Because of the Hamilton-Ivey pinching phenomenon (Theorem 1 from Lecture 3), we expect the rescaled flows to have non-negative Ricci curvature in the limit (and hence non-negative Riemann curvature also, as we are in three dimensions).
+-
+If we can pick the points suitably (so that the scalar curvature is larger than or comparable to the scalar curvatures at other nearby points), then we should be able to ensure that the rescaled flows have bounded curvature in the limit.
+-
+Since -noncollapsing is invariant under rescaling, the non-collapsing theorem (Theorem 2 of Lecture 7) should ensure that the rescaled flows remain -noncollapsed in the limit.
+-
+Since the rescaled scalar curvature at the base point of is equal to 1 by construction, any limiting flow will be non-flat.
+-
+Various compactness theorems (of Gromov, Hamilton, and Perelman) exploiting the non-collapsed, bounded curvature, and parabolic nature of the rescaled Ricci flows now allows one to extract a limiting flow . This limit is initially in a fairly weak sense, but one can use parabolic theory to upgrade the convergence to quite a strong (and smooth) convergence. In particular, the limit of the Ricci flows will remain a Ricci flow.
+-
+Applying 2-8, we see that the limiting flow is a -solution.
+-
+Applying Corollary 1, we conclude that every point in the limiting flow lies inside a canonical neighbourhood. Using the strong nature of the convergence (and the scale-invariant nature of canonical neighbourhoods), we deduce that the points also lie in canonical neighbourhoods for sufficiently large n, a contradiction.
+-
+There are some non-trivial technical difficulties in executing the above scheme, especially in Step 5 and Step 8. Step 8 will require some compactness theorems for -solutions which we will deduce in later lectures. For Step 5, the problem is that the points that we are trying to place inside canonical neighbourhoods have large curvature, but they may be adjacent to other points of significantly higher curvature, so that the limiting flow ends up having unbounded curvature. To get around this, Perelman established Theorem 2 by a downwards induction argument on the curvature, first establishing the result for extremely high curvature, then for slightly less extreme curvature, and so forth. The point is that with such an induction hypothesis, any potentially bad adjacent points of really high curvature will be safely tucked away in a canonical neighbourhood of high curvature, which in turn is connected to another canonical neighbourhood of high curvature, and so forth; some basic topological and geometric analysis then eventually lets us conclude that this bad point must in fact be quite far from the base point (much further away than the natural length scale , in particular), so that it does not show up in the limiting flow . We will discuss these issues in more detail in later lectures.
+
+— Benchmarks in controlling -solutions —
+
+As mentioned earlier, the next few lectures will be focused on controlling -solutions. It turns out that the various properties in Definition 1 interact very well with each other, and give remarkably precise control on these solutions. In this section we state (without proofs) some of the results we will establish concerning such solutions.
+
+
+Proposition 1.(Consequences of Hamilton’s Harnack inequality) Let be a -solution. Then is a non-decreasing function of time. Furthermore, for any , we have the pointwise inequalities(1)
+
+and
+
+(2)
+
+on , where of course is the backwards time variable.
+
+These inequalities follow from an important Harnack inequality of Hamilton (also related to earlier work of Li and Yau) that we will discuss in the next lecture. These results rely primarily on the ancient and non-negatively curved nature of -solutions, as well as the Ricci flow equation of course.
+
+Now one can handle the two-dimensional case:
+
+Proposition 2.(Classification of 2-dimensional -solutions) The only two-dimensional -solutions are the round shrinking 2-spheres.
+This proposition relies on first studying a certain asymptotic limit of the -solution, known as the asymptotic soliton, to be defined later. One shows that this asymptotic limit is a round shrinking 2-sphere, which implies that the original -solution is asymptotically a round shrinking 2-sphere. One can then invoke Hamilton’s rounding theorem to finish the claim.
+
+Turning now to three dimensions, the first important result that the curvature R decays slower at infinity than what scaling naively predicts.
+
+Proposition 3.(Asymptotic curvature) Let be a 3-dimensional solution which is not compact. Then for any time and any base point , we have .
+The proof of Proposition 3 is based on another compactness-and-contradiction argument which also heavily exploits some splitting theorems in Riemannian geometry, as well as the soul theorem.
+
+The increasing curvature at infinity can be used to show that the volume does not grow as fast at infinity as scaling predicts:
+
+
+Proposition 4.(Asymptotic volume collapse) Let be a 3-dimensional solution which is not compact. Then for any time and any base point , we have .
+Note that Proposition 4 does not contradict the non-collapsed nature of the flow, since one does not expect bounded normalised curvature at extremely large scales. Proposition 4 morally follows from Bishop-Gromov comparison geometry theory, but its proof in fact uses yet another compactness-and-contradiction argument combined with splitting theory.
+
+An important variant of Proposition 4 and Proposition 3 (and yet another compactness-and-contradiction argument) states that on any ball at time zero on which the volume is large (e.g. larger than for some ), one has bounded normalised curvature, thus on this ball. This fact helps us deduce
+
+Theorem 3.(Perelman compactness theorem, informal version) The space of all pointed -solutions (allowing to range over the positive real numbers) is compact (in a suitable topology) after normalising the scalar curvature at the base point to be 1.
+One corollary of this compactness is that there is in fact a universal such that every -solution is a -solution. (Indeed, the proof of this universality is one of the key steps in the proof of the above theorem.) This theorem is proven by establishing some uniform curvature bounds on -solutions which come from the previous volume analysis.
+
+The proof of Theorem 1 (and thus Corollary 1) follows from this compactness once one can classify the asymptotic solitons mentioned earlier. This task in turn requires many of the techniques already mentioned, together with some variational analysis of the gradient curves of the potential function f that controls the geometry of the soliton.
+""",
+    content=r"""In previous lectures, we have established (modulo some technical details) two significant components of the proof of the Poincaré conjecture: finite time extinction of Ricci flow with surgery (Theorem 4 of Lecture 2), and a $\kappa$ -noncollapsing of Ricci flows with surgery (which, except for the surgery part, is Theorem 2 of Lecture 7). Now we come to the heart of the entire argument: the topological and geometric control of the high curvature regions of a Ricci flow, which is absolutely essential in order for one to define surgery on these regions in order to move the flow past singularities. This control is intimately tied to the study of a special type of Ricci flow, the $\kappa$ -solutions to the Ricci flow equation; we will be able to use compactness arguments (as well as the $\kappa$ -noncollapsing results already obtained) to deduce control of high curvature regions of arbitrary Ricci flows from similar control of $\kappa$ -solutions. A secondary compactness argument lets us obtain that control of $\kappa$ -solutions from control of an even more special type of solution, the gradient shrinking solitons that we already encountered in Lecture 8.
+
+[Even once one has this control of high curvature regions, the proof of the Poincaré conjecture is still not finished; there is significant work required to properly define the surgery procedure, and then one has to show that the surgeries do not accumulate in time, and also do not disrupt the various monotonicity formulae that we are using to deduce finite time extinction, $\kappa$ -noncollapsing, etc. But the control of high curvature regions is arguably the largest single task one has to establish in the entire proof.]
+
+The next few lectures will be devoted to the analysis of $\kappa$ -solutions, culminating in Perelman’s topological and geometric classification (or near-classification) of such solutions (which in particular leads to the canonical neighbourhood theorem for these solutions, which we will briefly discuss below). In this lecture we shall formally define the notion of a $\kappa$ -solution, and indicate informally why control of such solutions should lead to control of high curvature regions of Ricci flows. We’ll also outline the various types of results that we will prove about $\kappa$ -solutions.
+
+Our treatment here is based primarily on the book of Morgan and Tian.
+
+— Definition of a $\kappa$ -solution —
+
+We fix a small number $\kappa > 0$ (basically the parameter that comes out of the non-collapsing theorem). Here is the formal definition of a $\kappa$ -solution:
+
+Definition 1. ( $\kappa$ -solutions) A $\kappa$ -solution is a Ricci flow $t \mapsto (M,g(t))$ which is
+
+1. Ancient , in the sense that t ranges on the interval $(-\infty,0]$ ;
+2. Complete and connected (i.e. (M,g(t)) is complete and connected for every t);
+3. Non-negative Riemann curvature , i.e. $\hbox{Riem}: \bigwedge^2 TM \to \bigwedge^2 TM$ is positive semidefinite at all points in spacetime;
+4. Bounded curvature , thus $\sup_{(t,x) \in (-\infty,0] \times M} |\hbox{Riem}|_g < +\infty$ ;
+5. $\kappa$ -noncollapsed (see Definition 1 of Lecture 7 ) at every point $(t_0,x_0)$ in spacetime and at every scale $r_0 > 0$ ;
+6. Non-flat , i.e. the curvature is non-zero at at least one point in spacetime.
+
+This laundry list of properties arises because they are the properties that we are able to directly establish on limits of rescaled Ricci flows; see below.
+
+Remark 1. If a d-dimensional Riemann manifold is both flat (thus $\hbox{Riem}=0$ ) and non-collapsed at every scale, then (by Cheeger’s lemma, Theorem 1 from Lecture 7) its injectivity radius is infinite, and by normal coordinates the manifold is isometric to Euclidean space ${\Bbb R}^d$ . Thus the non-flat condition is only excluding the trivial Ricci flow $M = {\Bbb R}^d$ with the standard (and static) metric. The non-flat condition tells us that the (scalar, say) curvature is positive in at least one point of spacetime, but we will shortly be able to use the strong maximum principle to conclude in fact that the curvature is positive everywhere. $\diamond$
+
+Remark 2. In three dimensions, the condition of non-negative RIemann curvature is equivalent to that of non-negative sectional curvature; see the discussion in Lecture 0. In any dimension, the conditions of non-negative bounded Riemann curvature imply that R and $\hbox{Ric}$ are non-negative, and that $|\hbox{Riem}|_g, |\hbox{Ric}|_g = O(R)$ and $R = O_d(1)$ . Thus as far as magnitude is concerned, the Riemann and Ricci curvatures of $\kappa$ -solutions are controlled by the scalar curvature. $\diamond$
+
+Now we discuss examples (and non-examples) of $\kappa$ -solutions.
+
+Example 1. Every gradient shrinking soliton or gradient steady soliton (M,g) (see Lecture 8) gives an ancient flow. This flow will be a $\kappa$ -solution for sufficiently small $\kappa$ if the Einstein manifold (M,g) is complete, connected, non-collapsed at every scale, and is not Euclidean space. For instance, the round sphere $S^d$ with the standard metric is a gradient shrinking solution and will generate a $\kappa$ -solution for any $d \geq 2$ and sufficiently small $\kappa > 0$ , which we will refer to as the shrinking round sphere $\kappa$ -solution. $\diamond$
+
+Exercse 1. Show that the Cartesian product of two $\kappa$ -solutions is again a $\kappa$ -solution (with a smaller value of $\kappa$ ), as is the Cartesian product of a $\kappa$ -solution. Thus for instance the product $S^2 \times {\Bbb R}$ of the shrinking round 2-sphere and the Euclidean line is a $\kappa$ -solution, which we refer to as the shrinking round 3-cylinder $S^2 \times {\Bbb R}$ . $\diamond$
+
+Example 2. In one dimension, there are no $\kappa$ -solutions, as every manifold is flat; in particular, the 1-sphere (i.e. a circle) is not a $\kappa$ -solution (it is flat and also collapsed at large scales). In two dimensions, the shrinking round 2-sphere $S^2$ is $\kappa$ -solution, as discussed above. We can quotient this by the obvious ${\Bbb Z}/2$ action to also get a shrinking round projective plane $\Bbb{RP}^2$ as a $\kappa$ -solution. But we shall show in later lectures that if we restrict attention to oriented manifolds, then the shrinking round 2-sphere is the only 2-dimensional $\kappa$ -solutions; this result is due to Hamilton, see e.g. Chapter 5 of Chow-Knopf. For instance, the 2-cylinder $S^1 \times {\Bbb R}$ is not a $\kappa$ -solution (it is both flat and collapsed at large scales). The cigar soliton (Example 3 from Lecture 8) also fails to be a $\kappa$ -solution due to it being collapsed at large scales. $\diamond$
+
+Example 3. In three dimensions, we begin to get significantly more variety amongst $\kappa$ -solutions. We have the round shrinking 3-sphere $S^3$ , but also all the quotients $S^3/\Gamma$ of such round spheres by free finite group actions (including the projective space ${\Bbb RP}^3$ , but with many other examples. We refer to these examples as round shrinking 3-spherical space forms. We have also seen the shrinking round cylinder $S^2 \times {\Bbb R}$ ; there are also finite quotients of this example such as shrinking round projective cylinder $\Bbb{RP}^2 \times {\Bbb R}$ , or the quotient of the cylinder by the orientation-preserving free involution $(\omega,z) \mapsto (-\omega,-z)$ . We refer to these examples as the unoriented and oriented quotients of the shrinking round 3-cylinder respectively. The oriented quotient can be viewed as a half-cylinder $S^2 \times [1,+\infty)$ capped off with a punctured $\Bbb{RP}^3$ (and the whole manifold is in fact homeomorphic to a punctured $\Bbb{RP}^3$ ). $\diamond$
+
+Example 4. One can also imagine perturbations of the shrinking solutions mentioned above. For instance, one could imagine non-round versions of the shrinking $S^2$ or shrinking ${\Bbb RP}^3$ example, in which the manifold has sectional curvature which is still positive but not constant. We shall informally refer to such solutions as C-components (we will define this term formally later, and explain the role of the parameter C). Similarly one could imagine variants of the oriented quotient of the shrinking round cylinder, which are approximately round half-cylinders $S^2 \times [1,+\infty)$ capped off with what is topologically either a punctured $\Bbb{RP}^3$ or punctured $S^3$ (i.e. with something homeomorphic to a ball); a 3-dimensional variant of a cigar soliton would fall into this category (such solitons have been constructed by Bryant and by Cao). We informally refer to such solutions as $C$ -capped strong $\varepsilon$ -tubes (we will define this term precisely later). One can also consider doubly $C$ -capped strong $\varepsilon$ -tubes, in which an approximately round finite cylinder $S^2 \times [-T,T]$ is capped off at both ends by either a punctured $\Bbb{RP}^3$ or punctured $S^3$ ; such manifolds then become homeomorphic to either $S^3$ or ${\Bbb RP}^3$ . (Note we need to cap off any ends that show up in order to keep the manifold M complete.) $\diamond$
+
+An important theorem of Perelman shows that these examples of $\kappa$ -solutions are in fact the only ones:
+
+Theorem 1. (Perelman classification theorem, imprecise version) Every 3-dimensional $\kappa$ -solution takes on one of the following forms at time zero (after isometry and rescaling, if necessary):
+
+1. A shrinking round 3-sphere $S^3$ (or shrinking round spherical space form $S^3/\Gamma$ );
+2. A shrinking round 3-cylinder $S^2 \times {\Bbb R}$ , the quotient $\Bbb{RP}^2 \times {\Bbb R}$ , or one of its quotients (either oriented or unoriented);
+3. A C-component;
+4. A C-capped strong $\varepsilon$ -tube;
+5. A doubly C-capped strong $\varepsilon$ -tube.
+
+We will make this theorem more precise in later lectures (or if you are impatient, you can read Chapter 9 of Morgan-Tian).
+
+Remark 3. At very large scales, Theorem 1 implies that an ancient solution at time zero either looks 0-dimensional (because the manifold was compact, as in the case of a sphere, spherical space form, C-component, or doubly C-capped strong $\varepsilon$ -tube) or 1-dimensional, resembling a line (in the case of the cylinder) or half-line (for C-capped strong $\varepsilon$ -tube). Oversimplifying somewhat, this 0- or 1-dimensionality of the three-dimensional $\kappa$ -solutions is the main reason why surgery is even possible; if Ricci flow singularities could look 2-dimensional (such as $S^1 \times {\Bbb R}^2$ , or as the product of the cigar soliton and a line) or 3-dimensional (as in ${\Bbb R}^3$ ) then it is not clear at all how to define a surgery procedure to excise the singularity. The point is that all the potential candidates for singularity that look 2-dimensional or 3-dimensional at large scales (after rescaling) are either flat or collapsed (or do not have bounded nonnegative curvature), and so are not $\kappa$ -solutions. The unoriented quotiented cylinder $\Bbb{RP}^2 \times {\Bbb R}$ also causes difficulties with surgery (despite being only one-dimensional at large scales), because it is hard to cap off such a cylinder in a manner which is well-behaved with respect to Ricci flow; however if we assume that the original manifold M contains no embedded copy of $\Bbb{RP}^2 \times {\Bbb R}$ (which is for instance the case if the manifold is oriented, and in particular if it is simply connected) then this case does not occur. $\diamond$
+
+Remark 4. In four and higher dimensions, things look much worse; consider for instance the product of a shrinking round $S^2$ with the trivial plane ${\Bbb R}^2$ . This is a $\kappa$ -solution but has a two-dimensional large-scale structure, and so there is no obvious way to remove singularities of this shape by surgery. It may be that in order to have analogues of Perelman’s theory in higher dimensions one has to make much stronger topological or geometric assumptions on the manifold. Note however that four-dimensional Ricci flows with surgery were already considered by Hamilton (with a rather different definition of surgery, however).
+
+The classification theorem lets one understand the geometry of neighbourhoods of any given point in a $\kappa$ -solution. Let us make the following imprecise definitions (which, again, will be made precise in later lectures):
+
+Definition 2. (Canonical neighbourhoods, informal version) Let (M,g) be a complete connected 3-manifold, let x be a point in M, and let U be an open neighbourhood of x. We normalise the scalar curvature at x to be 1.
+
+1. We say that U is an $\varepsilon$ -neck if it is close (in a smooth topology) to a round cylinder $S^2 \times (-R,R)$ , with x well in the middle of of this cylinder;
+2. We say that U is a C-component if U is diffeomorphic to $S^3$ or $\Bbb{RP}^3$ (in particular, it must be all of M) with sectional curvatures bounded above and below by positive constants, and with diameter comparable to 1.
+3. We say that U is $\varepsilon$ -round if it is close (in a smooth topology) to a round sphere $S^3$ or spherical space form $S^3/\Gamma$ (i.e. it is close to a constant curvature manifold).
+4. We say that U is a $(C,\varepsilon)$ -cap if it consists of an $\varepsilon$ -neck together with a cap at one end, where the cap is homeomorphic to either an open 3-ball or a punctured ${\Bbb RP}^3$ and obeys similar bounds as a C-component, and that x is contained inside the cap. (For technical reasons one also needs some derivative bounds on curvature, but we omit them here.)
+5. We say that U is a canonical neighbourhood of x if it is one of the above four types.
+
+When the scalar curvature is some other positive number than 1, we can generalise the above definition by rescaling the metric to have curvature 1.
+
+Using Theorem 1 (and defining all terms precisely), one can easily show the following important statement:
+
+Corollary 1 (Canonical neighbourhood theorem for $\kappa$ -solitons, informal version) Every point in a 3-dimensional $\kappa$ -solution that does not contain an embedded copy of $\Bbb{RP}^2$ with trivial normal bundle is contained in a canonical neighbourhood.
+
+The next few lectures will be devoted to establishing precise versions of Theorem 1, Definition 2, and Corollary 1.
+
+— High curvature regions of Ricci flows —
+
+Corollary 1 is an assertion about $\kappa$ -solutions only, but it implies an important property about more general Ricci flows:
+
+Theorem 2. (Canonical neighbourhood for Ricci flows, informal version) Let $t \mapsto (M,g)$ be a Ricci flow of compact 3-manifolds on a time interval ${}[0,T)$ , without any embedded copy of $\Bbb{RP}^2$ with trivial normal bundle. Then every point $(t,x) \in [0,T) \times M$ with sufficiently large scalar curvature is contained in a canonical neighbourhood.
+
+(Actually, as with many other components of this proof, we actually need a generalisation of this result for Ricci flow with surgery, but we will address this (non-trivial) complication later.)
+
+The importance of this theorem lies in the fact that all the singular regions that need surgery will have large scalar curvature, and Theorem 2 provides the crucial topological and geometric control in order to perform surgery on these regions. (This is a significant oversimplification, as one has to also study certain “horns” that appear at the singular time in order to find a particularly good place to perform surgery, but we will postpone discussion of this major additional issue later in this course.)
+
+Theorem 2 is deduced from Corollary 1 and a significant number of additional arguments. The strategy is to use a compactness-and-contradiction argument. As a very crude first approximation, the proof goes as follows:
+
+1. Suppose for contradiction that Theorem 2 failed. Then one could find a sequence $(t_n,x_n) \in [0,T) \times M$ of points with $R(t_n,x_n) \to +\infty$ which were not contained in canonical neighbourhoods.
+2. M, being compact, has finitely many components; by restricting attention to a subsequence of points if necessary, we can take M to be connected.
+3. On any compact time interval ${}[0,t] \times M$ , the scalar curvature is necessarily bounded, and thus $t_n \to T$ . As a consequence, if we define the rescaled Ricci flows $g^{(n)}(t) = \frac{1}{L_n^2} g( t_n + L_n^2 t )$ , where $L_n := R(t_n,x_n)^{-1/2}$ is the natural length scale associated to the scalar curvature at $(t_n,x_n)$ , then these flows will become increasingly ancient. Note that in the limit (which we will not define rigorously yet, but think of a pointed Gromov-Hausdorff limit for now), the increasingly large manifolds $(M,g^{(n)}(t))$ may cease to be compact, but will remain complete.
+4. Because of the Hamilton-Ivey pinching phenomenon (Theorem 1 from Lecture 3 ), we expect the rescaled flows $t \mapsto (M, g^{(n)}(t))$ to have non-negative Ricci curvature in the limit (and hence non-negative Riemann curvature also, as we are in three dimensions).
+5. If we can pick the points $(t_n,x_n)$ suitably (so that the scalar curvature $R(t_n,x_n)$ is larger than or comparable to the scalar curvatures at other nearby points), then we should be able to ensure that the rescaled flows $t \mapsto (M, g^{(n)}(t))$ have bounded curvature in the limit.
+6. Since $\kappa$ -noncollapsing is invariant under rescaling, the non-collapsing theorem (Theorem 2 of Lecture 7 ) should ensure that the rescaled flows remain $\kappa$ -noncollapsed in the limit.
+7. Since the rescaled scalar curvature at the base point $x_n$ of $(M,g^{(n)})$ is equal to 1 by construction, any limiting flow will be non-flat.
+8. Various compactness theorems (of Gromov, Hamilton, and Perelman) exploiting the non-collapsed, bounded curvature, and parabolic nature of the rescaled Ricci flows now allows one to extract a limiting flow $(M^{(\infty)}, g^{(\infty)})$ . This limit is initially in a fairly weak sense, but one can use parabolic theory to upgrade the convergence to quite a strong (and smooth) convergence. In particular, the limit of the Ricci flows will remain a Ricci flow.
+9. Applying 2-8, we see that the limiting flow $(M^{(\infty)}, g^{(\infty)})$ is a $\kappa$ -solution.
+10. Applying Corollary 1, we conclude that every point in the limiting flow lies inside a canonical neighbourhood. Using the strong nature of the convergence (and the scale-invariant nature of canonical neighbourhoods), we deduce that the points $(t_n,x_n)$ also lie in canonical neighbourhoods for sufficiently large n, a contradiction.
+
+There are some non-trivial technical difficulties in executing the above scheme, especially in Step 5 and Step 8. Step 8 will require some compactness theorems for $\kappa$ -solutions which we will deduce in later lectures. For Step 5, the problem is that the points $(t_n,x_n)$ that we are trying to place inside canonical neighbourhoods have large curvature, but they may be adjacent to other points of significantly higher curvature, so that the limiting flow $(M^{(\infty)}, g^{(\infty)})$ ends up having unbounded curvature. To get around this, Perelman established Theorem 2 by a downwards induction argument on the curvature, first establishing the result for extremely high curvature, then for slightly less extreme curvature, and so forth. The point is that with such an induction hypothesis, any potentially bad adjacent points of really high curvature will be safely tucked away in a canonical neighbourhood of high curvature, which in turn is connected to another canonical neighbourhood of high curvature, and so forth; some basic topological and geometric analysis then eventually lets us conclude that this bad point must in fact be quite far from the base point $(t_n,x_n)$ (much further away than the natural length scale $L_n$ , in particular), so that it does not show up in the limiting flow $(M^{(\infty)}, g^{(\infty)})$ . We will discuss these issues in more detail in later lectures.
+
+— Benchmarks in controlling $\kappa$ -solutions —
+
+As mentioned earlier, the next few lectures will be focused on controlling $\kappa$ -solutions. It turns out that the various properties in Definition 1 interact very well with each other, and give remarkably precise control on these solutions. In this section we state (without proofs) some of the results we will establish concerning such solutions.
+
+Proposition 1. (Consequences of Hamilton’s Harnack inequality) Let $t \mapsto (M,g(t))$ be a $\kappa$ -solution. Then $R(t,x)$ is a non-decreasing function of time. Furthermore, for any $(t_0,x_0) \in (-\infty,0] \times M$ , we have the pointwise inequalities
+
+$\displaystyle |\nabla l_{(t_0,x_0)}|^2 + R \leq \frac{3 l_{(t_0,x_0)}}{\tau}$ (1)
+
+and
+
+$\displaystyle -2 \frac{l_{(t_0,x_0)}}{\tau} \leq \frac{\partial l_{(t_0,x_0)}}{\partial \tau} \leq \frac{l_{(t_0,x_0)}}{\tau}$ (2)
+
+on $(-\infty,t_0) \times M$ , where of course $\tau := t_0 - t$ is the backwards time variable.
+
+These inequalities follow from an important Harnack inequality of Hamilton (also related to earlier work of Li and Yau) that we will discuss in the next lecture. These results rely primarily on the ancient and non-negatively curved nature of $\kappa$ -solutions, as well as the Ricci flow equation $\dot g = -2 \hbox{Ric}$ of course.
+
+Now one can handle the two-dimensional case:
+
+Proposition 2.(Classification of 2-dimensional $\kappa$ -solutions) The only two-dimensional $\kappa$ -solutions are the round shrinking 2-spheres.
+
+This proposition relies on first studying a certain asymptotic limit of the $\kappa$ -solution, known as the asymptotic soliton, to be defined later. One shows that this asymptotic limit is a round shrinking 2-sphere, which implies that the original $\kappa$ -solution is asymptotically a round shrinking 2-sphere. One can then invoke Hamilton’s rounding theorem to finish the claim.
+
+Turning now to three dimensions, the first important result that the curvature R decays slower at infinity than what scaling naively predicts.
+
+Proposition 3. (Asymptotic curvature) Let $t \mapsto (M,g(t))$ be a 3-dimensional $\kappa$ solution which is not compact. Then for any time $t \in (-\infty,0)$ and any base point $p \in M$ , we have $\limsup_{x \to \infty} R(t,x) d_{g(t)}(x,p)^2 = +\infty$ .
+
+The proof of Proposition 3 is based on another compactness-and-contradiction argument which also heavily exploits some splitting theorems in Riemannian geometry, as well as the soul theorem.
+
+The increasing curvature at infinity can be used to show that the volume does not grow as fast at infinity as scaling predicts:
+
+Proposition 4. (Asymptotic volume collapse) Let $t \mapsto (M,g(t))$ be a 3-dimensional $\kappa$ solution which is not compact. Then for any time $t \in (-\infty,0)$ and any base point $p \in M$ , we have $\limsup_{r \to +\infty} \hbox{Vol}_{g(t)}( B_{g(t)})(p,r) ) / r^3 = 0$ .
+
+Note that Proposition 4 does not contradict the non-collapsed nature of the flow, since one does not expect bounded normalised curvature at extremely large scales. Proposition 4 morally follows from Bishop-Gromov comparison geometry theory, but its proof in fact uses yet another compactness-and-contradiction argument combined with splitting theory.
+
+An important variant of Proposition 4 and Proposition 3 (and yet another compactness-and-contradiction argument) states that on any ball $B_{g(0)}(p,r)$ at time zero on which the volume is large (e.g. larger than $\nu r^3$ for some $\nu > 0$ ), one has bounded normalised curvature, thus $R = O_\nu( 1 / r^2 )$ on this ball. This fact helps us deduce
+
+Theorem 3. (Perelman compactness theorem, informal version) The space of all pointed $\kappa$ -solutions (allowing $\kappa > 0$ to range over the positive real numbers) is compact (in a suitable topology) after normalising the scalar curvature at the base point to be 1.
+
+One corollary of this compactness is that there is in fact a universal $\kappa_0 > 0$ such that every $\kappa$ -solution is a $\kappa_0$ -solution. (Indeed, the proof of this universality is one of the key steps in the proof of the above theorem.) This theorem is proven by establishing some uniform curvature bounds on $\kappa$ -solutions which come from the previous volume analysis.
+
+The proof of Theorem 1 (and thus Corollary 1) follows from this compactness once one can classify the asymptotic solitons mentioned earlier. This task in turn requires many of the techniques already mentioned, together with some variational analysis of the gradient curves of the potential function f that controls the geometry of the soliton.
+    """,
+    raw_data={
+        "language": "en",  # 指定语言为英文
+    }
+)
+
+
+def run_comparison(data: Data, description: str):
+    """运行对比评估"""
+    print(f"\n{'=' * 60}")
+    print(f"测试场景: {description}")
+    print(f"{'=' * 60}\n")
+
+    # 执行评估
+    result = evaluator.eval(data)
+
+    # 打印结果
+    # print(f"评估结果类型: {result.type}")
+    # print(f"判断名称: {result.name}")
+    print(f"是否存在问题: {result.status}")
+    print(f"评估结果类型: {result.label}")
+    print(f"\n推理过程:\n{result.reason}")
+    print(f"\n{'=' * 60}\n")
+
+
+if __name__ == "__main__":
+    # 测试中文场景
+    run_comparison(example_data_en, "对比两种HTML提取工具")
+
+    # 测试英文场景
+    # run_comparison(example_data_en, "英文网页 - 对比两种HTML提取工具")
+
+    print("\n说明 (V3 label 与 Data 字段一致):")
+    print("- score=1 → PROMPT_BETTER：Data.prompt 侧抽取质量更好")
+    print("- score=2 → CONTENT_BETTER：Data.content 侧更好")
+    print("- score=0 → EXTRACTION_EQUAL：两者相当")
diff --git a/examples/compare/html_extract_compare_v3_example_dataset.py b/examples/compare/html_extract_compare_v3_example_dataset.py
new file mode 100644
index 00000000..217ac564
--- /dev/null
+++ b/examples/compare/html_extract_compare_v3_example_dataset.py
@@ -0,0 +1,117 @@
+"""
+HTML 提取工具对比评估 - Dataset 批量执行示例
+
+这个示例展示了如何使用 Executor 批量评估 JSONL 数据集中的 HTML 提取工具对比任务。
+
+特点：
+1. 支持从 JSONL 文件批量读取数据
+2. 使用 LLMHtmlExtractCompareV3 进行评估
+3. 自动生成评估报告
+4. 支持保存好样本和坏样本
+
+数据格式要求：
+{
+    "data_id": "唯一标识",
+    "method1": "工具A提取的文本",
+    "method2": "工具B提取的文本",
+    "language": "zh" 或 "en"
+}
+
+使用方法：
+python examples/compare/dataset_html_extract_compare_evaluation.py
+"""
+
+import os
+from pathlib import Path
+
+from dingo.config.input_args import InputArgs
+from dingo.exec.base import Executor
+
+# 获取项目根目录
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+
+# API 配置
+OPENAI_MODEL = os.getenv("OPENAI_MODEL")
+OPENAI_URL = os.getenv("OPENAI_BASE_URL")
+OPENAI_KEY = os.getenv("OPENAI_API_KEY")
+common_config = {
+    "model": OPENAI_MODEL,
+    "key": OPENAI_KEY,
+    "api_url": OPENAI_URL,
+}
+
+
+def evaluate_html_extract_compare_dataset():
+    """
+    批量评估 HTML 提取工具对比数据集
+
+    数据集格式：
+    {"data_id": "001", "method1": "工具A文本", "method2": "工具B文本", "language": "zh"}
+    """
+    print("=== HTML Extract Compare Dataset Evaluation ===")
+    print(f"使用模型: {OPENAI_MODEL}")
+    print(f"API URL: {OPENAI_URL}")
+    print()
+
+    # 配置参数
+    input_data = {
+        "task_name": "html_extract_compare_v3_evaluation",
+        "input_path": str(PROJECT_ROOT / "test/data/html_extract_compare_test.jsonl"),
+        "output_path": "output/html_extract_compare_evaluation/",
+        # "log_level": "INFO",
+
+        # 数据集配置
+        "dataset": {
+            "source": "local",  # 本地数据源
+            "format": "jsonl",  # JSONL 格式
+        },
+        # 执行器配置
+        "executor": {
+            "max_workers": 4,  # 并发数
+            "batch_size": 1,  # 批次大小
+            "result_save": {
+                "bad": True,  # 保存工具B更好的样本（eval_status=True）
+                "good": True  # 保存工具A更好或相同的样本
+            }
+        },
+        "evaluator": [
+            {
+                "fields": {"id": "data_id", "prompt": "method1", "content": "method2", "language": "language"},
+                "evals": [
+                    {"name": "LLMHtmlExtractCompareV3", "config": common_config},
+                ]
+            }
+        ]
+    }
+
+    # 创建 InputArgs 并执行
+    input_args = InputArgs(**input_data)
+    executor = Executor.exec_map["local"](input_args)
+
+    print("开始执行评估...")
+    result = executor.execute()
+
+    # 打印结果
+    print("\n" + "=" * 60)
+    print("评估完成！")
+    print("=" * 60)
+    print(f"任务名称: {result.task_name}")
+    # print(f"评估组: {result.eval_group}")
+    print(f"总样本数: {result.total}")
+    print(f"工具B更好的样本数: {result.num_bad} ")
+    print(f"工具A更好或相同: {result.num_good} ")
+    print(f"\n输出路径: {result.output_path}")
+
+    # # 打印详细统计
+    # if hasattr(result, 'type_count') and result.type_count:
+    #     print("\n详细统计:")
+    #     for eval_type, count in result.type_count.items():
+    #         print(f"  - {eval_type}: {count}")
+    #
+    # print("=" * 60)
+
+    return result
+
+
+if __name__ == "__main__":
+    evaluate_html_extract_compare_dataset()
diff --git a/test/scripts/model/llm/test_llm_html_extract_compare_v3.py b/test/scripts/model/llm/test_llm_html_extract_compare_v3.py
new file mode 100644
index 00000000..b2546ff1
--- /dev/null
+++ b/test/scripts/model/llm/test_llm_html_extract_compare_v3.py
@@ -0,0 +1,120 @@
+"""
+LLMHtmlExtractCompareV3 核心测试
+
+覆盖：
+1. build_messages（中英全文、language 解析）
+2. process_response（score→label、status、markdown 围栏、思考块剥离）
+3. 非法 JSON → ConvertJsonError
+
+pytest test/scripts/model/llm/test_llm_html_extract_compare_v3.py -v
+"""
+
+import json
+
+import pytest
+
+from dingo.io import Data
+from dingo.model.llm.compare.llm_html_extract_compare_v3 import LLMHtmlExtractCompareV3
+from dingo.utils.exception import ConvertJsonError
+
+
+class TestBuildMessages:
+    def test_chinese_includes_full_text_and_dimensions(self):
+        data = Data(
+            data_id="t1",
+            prompt="工具A完整正文",
+            content="工具B完整正文",
+            raw_data={"language": "zh"},
+        )
+        messages = LLMHtmlExtractCompareV3.build_messages(data)
+        assert len(messages) == 1
+        assert messages[0]["role"] == "user"
+        body = messages[0]["content"]
+        assert "工具A完整正文" in body
+        assert "工具B完整正文" in body
+        assert "Error_Content_Coverage" in body or "质量维度" in body
+
+    def test_english_includes_full_text(self):
+        data = Data(
+            data_id="t2",
+            prompt="Full text A from extraction 1",
+            content="Full text B from extraction 2",
+            raw_data={"language": "en"},
+        )
+        messages = LLMHtmlExtractCompareV3.build_messages(data)
+        body = messages[0]["content"]
+        assert "Full text A from extraction 1" in body
+        assert "Full text B from extraction 2" in body
+        assert "Error_Formula" in body or "Quality Dimensions" in body
+
+    def test_default_language_english_when_unset(self):
+        data = Data(
+            data_id="t3",
+            prompt="alpha",
+            content="beta",
+        )
+        messages = LLMHtmlExtractCompareV3.build_messages(data)
+        assert "Quality Dimensions" in messages[0]["content"]
+
+    def test_language_from_top_level_field(self):
+        data = Data(
+            data_id="t4",
+            prompt="中文A",
+            content="中文B",
+            language="zh",
+        )
+        messages = LLMHtmlExtractCompareV3.build_messages(data)
+        assert "文本A" in messages[0]["content"]
+
+
+class TestProcessResponse:
+    def test_score_1_prompt_better(self):
+        raw = json.dumps(
+            {"score": 1, "name": "Error_Content_Coverage", "reason": "A 覆盖更全"},
+            ensure_ascii=False,
+        )
+        result = LLMHtmlExtractCompareV3.process_response(raw)
+        assert result.metric == "LLMHtmlExtractCompareV3"
+        assert result.label == ["PROMPT_BETTER.Error_Content_Coverage"]
+        assert result.status is False
+        parsed = json.loads(result.reason[0])
+        assert parsed["score"] == 1
+
+    def test_score_2_content_better(self):
+        raw = json.dumps(
+            {"score": 2, "name": "Error_Formula", "reason": "B 公式更完整"},
+            ensure_ascii=False,
+        )
+        result = LLMHtmlExtractCompareV3.process_response(raw)
+        assert result.label == ["CONTENT_BETTER.Error_Formula"]
+        assert result.status is True
+
+    def test_score_0_extraction_equal(self):
+        raw = json.dumps(
+            {"score": 0, "name": "None", "reason": "质量相当"},
+            ensure_ascii=False,
+        )
+        result = LLMHtmlExtractCompareV3.process_response(raw)
+        assert result.label == ["EXTRACTION_EQUAL.None"]
+        assert result.status is True
+
+    def test_json_fenced_with_markdown(self):
+        inner = '{"score": 1, "name": "None", "reason": "ok"}'
+        wrapped = f"```json\n{inner}\n```"
+        result = LLMHtmlExtractCompareV3.process_response(wrapped)
+        assert "PROMPT_BETTER" in result.label[0]
+
+    def test_redacted_thinking_appended_to_reason(self):
+        # 与 llm_html_extract_compare 等实现一致：短标签 <think>...</think>
+        body = (
+            "<think>internal</think>\n"
+            '{"score": 2, "name": "Error_Table", "reason": "Brief."}'
+        )
+        result = LLMHtmlExtractCompareV3.process_response(body)
+        assert "CONTENT_BETTER.Error_Table" == result.label[0]
+        parsed = json.loads(result.reason[0])
+        assert "internal" in parsed["reason"]
+
+    def test_invalid_json_raises(self):
+        with pytest.raises(ConvertJsonError):
+            LLMHtmlExtractCompareV3.process_response("not json")