diff --git a/.claude/skills/dingo-verify/scripts/fact_check.py b/.claude/skills/dingo-verify/scripts/fact_check.py
index 23748206..71dd158d 100644
--- a/.claude/skills/dingo-verify/scripts/fact_check.py
+++ b/.claude/skills/dingo-verify/scripts/fact_check.py
@@ -169,13 +169,11 @@ def build_config(
                     "key": api_key,
                     "model": model,
                     "api_url": api_url,
-                    "parameters": {
-                        "temperature": 0,
-                        "agent_config": {
-                            "max_concurrent_claims": max_concurrent,
-                            "max_iterations": 50,
-                            "tools": tools_config,
-                        }
+                    "temperature": 0,
+                    "agent_config": {
+                        "max_concurrent_claims": max_concurrent,
+                        "max_iterations": 50,
+                        "tools": tools_config,
                     }
                 }
             }]
diff --git a/clawhub/scripts/fact_check.py b/clawhub/scripts/fact_check.py
index b2865496..a4930f14 100644
--- a/clawhub/scripts/fact_check.py
+++ b/clawhub/scripts/fact_check.py
@@ -164,13 +164,11 @@ def build_config(
                     "key": api_key,
                     "model": model,
                     "api_url": api_url,
-                    "parameters": {
-                        "temperature": 0,
-                        "agent_config": {
-                            "max_concurrent_claims": max_concurrent,
-                            "max_iterations": 50,
-                            "tools": tools_config,
-                        }
+                    "temperature": 0,
+                    "agent_config": {
+                        "max_concurrent_claims": max_concurrent,
+                        "max_iterations": 50,
+                        "tools": tools_config,
                     }
                 }
             }]
diff --git a/dingo/config/input_args.py b/dingo/config/input_args.py
index df1007a4..de589a69 100644
--- a/dingo/config/input_args.py
+++ b/dingo/config/input_args.py
@@ -102,10 +102,11 @@ class EmbeddingConfigArgs(BaseModel):
 
 
 class EvaluatorLLMArgs(BaseModel):
+    model_config = {"extra": "allow"}
+
     model: Optional[str] = None
     key: Optional[str] = None
     api_url: Optional[str] = None
-    parameters: Optional[dict] = None
     embedding_config: Optional[EmbeddingConfigArgs] = None
 
 
diff --git a/dingo/model/llm/agent/agent_article_fact_checker.py b/dingo/model/llm/agent/agent_article_fact_checker.py
index 0a829073..b72b9787 100644
--- a/dingo/model/llm/agent/agent_article_fact_checker.py
+++ b/dingo/model/llm/agent/agent_article_fact_checker.py
@@ -343,21 +343,21 @@ class ArticleFactChecker(BaseAgent):
             "config": {
                 "key": "your-openai-api-key",
                 "model": "gpt-4o-mini",
-                "parameters": {
-                    "agent_config": {
-                        "max_iterations": 10,
-                        "tools": {
-                            "claims_extractor": {
-                                "api_key": "your-openai-api-key",
-                                "max_claims": 50,
-                                "claim_types": ["factual", "institutional", "statistical", "attribution"]
-                            },
-                            "tavily_search": {
-                                "api_key": "your-tavily-api-key",
-                                "max_results": 5
-                            },
-                            "arxiv_search": {"max_results": 5}
-                        }
+                "agent_config": {
+                    "max_iterations": 10,
+                    "overall_timeout": 900,
+                    "max_concurrent_claims": 5,
+                    "tools": {
+                        "claims_extractor": {
+                            "api_key": "your-openai-api-key",
+                            "max_claims": 50,
+                            "claim_types": ["factual", "institutional", "statistical", "attribution"]
+                        },
+                        "tavily_search": {
+                            "api_key": "your-tavily-api-key",
+                            "max_results": 5
+                        },
+                        "arxiv_search": {"max_results": 5}
                     }
                 }
             }
@@ -372,6 +372,9 @@ class ArticleFactChecker(BaseAgent):
     ]
     max_iterations = 10  # Allow more iterations for comprehensive checking
     max_concurrent_claims = 5  # Default parallel claim verification slots
+    overall_timeout = 900       # 15-minute wall-clock timeout for entire evaluation
+    _MIN_OVERALL_TIMEOUT = 30   # Floor: 30 seconds
+    _MAX_OVERALL_TIMEOUT = 7200  # Ceiling: 2 hours
 
     _required_fields = [RequiredField.CONTENT]  # Article text
 
@@ -394,8 +397,8 @@ def _get_output_dir(cls) -> Optional[str]:
         Returns:
             Output directory path (created if needed), or None if saving is disabled.
         """
-        params = cls.dynamic_config.parameters or {}
-        agent_cfg = params.get('agent_config') or {}
+        extra_params = cls.dynamic_config.model_extra
+        agent_cfg = extra_params.get('agent_config') or {}
 
         explicit_path = agent_cfg.get('output_path')
         if explicit_path:
@@ -816,24 +819,42 @@ def eval(cls, input_data: Data) -> EvalDetail:
         output_dir = cls._get_output_dir()
 
         if cls.dynamic_config:
-            if cls.dynamic_config.parameters is None:
-                cls.dynamic_config.parameters = {}
-            cls.dynamic_config.parameters.setdefault("temperature", 0)
+            if 'temperature' not in cls.dynamic_config.model_extra:
+                cls.dynamic_config.temperature = 0
 
         if output_dir and input_data.content:
             cls._save_article_content(output_dir, input_data.content)
 
+        timeout = cls._get_overall_timeout()
+
+        async def _run_with_timeout() -> EvalDetail:
+            return await asyncio.wait_for(
+                cls._async_eval(input_data, start_time, output_dir),
+                timeout=timeout,
+            )
+
         try:
-            return asyncio.run(cls._async_eval(input_data, start_time, output_dir))
+            return asyncio.run(_run_with_timeout())
+        except asyncio.TimeoutError:
+            elapsed = time.time() - start_time
+            log.warning(f"ArticleFactChecker: overall timeout exceeded ({elapsed:.1f}s / {timeout:.0f}s limit)")
+            return cls._create_overall_timeout_result(elapsed, timeout)
         except RuntimeError as e:
             # Fallback when called inside an already-running event loop (e.g. Jupyter, tests)
             if "cannot run" in str(e).lower() or "already running" in str(e).lower():
                 import concurrent.futures
                 with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-                    future = pool.submit(
-                        lambda: asyncio.run(cls._async_eval(input_data, start_time, output_dir))
-                    )
-                    return future.result()
+                    future = pool.submit(lambda: asyncio.run(_run_with_timeout()))
+                    try:
+                        # Extra margin so asyncio.wait_for fires before this outer timeout
+                        return future.result(timeout=timeout + 30)
+                    except (asyncio.TimeoutError, concurrent.futures.TimeoutError):
+                        elapsed = time.time() - start_time
+                        log.warning(
+                            f"ArticleFactChecker: overall timeout exceeded "
+                            f"({elapsed:.1f}s / {timeout:.0f}s limit, fallback path)"
+                        )
+                        return cls._create_overall_timeout_result(elapsed, timeout)
             raise
 
     # --- Two-Phase Async Architecture Methods ---
@@ -922,8 +943,8 @@ async def _async_extract_claims(cls, input_data: Data) -> List[Dict]:
         """
         from dingo.model.llm.agent.tools.claims_extractor import ClaimsExtractor, ClaimsExtractorConfig
 
-        params = cls.dynamic_config.parameters or {}
-        agent_cfg = params.get('agent_config') or {}
+        extra_params = cls.dynamic_config.model_extra
+        agent_cfg = extra_params.get('agent_config') or {}
         extractor_cfg = agent_cfg.get('tools', {}).get('claims_extractor', {})
 
         config_kwargs: Dict[str, Any] = {
@@ -1019,10 +1040,30 @@ async def _async_verify_single_claim(
     @classmethod
     def _get_max_concurrent_claims(cls) -> int:
         """Read max_concurrent_claims from agent_config or use class default."""
-        params = cls.dynamic_config.parameters or {}
-        agent_cfg = params.get('agent_config') or {}
+        extra_params = cls.dynamic_config.model_extra
+        agent_cfg = extra_params.get('agent_config') or {}
         return agent_cfg.get('max_concurrent_claims', cls.max_concurrent_claims)
 
+    @classmethod
+    def _get_overall_timeout(cls) -> float:
+        """Read overall_timeout from agent_config or use class default (900s).
+
+        Returns:
+            Positive timeout in seconds, clamped to [30, 7200].
+        """
+        extra_params = cls.dynamic_config.model_extra
+        agent_cfg = extra_params.get('agent_config') or {}
+        raw = agent_cfg.get('overall_timeout', cls.overall_timeout)
+        try:
+            timeout = float(raw)
+        except (TypeError, ValueError):
+            log.warning(f"Invalid overall_timeout={raw!r}, using default {cls.overall_timeout}s")
+            return float(cls.overall_timeout)
+        clamped = max(cls._MIN_OVERALL_TIMEOUT, min(timeout, cls._MAX_OVERALL_TIMEOUT))
+        if clamped != timeout:
+            log.warning(f"overall_timeout={timeout} out of range, clamped to {clamped}s")
+        return float(clamped)
+
     @classmethod
     def _parse_claim_json_robust(cls, output: Optional[str]) -> Dict[str, Any]:
         """
@@ -1795,6 +1836,38 @@ def _create_error_result(cls, error_message: str) -> EvalDetail:
         ]
         return result
 
+    @classmethod
+    def _create_overall_timeout_result(cls, elapsed: float, timeout: float) -> EvalDetail:
+        """
+        Create error result when overall wall-clock timeout is exceeded.
+
+        Args:
+            elapsed: Actual elapsed time in seconds
+            timeout: Configured timeout limit in seconds
+
+        Returns:
+            EvalDetail with timeout error status
+        """
+        minutes, seconds = divmod(int(timeout), 60)
+        limit_str = f"{minutes}m{seconds}s" if minutes else f"{int(timeout)}s"
+        result = EvalDetail(metric=cls.__name__)
+        result.status = True
+        result.label = [f"{QualityLabel.QUALITY_BAD_PREFIX}AGENT_OVERALL_TIMEOUT"]
+        result.reason = [
+            "Article Fact-Checking Failed: Overall Timeout Exceeded",
+            "=" * 70,
+            f"Execution exceeded the {int(timeout)}s ({limit_str}) wall-clock limit.",
+            f"Elapsed time: {elapsed:.1f}s",
+            "",
+            "Recommendations:",
+            f"  1. Increase overall_timeout (current: {int(timeout)}s) in agent_config",
+            "  2. Reduce max_claims in claims_extractor config (e.g., 50 -> 20)",
+            "  3. Use a faster model (e.g., gpt-4o-mini instead of gpt-4o)",
+            "  4. Reduce max_concurrent_claims to lower API rate-limit pressure",
+            "  5. Split long articles into shorter sections",
+        ]
+        return result
+
     @classmethod
     def plan_execution(cls, input_data: Data) -> List[Dict[str, Any]]:
         """
diff --git a/dingo/model/llm/agent/agent_fact_check.py b/dingo/model/llm/agent/agent_fact_check.py
index 190e105d..5246d7a1 100644
--- a/dingo/model/llm/agent/agent_fact_check.py
+++ b/dingo/model/llm/agent/agent_fact_check.py
@@ -70,15 +70,13 @@ class AgentFactCheck(BaseAgent):
             "key": "your-openai-api-key",
             "api_url": "https://api.openai.com/v1",
             "model": "gpt-4.1-mini-2025-04-14",
-            "parameters": {
-                "agent_config": {
-                    "max_iterations": 5,
-                    "tools": {
-                        "tavily_search": {
-                            "api_key": "your-tavily-api-key",
-                            "max_results": 5,
-                            "search_depth": "advanced"
-                        }
+            "agent_config": {
+                "max_iterations": 5,
+                "tools": {
+                    "tavily_search": {
+                        "api_key": "your-tavily-api-key",
+                        "max_results": 5,
+                        "search_depth": "advanced"
                     }
                 }
             }
diff --git a/dingo/model/llm/agent/agent_hallucination.py b/dingo/model/llm/agent/agent_hallucination.py
index 0e39c48c..fc22ba56 100644
--- a/dingo/model/llm/agent/agent_hallucination.py
+++ b/dingo/model/llm/agent/agent_hallucination.py
@@ -82,15 +82,13 @@ class AgentHallucination(BaseAgent):
             "key": "your-openai-api-key",
             "api_url": "https://api.openai.com/v1",
             "model": "gpt-4.1-mini-2025-04-14",
-            "parameters": {
-                "agent_config": {
-                    "max_iterations": 3,
-                    "tools": {
-                        "tavily_search": {
-                            "api_key": "your-tavily-api-key",
-                            "max_results": 5,
-                            "search_depth": "advanced"
-                        }
+            "agent_config": {
+                "max_iterations": 3,
+                "tools": {
+                    "tavily_search": {
+                        "api_key": "your-tavily-api-key",
+                        "max_results": 5,
+                        "search_depth": "advanced"
                     }
                 }
             }
diff --git a/dingo/model/llm/agent/agent_wrapper.py b/dingo/model/llm/agent/agent_wrapper.py
index 4240c1ef..8c20247d 100644
--- a/dingo/model/llm/agent/agent_wrapper.py
+++ b/dingo/model/llm/agent/agent_wrapper.py
@@ -327,22 +327,22 @@ def get_openai_llm_from_dingo_config(dynamic_config):
             )
 
         # Extract parameters
-        params = dynamic_config.parameters or {}
+        extra_params = dynamic_config.model_extra
 
         # Create ChatOpenAI instance
         llm = ChatOpenAI(
             api_key=dynamic_config.key,
             base_url=dynamic_config.api_url,
             model=dynamic_config.model or "gpt-4.1-mini",
-            temperature=params.get("temperature", 0.3),
-            max_tokens=params.get("max_tokens", 4096),
-            top_p=params.get("top_p", 1.0),
-            timeout=params.get("timeout", 30)
+            temperature=extra_params.get("temperature", 0.3),
+            max_tokens=extra_params.get("max_tokens", 4096),
+            top_p=extra_params.get("top_p", 1.0),
+            timeout=extra_params.get("timeout", 30)
         )
 
         log.debug(
             f"Created ChatOpenAI: model={dynamic_config.model}, "
-            f"temp={params.get('temperature', 0.3)}"
+            f"temp={extra_params.get('temperature', 0.3)}"
         )
 
         return llm
diff --git a/dingo/model/llm/agent/base_agent.py b/dingo/model/llm/agent/base_agent.py
index 3832cefe..d3db23d2 100644
--- a/dingo/model/llm/agent/base_agent.py
+++ b/dingo/model/llm/agent/base_agent.py
@@ -146,7 +146,7 @@ def get_tool_config(cls, tool_name: str) -> Dict[str, Any]:
         Extract tool configuration from agent's dynamic_config.
 
         Configuration is expected in:
-        dynamic_config.parameters.agent_config.tools.{tool_name}
+        dynamic_config.agent_config.tools.{tool_name}
 
         Args:
             tool_name: Name of the tool
@@ -154,8 +154,8 @@ def get_tool_config(cls, tool_name: str) -> Dict[str, Any]:
         Returns:
             Dict of configuration values for the tool
         """
-        params = cls.dynamic_config.parameters or {}
-        agent_config = params.get('agent_config', {})
+        extra_params = cls.dynamic_config.model_extra
+        agent_config = extra_params.get('agent_config', {})
         tools_config = agent_config.get('tools', {})
         return tools_config.get(tool_name, {})
 
@@ -184,8 +184,8 @@ def get_max_iterations(cls) -> int:
         Returns:
             Maximum number of iterations allowed
         """
-        params = cls.dynamic_config.parameters or {}
-        agent_config = params.get('agent_config', {})
+        extra_params = cls.dynamic_config.model_extra
+        agent_config = extra_params.get('agent_config', {})
         return agent_config.get('max_iterations', cls.max_iterations)
 
     @classmethod
diff --git a/dingo/model/llm/base_openai.py b/dingo/model/llm/base_openai.py
index b6fbcd52..c3911699 100644
--- a/dingo/model/llm/base_openai.py
+++ b/dingo/model/llm/base_openai.py
@@ -82,22 +82,18 @@ def send_messages(cls, messages: List):
         else:
             model_name = cls.client.models.list().data[0].id
 
-        params = cls.dynamic_config.parameters
-        cls.validate_config(params)
+        extra_params = cls.dynamic_config.model_extra
+        cls.validate_config(extra_params)
 
         completions = cls.client.chat.completions.create(
             model=model_name,
             messages=messages,
-            temperature=params.get("temperature", 0.3) if params else 0.3,
-            top_p=params.get("top_p", 1) if params else 1,
-            max_tokens=params.get("max_tokens", 4000) if params else 4000,
-            presence_penalty=params.get("presence_penalty", 0) if params else 0,
-            frequency_penalty=params.get("frequency_penalty", 0) if params else 0,
+            **extra_params,
         )
 
         if completions.choices[0].finish_reason == "length":
             raise ExceedMaxTokens(
-                f"Exceed max tokens: {params.get('max_tokens', 4000) if params else 4000}"
+                f"Exceed max tokens: {extra_params.get('max_tokens', 4000)}"
             )
 
         return str(completions.choices[0].message.content)
diff --git a/dingo/model/llm/compare/llm_html_extract_compare_v2.py b/dingo/model/llm/compare/llm_html_extract_compare_v2.py
index 65290d1f..a54c3fdb 100644
--- a/dingo/model/llm/compare/llm_html_extract_compare_v2.py
+++ b/dingo/model/llm/compare/llm_html_extract_compare_v2.py
@@ -25,9 +25,17 @@ class LLMHtmlExtractCompareV2(BaseOpenAI):
     输入数据要求：
     - input_data.prompt: 工具A提取的文本
     - input_data.content: 工具B提取的文本
-    - input_data.raw_data.get("language", "en"): 语言类型 ("zh" 或 "en")
+    - language: 可选，来自 input_data.language 或 raw_data["language"]，缺省为 "en"（"zh" / "en"）
     """
 
+    _metric_info = {
+        'category': 'Pretrain Text Quality Assessment Metrics',
+        'metric_name': 'LLMHtmlExtractCompareV2',
+        'description': 'Compares two HTML main-content extraction tools by computing text diffs and using LLM to judge which preserves more core information',
+        'paper_title': '',
+        'paper_url': '',
+    }
+
     _required_fields = [RequiredField.CONTENT, RequiredField.PROMPT]
     prompt = {
         "content_en": r"""Please compare the following two texts, each extracted from the same webpage using different HTML parsing methods. Your task is to determine whether there is a difference in the core informational content between them.
@@ -174,7 +182,8 @@ def build_messages(cls, input_data: Data) -> List:
         text_tool_b = input_data.content
 
         # 获取配置参数
-        language = input_data.raw_data.get("language", "en")
+        raw_data = getattr(input_data, 'raw_data', {}) or {}
+        language = raw_data.get("language", getattr(input_data, 'language', "en"))
 
         # 计算文本差异
         diff_result = cls.extract_text_diff(text_tool_a, text_tool_b)
diff --git a/dingo/model/llm/instruction_quality/llm_instruction_clarity.py b/dingo/model/llm/instruction_quality/llm_instruction_clarity.py
index 9c73a35d..526611d8 100644
--- a/dingo/model/llm/instruction_quality/llm_instruction_clarity.py
+++ b/dingo/model/llm/instruction_quality/llm_instruction_clarity.py
@@ -283,8 +283,8 @@ def process_response(cls, response: str) -> EvalDetail:
 
             # 判断是否通过（默认阈值 6.0）
             threshold = 6.0
-            if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
-                threshold = cls.dynamic_config.parameters.get('threshold', 6.0)
+            if hasattr(cls, 'dynamic_config'):
+                threshold = cls.dynamic_config.model_extra.get('threshold', 6.0)
 
             if score >= threshold:
                 result.status = False
diff --git a/dingo/model/llm/instruction_quality/llm_task_difficulty.py b/dingo/model/llm/instruction_quality/llm_task_difficulty.py
index e3fe1db0..9c676396 100644
--- a/dingo/model/llm/instruction_quality/llm_task_difficulty.py
+++ b/dingo/model/llm/instruction_quality/llm_task_difficulty.py
@@ -321,14 +321,14 @@ def process_response(cls, response: str) -> EvalDetail:
 
             # 难度评估没有"通过/不通过"的概念，只是描述性的
             # 但为了兼容框架，我们设置一个合理的默认行为
-            # 可以通过 parameters 配置 min_difficulty 和 max_difficulty
+            # 可以通过 config 中的 min_difficulty 和 max_difficulty 配置难度范围
             result.status = False  # 默认不标记为问题
             result.label = [f"TASK_DIFFICULTY.{difficulty_level.upper()}"]
 
             # 如果配置了难度范围要求，进行检查
-            if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
-                min_difficulty = cls.dynamic_config.parameters.get('min_difficulty', 0)
-                max_difficulty = cls.dynamic_config.parameters.get('max_difficulty', 10)
+            if hasattr(cls, 'dynamic_config'):
+                min_difficulty = cls.dynamic_config.model_extra.get('min_difficulty', 0)
+                max_difficulty = cls.dynamic_config.model_extra.get('max_difficulty', 10)
 
                 if difficulty_score < min_difficulty:
                     result.status = True
diff --git a/dingo/model/llm/rag/llm_rag_answer_relevancy.py b/dingo/model/llm/rag/llm_rag_answer_relevancy.py
index ec0e0cda..199187cd 100644
--- a/dingo/model/llm/rag/llm_rag_answer_relevancy.py
+++ b/dingo/model/llm/rag/llm_rag_answer_relevancy.py
@@ -242,14 +242,8 @@ def eval(cls, input_data: Data) -> EvalDetail:
 
         try:
             # 增加温度参数以提高问题生成的随机性
-            if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
-                if 'temperature' not in cls.dynamic_config.parameters:
-                    cls.dynamic_config.parameters['temperature'] = 0.7
-            else:
-                # 如果没有parameters，创建一个包含temperature的parameters
-                current_params = cls.dynamic_config.parameters or {}
-                current_params['temperature'] = 0.7
-                cls.dynamic_config.parameters = current_params
+            if hasattr(cls, 'dynamic_config') and 'temperature' not in cls.dynamic_config.model_extra:
+                cls.dynamic_config.temperature = 0.7
 
             # 生成多个相关问题
             generated_questions = cls.generate_multiple_questions(input_data, cls.strictness)
@@ -263,10 +257,9 @@ def eval(cls, input_data: Data) -> EvalDetail:
 
             # 根据分数判断是否通过，默认阈值为5
             threshold = 5
-            if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
-                threshold = cls.dynamic_config.parameters.get('threshold', 5)
-                # 检查是否有自定义的strictness参数
-                cls.strictness = cls.dynamic_config.parameters.get('strictness', 3)
+            if hasattr(cls, 'dynamic_config'):
+                threshold = cls.dynamic_config.model_extra.get('threshold', 5)
+                cls.strictness = cls.dynamic_config.model_extra.get('strictness', 3)
 
             # 构建详细的reason文本
             all_reasons = []
diff --git a/dingo/model/llm/rag/llm_rag_context_precision.py b/dingo/model/llm/rag/llm_rag_context_precision.py
index 50f9b661..9c305c4b 100644
--- a/dingo/model/llm/rag/llm_rag_context_precision.py
+++ b/dingo/model/llm/rag/llm_rag_context_precision.py
@@ -256,8 +256,8 @@ def process_response(cls, responses: List[str]) -> EvalDetail:
 
         # 根据分数判断是否通过，默认阈值为5
         threshold = 5
-        if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
-            threshold = cls.dynamic_config.parameters.get('threshold', 5)
+        if hasattr(cls, 'dynamic_config'):
+            threshold = cls.dynamic_config.model_extra.get('threshold', 5)
 
         if score >= threshold:
             result.status = False
diff --git a/dingo/model/llm/rag/llm_rag_context_recall.py b/dingo/model/llm/rag/llm_rag_context_recall.py
index 4ba059cc..8d6d06cc 100644
--- a/dingo/model/llm/rag/llm_rag_context_recall.py
+++ b/dingo/model/llm/rag/llm_rag_context_recall.py
@@ -215,8 +215,8 @@ def process_response(cls, response: str) -> EvalDetail:
 
         # 根据分数判断是否通过，默认阈值为5
         threshold = 5
-        if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
-            threshold = cls.dynamic_config.parameters.get('threshold', 5)
+        if hasattr(cls, 'dynamic_config'):
+            threshold = cls.dynamic_config.model_extra.get('threshold', 5)
 
         if score >= threshold:
             result.status = False
diff --git a/dingo/model/llm/rag/llm_rag_context_relevancy.py b/dingo/model/llm/rag/llm_rag_context_relevancy.py
index ca16e289..94204e8a 100644
--- a/dingo/model/llm/rag/llm_rag_context_relevancy.py
+++ b/dingo/model/llm/rag/llm_rag_context_relevancy.py
@@ -206,8 +206,8 @@ def process_response(cls, response: str) -> EvalDetail:
 
         # 根据分数判断是否通过，默认阈值为5
         threshold = 5
-        if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
-            threshold = cls.dynamic_config.parameters.get('threshold', 5)
+        if hasattr(cls, 'dynamic_config'):
+            threshold = cls.dynamic_config.model_extra.get('threshold', 5)
 
         if score >= threshold:
             result.status = False
diff --git a/dingo/model/llm/rag/llm_rag_faithfulness.py b/dingo/model/llm/rag/llm_rag_faithfulness.py
index 2111e77d..fe763ef0 100644
--- a/dingo/model/llm/rag/llm_rag_faithfulness.py
+++ b/dingo/model/llm/rag/llm_rag_faithfulness.py
@@ -290,8 +290,8 @@ def process_response(cls, response: str) -> EvalDetail:
 
         # 根据分数判断是否通过，默认阈值为5
         threshold = 5
-        if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters:
-            threshold = cls.dynamic_config.parameters.get('threshold', 5)
+        if hasattr(cls, 'dynamic_config'):
+            threshold = cls.dynamic_config.model_extra.get('threshold', 5)
 
         if score >= threshold:
             result.status = False
diff --git a/dingo/model/llm/text_quality/base_text_quality.py b/dingo/model/llm/text_quality/base_text_quality.py
index 5f3133ea..713e5e06 100644
--- a/dingo/model/llm/text_quality/base_text_quality.py
+++ b/dingo/model/llm/text_quality/base_text_quality.py
@@ -47,16 +47,11 @@ def process_response(cls, response: str) -> EvalDetail:
         response_json = json.loads(response)
         response_model = ResponseScoreTypeNameReason(**response_json)
 
-        # Create EvalDetail with all required fields
-        # status = False for Good quality (no issues found)
-        # status = True for Bad quality (issues found)
-        is_good = response_model.type == "Good"
-
         result = EvalDetail(
             metric=cls.__name__,
-            status=not is_good,  # True if Bad (issues found), False if Good
+            status=False if response_model.score == 1 else True,
             score=response_model.score,
-            label=["QUALITY_GOOD"] if is_good else [f"{response_model.type}.{response_model.name}"],
+            label=["QUALITY_GOOD"] if response_model.score == 1 else [f"{response_model.type}.{response_model.name}"],
             reason=[response_model.reason]
         )
 
diff --git a/dingo/model/llm/text_quality/llm_text_equation.py b/dingo/model/llm/text_quality/llm_text_equation.py
new file mode 100644
index 00000000..e71220d1
--- /dev/null
+++ b/dingo/model/llm/text_quality/llm_text_equation.py
@@ -0,0 +1,68 @@
+from dingo.io.input import RequiredField
+from dingo.model import Model
+from dingo.model.llm.text_quality.base_text_quality import BaseTextQuality
+
+
+@Model.llm_register("LLMTextEquation")
+class LLMTextEquation(BaseTextQuality):
+    # Metadata for documentation generation
+    _metric_info = {
+        "category": "Pretrain Text Quality Assessment Metrics",
+        "metric_name": "LLMTextQualityV5",
+        "description": "Impact-driven text quality evaluation for LLM pretraining, focusing on structural completeness, readability, diversity, and safety with quantitative thresholds",
+        "paper_title": "WanJuanSiLu: A High-Quality Open-Source Webtext Dataset for Low-Resource Languages",
+        "paper_url": "https://arxiv.org/abs/2501.14506",
+        "paper_authors": "Yu et al., 2025",
+        "examples": "examples/llm_and_rule/llm_local.py",
+        "evaluation_results": "docs/eval/prompt/redpajama_data_evaluated_by_prompt.md"
+    }
+    _required_fields = [RequiredField.CONTENT]
+    prompt = r"""
+你是一个专业的数学、化学等学科的公式质检员。我会给你一个从文档中提取的 equation 类型元素（JSON 格式），请对其 text 字段进行质量检测。
+
+## 检测维度
+
+1. **语法问题**
+   - LaTeX 命令拼写错误（如 \frace 代替 \frac）
+   - 括号未正确配对闭合（{}、[]、()）
+   - 环境标签不匹配（如 \begin{} 与 \end{} 不对应）
+
+2. **识别问题**
+   - 疑似 OCR 识别错误（如字母与符号混淆：x 与 ×、- 与 −、l 与 1、O 与 0 等）
+   - 公式内容明显残缺或截断
+   - 出现乱码或无意义字符
+
+3. **语义问题**
+   - 公式结构不完整，无法表达完整的数学含义
+   - 运算符或符号使用明显不合数学规范
+
+## 一级错误类型（type）
+
+- `syntax`：语法问题
+- `recognition`：识别问题
+- `semantic`：语义问题
+
+## 二级错误类型（name）
+
+- `command_error`：LaTeX 命令拼写错误
+- `bracket_mismatch`：括号未正确配对
+- `env_mismatch`：环境标签不匹配
+- `ocr_error`：OCR 字符识别错误
+- `truncated_content`：公式残缺或截断
+- `garbled_text`：乱码或无意义字符
+- `incomplete_expression`：公式结构不完整
+- `invalid_notation`：数学符号使用不规范
+- `none`：无问题
+
+## Output Format
+
+Return JSON only: {"score": 0/1, "type": "", "name": "", "reason": ""}
+
+score 类型必须为int；
+score 为 1 表示通过，type 填 "Good"，name 填 "None"，reason 说明公式正常的依据；
+score 为 0 表示不通过，type 和 name 填对应的错误类型，reason 说明判断依据并指出具体的问题位置或内容。
+
+## Input content to evaluate:
+
+"""
+    # process_response method is now inherited from BaseTextQuality
diff --git a/dingo/model/llm/text_quality/llm_text_quality_v5.py b/dingo/model/llm/text_quality/llm_text_quality_v5.py
index d0b02992..918ecff9 100644
--- a/dingo/model/llm/text_quality/llm_text_quality_v5.py
+++ b/dingo/model/llm/text_quality/llm_text_quality_v5.py
@@ -30,7 +30,27 @@ class LLMTextQualityV5(BaseTextQuality):
 **Impact**: Broken structures prevent models from learning correct formatting patterns.
 
 **Check for**:
-- **Error_Formula**: Mathematical expressions with **unmatched delimiters** or **unclosed environments**
+- **Error_Formula**: Mathematical content with **broken syntax** OR **systematically stripped symbols/formulas**
+
+  Two failure modes:
+
+  **(A) Broken LaTeX syntax** — delimiters or environments are present but malformed:
+  - Delimiters unmatched: $ without closing $ (LaTeX context, not dollar signs)
+  - Environments unclosed: \\begin{{align}} without \\end{{align}}
+  - Syntax broken: \\frac{{a}}{{b missing closing }}
+  - HTML tags unclosed: <sub>text without </sub>
+  - Impact: Prevents >50% of mainstream parsers from rendering
+
+  **(B) Stripped mathematical content** — symbols/formulas systematically removed during extraction:
+  - Orphan hyphens from stripped Greek letters: "κ-solutions" → "-solutions", "ε-net" → "-net"
+  - Empty positions after connective words: "thus ;" or "the interval ;" where a formula was removed
+  - Sentences referencing variables/expressions that are absent: "a small number" (number missing), "we have ." (equation missing)
+  - Systematic loss: multiple occurrences throughout the text, not just one or two typos
+  - Impact: Mathematical text becomes incoherent; models learn broken academic writing patterns
+
+  Example (BAD — stripped symbols):
+  "Let be a -solution to the Ricci flow which is -noncollapsed. Ancient, in the sense that t ranges on the interval ; Bounded curvature, thus ;"
+  (Greek letters κ stripped from "κ-solution" and "κ-noncollapsed"; interval expression and inequality after "thus" removed entirely)
 
   ⚠️ **Normal patterns (DO NOT flag)**:
   - Mixing inline ($...$) and display ($$...$$) formulas
@@ -38,31 +58,39 @@ class LLMTextQualityV5(BaseTextQuality):
   - Line breaks with \\\\ in alignment environments
   - HTML tags: <sub>x</sub>, <sup>2</sup> for subscripts/superscripts
   - Mixing LaTeX and HTML in web-extracted content
-
-  ✅ **Only flag when**:
-  - Delimiters unmatched: $ without closing $ (LaTeX context, not dollar signs)
-  - Environments unclosed: \\begin{{align}} without \\end{{align}}
-  - Syntax broken: \\frac{{a}}{{b missing closing }}
-  - HTML tags unclosed: <sub>text without </sub>
+  - Plain-text math without any LaTeX (e.g., "a^2 + b^2 = c^2" without $ delimiters) — this is fine as long as the expressions are actually present
 
   ⚠️ **Important**: Distinguish LaTeX $ from dollar signs ($100)
   - Dollar sign: "$100", "$5.99" (followed by numbers) → NOT LaTeX
   - LaTeX delimiter: "$x$", "$\\alpha$" (contains math symbols) → IS LaTeX
-  - Example: "The price is $100 and equation $x=y$ costs $50" has 4 dollar symbols but only 2 are LaTeX delimiters (and they match)
 
-  - Example (BAD): "$x^2 + y^2 is broken here $$a = b$$$"
+  - Example (BAD — broken delimiters): "$x^2 + y^2 is broken here $$a = b$$$"
     (First LaTeX $ never closes, extra $ at end)
   - Example (GOOD): "The item costs $100 and satisfies $x^2 + y^2 = z^2$ where price is $50"
     (Dollar signs for money + proper LaTeX pair)
-  - Impact: Only flag errors that prevent >50% of mainstream parsers (pdflatex, MathJax, KaTeX, Pandoc, Jupyter) from rendering
 
 - **Error_Table**: Table structures that are malformed or unreadable
   - Example (BAD): Misaligned columns, missing headers, or garbled HTML tags
   - Impact: Models cannot learn proper table representation
 
 - **Error_Code**: Code blocks with formatting corruption
-  - Example (BAD): Line numbers mixed with code, broken syntax highlighting markers
-  - Impact: Teaches incorrect code structure
+  **Common corruption patterns**:
+  - Missing code fence (` ``` `): code appears as plain text without language block
+  - Lost indentation: Python/YAML code with all indentation stripped (flat lines)
+  - Broken identifiers: spaces injected into tokens, e.g. `sys .argv`, `pts .append`, `i[ 0]`
+  - Line numbers mixed with code, broken syntax highlighting markers
+  - Keywords wrapped in inline backticks instead of a fenced block, e.g. `` `import` sys ``
+
+  Example (BAD — indentation and identifiers destroyed):
+  ```
+  `import` sys
+  pts = []
+  for i in range( 1,len(sys .argv), 2):
+  pts .append([int(sys .argv[i]), int(sys .argv[i +1])])
+  ```
+  Correct version would have a code fence, proper indentation, and no spaces inside `sys.argv`.
+
+  - Impact: Teaches incorrect code syntax, broken tokenization patterns, and wrong indentation conventions
 
 **Key Question**: "Can the model learn proper formatting from this structure?"
 
@@ -160,10 +188,14 @@ class LLMTextQualityV5(BaseTextQuality):
 Input: "The eigenstate $\\psi_n$ where <sub>n</sub> is quantum number and energy E<sup>2</sup> = m<sup>2</sup>c<sup>4</sup>"
 Output: {{"score": 1, "type": "Good", "name": "None", "reason": "Normal mix of LaTeX and HTML tags from web content"}}
 
-**Example 2 (Bad - Completeness)**:
+**Example 2 (Bad - Completeness, broken delimiters)**:
 Input: "The formula $x^2 + y^2 is broken here $$a = b$$$"
 Output: {"score": 0, "type": "Completeness", "name": "Error_Formula", "reason": "Unmatched delimiters: first $ never closes, extra $ at end"}
 
+**Example 2.5 (Bad - Completeness, stripped math)**:
+Input: "Definition 1.(-solutions) A -solution is a Ricci flow which is -noncollapsed at every scale. Ancient, in the sense that t ranges on the interval ; Bounded curvature, thus ;"
+Output: {{"score": 0, "type": "Completeness", "name": "Error_Formula", "reason": "Mathematical symbols systematically stripped: Greek letters removed ('-solutions' instead of 'κ-solutions'), formulas missing after 'the interval' and 'thus'"}}
+
 **Example 3 (Bad - Effectiveness)**:
 Input: "Theappleisredandtasty�withsomegarbledtext□□"
 Output: {"score": 0, "type": "Effectiveness", "name": "Error_Garbled_Characters", "reason": "Contains encoding corruption (�, □) and missing spaces (>1% of text)"}
diff --git a/dingo/model/llm/text_quality/llm_text_table.py b/dingo/model/llm/text_quality/llm_text_table.py
new file mode 100644
index 00000000..17897347
--- /dev/null
+++ b/dingo/model/llm/text_quality/llm_text_table.py
@@ -0,0 +1,70 @@
+from dingo.io.input import RequiredField
+from dingo.model import Model
+from dingo.model.llm.text_quality.base_text_quality import BaseTextQuality
+
+
+@Model.llm_register("LLMTextTable")
+class LLMTextTable(BaseTextQuality):
+    # Metadata for documentation generation
+    _metric_info = {
+        "category": "Pretrain Text Quality Assessment Metrics",
+        "metric_name": "LLMTextQualityV5",
+        "description": "Impact-driven text quality evaluation for LLM pretraining, focusing on structural completeness, readability, diversity, and safety with quantitative thresholds",
+        "paper_title": "WanJuanSiLu: A High-Quality Open-Source Webtext Dataset for Low-Resource Languages",
+        "paper_url": "https://arxiv.org/abs/2501.14506",
+        "paper_authors": "Yu et al., 2025",
+        "examples": "examples/llm_and_rule/llm_local.py",
+        "evaluation_results": "docs/eval/prompt/redpajama_data_evaluated_by_prompt.md"
+    }
+    _required_fields = [RequiredField.CONTENT]
+    prompt = r"""
+你是一个专业的表格数据质检员。我会给你一段从文档中提取的 HTML 表格（table_body 字段），请判断该表格是否存在质量问题。
+
+## 检测维度
+
+请从以下维度进行检查：
+
+1. **结构问题**
+   - HTML 标签不完整或嵌套错误（<table>、<tr>、<td> 未正确闭合）
+   - 行列结构异常（某行 <td> 数量与其他行差异过大）
+   - 表格内容全部为空
+
+2. **识别问题**
+   - 存在明显乱码或无意义字符
+   - 疑似 OCR 识别错误（如字母/数字混淆：0与O、1与l、S与5等）
+   - 文字截断或内容残缺
+
+3. **语义问题**
+   - 单元格内容语义不连贯，无法理解表格表达的含义
+   - 行列关系混乱，内容错位
+
+## 一级错误类型（type）
+
+- `structure`：结构问题
+- `recognition`：识别问题
+- `semantic`：语义问题
+
+## 二级错误类型（name）
+
+- `tag_error`：标签不完整或嵌套错误
+- `row_col_mismatch`：行列数量不一致
+- `empty_table`：表格内容为空
+- `garbled_text`：乱码或无意义字符
+- `ocr_error`：OCR 字符识别错误
+- `truncated_content`：文字截断或内容残缺
+- `incoherent_semantics`：语义不连贯
+- `misaligned_content`：内容错位
+- `none`：无问题
+
+## Output Format
+
+Return JSON only: {"score": 0/1, "type": "", "name": "", "reason": ""}
+
+score 类型必须为int；
+score 为 1 表示通过，type 填 "Good"，name 填 "None"，reason 说明公式正常的依据；
+score 为 0 表示不通过，type 和 name 填对应的错误类型，reason 说明判断依据并指出具体位置或内容。
+
+## Input content to evaluate:
+
+"""
+    # process_response method is now inherited from BaseTextQuality
diff --git a/dingo/model/llm/vlm_layout_quality.py b/dingo/model/llm/vlm_layout_quality.py
index 40627a88..e3a5456d 100644
--- a/dingo/model/llm/vlm_layout_quality.py
+++ b/dingo/model/llm/vlm_layout_quality.py
@@ -201,8 +201,8 @@ def send_messages(cls, messages: List):
         else:
             model_name = cls.client.models.list().data[0].id
 
-        params = cls.dynamic_config.parameters
-        cls.validate_config(params)
+        extra_params = cls.dynamic_config.model_extra
+        cls.validate_config(extra_params)
 
         completions = cls.client.chat.completions.create(
             model=model_name,
diff --git a/docs/agent_architecture.md b/docs/agent_architecture.md
index c55d34c6..e563357b 100644
--- a/docs/agent_architecture.md
+++ b/docs/agent_architecture.md
@@ -458,7 +458,7 @@ Aggregation:
                                             ├─ name: "AgentFactCheck"
                                             ├─ config.key: API key
                                             ├─ config.model: "gpt-4"
-                                            └─ config.parameters.agent_config:
+                                            └─ config.agent_config:
                                                  ├─ max_iterations: 10
                                                  └─ tools:
                                                       └─ tavily_search:
@@ -539,7 +539,7 @@ Check if tool in available_tools
 ToolRegistry.get(tool_name) → tool_class
     ↓
 configure_tool(tool_name, tool_class)
-    ├─ Extract config from dynamic_config.parameters.agent_config.tools.{tool_name}
+    ├─ Extract config from dynamic_config.agent_config.tools.{tool_name}
     └─ tool_class.update_config(config_dict)
     ↓
 tool_class.execute(**kwargs)
@@ -560,7 +560,7 @@ Return to agent for processing
 3. **Three Patterns**: LangChain-based (declarative), Custom Workflow (imperative), Agent-First + Context (hybrid)
 4. **Tool System**: Centralized registry with configuration injection
 5. **Execution**: Runs in ThreadPoolExecutor alongside other LLMs
-6. **Configuration**: Nested under `parameters.agent_config` in evaluator config
+6. **Configuration**: `agent_config` is a top-level key in evaluator config (flat structure)
 7. **Artifact Saving**: ArticleFactChecker auto-saves intermediate artifacts to a timestamped directory by default; override via `agent_config.output_path`, or disable with `agent_config.save_artifacts=false`
 
 ### Implementation Checklist
diff --git a/docs/agent_development_guide.md b/docs/agent_development_guide.md
index da071b7c..6d6f80df 100644
--- a/docs/agent_development_guide.md
+++ b/docs/agent_development_guide.md
@@ -441,8 +441,8 @@ def _get_output_dir(cls) -> Optional[str]:
     Get output directory for artifact files (three-priority chain).
     Returns output dir path (created if needed), or None if saving disabled.
     """
-    params = cls.dynamic_config.parameters or {}
-    agent_cfg = params.get('agent_config') or {}
+    extra_params = cls.dynamic_config.model_extra
+    agent_cfg = extra_params.get('agent_config') or {}
 
     explicit_path = agent_cfg.get('output_path')
     if explicit_path:
@@ -673,17 +673,15 @@ class MyAgent(BaseAgent):
             "key": "openai-api-key",
             "api_url": "https://api.openai.com/v1",
             "model": "gpt-4",
-            "parameters": {
-                "agent_config": {
-                    "max_iterations": 3,
-                    "tools": {
-                        "my_tool": {
-                            "api_key": "tool-api-key",
-                            "max_results": 5
-                        }
-                    }
-                }
-            }
+             "agent_config": {
+                 "max_iterations": 3,
+                 "tools": {
+                     "my_tool": {
+                         "api_key": "tool-api-key",
+                         "max_results": 5
+                     }
+                 }
+             }
         }
     }
     """
@@ -889,19 +887,17 @@ def eval(cls, input_data: Data) -> EvalDetail:
         "key": "openai-api-key",
         "api_url": "https://api.openai.com/v1",
         "model": "gpt-4-turbo",
-        "parameters": {
-          "temperature": 0.1,
-          "agent_config": {
-            "max_iterations": 3,
-            "tools": {
-              "my_tool": {
-                "api_key": "my-tool-api-key",
-                "max_results": 10,
-                "timeout": 30
-              },
-              "another_tool": {
-                "config_key": "value"
-              }
+        "temperature": 0.1,
+        "agent_config": {
+          "max_iterations": 3,
+          "tools": {
+            "my_tool": {
+              "api_key": "my-tool-api-key",
+              "max_results": 10,
+              "timeout": 30
+            },
+            "another_tool": {
+              "config_key": "value"
             }
           }
         }
@@ -919,10 +915,10 @@ def eval(cls, input_data: Data) -> EvalDetail:
 def some_method(cls):
     # Access LLM configuration
     model = cls.dynamic_config.model  # "gpt-4-turbo"
-    temperature = cls.dynamic_config.parameters.get('temperature', 0)
+    temperature = cls.dynamic_config.model_extra.get('temperature', 0)
 
     # Access agent-specific configuration
-    agent_config = cls.dynamic_config.parameters.get('agent_config', {})
+    agent_config = cls.dynamic_config.model_extra.get('agent_config', {})
     max_iterations = agent_config.get('max_iterations', 5)
 
     # Get tool configuration
@@ -966,10 +962,8 @@ class MyAgent(BaseAgent):
 {
   "name": "MyAgent",
   "config": {
-    "parameters": {
-      "agent_config": {
-        "max_iterations": 10
-      }
+    "agent_config": {
+      "max_iterations": 10
     }
   }
 }
@@ -1259,17 +1253,15 @@ Always include SOURCES with specific URLs when you perform web searches."""
     "key": "your-openai-api-key",
     "api_url": "https://api.openai.com/v1",
     "model": "gpt-4-turbo",
-    "parameters": {
-      "temperature": 0.1,
-      "max_tokens": 16384,
-      "agent_config": {
-        "max_iterations": 5,
-        "tools": {
-          "tavily_search": {
-            "api_key": "your-tavily-api-key",
-            "max_results": 5,
-            "search_depth": "advanced"
-          }
+    "temperature": 0.1,
+    "max_tokens": 16384,
+    "agent_config": {
+      "max_iterations": 5,
+      "tools": {
+        "tavily_search": {
+          "api_key": "your-tavily-api-key",
+          "max_results": 5,
+          "search_depth": "advanced"
         }
       }
     }
@@ -1597,11 +1589,9 @@ config = {
                 "key": "openai-key",
                 "api_url": "https://api.openai.com/v1",
                 "model": "gpt-4",
-                "parameters": {
-                    "agent_config": {
-                        "tools": {
-                            "tavily_search": {"api_key": "tavily-key"}
-                        }
+                "agent_config": {
+                    "tools": {
+                        "tavily_search": {"api_key": "tavily-key"}
                     }
                 }
             }
@@ -1632,7 +1622,7 @@ summary = executor.execute()
 
 **Configuration not working:**
 - Check JSON structure matches expected format
-- Verify `parameters.agent_config.tools.{tool_name}` structure
+- Verify `agent_config.tools.{tool_name}` structure
 - Use Pydantic validation to catch config errors early
 
 **Tests failing:**
diff --git a/docs/article_fact_checking_guide.md b/docs/article_fact_checking_guide.md
index 518b0ff3..43d04947 100644
--- a/docs/article_fact_checking_guide.md
+++ b/docs/article_fact_checking_guide.md
@@ -81,24 +81,22 @@ config = {
             "config": {
                 "key": os.getenv("OPENAI_API_KEY"),
                 "model": "deepseek-chat",  # or "gpt-4o-mini" for OpenAI
-                "parameters": {
-                    "agent_config": {
-                        "max_iterations": 15,
-                        "output_path": "outputs/article_factcheck/",  # Optional: save intermediate artifacts
-                        "tools": {
-                            "claims_extractor": {
-                                "api_key": os.getenv("OPENAI_API_KEY"),
-                                "max_claims": 50,
-                                "claim_types": [
-                                    "factual", "statistical", "attribution", "institutional",
-                                    "temporal", "comparative", "monetary", "technical"
-                                ]
-                            },
-                            "tavily_search": {
-                                "api_key": os.getenv("TAVILY_API_KEY")
-                            },
-                            "arxiv_search": {"max_results": 5}
-                        }
+                "agent_config": {
+                    "max_iterations": 15,
+                    "output_path": "outputs/article_factcheck/",  # Optional: save intermediate artifacts
+                    "tools": {
+                        "claims_extractor": {
+                            "api_key": os.getenv("OPENAI_API_KEY"),
+                            "max_claims": 50,
+                            "claim_types": [
+                                "factual", "statistical", "attribution", "institutional",
+                                "temporal", "comparative", "monetary", "technical"
+                            ]
+                        },
+                        "tavily_search": {
+                            "api_key": os.getenv("TAVILY_API_KEY")
+                        },
+                        "arxiv_search": {"max_results": 5}
                     }
                 }
             }
@@ -143,19 +141,17 @@ cat > article_check_config.json << EOF
       "config": {
         "key": "${OPENAI_API_KEY}",
         "model": "deepseek-chat",
-        "parameters": {
-          "agent_config": {
-            "max_iterations": 15,
-            "tools": {
-              "claims_extractor": {
-                "api_key": "${OPENAI_API_KEY}",
-                "max_claims": 50
-              },
-              "tavily_search": {
-                "api_key": "${TAVILY_API_KEY}"
-              },
-              "arxiv_search": {}
-            }
+        "agent_config": {
+          "max_iterations": 15,
+          "tools": {
+            "claims_extractor": {
+              "api_key": "${OPENAI_API_KEY}",
+              "max_claims": 50
+            },
+            "tavily_search": {
+              "api_key": "${TAVILY_API_KEY}"
+            },
+            "arxiv_search": {}
           }
         }
       }
diff --git a/docs/config.md b/docs/config.md
index 6d1f1d4c..a5020ee4 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -104,26 +104,22 @@ HuggingFace 特定配置：
 
 #### EvaluatorLLMArgs 配置 (evaluator.llm_config.[llm_name])
 
-LLM 配置：
+LLM 配置（支持额外字段，所有额外字段会直接透传给 LLM API）：
 
 | Parameter | Type | Default | Required | Description |
 |-----------|------|---------|----------|-------------|
 | model | str | null | No | 使用的模型名称 |
 | key | str | null | No | API 密钥 |
 | api_url | str | null | No | API URL |
-| parameters | object | null | No | LLM 调参配置 |
-
-##### LLM Parameters 配置
-
-LLM 调参配置：
-
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| temperature | number | 1 | 采样温度，0-2之间 |
-| top_p | number | 1 | 核心采样概率 |
-| max_tokens | number | 4000 | 最大生成token数 |
-| presence_penalty | number | 0 | 存在惩罚，-2.0到2.0之间 |
-| frequency_penalty | number | 0 | 频率惩罚，-2.0到2.0之间 |
+| embedding_config | object | null | No | Embedding 模型独立配置（RAG 评估器使用） |
+| temperature | number | 1 | No | 采样温度，0-2之间 |
+| top_p | number | 1 | No | 核心采样概率 |
+| max_tokens | number | 4000 | No | 最大生成token数 |
+| presence_penalty | number | 0 | No | 存在惩罚，-2.0到2.0之间 |
+| frequency_penalty | number | 0 | No | 频率惩罚，-2.0到2.0之间 |
+| agent_config | object | null | No | Agent 专属配置（max_iterations、tools 等） |
+| threshold | number | - | No | 评估通过阈值（各评估器自定义） |
+| *其他字段* | any | - | No | 所有额外字段直接透传给 LLM API |
 
 ## 配置文件示例
 
@@ -181,13 +177,11 @@ LLM 调参配置：
         "model": "gpt-3.5-turbo",
         "key": "your-api-key",
         "api_url": "https://api.openai.com/v1/chat/completions",
-        "parameters": {
-          "temperature": 1,
-          "top_p": 1,
-          "max_tokens": 4000,
-          "presence_penalty": 0,
-          "frequency_penalty": 0
-        }
+        "temperature": 1,
+        "top_p": 1,
+        "max_tokens": 4000,
+        "presence_penalty": 0,
+        "frequency_penalty": 0
       }
     }
   }
diff --git a/docs/factcheck_guide.md b/docs/factcheck_guide.md
index 4112707f..7abed067 100644
--- a/docs/factcheck_guide.md
+++ b/docs/factcheck_guide.md
@@ -101,9 +101,7 @@ input_data = {
                 "model": "deepseek-chat",
                 "key": "your-api-key",
                 "api_url": "https://api.deepseek.com/v1",
-                "parameters": {
-                    "temperature": 0.1
-                }
+                "temperature": 0.1
             }
         }
     }
diff --git a/docs/factuality_assessment_guide.md b/docs/factuality_assessment_guide.md
index 13680cc2..6f670fcc 100644
--- a/docs/factuality_assessment_guide.md
+++ b/docs/factuality_assessment_guide.md
@@ -59,7 +59,7 @@ LLMFactCheck.dynamic_config = EvaluatorLLMArgs(
     key=os.getenv("OPENAI_API_KEY"),
     api_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
     model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
-    parameters={"threshold": 5.0}
+    threshold=5.0
 )
 
 # Prepare data
@@ -108,7 +108,7 @@ input_data = {
                         "model": "gpt-4o-mini",
                         "key": "YOUR_API_KEY",
                         "api_url": "https://api.openai.com/v1",
-                        "parameters": {"threshold": 5.0}
+                        "threshold": 5.0
                     }
                 }
             ]
@@ -142,7 +142,7 @@ LLMFactCheck.dynamic_config = EvaluatorLLMArgs(
     key="YOUR_API_KEY",
     api_url="https://api.openai.com/v1",
     model="gpt-4o-mini",
-    parameters={"threshold": 5.0}  # Range: 0.0-10.0
+    threshold=5.0  # Range: 0.0-10.0
 )
 ```
 
diff --git a/docs/hallucination_detection_guide.md b/docs/hallucination_detection_guide.md
index d6fceea9..4da27e0d 100644
--- a/docs/hallucination_detection_guide.md
+++ b/docs/hallucination_detection_guide.md
@@ -131,7 +131,7 @@ LLMHallucination.dynamic_config = EvaluatorLLMArgs(
     key=os.getenv("OPENAI_API_KEY"),
     api_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
     model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
-    parameters={"threshold": 0.5}
+    threshold=0.5
 )
 
 # Prepare data
@@ -218,7 +218,7 @@ LLMHallucination.dynamic_config = EvaluatorLLMArgs(
     key="YOUR_API_KEY",
     api_url="https://api.openai.com/v1",
     model="gpt-4o-mini",
-    parameters={"threshold": 0.5}  # Range: 0.0-1.0
+    threshold=0.5  # Range: 0.0-1.0
 )
 ```
 
diff --git a/docs/html_extract_compare_v2.md b/docs/html_extract_compare_v2.md
index c0d92242..b194b882 100644
--- a/docs/html_extract_compare_v2.md
+++ b/docs/html_extract_compare_v2.md
@@ -62,9 +62,7 @@ data = Data(
     data_id="unique_id_001",  # 必需：数据的唯一标识符
     prompt="工具A提取的文本内容",  # 必需
     content="工具B提取的文本内容",  # 必需
-    raw_data={
-        "language": "zh",  # 可选，默认 "en"
-    }
+    language="zh",  # 可选，默认 "en"；也可放在 raw_data["language"]
 )
 ```
 
@@ -116,9 +114,7 @@ data = Data(
     data_id="test_001",
     prompt="工具A提取的内容...",
     content="工具B提取的文本内容",
-    raw_data={
-        "language": "zh"
-    }
+    language="zh",  # 可选
 )
 
 # 执行评估
@@ -131,70 +127,76 @@ print(f"推理: {result.reason[0]}")
 
 ### 批量评估（使用 Executor）
 
-推荐使用 Executor 进行大规模批量评估，支持并发处理和结果保存。
+推荐使用 Executor 进行大规模批量评估，支持并发处理和结果保存。配置需与 `InputArgs` 一致：`evaluator` 为列表，每项包含 `fields`（列名映射到 `Data`）与 `evals`（评估器及 `config`）。
+
+`LLMHtmlExtractCompareV2` 约定：`prompt` = 工具 A 文本，`content` = 工具 B 文本；`language` 可选，缺省为 `"en"`。
 
 ```python
+import os
 from pathlib import Path
 from dingo.config.input_args import InputArgs
 from dingo.exec.base import Executor
 
-# 配置参数
+common_config = {
+    "model": os.getenv("OPENAI_MODEL", "deepseek-chat"),
+    "key": os.getenv("OPENAI_API_KEY"),
+    "api_url": os.getenv("OPENAI_BASE_URL", "https://api.deepseek.com/v1"),
+}
+
 input_data = {
-    "task_name": "html_extract_compare_evaluation",
+    "task_name": "html_extract_compare_v2_evaluation",
     "input_path": str(Path("test/data/html_extract_compare_test.jsonl")),
     "output_path": "output/html_extract_compare_evaluation/",
-
-    # 数据集配置
     "dataset": {
         "source": "local",
         "format": "jsonl",
-        "field": {
-            "id": "data_id",
-            "content": "content"
-            # magic_md 和 language 会自动放入 raw_data
-        }
     },
-
-    # 执行器配置
     "executor": {
-        "eval_group": "html_extract_compare",  # 评估组
-        "max_workers": 4,  # 并发数
+        "max_workers": 4,
+        "batch_size": 1,
         "result_save": {
-            "bad": True,   # 保存问题样本
-            "good": True   # 保存正常样本
-        }
+            "bad": True,   # 保存工具 B 更优的样本（status=True，对应判断 C）
+            "good": True,  # 保存工具 A 更好或相当的样本
+        },
     },
-
-    # LLM 配置
-    "evaluator": {
-        "llm_config": {
-            "LLMHtmlExtractCompareV2": {
-                "model": "deepseek-chat",
-                "key": "your_api_key",
-                "api_url": "https://api.deepseek.com/v1"
-            }
+    "evaluator": [
+        {
+            # 将 JSONL 列映射到 Data：prompt=工具A，content=工具B
+            "fields": {
+                "id": "data_id",
+                "prompt": "method1",
+                "content": "method2",
+                "language": "language",
+            },
+            "evals": [
+                {"name": "LLMHtmlExtractCompareV2", "config": common_config},
+            ],
         }
-    }
+    ],
 }
 
-# 执行评估
 input_args = InputArgs(**input_data)
 executor = Executor.exec_map["local"](input_args)
 result = executor.execute()
 
-# 查看结果
 print(f"总样本数: {result.total}")
 print(f"工具B更好: {result.num_bad}")
-print(f"工具A更好或相同: {result.total - result.num_bad}")
+print(f"工具A更好或相同: {result.num_good}")
 ```
 
+若你的数据列名为 `content` / `magic_md`，只需将 `fields` 改为 `"prompt": "content", "content": "magic_md"` 等即可。
+
 #### JSONL 数据格式
 
+与仓库内 `test/data/html_extract_compare_test.jsonl` 对齐：每行一条 JSON，至少包含唯一标识、两种提取结果与可选语言。
+
 ```jsonl
-{"data_id": "001", "content": "工具A文本", "magic_md": "工具B文本", "language": "zh"}
-{"data_id": "002", "content": "Tool A text", "magic_md": "Tool B text", "language": "en"}
+{"data_id": "001", "method1": "工具A提取的Markdown文本...", "method2": "工具B提取的Markdown文本...", "language": "zh"}
+{"data_id": "002", "method1": "Tool A extracted text...", "method2": "Tool B extracted text...", "language": "en"}
 ```
 
+`method1` / `method2` 仅为示例列名；实际列名通过 `evaluator[].fields` 中的 `prompt` / `content` 映射指定。
+
 ## 与 V1 版本的对比
 
 | 特性 | V1 | V2 |
diff --git a/docs/instruction_quality_guide.md b/docs/instruction_quality_guide.md
index eb1b08d1..2caae4dc 100644
--- a/docs/instruction_quality_guide.md
+++ b/docs/instruction_quality_guide.md
@@ -197,7 +197,7 @@ input_data = {
                         "model": "deepseek-chat",
                         "key": "your-api-key",
                         "api_url": "https://api.deepseek.com",
-                        "parameters": {"threshold": 6.0}
+                        "threshold": 6.0
                     }
                 }
             ]
@@ -223,10 +223,8 @@ print(f"清晰指令: {summary.num_good}/{summary.total}")
                 "model": "deepseek-chat",
                 "key": "your-api-key",
                 "api_url": "https://api.deepseek.com",
-                "parameters": {
-                    "min_difficulty": 3.0,  # 可选：过滤太简单的
-                    "max_difficulty": 8.0,  # 可选：过滤太难的
-                }
+                "min_difficulty": 3.0,  # 可选：过滤太简单的
+                "max_difficulty": 8.0,  # 可选：过滤太难的
             }
         }
     ]
@@ -303,13 +301,13 @@ python examples/custom/evaluate_instruction_quality.py distribution
 **问题1: 过多简单指令**
 ```python
 # 设置最低难度阈值
-"parameters": {"min_difficulty": 3.0}
+"min_difficulty": 3.0
 ```
 
 **问题2: 指令模糊不清**
 ```python
 # 提高清晰度要求
-"parameters": {"threshold": 7.0}
+"threshold": 7.0
 ```
 
 **问题3: 难度分布不均**
diff --git a/docs/metrics.md b/docs/metrics.md
index bfde1ea7..44e75482 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -20,11 +20,14 @@ This document provides comprehensive information about all quality metrics used
 |------|--------|-------------|--------------|-------------------|----------|
 | `LLMCodeCompare` | LLMCodeCompare | Compares the effectiveness of two tools in extracting code blocks from HTML to Markdown format by evaluating recognit... | Internal Implementation | N/A | N/A |
 | `LLMDatamanAssessment` | LLMDatamanAssessment | Evaluates pre-training data quality using the DataMan methodology (14 standards, 15 domains). Assigns a score (0/1), ... | [DataMan: Data Manager for Pre-training Large Language Models](https://arxiv.org/abs/2502.19363) (Peng et al., 2025) | N/A | N/A |
+| `LLMHtmlExtractCompareV2` | LLMHtmlExtractCompareV2 | Compares two HTML main-content extraction tools by computing text diffs and using LLM to judge which preserves more c... | Internal Implementation | N/A | N/A |
 | `LLMMathCompare` | LLMMathCompare | Compares the effectiveness of two tools in extracting mathematical formulas from HTML to Markdown format by evaluatin... | Internal Implementation | N/A | N/A |
 | `LLMSecurityPolitics` | LLMSecurityPolitics | Evaluates whether the text contains politics-related content | Internal Implementation | N/A | N/A |
 | `LLMTableCompare` | LLMTableCompare | Compares the effectiveness of two tools in extracting tables from HTML to Markdown format by evaluating recognition r... | Internal Implementation | N/A | N/A |
+| `LLMTextEquation` | LLMTextEquation | Impact-driven text quality evaluation for LLM pretraining, focusing on structural completeness, readability, diversit... | [WanJuanSiLu: A High-Quality Open-Source Webtext Dataset for Low-Resource Languages](https://arxiv.org/abs/2501.14506) (Yu et al., 2025) | [📊 See Results](eval/prompt/redpajama_data_evaluated_by_prompt.md) | [📝 View Example](../examples/llm_and_rule/llm_local.py) |
 | `LLMTextQualityV4` | LLMTextQualityV4 | Enhanced text quality evaluation covering completeness (formulas, tables, code), effectiveness (garbled text, spacing... | [WanJuanSiLu: A High-Quality Open-Source Webtext Dataset for Low-Resource Languages](https://arxiv.org/abs/2501.14506) (Yu et al., 2025) | [📊 See Results](eval/prompt/redpajama_data_evaluated_by_prompt.md) | N/A |
 | `LLMTextQualityV5` | LLMTextQualityV5 | Impact-driven text quality evaluation for LLM pretraining, focusing on structural completeness, readability, diversit... | [WanJuanSiLu: A High-Quality Open-Source Webtext Dataset for Low-Resource Languages](https://arxiv.org/abs/2501.14506) (Yu et al., 2025) | [📊 See Results](eval/prompt/redpajama_data_evaluated_by_prompt.md) | [📝 View Example](../examples/llm_and_rule/llm_local.py) |
+| `LLMTextTable` | LLMTextTable | Impact-driven text quality evaluation for LLM pretraining, focusing on structural completeness, readability, diversit... | [WanJuanSiLu: A High-Quality Open-Source Webtext Dataset for Low-Resource Languages](https://arxiv.org/abs/2501.14506) (Yu et al., 2025) | [📊 See Results](eval/prompt/redpajama_data_evaluated_by_prompt.md) | [📝 View Example](../examples/llm_and_rule/llm_local.py) |
 
 ### SFT Data Assessment Metrics
 
@@ -57,7 +60,7 @@ This document provides comprehensive information about all quality metrics used
 | Type | Metric | Description | Paper Source | Evaluation Results | Examples |
 |------|--------|-------------|--------------|-------------------|----------|
 | `QUALITY_BAD_COMPLETENESS` | RuleLineEndWithEllipsis, RuleLineEndWithTerminal, RuleSentenceNumber, RuleWordNumber | Checks whether the ratio of lines ending with ellipsis is below threshold; Checks whether the ratio of lines ending w... | [RedPajama: an Open Dataset for Training Large Language Models](https://github.com/togethercomputer/RedPajama-Data) (Together Computer, 2023) | [📊 See Results](eval/rule/slimpajama_data_evaluated_by_rule.md) | N/A |
-| `QUALITY_BAD_EFFECTIVENESS` | RuleAbnormalChar, RuleAbnormalHtml, RuleAlphaWords, RuleAudioDataFormat, RuleCharNumber, RuleColonEnd, RuleContentNull, RuleContentShort, RuleContentShortMultiLan, RuleEnterAndSpace, RuleEnterMore, RuleEnterRatioMore, RuleHtmlEntity, RuleHtmlTag, RuleInvisibleChar, RuleImageDataFormat, RuleLatexSpecialChar, RuleLineJavascriptCount, RuleLoremIpsum, RuleMeanWordLength, RuleNlpDataFormat, RuleSftDataFormat, RuleSpaceMore, RuleSpecialCharacter, RuleStopWord, RuleSymbolWordRatio, RuleVedioDataFormat, RuleOnlyUrl, RuleDoi, RuleIsbn | Detects garbled text and anti-crawling characters by combining special character and invisible character detection; D... | [RedPajama: an Open Dataset for Training Large Language Models](https://github.com/togethercomputer/RedPajama-Data) (Together Computer, 2023) | [📊 See Results](eval/rule/slimpajama_data_evaluated_by_rule.md) | N/A |
+| `QUALITY_BAD_EFFECTIVENESS` | RuleDoi, RuleIsbn, RuleAbnormalChar, RuleAbnormalHtml, RuleAlphaWords, RuleAudioDataFormat, RuleCharNumber, RuleColonEnd, RuleContentNull, RuleContentShort, RuleContentShortMultiLan, RuleEnterAndSpace, RuleEnterMore, RuleEnterRatioMore, RuleHtmlEntity, RuleHtmlTag, RuleInvisibleChar, RuleImageDataFormat, RuleLatexSpecialChar, RuleLineJavascriptCount, RuleLoremIpsum, RuleMeanWordLength, RuleNlpDataFormat, RuleSftDataFormat, RuleSpaceMore, RuleSpecialCharacter, RuleStopWord, RuleSymbolWordRatio, RuleVedioDataFormat, RuleOnlyUrl | Check whether the string is in the correct format of the doi; Check whether the string is in the correct format of th... | Internal Implementation | N/A | N/A |
 | `QUALITY_BAD_FLUENCY` | RuleAbnormalNumber, RuleCharSplit, RuleNoPunc, RuleWordSplit, RuleWordStuck | Checks PDF content for abnormal book page or index numbers that disrupt text flow; Checks PDF content for abnormal ch... | [RedPajama: an Open Dataset for Training Large Language Models](https://github.com/togethercomputer/RedPajama-Data) (Together Computer, 2023) | [📊 See Results](eval/rule/slimpajama_data_evaluated_by_rule.md) | N/A |
 | `QUALITY_BAD_RELEVANCE` | RuleHeadWordAr, RuleHeadWordCs, RuleHeadWordHu, RuleHeadWordKo, RuleHeadWordRu, RuleHeadWordSr, RuleHeadWordTh, RuleHeadWordVi, RulePatternSearch, RuleWatermark | Checks whether Arabic content contains irrelevant tail source information; Checks whether Czech content contains irre... | [RedPajama: an Open Dataset for Training Large Language Models](https://github.com/togethercomputer/RedPajama-Data) (Together Computer, 2023) | [📊 See Results](eval/rule/slimpajama_data_evaluated_by_rule.md) | N/A |
 | `QUALITY_BAD_SECURITY` | RuleIDCard, RuleUnsafeWords, RulePIIDetection | Checks whether content contains ID card information; Checks whether content contains unsafe words; Detects Personal I... | [RedPajama: an Open Dataset for Training Large Language Models](https://github.com/togethercomputer/RedPajama-Data) (Together Computer, 2023) | [📊 See Results](eval/rule/slimpajama_data_evaluated_by_rule.md) | N/A |
diff --git a/docs/rag_evaluation_metrics.md b/docs/rag_evaluation_metrics.md
index 1c11c5dc..c2fce750 100644
--- a/docs/rag_evaluation_metrics.md
+++ b/docs/rag_evaluation_metrics.md
@@ -86,10 +86,8 @@ llm_config_embedding = {
         "api_url": "https://api.openai.com/v1",
         "key": "YOUR_API_KEY"
     },
-    "parameters": {
-        "strictness": 3,
-        "threshold": 5
-    }
+    "strictness": 3,
+    "threshold": 5
 }
 
 input_data = {
@@ -170,7 +168,8 @@ summary = executor.execute()
         "api_url": "https://api.deepseek.com",
         "key": "YOUR_API_KEY"
     },
-    "parameters": {"strictness": 3, "threshold": 5}
+    "strictness": 3,
+    "threshold": 5
 }
 ```
 
@@ -186,7 +185,8 @@ summary = executor.execute()
         "api_url": "http://localhost:8000/v1",  # Local vLLM/Xinference
         "key": "dummy-key"
     },
-    "parameters": {"strictness": 3, "threshold": 5}
+    "strictness": 3,
+    "threshold": 5
 }
 ```
 
diff --git a/docs/rag_evaluation_metrics_zh.md b/docs/rag_evaluation_metrics_zh.md
index 099addb4..963b02dd 100644
--- a/docs/rag_evaluation_metrics_zh.md
+++ b/docs/rag_evaluation_metrics_zh.md
@@ -123,10 +123,8 @@ input_data = {
                             "api_url": OPENAI_URL,
                             "key": OPENAI_KEY
                         },
-                        "parameters": {
-                            "strictness": 3,
-                            "threshold": 5
-                        }
+                        "strictness": 3,
+                        "threshold": 5
                     }
                 },
                 {
@@ -466,7 +464,7 @@ LLMRAGFaithfulness.dynamic_config = EvaluatorLLMArgs(
     key="YOUR_API_KEY",
     api_url="https://api.openai.com/v1",
     model="gpt-4o-mini",
-    parameters={"threshold": 7}  # 自定义阈值
+    threshold=7  # 自定义阈值
 )
 
 # Answer Relevancy 特殊配置（需要 embedding）⭐
@@ -480,10 +478,8 @@ LLMRAGAnswerRelevancy.dynamic_config = EvaluatorLLMArgs(
         api_url="https://api.openai.com/v1",
         key="YOUR_API_KEY"
     ),
-    parameters={
-        "strictness": 3,  # 生成问题数量
-        "threshold": 5    # 通过阈值
-    }
+    strictness=3,  # 生成问题数量
+    threshold=5    # 通过阈值
 )
 ```
 
@@ -499,7 +495,7 @@ LLMRAGAnswerRelevancy.dynamic_config = EvaluatorLLMArgs(
                     "model": "gpt-4o-mini",
                     "key": "YOUR_API_KEY",
                     "api_url": "https://api.openai.com/v1",
-                    "parameters": {"threshold": 7}
+                    "threshold": 7
                 }
             },
             {
@@ -513,10 +509,8 @@ LLMRAGAnswerRelevancy.dynamic_config = EvaluatorLLMArgs(
                         "api_url": "https://api.openai.com/v1",
                         "key": "YOUR_API_KEY"
                     },
-                    "parameters": {
-                        "strictness": 3,
-                        "threshold": 5
-                    }
+                    "strictness": 3,
+                    "threshold": 5
                 }
             }
         ]
@@ -528,8 +522,8 @@ LLMRAGAnswerRelevancy.dynamic_config = EvaluatorLLMArgs(
 
 | 参数 | 适用指标 | 默认值 | 说明 |
 |------|---------|--------|------|
-| `threshold` | 所有指标 | 5.0 | 通过阈值（0-10），在 `parameters` 中配置 |
-| `strictness` | Answer Relevancy | 3 | 生成问题数量（1-5），在 `parameters` 中配置 |
+| `threshold` | 所有指标 | 5.0 | 通过阈值（0-10），直接在 `config` 中配置 |
+| `strictness` | Answer Relevancy | 3 | 生成问题数量（1-5），直接在 `config` 中配置 |
 | `embedding_config` | Answer Relevancy | - | **必需配置**，包含 `model`（模型名）、`api_url`（服务地址）、`key`（API密钥） |
 
 ## 📊 指标详细说明
diff --git a/docs/technical/technical_all.md b/docs/technical/technical_all.md
index 45833111..212fd15c 100644
--- a/docs/technical/technical_all.md
+++ b/docs/technical/technical_all.md
@@ -220,9 +220,14 @@ dingo 在使用提示词进行评估任务的时候，必须同时使用场景
 + model
 + key
 + api_url
-+ parameters
++ temperature（直接平铺在配置中）
++ top_p
++ max_tokens
++ presence_penalty
++ frequency_penalty
++ agent_config（Agent 评估器专用，包含 max_iterations、tools 等）
 
-需要注意的是参数 [parameters](config.md#parameters) ，这个参数会对模型的推理产生影响，可以设置的值包括:
+LLM 调参配置直接平铺在 `config` 对象中（不再嵌套在 `parameters` 字段下），会对模型推理产生影响，可以设置的值包括:
 + temperature
 + top_p
 + max_tokens
diff --git a/examples/agent/agent_article_fact_checking_example.py b/examples/agent/agent_article_fact_checking_example.py
index 45b0ad60..3071f45f 100644
--- a/examples/agent/agent_article_fact_checking_example.py
+++ b/examples/agent/agent_article_fact_checking_example.py
@@ -58,32 +58,30 @@ def main() -> int:
                             "key": openai_key,
                             "model": "intern-s1-pro",
                             "api_url": "https://chat.intern-ai.org.cn/api/v1/",
-                            "parameters": {
-                                "timeout": 600,
-                                "temperature": 0,  # deterministic output
-                                "agent_config": {
-                                    "max_concurrent_claims": 10,
-                                    "max_iterations": 50,
-                                    # Artifacts auto-saved to outputs/article_factcheck_<timestamp>/
-                                    # Override with: "output_path": "your/custom/path"
-                                    "tools": {
-                                        "claims_extractor": {
-                                            "api_key": openai_key,
-                                            "model": "intern-s1-pro",
-                                            "base_url": "https://chat.intern-ai.org.cn/api/v1/",
-                                            "max_claims": 50,
-                                            "claim_types": [
-                                                "factual", "statistical", "attribution", "institutional",
-                                                "temporal", "comparative", "monetary", "technical"
-                                            ]
-                                        },
-                                        "tavily_search": {
-                                            "api_key": tavily_key
-                                        } if tavily_key else {},
-                                        "arxiv_search": {
-                                            "max_results": 5,
-                                            "fetch_affiliations": True,
-                                        }
+                            "timeout": 600,
+                            "temperature": 0,  # deterministic output
+                            "agent_config": {
+                                "max_concurrent_claims": 10,
+                                "max_iterations": 50,
+                                # Artifacts auto-saved to outputs/article_factcheck_<timestamp>/
+                                # Override with: "output_path": "your/custom/path"
+                                "tools": {
+                                    "claims_extractor": {
+                                        "api_key": openai_key,
+                                        "model": "intern-s1-pro",
+                                        "base_url": "https://chat.intern-ai.org.cn/api/v1/",
+                                        "max_claims": 50,
+                                        "claim_types": [
+                                            "factual", "statistical", "attribution", "institutional",
+                                            "temporal", "comparative", "monetary", "technical"
+                                        ]
+                                    },
+                                    "tavily_search": {
+                                        "api_key": tavily_key
+                                    } if tavily_key else {},
+                                    "arxiv_search": {
+                                        "max_results": 5,
+                                        "fetch_affiliations": True,
                                     }
                                 }
                             }
diff --git a/examples/agent/agent_executor_example.py b/examples/agent/agent_executor_example.py
index 02a57c94..e0970ebc 100644
--- a/examples/agent/agent_executor_example.py
+++ b/examples/agent/agent_executor_example.py
@@ -67,17 +67,15 @@ def main():
                     "key": os.getenv("OPENAI_API_KEY", "your-openai-api-key"),
                     "api_url": os.getenv("OPENAI_API_URL", "https://api.openai.com/v1"),
                     "model": "gpt-4.1-mini-2025-04-14",
-                    "parameters": {
-                        "temperature": 0.1,
-                        "max_tokens": 16384,
-                        "agent_config": {
-                            "max_iterations": 5,
-                            "tools": {
-                                "tavily_search": {
-                                    "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key"),
-                                    "max_results": 5,
-                                    "search_depth": "advanced"
-                                }
+                    "temperature": 0.1,
+                    "max_tokens": 16384,
+                    "agent_config": {
+                        "max_iterations": 5,
+                        "tools": {
+                            "tavily_search": {
+                                "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key"),
+                                "max_results": 5,
+                                "search_depth": "advanced"
                             }
                         }
                     }
diff --git a/examples/agent/agent_hallucination_example.py b/examples/agent/agent_hallucination_example.py
index 6f463152..5f2723ec 100644
--- a/examples/agent/agent_hallucination_example.py
+++ b/examples/agent/agent_hallucination_example.py
@@ -64,14 +64,12 @@ def example_with_context():
                     "key": os.getenv("OPENAI_API_KEY", "your-openai-api-key"),
                     "api_url": os.getenv("OPENAI_API_URL", "https://api.openai.com/v1"),
                     "model": "gpt-4.1-mini-2025-04-14",
-                    "parameters": {
-                        "temperature": 0.1,
-                        "agent_config": {
-                            "max_iterations": 3,
-                            "tools": {
-                                "tavily_search": {
-                                    "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key")
-                                }
+                    "temperature": 0.1,
+                    "agent_config": {
+                        "max_iterations": 3,
+                        "tools": {
+                            "tavily_search": {
+                                "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key")
                             }
                         }
                     }
@@ -155,17 +153,15 @@ def example_without_context():
                     "key": os.getenv("OPENAI_API_KEY", "your-openai-api-key"),
                     "api_url": os.getenv("OPENAI_API_URL", "https://api.openai.com/v1"),
                     "model": "gpt-4.1-mini-2025-04-14",
-                    "parameters": {
-                        "temperature": 0.1,
-                        "agent_config": {
-                            "max_iterations": 3,
-                            "tools": {
-                                "tavily_search": {
-                                    "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key"),
-                                    "max_results": 5,
-                                    "search_depth": "advanced",
-                                    "include_answer": True
-                                }
+                    "temperature": 0.1,
+                    "agent_config": {
+                        "max_iterations": 3,
+                        "tools": {
+                            "tavily_search": {
+                                "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key"),
+                                "max_results": 5,
+                                "search_depth": "advanced",
+                                "include_answer": True
                             }
                         }
                     }
@@ -215,13 +211,11 @@ def example_sdk_usage():
         key=os.getenv("OPENAI_API_KEY", "your-openai-api-key"),
         api_url=os.getenv("OPENAI_API_URL", "https://api.openai.com/v1"),
         model="gpt-4.1-mini-2025-04-14",
-        parameters={
-            "temperature": 0.1,
-            "agent_config": {
-                "tools": {
-                    "tavily_search": {
-                        "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key")
-                    }
+        temperature=0.1,
+        agent_config={
+            "tools": {
+                "tavily_search": {
+                    "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key")
                 }
             }
         }
diff --git a/examples/compare/html_extract_compare_v2_example_dataset.py b/examples/compare/html_extract_compare_v2_example_dataset.py
index f4449b2c..5eaeead1 100644
--- a/examples/compare/html_extract_compare_v2_example_dataset.py
+++ b/examples/compare/html_extract_compare_v2_example_dataset.py
@@ -12,8 +12,8 @@
 数据格式要求：
 {
     "data_id": "唯一标识",
-    "content": "工具A提取的文本",
-    "magic_md": "工具B提取的文本",
+    "method1": "工具A提取的文本",
+    "method2": "工具B提取的文本",
     "language": "zh" 或 "en"
 }
 
@@ -31,7 +31,7 @@
 PROJECT_ROOT = Path(__file__).parent.parent.parent
 
 # API 配置
-OPENAI_MODEL = 'deepseek-chat'
+OPENAI_MODEL = os.getenv("OPENAI_MODEL")
 OPENAI_URL = os.getenv("OPENAI_BASE_URL")
 OPENAI_KEY = os.getenv("OPENAI_API_KEY")
 common_config = {
@@ -46,7 +46,7 @@ def evaluate_html_extract_compare_dataset():
     批量评估 HTML 提取工具对比数据集
 
     数据集格式：
-    {"data_id": "001", "content": "工具A文本", "magic_md": "工具B文本", "language": "zh"}
+    {"data_id": "001", "method1": "工具A文本", "method2": "工具B文本", "language": "zh"}
     """
     print("=== HTML Extract Compare Dataset Evaluation ===")
     print(f"使用模型: {OPENAI_MODEL}")
@@ -76,7 +76,7 @@ def evaluate_html_extract_compare_dataset():
         },
         "evaluator": [
             {
-                "fields": {"id": "data_id", "prompt": "content", "content": "magic_md"},
+                "fields": {"id": "data_id", "prompt": "method1", "content": "method2", "language": "language"},
                 "evals": [
                     {"name": "LLMHtmlExtractCompareV2", "config": common_config},
                 ]
diff --git a/examples/factcheck/dataset_factcheck_evaluation.py b/examples/factcheck/dataset_factcheck_evaluation.py
index dd3d72d3..4f48eadf 100644
--- a/examples/factcheck/dataset_factcheck_evaluation.py
+++ b/examples/factcheck/dataset_factcheck_evaluation.py
@@ -77,10 +77,8 @@ def evaluate_single_data_example():
     evaluator.dynamic_config.model = OPENAI_MODEL
     evaluator.dynamic_config.key = OPENAI_KEY
     evaluator.dynamic_config.api_url = OPENAI_URL
-    evaluator.dynamic_config.parameters = {
-        "temperature": 0.1,  # 降低随机性以提高一致性
-        "max_tokens": 2000
-    }
+    evaluator.dynamic_config.temperature = 0.1  # 降低随机性以提高一致性
+    evaluator.dynamic_config.max_tokens = 2000
 
     # 创建测试数据
     test_data = Data(
diff --git a/examples/rag/dataset_rag_eval_baseline.py b/examples/rag/dataset_rag_eval_baseline.py
index 84e09106..9828f7e7 100644
--- a/examples/rag/dataset_rag_eval_baseline.py
+++ b/examples/rag/dataset_rag_eval_baseline.py
@@ -128,10 +128,8 @@ def run_rag_evaluation():
             "api_url": OPENAI_URL,  # 如果同一服务提供 embedding
             "key": OPENAI_KEY
         },
-        "parameters": {
-            "strictness": 3,
-            "threshold": 5
-        }
+        "strictness": 3,
+        "threshold": 5
     }
 
     # 构建配置
diff --git a/examples/rag/e2e_RAG_eval_with_mockRAG_fiqa.py b/examples/rag/e2e_RAG_eval_with_mockRAG_fiqa.py
index f212b94b..d190c969 100644
--- a/examples/rag/e2e_RAG_eval_with_mockRAG_fiqa.py
+++ b/examples/rag/e2e_RAG_eval_with_mockRAG_fiqa.py
@@ -327,10 +327,8 @@ def run_dingo_evaluation(rag_output_path: str) -> SummaryModel:
             "api_url": OPENAI_BASE_URL,  # 如果同一服务提供 embedding
             "key": OPENAI_API_KEY
         },
-        "parameters": {
-            "strictness": 3,
-            "threshold": 5
-        }
+        "strictness": 3,
+        "threshold": 5
     }
 
     input_data = {
diff --git a/examples/sft/evaluate_instruction_quality.py b/examples/sft/evaluate_instruction_quality.py
index f9ea43e9..841b54e3 100644
--- a/examples/sft/evaluate_instruction_quality.py
+++ b/examples/sft/evaluate_instruction_quality.py
@@ -56,9 +56,7 @@ def evaluate_instruction_clarity():
                             "model": OPENAI_MODEL,
                             "key": OPENAI_API_KEY,
                             "api_url": OPENAI_BASE_URL,
-                            "parameters": {
-                                "threshold": 6.0  # 清晰度阈值 (0-10)
-                            }
+                            "threshold": 6.0  # 清晰度阈值 (0-10)
                         }
                     }
                 ]
@@ -127,11 +125,9 @@ def evaluate_task_difficulty():
                             "model": OPENAI_MODEL,
                             "key": OPENAI_API_KEY,
                             "api_url": OPENAI_BASE_URL,
-                            "parameters": {
-                                # 可选：设置期望的难度范围
-                                # "min_difficulty": 4.0,  # 最低难度（太简单的会被标记）
-                                # "max_difficulty": 8.0,  # 最高难度（太难的会被标记）
-                            }
+                            # 可选：设置期望的难度范围
+                            # "min_difficulty": 4.0,  # 最低难度（太简单的会被标记）
+                            # "max_difficulty": 8.0,  # 最高难度（太难的会被标记）
                         }
                     }
                 ]
@@ -198,7 +194,7 @@ def evaluate_both():
                             "model": OPENAI_MODEL,
                             "key": OPENAI_API_KEY,
                             "api_url": OPENAI_BASE_URL,
-                            "parameters": {"threshold": 6.0}
+                            "threshold": 6.0
                         }
                     },
                     {
@@ -207,10 +203,8 @@ def evaluate_both():
                             "model": OPENAI_MODEL,
                             "key": OPENAI_API_KEY,
                             "api_url": OPENAI_BASE_URL,
-                            "parameters": {
-                                "min_difficulty": 3.0,  # 过滤太简单的任务
-                                "max_difficulty": 9.0,  # 过滤过于困难的任务
-                            }
+                            "min_difficulty": 3.0,  # 过滤太简单的任务
+                            "max_difficulty": 9.0,  # 过滤过于困难的任务
                         }
                     }
                 ]
diff --git a/mcp_server.py b/mcp_server.py
index b865606c..08f4812d 100644
--- a/mcp_server.py
+++ b/mcp_server.py
@@ -106,11 +106,9 @@ def get_llm_config_from_env(eval_group_name: str = "") -> Dict:
                 "key": openai_key,
                 "api_url": openai_base_url,
                 "model": openai_model,
-                "parameters": {
-                    "temperature": 0.3,
-                    "top_p": 1,
-                    "max_tokens": 4000,
-                }
+                "temperature": 0.3,
+                "top_p": 1,
+                "max_tokens": 4000,
             }
         }
 
diff --git a/requirements/agent.txt b/requirements/agent.txt
index 2916ef75..59afa0d1 100644
--- a/requirements/agent.txt
+++ b/requirements/agent.txt
@@ -1,10 +1,13 @@
-# Agent-specific dependencies (optional)
-# Install with: pip install -r requirements/agent.txt
-# Or: pip install dingo-python[agent]
-
-# LangChain 1.0 for agent-based evaluation
-langchain>=1.0.0
-langchain-openai>=1.0.0
-
-# Tavily for web search tool
-tavily-python>=0.3.0
+# Agent-specific dependencies (optional)
+# Install with: pip install -r requirements/agent.txt
+# Or: pip install dingo-python[agent]
+
+# LangChain 1.0 for agent-based evaluation
+langchain>=1.0.0
+langchain-openai>=1.0.0
+
+# Tavily for web search tool
+tavily-python>=0.3.0
+
+# ArXiv for academic paper search tool
+arxiv>=2.4.0
diff --git a/setup.py b/setup.py
index 33394ce2..7dfc6abf 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@ def _read_requirements(path):
 
 setup(
     name="dingo-python",
-    version="2.2.1",
+    version="2.2.2",
     author="Dingo",
     description="A Comprehensive AI Data Quality Evaluation Tool for Large Models",
     long_description=long_description,
diff --git a/skills/dingo-verify/scripts/fact_check.py b/skills/dingo-verify/scripts/fact_check.py
index b2865496..a4930f14 100644
--- a/skills/dingo-verify/scripts/fact_check.py
+++ b/skills/dingo-verify/scripts/fact_check.py
@@ -164,13 +164,11 @@ def build_config(
                     "key": api_key,
                     "model": model,
                     "api_url": api_url,
-                    "parameters": {
-                        "temperature": 0,
-                        "agent_config": {
-                            "max_concurrent_claims": max_concurrent,
-                            "max_iterations": 50,
-                            "tools": tools_config,
-                        }
+                    "temperature": 0,
+                    "agent_config": {
+                        "max_concurrent_claims": max_concurrent,
+                        "max_iterations": 50,
+                        "tools": tools_config,
                     }
                 }
             }]
diff --git a/test/data/html_extract_compare_test.jsonl b/test/data/html_extract_compare_test.jsonl
index 90650250..51411e3c 100644
--- a/test/data/html_extract_compare_test.jsonl
+++ b/test/data/html_extract_compare_test.jsonl
@@ -1,5 +1,5 @@
-{"data_id": "001", "content": "# Python 教程\n\nPython 是一种高级编程语言。\n\n## 特点\n\n1. 简单易学\n2. 功能强大\n\nPython 广泛应用于数据科学和Web开发。", "magic_md": "# Python 教程\n\nPython 是一种高级编程语言。\n\n## 特点\n\n1. 简单易学\n2. 功能强大\n3. 跨平台\n\n## 应用领域\n\nPython 广泛应用于数据科学、Web开发和人工智能。\n\n## 版本\n\n- Python 2.x\n- Python 3.x", "language": "zh"}
-{"data_id": "002", "content": "机器学习是人工智能的一个分支。它使计算机能够从数据中学习并做出决策。\n\n## 主要方法\n\n- 监督学习\n- 无监督学习\n- 强化学习\n\n## 应用\n\n机器学习应用于图像识别、自然语言处理等领域。\n\n参考文献：\n[1] Mitchell, T. 1997.", "magic_md": "机器学习是人工智能的一个分支。它使计算机能够从数据中学习。\n\n主要方法包括：", "language": "zh"}
-{"data_id": "003", "content": "# 数据结构\n\n数组是最基本的数据结构。\n\n```python\narr = [1, 2, 3]\n```\n\n常用操作：\n- 访问元素\n- 插入元素", "magic_md": "# 数据结构\n\n数组是最基本的数据结构之一。\n\n```python\narr = [1, 2, 3, 4, 5]\nprint(arr[0])\n```\n\n## 常用操作\n\n- 访问元素：O(1)\n- 插入元素：O(n)\n- 删除元素：O(n)\n\n## 其他数据结构\n\n- 链表\n- 栈\n- 队列", "language": "zh"}
-{"data_id": "004", "content": "Web development involves creating websites. HTML provides the structure.\n\nKey technologies:\n1. HTML\n2. CSS\n3. JavaScript\n\nRecommended reading:\n- Modern Web Design\n- JavaScript Basics", "magic_md": "Web development involves creating websites and web applications. HTML provides the structure, CSS handles styling.\n\n## Key Technologies\n\n1. HTML - Structure\n2. CSS - Styling  \n3. JavaScript - Interactivity\n\n## Frameworks\n\n- React\n- Vue\n- Angular\n\nReferences:\n[1] MDN Web Docs", "language": "en"}
-{"data_id": "005", "content": "# SQL基础\n\nSQL（结构化查询语言）用于管理关系型数据库。\n\n## 基本命令\n\n```sql\nSELECT * FROM users WHERE age > 18;\nINSERT INTO users VALUES (1, 'John');\n```\n\n## 应用场景\n\n- 数据查询\n- 数据更新\n- 数据删除\n- 数据插入\n\n## 常见数据库\n\n- MySQL\n- PostgreSQL\n- SQLite", "magic_md": "# SQL基础\n\nSQL用于管理数据库。\n\n基本命令：\n```sql\nSELECT * FROM users;\n```\n\n应用场景：数据查询、数据更新。", "language": "zh"}
+{"data_id": "001", "method1": "# Python 教程\n\nPython 是一种高级编程语言。\n\n## 特点\n\n1. 简单易学\n2. 功能强大\n\nPython 广泛应用于数据科学和Web开发。", "method2": "# Python 教程\n\nPython 是一种高级编程语言。\n\n## 特点\n\n1. 简单易学\n2. 功能强大\n3. 跨平台\n\n## 应用领域\n\nPython 广泛应用于数据科学、Web开发和人工智能。\n\n## 版本\n\n- Python 2.x\n- Python 3.x", "language": "zh"}
+{"data_id": "002", "method1": "机器学习是人工智能的一个分支。它使计算机能够从数据中学习并做出决策。\n\n## 主要方法\n\n- 监督学习\n- 无监督学习\n- 强化学习\n\n## 应用\n\n机器学习应用于图像识别、自然语言处理等领域。\n\n参考文献：\n[1] Mitchell, T. 1997.", "method2": "机器学习是人工智能的一个分支。它使计算机能够从数据中学习。\n\n主要方法包括：", "language": "zh"}
+{"data_id": "003", "method1": "# 数据结构\n\n数组是最基本的数据结构。\n\n```python\narr = [1, 2, 3]\n```\n\n常用操作：\n- 访问元素\n- 插入元素", "method2": "# 数据结构\n\n数组是最基本的数据结构之一。\n\n```python\narr = [1, 2, 3, 4, 5]\nprint(arr[0])\n```\n\n## 常用操作\n\n- 访问元素：O(1)\n- 插入元素：O(n)\n- 删除元素：O(n)\n\n## 其他数据结构\n\n- 链表\n- 栈\n- 队列", "language": "zh"}
+{"data_id": "004", "method1": "Web development involves creating websites. HTML provides the structure.\n\nKey technologies:\n1. HTML\n2. CSS\n3. JavaScript\n\nRecommended reading:\n- Modern Web Design\n- JavaScript Basics", "method2": "Web development involves creating websites and web applications. HTML provides the structure, CSS handles styling.\n\n## Key Technologies\n\n1. HTML - Structure\n2. CSS - Styling  \n3. JavaScript - Interactivity\n\n## Frameworks\n\n- React\n- Vue\n- Angular\n\nReferences:\n[1] MDN Web Docs", "language": "en"}
+{"data_id": "005", "method1": "# SQL基础\n\nSQL（结构化查询语言）用于管理关系型数据库。\n\n## 基本命令\n\n```sql\nSELECT * FROM users WHERE age > 18;\nINSERT INTO users VALUES (1, 'John');\n```\n\n## 应用场景\n\n- 数据查询\n- 数据更新\n- 数据删除\n- 数据插入\n\n## 常见数据库\n\n- MySQL\n- PostgreSQL\n- SQLite", "method2": "# SQL基础\n\nSQL用于管理数据库。\n\n基本命令：\n```sql\nSELECT * FROM users;\n```\n\n应用场景：数据查询、数据更新。", "language": "zh"}
diff --git a/test/scripts/model/llm/agent/test_article_fact_checker.py b/test/scripts/model/llm/agent/test_article_fact_checker.py
index 5376dfcc..01e12edb 100644
--- a/test/scripts/model/llm/agent/test_article_fact_checker.py
+++ b/test/scripts/model/llm/agent/test_article_fact_checker.py
@@ -552,7 +552,7 @@ def test_get_output_dir_auto_generates_path_when_not_configured(self, tmp_path):
         from dingo.config.input_args import EvaluatorLLMArgs
         ArticleFactChecker.dynamic_config = EvaluatorLLMArgs(
             key="test", api_url="https://api.example.com", model="test",
-            parameters={"agent_config": {"base_output_path": str(tmp_path)}}
+            agent_config={"base_output_path": str(tmp_path)}
         )
         result = ArticleFactChecker._get_output_dir()
         assert result is not None
@@ -565,7 +565,7 @@ def test_get_output_dir_returns_none_when_save_artifacts_disabled(self):
         from dingo.config.input_args import EvaluatorLLMArgs
         ArticleFactChecker.dynamic_config = EvaluatorLLMArgs(
             key="test", api_url="https://api.example.com", model="test",
-            parameters={"agent_config": {"save_artifacts": False}}
+            agent_config={"save_artifacts": False}
         )
         result = ArticleFactChecker._get_output_dir()
         assert result is None
@@ -577,7 +577,7 @@ def test_get_output_dir_creates_directory(self, tmp_path):
         output_dir = str(tmp_path / "new_output_dir")
         ArticleFactChecker.dynamic_config = EvaluatorLLMArgs(
             key="test", api_url="https://api.example.com", model="test",
-            parameters={"agent_config": {"output_path": output_dir}}
+            agent_config={"output_path": output_dir}
         )
 
         result = ArticleFactChecker._get_output_dir()
diff --git a/test/scripts/model/llm/agent/test_async_article_fact_checker.py b/test/scripts/model/llm/agent/test_async_article_fact_checker.py
index f9529f56..43b0bfa8 100644
--- a/test/scripts/model/llm/agent/test_async_article_fact_checker.py
+++ b/test/scripts/model/llm/agent/test_async_article_fact_checker.py
@@ -547,13 +547,13 @@ def setup_method(self):
     def test_returns_class_default_when_no_config(self):
         """Should return max_concurrent_claims class default when not configured."""
         with patch.object(self.checker, 'dynamic_config') as mock_cfg:
-            mock_cfg.parameters = {}
+            mock_cfg.model_extra = {}
             result = self.checker._get_max_concurrent_claims()
         assert result == self.checker.max_concurrent_claims
 
     def test_returns_config_value_when_set(self):
         """Should return value from agent_config.max_concurrent_claims."""
         with patch.object(self.checker, 'dynamic_config') as mock_cfg:
-            mock_cfg.parameters = {"agent_config": {"max_concurrent_claims": 10}}
+            mock_cfg.model_extra = {"agent_config": {"max_concurrent_claims": 10}}
             result = self.checker._get_max_concurrent_claims()
         assert result == 10
diff --git a/test/scripts/skills/test_fact_check_script.py b/test/scripts/skills/test_fact_check_script.py
index 79358e50..aacce4cd 100644
--- a/test/scripts/skills/test_fact_check_script.py
+++ b/test/scripts/skills/test_fact_check_script.py
@@ -189,7 +189,7 @@ def test_tavily_omitted_when_no_key(self):
             max_claims=50,
             max_concurrent=5,
         )
-        tools = config["evaluator"][0]["evals"][0]["config"]["parameters"]["agent_config"]["tools"]
+        tools = config["evaluator"][0]["evals"][0]["config"]["agent_config"]["tools"]
         assert "tavily_search" not in tools
         assert "claims_extractor" in tools
         assert "arxiv_search" in tools
@@ -205,7 +205,7 @@ def test_tavily_included_when_key_present(self):
             max_claims=50,
             max_concurrent=5,
         )
-        tools = config["evaluator"][0]["evals"][0]["config"]["parameters"]["agent_config"]["tools"]
+        tools = config["evaluator"][0]["evals"][0]["config"]["agent_config"]["tools"]
         assert "tavily_search" in tools
         assert tools["tavily_search"]["api_key"] == "tvly-xxx"
 
@@ -220,8 +220,8 @@ def test_temperature_is_zero(self):
             max_claims=50,
             max_concurrent=5,
         )
-        params = config["evaluator"][0]["evals"][0]["config"]["parameters"]
-        assert params["temperature"] == 0
+        cfg = config["evaluator"][0]["evals"][0]["config"]
+        assert cfg["temperature"] == 0
 
 
 class TestErrorOutput: