diff --git a/.claude/skills/dingo-verify/scripts/fact_check.py b/.claude/skills/dingo-verify/scripts/fact_check.py index 23748206..71dd158d 100644 --- a/.claude/skills/dingo-verify/scripts/fact_check.py +++ b/.claude/skills/dingo-verify/scripts/fact_check.py @@ -169,13 +169,11 @@ def build_config( "key": api_key, "model": model, "api_url": api_url, - "parameters": { - "temperature": 0, - "agent_config": { - "max_concurrent_claims": max_concurrent, - "max_iterations": 50, - "tools": tools_config, - } + "temperature": 0, + "agent_config": { + "max_concurrent_claims": max_concurrent, + "max_iterations": 50, + "tools": tools_config, } } }] diff --git a/clawhub/scripts/fact_check.py b/clawhub/scripts/fact_check.py index b2865496..a4930f14 100644 --- a/clawhub/scripts/fact_check.py +++ b/clawhub/scripts/fact_check.py @@ -164,13 +164,11 @@ def build_config( "key": api_key, "model": model, "api_url": api_url, - "parameters": { - "temperature": 0, - "agent_config": { - "max_concurrent_claims": max_concurrent, - "max_iterations": 50, - "tools": tools_config, - } + "temperature": 0, + "agent_config": { + "max_concurrent_claims": max_concurrent, + "max_iterations": 50, + "tools": tools_config, } } }] diff --git a/dingo/config/input_args.py b/dingo/config/input_args.py index df1007a4..de589a69 100644 --- a/dingo/config/input_args.py +++ b/dingo/config/input_args.py @@ -102,10 +102,11 @@ class EmbeddingConfigArgs(BaseModel): class EvaluatorLLMArgs(BaseModel): + model_config = {"extra": "allow"} + model: Optional[str] = None key: Optional[str] = None api_url: Optional[str] = None - parameters: Optional[dict] = None embedding_config: Optional[EmbeddingConfigArgs] = None diff --git a/dingo/model/llm/agent/agent_article_fact_checker.py b/dingo/model/llm/agent/agent_article_fact_checker.py index 1a0006cb..b72b9787 100644 --- a/dingo/model/llm/agent/agent_article_fact_checker.py +++ b/dingo/model/llm/agent/agent_article_fact_checker.py @@ -343,23 +343,21 @@ class ArticleFactChecker(BaseAgent): "config": { "key": "your-openai-api-key", "model": "gpt-4o-mini", - "parameters": { - "agent_config": { - "max_iterations": 10, - "overall_timeout": 900, - "max_concurrent_claims": 5, - "tools": { - "claims_extractor": { - "api_key": "your-openai-api-key", - "max_claims": 50, - "claim_types": ["factual", "institutional", "statistical", "attribution"] - }, - "tavily_search": { - "api_key": "your-tavily-api-key", - "max_results": 5 - }, - "arxiv_search": {"max_results": 5} - } + "agent_config": { + "max_iterations": 10, + "overall_timeout": 900, + "max_concurrent_claims": 5, + "tools": { + "claims_extractor": { + "api_key": "your-openai-api-key", + "max_claims": 50, + "claim_types": ["factual", "institutional", "statistical", "attribution"] + }, + "tavily_search": { + "api_key": "your-tavily-api-key", + "max_results": 5 + }, + "arxiv_search": {"max_results": 5} } } } @@ -399,8 +397,8 @@ def _get_output_dir(cls) -> Optional[str]: Returns: Output directory path (created if needed), or None if saving is disabled. """ - params = cls.dynamic_config.parameters or {} - agent_cfg = params.get('agent_config') or {} + extra_params = cls.dynamic_config.model_extra + agent_cfg = extra_params.get('agent_config') or {} explicit_path = agent_cfg.get('output_path') if explicit_path: @@ -821,9 +819,8 @@ def eval(cls, input_data: Data) -> EvalDetail: output_dir = cls._get_output_dir() if cls.dynamic_config: - if cls.dynamic_config.parameters is None: - cls.dynamic_config.parameters = {} - cls.dynamic_config.parameters.setdefault("temperature", 0) + if 'temperature' not in cls.dynamic_config.model_extra: + cls.dynamic_config.temperature = 0 if output_dir and input_data.content: cls._save_article_content(output_dir, input_data.content) @@ -946,8 +943,8 @@ async def _async_extract_claims(cls, input_data: Data) -> List[Dict]: """ from dingo.model.llm.agent.tools.claims_extractor import ClaimsExtractor, ClaimsExtractorConfig - params = cls.dynamic_config.parameters or {} - agent_cfg = params.get('agent_config') or {} + extra_params = cls.dynamic_config.model_extra + agent_cfg = extra_params.get('agent_config') or {} extractor_cfg = agent_cfg.get('tools', {}).get('claims_extractor', {}) config_kwargs: Dict[str, Any] = { @@ -1043,8 +1040,8 @@ async def _async_verify_single_claim( @classmethod def _get_max_concurrent_claims(cls) -> int: """Read max_concurrent_claims from agent_config or use class default.""" - params = cls.dynamic_config.parameters or {} - agent_cfg = params.get('agent_config') or {} + extra_params = cls.dynamic_config.model_extra + agent_cfg = extra_params.get('agent_config') or {} return agent_cfg.get('max_concurrent_claims', cls.max_concurrent_claims) @classmethod @@ -1054,8 +1051,8 @@ def _get_overall_timeout(cls) -> float: Returns: Positive timeout in seconds, clamped to [30, 7200]. """ - params = cls.dynamic_config.parameters or {} - agent_cfg = params.get('agent_config') or {} + extra_params = cls.dynamic_config.model_extra + agent_cfg = extra_params.get('agent_config') or {} raw = agent_cfg.get('overall_timeout', cls.overall_timeout) try: timeout = float(raw) diff --git a/dingo/model/llm/agent/agent_fact_check.py b/dingo/model/llm/agent/agent_fact_check.py index 190e105d..5246d7a1 100644 --- a/dingo/model/llm/agent/agent_fact_check.py +++ b/dingo/model/llm/agent/agent_fact_check.py @@ -70,15 +70,13 @@ class AgentFactCheck(BaseAgent): "key": "your-openai-api-key", "api_url": "https://api.openai.com/v1", "model": "gpt-4.1-mini-2025-04-14", - "parameters": { - "agent_config": { - "max_iterations": 5, - "tools": { - "tavily_search": { - "api_key": "your-tavily-api-key", - "max_results": 5, - "search_depth": "advanced" - } + "agent_config": { + "max_iterations": 5, + "tools": { + "tavily_search": { + "api_key": "your-tavily-api-key", + "max_results": 5, + "search_depth": "advanced" } } } diff --git a/dingo/model/llm/agent/agent_hallucination.py b/dingo/model/llm/agent/agent_hallucination.py index 0e39c48c..fc22ba56 100644 --- a/dingo/model/llm/agent/agent_hallucination.py +++ b/dingo/model/llm/agent/agent_hallucination.py @@ -82,15 +82,13 @@ class AgentHallucination(BaseAgent): "key": "your-openai-api-key", "api_url": "https://api.openai.com/v1", "model": "gpt-4.1-mini-2025-04-14", - "parameters": { - "agent_config": { - "max_iterations": 3, - "tools": { - "tavily_search": { - "api_key": "your-tavily-api-key", - "max_results": 5, - "search_depth": "advanced" - } + "agent_config": { + "max_iterations": 3, + "tools": { + "tavily_search": { + "api_key": "your-tavily-api-key", + "max_results": 5, + "search_depth": "advanced" } } } diff --git a/dingo/model/llm/agent/agent_wrapper.py b/dingo/model/llm/agent/agent_wrapper.py index 4240c1ef..8c20247d 100644 --- a/dingo/model/llm/agent/agent_wrapper.py +++ b/dingo/model/llm/agent/agent_wrapper.py @@ -327,22 +327,22 @@ def get_openai_llm_from_dingo_config(dynamic_config): ) # Extract parameters - params = dynamic_config.parameters or {} + extra_params = dynamic_config.model_extra # Create ChatOpenAI instance llm = ChatOpenAI( api_key=dynamic_config.key, base_url=dynamic_config.api_url, model=dynamic_config.model or "gpt-4.1-mini", - temperature=params.get("temperature", 0.3), - max_tokens=params.get("max_tokens", 4096), - top_p=params.get("top_p", 1.0), - timeout=params.get("timeout", 30) + temperature=extra_params.get("temperature", 0.3), + max_tokens=extra_params.get("max_tokens", 4096), + top_p=extra_params.get("top_p", 1.0), + timeout=extra_params.get("timeout", 30) ) log.debug( f"Created ChatOpenAI: model={dynamic_config.model}, " - f"temp={params.get('temperature', 0.3)}" + f"temp={extra_params.get('temperature', 0.3)}" ) return llm diff --git a/dingo/model/llm/agent/base_agent.py b/dingo/model/llm/agent/base_agent.py index 3832cefe..d3db23d2 100644 --- a/dingo/model/llm/agent/base_agent.py +++ b/dingo/model/llm/agent/base_agent.py @@ -146,7 +146,7 @@ def get_tool_config(cls, tool_name: str) -> Dict[str, Any]: Extract tool configuration from agent's dynamic_config. Configuration is expected in: - dynamic_config.parameters.agent_config.tools.{tool_name} + dynamic_config.agent_config.tools.{tool_name} Args: tool_name: Name of the tool @@ -154,8 +154,8 @@ def get_tool_config(cls, tool_name: str) -> Dict[str, Any]: Returns: Dict of configuration values for the tool """ - params = cls.dynamic_config.parameters or {} - agent_config = params.get('agent_config', {}) + extra_params = cls.dynamic_config.model_extra + agent_config = extra_params.get('agent_config', {}) tools_config = agent_config.get('tools', {}) return tools_config.get(tool_name, {}) @@ -184,8 +184,8 @@ def get_max_iterations(cls) -> int: Returns: Maximum number of iterations allowed """ - params = cls.dynamic_config.parameters or {} - agent_config = params.get('agent_config', {}) + extra_params = cls.dynamic_config.model_extra + agent_config = extra_params.get('agent_config', {}) return agent_config.get('max_iterations', cls.max_iterations) @classmethod diff --git a/dingo/model/llm/base_openai.py b/dingo/model/llm/base_openai.py index b6fbcd52..c3911699 100644 --- a/dingo/model/llm/base_openai.py +++ b/dingo/model/llm/base_openai.py @@ -82,22 +82,18 @@ def send_messages(cls, messages: List): else: model_name = cls.client.models.list().data[0].id - params = cls.dynamic_config.parameters - cls.validate_config(params) + extra_params = cls.dynamic_config.model_extra + cls.validate_config(extra_params) completions = cls.client.chat.completions.create( model=model_name, messages=messages, - temperature=params.get("temperature", 0.3) if params else 0.3, - top_p=params.get("top_p", 1) if params else 1, - max_tokens=params.get("max_tokens", 4000) if params else 4000, - presence_penalty=params.get("presence_penalty", 0) if params else 0, - frequency_penalty=params.get("frequency_penalty", 0) if params else 0, + **extra_params, ) if completions.choices[0].finish_reason == "length": raise ExceedMaxTokens( - f"Exceed max tokens: {params.get('max_tokens', 4000) if params else 4000}" + f"Exceed max tokens: {extra_params.get('max_tokens', 4000)}" ) return str(completions.choices[0].message.content) diff --git a/dingo/model/llm/instruction_quality/llm_instruction_clarity.py b/dingo/model/llm/instruction_quality/llm_instruction_clarity.py index 9c73a35d..526611d8 100644 --- a/dingo/model/llm/instruction_quality/llm_instruction_clarity.py +++ b/dingo/model/llm/instruction_quality/llm_instruction_clarity.py @@ -283,8 +283,8 @@ def process_response(cls, response: str) -> EvalDetail: # 判断是否通过(默认阈值 6.0) threshold = 6.0 - if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters: - threshold = cls.dynamic_config.parameters.get('threshold', 6.0) + if hasattr(cls, 'dynamic_config'): + threshold = cls.dynamic_config.model_extra.get('threshold', 6.0) if score >= threshold: result.status = False diff --git a/dingo/model/llm/instruction_quality/llm_task_difficulty.py b/dingo/model/llm/instruction_quality/llm_task_difficulty.py index e3fe1db0..9c676396 100644 --- a/dingo/model/llm/instruction_quality/llm_task_difficulty.py +++ b/dingo/model/llm/instruction_quality/llm_task_difficulty.py @@ -321,14 +321,14 @@ def process_response(cls, response: str) -> EvalDetail: # 难度评估没有"通过/不通过"的概念,只是描述性的 # 但为了兼容框架,我们设置一个合理的默认行为 - # 可以通过 parameters 配置 min_difficulty 和 max_difficulty + # 可以通过 config 中的 min_difficulty 和 max_difficulty 配置难度范围 result.status = False # 默认不标记为问题 result.label = [f"TASK_DIFFICULTY.{difficulty_level.upper()}"] # 如果配置了难度范围要求,进行检查 - if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters: - min_difficulty = cls.dynamic_config.parameters.get('min_difficulty', 0) - max_difficulty = cls.dynamic_config.parameters.get('max_difficulty', 10) + if hasattr(cls, 'dynamic_config'): + min_difficulty = cls.dynamic_config.model_extra.get('min_difficulty', 0) + max_difficulty = cls.dynamic_config.model_extra.get('max_difficulty', 10) if difficulty_score < min_difficulty: result.status = True diff --git a/dingo/model/llm/rag/llm_rag_answer_relevancy.py b/dingo/model/llm/rag/llm_rag_answer_relevancy.py index ec0e0cda..199187cd 100644 --- a/dingo/model/llm/rag/llm_rag_answer_relevancy.py +++ b/dingo/model/llm/rag/llm_rag_answer_relevancy.py @@ -242,14 +242,8 @@ def eval(cls, input_data: Data) -> EvalDetail: try: # 增加温度参数以提高问题生成的随机性 - if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters: - if 'temperature' not in cls.dynamic_config.parameters: - cls.dynamic_config.parameters['temperature'] = 0.7 - else: - # 如果没有parameters,创建一个包含temperature的parameters - current_params = cls.dynamic_config.parameters or {} - current_params['temperature'] = 0.7 - cls.dynamic_config.parameters = current_params + if hasattr(cls, 'dynamic_config') and 'temperature' not in cls.dynamic_config.model_extra: + cls.dynamic_config.temperature = 0.7 # 生成多个相关问题 generated_questions = cls.generate_multiple_questions(input_data, cls.strictness) @@ -263,10 +257,9 @@ def eval(cls, input_data: Data) -> EvalDetail: # 根据分数判断是否通过,默认阈值为5 threshold = 5 - if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters: - threshold = cls.dynamic_config.parameters.get('threshold', 5) - # 检查是否有自定义的strictness参数 - cls.strictness = cls.dynamic_config.parameters.get('strictness', 3) + if hasattr(cls, 'dynamic_config'): + threshold = cls.dynamic_config.model_extra.get('threshold', 5) + cls.strictness = cls.dynamic_config.model_extra.get('strictness', 3) # 构建详细的reason文本 all_reasons = [] diff --git a/dingo/model/llm/rag/llm_rag_context_precision.py b/dingo/model/llm/rag/llm_rag_context_precision.py index 50f9b661..9c305c4b 100644 --- a/dingo/model/llm/rag/llm_rag_context_precision.py +++ b/dingo/model/llm/rag/llm_rag_context_precision.py @@ -256,8 +256,8 @@ def process_response(cls, responses: List[str]) -> EvalDetail: # 根据分数判断是否通过,默认阈值为5 threshold = 5 - if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters: - threshold = cls.dynamic_config.parameters.get('threshold', 5) + if hasattr(cls, 'dynamic_config'): + threshold = cls.dynamic_config.model_extra.get('threshold', 5) if score >= threshold: result.status = False diff --git a/dingo/model/llm/rag/llm_rag_context_recall.py b/dingo/model/llm/rag/llm_rag_context_recall.py index 4ba059cc..8d6d06cc 100644 --- a/dingo/model/llm/rag/llm_rag_context_recall.py +++ b/dingo/model/llm/rag/llm_rag_context_recall.py @@ -215,8 +215,8 @@ def process_response(cls, response: str) -> EvalDetail: # 根据分数判断是否通过,默认阈值为5 threshold = 5 - if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters: - threshold = cls.dynamic_config.parameters.get('threshold', 5) + if hasattr(cls, 'dynamic_config'): + threshold = cls.dynamic_config.model_extra.get('threshold', 5) if score >= threshold: result.status = False diff --git a/dingo/model/llm/rag/llm_rag_context_relevancy.py b/dingo/model/llm/rag/llm_rag_context_relevancy.py index ca16e289..94204e8a 100644 --- a/dingo/model/llm/rag/llm_rag_context_relevancy.py +++ b/dingo/model/llm/rag/llm_rag_context_relevancy.py @@ -206,8 +206,8 @@ def process_response(cls, response: str) -> EvalDetail: # 根据分数判断是否通过,默认阈值为5 threshold = 5 - if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters: - threshold = cls.dynamic_config.parameters.get('threshold', 5) + if hasattr(cls, 'dynamic_config'): + threshold = cls.dynamic_config.model_extra.get('threshold', 5) if score >= threshold: result.status = False diff --git a/dingo/model/llm/rag/llm_rag_faithfulness.py b/dingo/model/llm/rag/llm_rag_faithfulness.py index 2111e77d..fe763ef0 100644 --- a/dingo/model/llm/rag/llm_rag_faithfulness.py +++ b/dingo/model/llm/rag/llm_rag_faithfulness.py @@ -290,8 +290,8 @@ def process_response(cls, response: str) -> EvalDetail: # 根据分数判断是否通过,默认阈值为5 threshold = 5 - if hasattr(cls, 'dynamic_config') and cls.dynamic_config.parameters: - threshold = cls.dynamic_config.parameters.get('threshold', 5) + if hasattr(cls, 'dynamic_config'): + threshold = cls.dynamic_config.model_extra.get('threshold', 5) if score >= threshold: result.status = False diff --git a/dingo/model/llm/vlm_layout_quality.py b/dingo/model/llm/vlm_layout_quality.py index 40627a88..e3a5456d 100644 --- a/dingo/model/llm/vlm_layout_quality.py +++ b/dingo/model/llm/vlm_layout_quality.py @@ -201,8 +201,8 @@ def send_messages(cls, messages: List): else: model_name = cls.client.models.list().data[0].id - params = cls.dynamic_config.parameters - cls.validate_config(params) + extra_params = cls.dynamic_config.model_extra + cls.validate_config(extra_params) completions = cls.client.chat.completions.create( model=model_name, diff --git a/docs/agent_architecture.md b/docs/agent_architecture.md index c55d34c6..e563357b 100644 --- a/docs/agent_architecture.md +++ b/docs/agent_architecture.md @@ -458,7 +458,7 @@ Aggregation: ├─ name: "AgentFactCheck" ├─ config.key: API key ├─ config.model: "gpt-4" - └─ config.parameters.agent_config: + └─ config.agent_config: ├─ max_iterations: 10 └─ tools: └─ tavily_search: @@ -539,7 +539,7 @@ Check if tool in available_tools ToolRegistry.get(tool_name) → tool_class ↓ configure_tool(tool_name, tool_class) - ├─ Extract config from dynamic_config.parameters.agent_config.tools.{tool_name} + ├─ Extract config from dynamic_config.agent_config.tools.{tool_name} └─ tool_class.update_config(config_dict) ↓ tool_class.execute(**kwargs) @@ -560,7 +560,7 @@ Return to agent for processing 3. **Three Patterns**: LangChain-based (declarative), Custom Workflow (imperative), Agent-First + Context (hybrid) 4. **Tool System**: Centralized registry with configuration injection 5. **Execution**: Runs in ThreadPoolExecutor alongside other LLMs -6. **Configuration**: Nested under `parameters.agent_config` in evaluator config +6. **Configuration**: `agent_config` is a top-level key in evaluator config (flat structure) 7. **Artifact Saving**: ArticleFactChecker auto-saves intermediate artifacts to a timestamped directory by default; override via `agent_config.output_path`, or disable with `agent_config.save_artifacts=false` ### Implementation Checklist diff --git a/docs/agent_development_guide.md b/docs/agent_development_guide.md index da071b7c..6d6f80df 100644 --- a/docs/agent_development_guide.md +++ b/docs/agent_development_guide.md @@ -441,8 +441,8 @@ def _get_output_dir(cls) -> Optional[str]: Get output directory for artifact files (three-priority chain). Returns output dir path (created if needed), or None if saving disabled. """ - params = cls.dynamic_config.parameters or {} - agent_cfg = params.get('agent_config') or {} + extra_params = cls.dynamic_config.model_extra + agent_cfg = extra_params.get('agent_config') or {} explicit_path = agent_cfg.get('output_path') if explicit_path: @@ -673,17 +673,15 @@ class MyAgent(BaseAgent): "key": "openai-api-key", "api_url": "https://api.openai.com/v1", "model": "gpt-4", - "parameters": { - "agent_config": { - "max_iterations": 3, - "tools": { - "my_tool": { - "api_key": "tool-api-key", - "max_results": 5 - } - } - } - } + "agent_config": { + "max_iterations": 3, + "tools": { + "my_tool": { + "api_key": "tool-api-key", + "max_results": 5 + } + } + } } } """ @@ -889,19 +887,17 @@ def eval(cls, input_data: Data) -> EvalDetail: "key": "openai-api-key", "api_url": "https://api.openai.com/v1", "model": "gpt-4-turbo", - "parameters": { - "temperature": 0.1, - "agent_config": { - "max_iterations": 3, - "tools": { - "my_tool": { - "api_key": "my-tool-api-key", - "max_results": 10, - "timeout": 30 - }, - "another_tool": { - "config_key": "value" - } + "temperature": 0.1, + "agent_config": { + "max_iterations": 3, + "tools": { + "my_tool": { + "api_key": "my-tool-api-key", + "max_results": 10, + "timeout": 30 + }, + "another_tool": { + "config_key": "value" } } } @@ -919,10 +915,10 @@ def eval(cls, input_data: Data) -> EvalDetail: def some_method(cls): # Access LLM configuration model = cls.dynamic_config.model # "gpt-4-turbo" - temperature = cls.dynamic_config.parameters.get('temperature', 0) + temperature = cls.dynamic_config.model_extra.get('temperature', 0) # Access agent-specific configuration - agent_config = cls.dynamic_config.parameters.get('agent_config', {}) + agent_config = cls.dynamic_config.model_extra.get('agent_config', {}) max_iterations = agent_config.get('max_iterations', 5) # Get tool configuration @@ -966,10 +962,8 @@ class MyAgent(BaseAgent): { "name": "MyAgent", "config": { - "parameters": { - "agent_config": { - "max_iterations": 10 - } + "agent_config": { + "max_iterations": 10 } } } @@ -1259,17 +1253,15 @@ Always include SOURCES with specific URLs when you perform web searches.""" "key": "your-openai-api-key", "api_url": "https://api.openai.com/v1", "model": "gpt-4-turbo", - "parameters": { - "temperature": 0.1, - "max_tokens": 16384, - "agent_config": { - "max_iterations": 5, - "tools": { - "tavily_search": { - "api_key": "your-tavily-api-key", - "max_results": 5, - "search_depth": "advanced" - } + "temperature": 0.1, + "max_tokens": 16384, + "agent_config": { + "max_iterations": 5, + "tools": { + "tavily_search": { + "api_key": "your-tavily-api-key", + "max_results": 5, + "search_depth": "advanced" } } } @@ -1597,11 +1589,9 @@ config = { "key": "openai-key", "api_url": "https://api.openai.com/v1", "model": "gpt-4", - "parameters": { - "agent_config": { - "tools": { - "tavily_search": {"api_key": "tavily-key"} - } + "agent_config": { + "tools": { + "tavily_search": {"api_key": "tavily-key"} } } } @@ -1632,7 +1622,7 @@ summary = executor.execute() **Configuration not working:** - Check JSON structure matches expected format -- Verify `parameters.agent_config.tools.{tool_name}` structure +- Verify `agent_config.tools.{tool_name}` structure - Use Pydantic validation to catch config errors early **Tests failing:** diff --git a/docs/article_fact_checking_guide.md b/docs/article_fact_checking_guide.md index 518b0ff3..43d04947 100644 --- a/docs/article_fact_checking_guide.md +++ b/docs/article_fact_checking_guide.md @@ -81,24 +81,22 @@ config = { "config": { "key": os.getenv("OPENAI_API_KEY"), "model": "deepseek-chat", # or "gpt-4o-mini" for OpenAI - "parameters": { - "agent_config": { - "max_iterations": 15, - "output_path": "outputs/article_factcheck/", # Optional: save intermediate artifacts - "tools": { - "claims_extractor": { - "api_key": os.getenv("OPENAI_API_KEY"), - "max_claims": 50, - "claim_types": [ - "factual", "statistical", "attribution", "institutional", - "temporal", "comparative", "monetary", "technical" - ] - }, - "tavily_search": { - "api_key": os.getenv("TAVILY_API_KEY") - }, - "arxiv_search": {"max_results": 5} - } + "agent_config": { + "max_iterations": 15, + "output_path": "outputs/article_factcheck/", # Optional: save intermediate artifacts + "tools": { + "claims_extractor": { + "api_key": os.getenv("OPENAI_API_KEY"), + "max_claims": 50, + "claim_types": [ + "factual", "statistical", "attribution", "institutional", + "temporal", "comparative", "monetary", "technical" + ] + }, + "tavily_search": { + "api_key": os.getenv("TAVILY_API_KEY") + }, + "arxiv_search": {"max_results": 5} } } } @@ -143,19 +141,17 @@ cat > article_check_config.json << EOF "config": { "key": "${OPENAI_API_KEY}", "model": "deepseek-chat", - "parameters": { - "agent_config": { - "max_iterations": 15, - "tools": { - "claims_extractor": { - "api_key": "${OPENAI_API_KEY}", - "max_claims": 50 - }, - "tavily_search": { - "api_key": "${TAVILY_API_KEY}" - }, - "arxiv_search": {} - } + "agent_config": { + "max_iterations": 15, + "tools": { + "claims_extractor": { + "api_key": "${OPENAI_API_KEY}", + "max_claims": 50 + }, + "tavily_search": { + "api_key": "${TAVILY_API_KEY}" + }, + "arxiv_search": {} } } } diff --git a/docs/config.md b/docs/config.md index 6d1f1d4c..a5020ee4 100644 --- a/docs/config.md +++ b/docs/config.md @@ -104,26 +104,22 @@ HuggingFace 特定配置: #### EvaluatorLLMArgs 配置 (evaluator.llm_config.[llm_name]) -LLM 配置: +LLM 配置(支持额外字段,所有额外字段会直接透传给 LLM API): | Parameter | Type | Default | Required | Description | |-----------|------|---------|----------|-------------| | model | str | null | No | 使用的模型名称 | | key | str | null | No | API 密钥 | | api_url | str | null | No | API URL | -| parameters | object | null | No | LLM 调参配置 | - -##### LLM Parameters 配置 - -LLM 调参配置: - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| temperature | number | 1 | 采样温度,0-2之间 | -| top_p | number | 1 | 核心采样概率 | -| max_tokens | number | 4000 | 最大生成token数 | -| presence_penalty | number | 0 | 存在惩罚,-2.0到2.0之间 | -| frequency_penalty | number | 0 | 频率惩罚,-2.0到2.0之间 | +| embedding_config | object | null | No | Embedding 模型独立配置(RAG 评估器使用) | +| temperature | number | 1 | No | 采样温度,0-2之间 | +| top_p | number | 1 | No | 核心采样概率 | +| max_tokens | number | 4000 | No | 最大生成token数 | +| presence_penalty | number | 0 | No | 存在惩罚,-2.0到2.0之间 | +| frequency_penalty | number | 0 | No | 频率惩罚,-2.0到2.0之间 | +| agent_config | object | null | No | Agent 专属配置(max_iterations、tools 等) | +| threshold | number | - | No | 评估通过阈值(各评估器自定义) | +| *其他字段* | any | - | No | 所有额外字段直接透传给 LLM API | ## 配置文件示例 @@ -181,13 +177,11 @@ LLM 调参配置: "model": "gpt-3.5-turbo", "key": "your-api-key", "api_url": "https://api.openai.com/v1/chat/completions", - "parameters": { - "temperature": 1, - "top_p": 1, - "max_tokens": 4000, - "presence_penalty": 0, - "frequency_penalty": 0 - } + "temperature": 1, + "top_p": 1, + "max_tokens": 4000, + "presence_penalty": 0, + "frequency_penalty": 0 } } } diff --git a/docs/factcheck_guide.md b/docs/factcheck_guide.md index 4112707f..7abed067 100644 --- a/docs/factcheck_guide.md +++ b/docs/factcheck_guide.md @@ -101,9 +101,7 @@ input_data = { "model": "deepseek-chat", "key": "your-api-key", "api_url": "https://api.deepseek.com/v1", - "parameters": { - "temperature": 0.1 - } + "temperature": 0.1 } } } diff --git a/docs/factuality_assessment_guide.md b/docs/factuality_assessment_guide.md index 13680cc2..6f670fcc 100644 --- a/docs/factuality_assessment_guide.md +++ b/docs/factuality_assessment_guide.md @@ -59,7 +59,7 @@ LLMFactCheck.dynamic_config = EvaluatorLLMArgs( key=os.getenv("OPENAI_API_KEY"), api_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"), model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"), - parameters={"threshold": 5.0} + threshold=5.0 ) # Prepare data @@ -108,7 +108,7 @@ input_data = { "model": "gpt-4o-mini", "key": "YOUR_API_KEY", "api_url": "https://api.openai.com/v1", - "parameters": {"threshold": 5.0} + "threshold": 5.0 } } ] @@ -142,7 +142,7 @@ LLMFactCheck.dynamic_config = EvaluatorLLMArgs( key="YOUR_API_KEY", api_url="https://api.openai.com/v1", model="gpt-4o-mini", - parameters={"threshold": 5.0} # Range: 0.0-10.0 + threshold=5.0 # Range: 0.0-10.0 ) ``` diff --git a/docs/hallucination_detection_guide.md b/docs/hallucination_detection_guide.md index d6fceea9..4da27e0d 100644 --- a/docs/hallucination_detection_guide.md +++ b/docs/hallucination_detection_guide.md @@ -131,7 +131,7 @@ LLMHallucination.dynamic_config = EvaluatorLLMArgs( key=os.getenv("OPENAI_API_KEY"), api_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"), model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"), - parameters={"threshold": 0.5} + threshold=0.5 ) # Prepare data @@ -218,7 +218,7 @@ LLMHallucination.dynamic_config = EvaluatorLLMArgs( key="YOUR_API_KEY", api_url="https://api.openai.com/v1", model="gpt-4o-mini", - parameters={"threshold": 0.5} # Range: 0.0-1.0 + threshold=0.5 # Range: 0.0-1.0 ) ``` diff --git a/docs/instruction_quality_guide.md b/docs/instruction_quality_guide.md index eb1b08d1..2caae4dc 100644 --- a/docs/instruction_quality_guide.md +++ b/docs/instruction_quality_guide.md @@ -197,7 +197,7 @@ input_data = { "model": "deepseek-chat", "key": "your-api-key", "api_url": "https://api.deepseek.com", - "parameters": {"threshold": 6.0} + "threshold": 6.0 } } ] @@ -223,10 +223,8 @@ print(f"清晰指令: {summary.num_good}/{summary.total}") "model": "deepseek-chat", "key": "your-api-key", "api_url": "https://api.deepseek.com", - "parameters": { - "min_difficulty": 3.0, # 可选:过滤太简单的 - "max_difficulty": 8.0, # 可选:过滤太难的 - } + "min_difficulty": 3.0, # 可选:过滤太简单的 + "max_difficulty": 8.0, # 可选:过滤太难的 } } ] @@ -303,13 +301,13 @@ python examples/custom/evaluate_instruction_quality.py distribution **问题1: 过多简单指令** ```python # 设置最低难度阈值 -"parameters": {"min_difficulty": 3.0} +"min_difficulty": 3.0 ``` **问题2: 指令模糊不清** ```python # 提高清晰度要求 -"parameters": {"threshold": 7.0} +"threshold": 7.0 ``` **问题3: 难度分布不均** diff --git a/docs/rag_evaluation_metrics.md b/docs/rag_evaluation_metrics.md index 1c11c5dc..c2fce750 100644 --- a/docs/rag_evaluation_metrics.md +++ b/docs/rag_evaluation_metrics.md @@ -86,10 +86,8 @@ llm_config_embedding = { "api_url": "https://api.openai.com/v1", "key": "YOUR_API_KEY" }, - "parameters": { - "strictness": 3, - "threshold": 5 - } + "strictness": 3, + "threshold": 5 } input_data = { @@ -170,7 +168,8 @@ summary = executor.execute() "api_url": "https://api.deepseek.com", "key": "YOUR_API_KEY" }, - "parameters": {"strictness": 3, "threshold": 5} + "strictness": 3, + "threshold": 5 } ``` @@ -186,7 +185,8 @@ summary = executor.execute() "api_url": "http://localhost:8000/v1", # Local vLLM/Xinference "key": "dummy-key" }, - "parameters": {"strictness": 3, "threshold": 5} + "strictness": 3, + "threshold": 5 } ``` diff --git a/docs/rag_evaluation_metrics_zh.md b/docs/rag_evaluation_metrics_zh.md index 099addb4..963b02dd 100644 --- a/docs/rag_evaluation_metrics_zh.md +++ b/docs/rag_evaluation_metrics_zh.md @@ -123,10 +123,8 @@ input_data = { "api_url": OPENAI_URL, "key": OPENAI_KEY }, - "parameters": { - "strictness": 3, - "threshold": 5 - } + "strictness": 3, + "threshold": 5 } }, { @@ -466,7 +464,7 @@ LLMRAGFaithfulness.dynamic_config = EvaluatorLLMArgs( key="YOUR_API_KEY", api_url="https://api.openai.com/v1", model="gpt-4o-mini", - parameters={"threshold": 7} # 自定义阈值 + threshold=7 # 自定义阈值 ) # Answer Relevancy 特殊配置(需要 embedding)⭐ @@ -480,10 +478,8 @@ LLMRAGAnswerRelevancy.dynamic_config = EvaluatorLLMArgs( api_url="https://api.openai.com/v1", key="YOUR_API_KEY" ), - parameters={ - "strictness": 3, # 生成问题数量 - "threshold": 5 # 通过阈值 - } + strictness=3, # 生成问题数量 + threshold=5 # 通过阈值 ) ``` @@ -499,7 +495,7 @@ LLMRAGAnswerRelevancy.dynamic_config = EvaluatorLLMArgs( "model": "gpt-4o-mini", "key": "YOUR_API_KEY", "api_url": "https://api.openai.com/v1", - "parameters": {"threshold": 7} + "threshold": 7 } }, { @@ -513,10 +509,8 @@ LLMRAGAnswerRelevancy.dynamic_config = EvaluatorLLMArgs( "api_url": "https://api.openai.com/v1", "key": "YOUR_API_KEY" }, - "parameters": { - "strictness": 3, - "threshold": 5 - } + "strictness": 3, + "threshold": 5 } } ] @@ -528,8 +522,8 @@ LLMRAGAnswerRelevancy.dynamic_config = EvaluatorLLMArgs( | 参数 | 适用指标 | 默认值 | 说明 | |------|---------|--------|------| -| `threshold` | 所有指标 | 5.0 | 通过阈值(0-10),在 `parameters` 中配置 | -| `strictness` | Answer Relevancy | 3 | 生成问题数量(1-5),在 `parameters` 中配置 | +| `threshold` | 所有指标 | 5.0 | 通过阈值(0-10),直接在 `config` 中配置 | +| `strictness` | Answer Relevancy | 3 | 生成问题数量(1-5),直接在 `config` 中配置 | | `embedding_config` | Answer Relevancy | - | **必需配置**,包含 `model`(模型名)、`api_url`(服务地址)、`key`(API密钥) | ## 📊 指标详细说明 diff --git a/docs/technical/technical_all.md b/docs/technical/technical_all.md index 45833111..212fd15c 100644 --- a/docs/technical/technical_all.md +++ b/docs/technical/technical_all.md @@ -220,9 +220,14 @@ dingo 在使用提示词进行评估任务的时候,必须同时使用场景 + model + key + api_url -+ parameters ++ temperature(直接平铺在配置中) ++ top_p ++ max_tokens ++ presence_penalty ++ frequency_penalty ++ agent_config(Agent 评估器专用,包含 max_iterations、tools 等) -需要注意的是参数 [parameters](config.md#parameters) ,这个参数会对模型的推理产生影响,可以设置的值包括: +LLM 调参配置直接平铺在 `config` 对象中(不再嵌套在 `parameters` 字段下),会对模型推理产生影响,可以设置的值包括: + temperature + top_p + max_tokens diff --git a/examples/agent/agent_article_fact_checking_example.py b/examples/agent/agent_article_fact_checking_example.py index 45b0ad60..3071f45f 100644 --- a/examples/agent/agent_article_fact_checking_example.py +++ b/examples/agent/agent_article_fact_checking_example.py @@ -58,32 +58,30 @@ def main() -> int: "key": openai_key, "model": "intern-s1-pro", "api_url": "https://chat.intern-ai.org.cn/api/v1/", - "parameters": { - "timeout": 600, - "temperature": 0, # deterministic output - "agent_config": { - "max_concurrent_claims": 10, - "max_iterations": 50, - # Artifacts auto-saved to outputs/article_factcheck_/ - # Override with: "output_path": "your/custom/path" - "tools": { - "claims_extractor": { - "api_key": openai_key, - "model": "intern-s1-pro", - "base_url": "https://chat.intern-ai.org.cn/api/v1/", - "max_claims": 50, - "claim_types": [ - "factual", "statistical", "attribution", "institutional", - "temporal", "comparative", "monetary", "technical" - ] - }, - "tavily_search": { - "api_key": tavily_key - } if tavily_key else {}, - "arxiv_search": { - "max_results": 5, - "fetch_affiliations": True, - } + "timeout": 600, + "temperature": 0, # deterministic output + "agent_config": { + "max_concurrent_claims": 10, + "max_iterations": 50, + # Artifacts auto-saved to outputs/article_factcheck_/ + # Override with: "output_path": "your/custom/path" + "tools": { + "claims_extractor": { + "api_key": openai_key, + "model": "intern-s1-pro", + "base_url": "https://chat.intern-ai.org.cn/api/v1/", + "max_claims": 50, + "claim_types": [ + "factual", "statistical", "attribution", "institutional", + "temporal", "comparative", "monetary", "technical" + ] + }, + "tavily_search": { + "api_key": tavily_key + } if tavily_key else {}, + "arxiv_search": { + "max_results": 5, + "fetch_affiliations": True, } } } diff --git a/examples/agent/agent_executor_example.py b/examples/agent/agent_executor_example.py index 02a57c94..e0970ebc 100644 --- a/examples/agent/agent_executor_example.py +++ b/examples/agent/agent_executor_example.py @@ -67,17 +67,15 @@ def main(): "key": os.getenv("OPENAI_API_KEY", "your-openai-api-key"), "api_url": os.getenv("OPENAI_API_URL", "https://api.openai.com/v1"), "model": "gpt-4.1-mini-2025-04-14", - "parameters": { - "temperature": 0.1, - "max_tokens": 16384, - "agent_config": { - "max_iterations": 5, - "tools": { - "tavily_search": { - "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key"), - "max_results": 5, - "search_depth": "advanced" - } + "temperature": 0.1, + "max_tokens": 16384, + "agent_config": { + "max_iterations": 5, + "tools": { + "tavily_search": { + "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key"), + "max_results": 5, + "search_depth": "advanced" } } } diff --git a/examples/agent/agent_hallucination_example.py b/examples/agent/agent_hallucination_example.py index 6f463152..5f2723ec 100644 --- a/examples/agent/agent_hallucination_example.py +++ b/examples/agent/agent_hallucination_example.py @@ -64,14 +64,12 @@ def example_with_context(): "key": os.getenv("OPENAI_API_KEY", "your-openai-api-key"), "api_url": os.getenv("OPENAI_API_URL", "https://api.openai.com/v1"), "model": "gpt-4.1-mini-2025-04-14", - "parameters": { - "temperature": 0.1, - "agent_config": { - "max_iterations": 3, - "tools": { - "tavily_search": { - "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key") - } + "temperature": 0.1, + "agent_config": { + "max_iterations": 3, + "tools": { + "tavily_search": { + "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key") } } } @@ -155,17 +153,15 @@ def example_without_context(): "key": os.getenv("OPENAI_API_KEY", "your-openai-api-key"), "api_url": os.getenv("OPENAI_API_URL", "https://api.openai.com/v1"), "model": "gpt-4.1-mini-2025-04-14", - "parameters": { - "temperature": 0.1, - "agent_config": { - "max_iterations": 3, - "tools": { - "tavily_search": { - "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key"), - "max_results": 5, - "search_depth": "advanced", - "include_answer": True - } + "temperature": 0.1, + "agent_config": { + "max_iterations": 3, + "tools": { + "tavily_search": { + "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key"), + "max_results": 5, + "search_depth": "advanced", + "include_answer": True } } } @@ -215,13 +211,11 @@ def example_sdk_usage(): key=os.getenv("OPENAI_API_KEY", "your-openai-api-key"), api_url=os.getenv("OPENAI_API_URL", "https://api.openai.com/v1"), model="gpt-4.1-mini-2025-04-14", - parameters={ - "temperature": 0.1, - "agent_config": { - "tools": { - "tavily_search": { - "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key") - } + temperature=0.1, + agent_config={ + "tools": { + "tavily_search": { + "api_key": os.getenv("TAVILY_API_KEY", "your-tavily-api-key") } } } diff --git a/examples/factcheck/dataset_factcheck_evaluation.py b/examples/factcheck/dataset_factcheck_evaluation.py index dd3d72d3..4f48eadf 100644 --- a/examples/factcheck/dataset_factcheck_evaluation.py +++ b/examples/factcheck/dataset_factcheck_evaluation.py @@ -77,10 +77,8 @@ def evaluate_single_data_example(): evaluator.dynamic_config.model = OPENAI_MODEL evaluator.dynamic_config.key = OPENAI_KEY evaluator.dynamic_config.api_url = OPENAI_URL - evaluator.dynamic_config.parameters = { - "temperature": 0.1, # 降低随机性以提高一致性 - "max_tokens": 2000 - } + evaluator.dynamic_config.temperature = 0.1 # 降低随机性以提高一致性 + evaluator.dynamic_config.max_tokens = 2000 # 创建测试数据 test_data = Data( diff --git a/examples/rag/dataset_rag_eval_baseline.py b/examples/rag/dataset_rag_eval_baseline.py index 84e09106..9828f7e7 100644 --- a/examples/rag/dataset_rag_eval_baseline.py +++ b/examples/rag/dataset_rag_eval_baseline.py @@ -128,10 +128,8 @@ def run_rag_evaluation(): "api_url": OPENAI_URL, # 如果同一服务提供 embedding "key": OPENAI_KEY }, - "parameters": { - "strictness": 3, - "threshold": 5 - } + "strictness": 3, + "threshold": 5 } # 构建配置 diff --git a/examples/rag/e2e_RAG_eval_with_mockRAG_fiqa.py b/examples/rag/e2e_RAG_eval_with_mockRAG_fiqa.py index f212b94b..d190c969 100644 --- a/examples/rag/e2e_RAG_eval_with_mockRAG_fiqa.py +++ b/examples/rag/e2e_RAG_eval_with_mockRAG_fiqa.py @@ -327,10 +327,8 @@ def run_dingo_evaluation(rag_output_path: str) -> SummaryModel: "api_url": OPENAI_BASE_URL, # 如果同一服务提供 embedding "key": OPENAI_API_KEY }, - "parameters": { - "strictness": 3, - "threshold": 5 - } + "strictness": 3, + "threshold": 5 } input_data = { diff --git a/examples/sft/evaluate_instruction_quality.py b/examples/sft/evaluate_instruction_quality.py index f9ea43e9..841b54e3 100644 --- a/examples/sft/evaluate_instruction_quality.py +++ b/examples/sft/evaluate_instruction_quality.py @@ -56,9 +56,7 @@ def evaluate_instruction_clarity(): "model": OPENAI_MODEL, "key": OPENAI_API_KEY, "api_url": OPENAI_BASE_URL, - "parameters": { - "threshold": 6.0 # 清晰度阈值 (0-10) - } + "threshold": 6.0 # 清晰度阈值 (0-10) } } ] @@ -127,11 +125,9 @@ def evaluate_task_difficulty(): "model": OPENAI_MODEL, "key": OPENAI_API_KEY, "api_url": OPENAI_BASE_URL, - "parameters": { - # 可选:设置期望的难度范围 - # "min_difficulty": 4.0, # 最低难度(太简单的会被标记) - # "max_difficulty": 8.0, # 最高难度(太难的会被标记) - } + # 可选:设置期望的难度范围 + # "min_difficulty": 4.0, # 最低难度(太简单的会被标记) + # "max_difficulty": 8.0, # 最高难度(太难的会被标记) } } ] @@ -198,7 +194,7 @@ def evaluate_both(): "model": OPENAI_MODEL, "key": OPENAI_API_KEY, "api_url": OPENAI_BASE_URL, - "parameters": {"threshold": 6.0} + "threshold": 6.0 } }, { @@ -207,10 +203,8 @@ def evaluate_both(): "model": OPENAI_MODEL, "key": OPENAI_API_KEY, "api_url": OPENAI_BASE_URL, - "parameters": { - "min_difficulty": 3.0, # 过滤太简单的任务 - "max_difficulty": 9.0, # 过滤过于困难的任务 - } + "min_difficulty": 3.0, # 过滤太简单的任务 + "max_difficulty": 9.0, # 过滤过于困难的任务 } } ] diff --git a/mcp_server.py b/mcp_server.py index b865606c..08f4812d 100644 --- a/mcp_server.py +++ b/mcp_server.py @@ -106,11 +106,9 @@ def get_llm_config_from_env(eval_group_name: str = "") -> Dict: "key": openai_key, "api_url": openai_base_url, "model": openai_model, - "parameters": { - "temperature": 0.3, - "top_p": 1, - "max_tokens": 4000, - } + "temperature": 0.3, + "top_p": 1, + "max_tokens": 4000, } } diff --git a/skills/dingo-verify/scripts/fact_check.py b/skills/dingo-verify/scripts/fact_check.py index b2865496..a4930f14 100644 --- a/skills/dingo-verify/scripts/fact_check.py +++ b/skills/dingo-verify/scripts/fact_check.py @@ -164,13 +164,11 @@ def build_config( "key": api_key, "model": model, "api_url": api_url, - "parameters": { - "temperature": 0, - "agent_config": { - "max_concurrent_claims": max_concurrent, - "max_iterations": 50, - "tools": tools_config, - } + "temperature": 0, + "agent_config": { + "max_concurrent_claims": max_concurrent, + "max_iterations": 50, + "tools": tools_config, } } }] diff --git a/test/scripts/model/llm/agent/test_article_fact_checker.py b/test/scripts/model/llm/agent/test_article_fact_checker.py index 5376dfcc..01e12edb 100644 --- a/test/scripts/model/llm/agent/test_article_fact_checker.py +++ b/test/scripts/model/llm/agent/test_article_fact_checker.py @@ -552,7 +552,7 @@ def test_get_output_dir_auto_generates_path_when_not_configured(self, tmp_path): from dingo.config.input_args import EvaluatorLLMArgs ArticleFactChecker.dynamic_config = EvaluatorLLMArgs( key="test", api_url="https://api.example.com", model="test", - parameters={"agent_config": {"base_output_path": str(tmp_path)}} + agent_config={"base_output_path": str(tmp_path)} ) result = ArticleFactChecker._get_output_dir() assert result is not None @@ -565,7 +565,7 @@ def test_get_output_dir_returns_none_when_save_artifacts_disabled(self): from dingo.config.input_args import EvaluatorLLMArgs ArticleFactChecker.dynamic_config = EvaluatorLLMArgs( key="test", api_url="https://api.example.com", model="test", - parameters={"agent_config": {"save_artifacts": False}} + agent_config={"save_artifacts": False} ) result = ArticleFactChecker._get_output_dir() assert result is None @@ -577,7 +577,7 @@ def test_get_output_dir_creates_directory(self, tmp_path): output_dir = str(tmp_path / "new_output_dir") ArticleFactChecker.dynamic_config = EvaluatorLLMArgs( key="test", api_url="https://api.example.com", model="test", - parameters={"agent_config": {"output_path": output_dir}} + agent_config={"output_path": output_dir} ) result = ArticleFactChecker._get_output_dir() diff --git a/test/scripts/model/llm/agent/test_async_article_fact_checker.py b/test/scripts/model/llm/agent/test_async_article_fact_checker.py index f9529f56..43b0bfa8 100644 --- a/test/scripts/model/llm/agent/test_async_article_fact_checker.py +++ b/test/scripts/model/llm/agent/test_async_article_fact_checker.py @@ -547,13 +547,13 @@ def setup_method(self): def test_returns_class_default_when_no_config(self): """Should return max_concurrent_claims class default when not configured.""" with patch.object(self.checker, 'dynamic_config') as mock_cfg: - mock_cfg.parameters = {} + mock_cfg.model_extra = {} result = self.checker._get_max_concurrent_claims() assert result == self.checker.max_concurrent_claims def test_returns_config_value_when_set(self): """Should return value from agent_config.max_concurrent_claims.""" with patch.object(self.checker, 'dynamic_config') as mock_cfg: - mock_cfg.parameters = {"agent_config": {"max_concurrent_claims": 10}} + mock_cfg.model_extra = {"agent_config": {"max_concurrent_claims": 10}} result = self.checker._get_max_concurrent_claims() assert result == 10 diff --git a/test/scripts/skills/test_fact_check_script.py b/test/scripts/skills/test_fact_check_script.py index 79358e50..aacce4cd 100644 --- a/test/scripts/skills/test_fact_check_script.py +++ b/test/scripts/skills/test_fact_check_script.py @@ -189,7 +189,7 @@ def test_tavily_omitted_when_no_key(self): max_claims=50, max_concurrent=5, ) - tools = config["evaluator"][0]["evals"][0]["config"]["parameters"]["agent_config"]["tools"] + tools = config["evaluator"][0]["evals"][0]["config"]["agent_config"]["tools"] assert "tavily_search" not in tools assert "claims_extractor" in tools assert "arxiv_search" in tools @@ -205,7 +205,7 @@ def test_tavily_included_when_key_present(self): max_claims=50, max_concurrent=5, ) - tools = config["evaluator"][0]["evals"][0]["config"]["parameters"]["agent_config"]["tools"] + tools = config["evaluator"][0]["evals"][0]["config"]["agent_config"]["tools"] assert "tavily_search" in tools assert tools["tavily_search"]["api_key"] == "tvly-xxx" @@ -220,8 +220,8 @@ def test_temperature_is_zero(self): max_claims=50, max_concurrent=5, ) - params = config["evaluator"][0]["evals"][0]["config"]["parameters"] - assert params["temperature"] == 0 + cfg = config["evaluator"][0]["evals"][0]["config"] + assert cfg["temperature"] == 0 class TestErrorOutput: