From acf44064eb1e7a987172c7a4434d20569a70ba74 Mon Sep 17 00:00:00 2001 From: shijinpjlab Date: Mon, 13 Apr 2026 14:31:02 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20RuleDictConsistency=E5=BF=BD=E7=95=A5?= =?UTF-8?q?=E9=A1=BA=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dingo/model/rule/rule_common.py | 32 +++++++++++++++++++++++++++++++- docs/config.md | 7 ------- docs/technical/technical_all.md | 17 ++++------------- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/dingo/model/rule/rule_common.py b/dingo/model/rule/rule_common.py index d1af227..f962758 100644 --- a/dingo/model/rule/rule_common.py +++ b/dingo/model/rule/rule_common.py @@ -2691,12 +2691,36 @@ class RuleDictConsistency(BaseRule): } _required_fields = [RequiredField.METADATA, RequiredField.CONTEXT] + dynamic_config = EvaluatorRuleArgs(parameters={"ignore_order": True}) + + @classmethod + def _normalize_value(cls, value, ignore_order: bool): + """Normalize nested values for configurable order-aware comparison.""" + if isinstance(value, dict): + return { + key: cls._normalize_value(value[key], ignore_order) + for key in sorted(value.keys(), key=lambda x: str(x)) + } + + if isinstance(value, (list, tuple)): + normalized = [cls._normalize_value(item, ignore_order) for item in value] + if ignore_order: + return sorted(normalized, key=lambda x: repr(x)) + return normalized + + if isinstance(value, set): + normalized = [cls._normalize_value(item, ignore_order) for item in value] + return sorted(normalized, key=lambda x: repr(x)) + + return value @classmethod def eval(cls, input_data: Data) -> EvalDetail: res = EvalDetail(metric=cls.__name__) left_dict = getattr(input_data, "metadata", None) right_dict = getattr(input_data, "context", None) + parameters = cls.dynamic_config.parameters or {} + ignore_order = parameters.get("ignore_order", True) if not isinstance(left_dict, dict) or not isinstance(right_dict, dict): res.status = True @@ -2710,7 +2734,13 @@ def eval(cls, input_data: Data) -> EvalDetail: diff_keys = [] all_keys = set(left_dict.keys()) | set(right_dict.keys()) for key in sorted(all_keys, key=lambda x: str(x)): - if key not in left_dict or key not in right_dict or left_dict[key] != right_dict[key]: + if key not in left_dict or key not in right_dict: + diff_keys.append(str(key)) + continue + + left_value = cls._normalize_value(left_dict[key], ignore_order) + right_value = cls._normalize_value(right_dict[key], ignore_order) + if left_value != right_value: diff_keys.append(str(key)) if diff_keys: diff --git a/docs/config.md b/docs/config.md index a5020ee..070de09 100644 --- a/docs/config.md +++ b/docs/config.md @@ -112,13 +112,6 @@ LLM 配置(支持额外字段,所有额外字段会直接透传给 LLM API | key | str | null | No | API 密钥 | | api_url | str | null | No | API URL | | embedding_config | object | null | No | Embedding 模型独立配置(RAG 评估器使用) | -| temperature | number | 1 | No | 采样温度,0-2之间 | -| top_p | number | 1 | No | 核心采样概率 | -| max_tokens | number | 4000 | No | 最大生成token数 | -| presence_penalty | number | 0 | No | 存在惩罚,-2.0到2.0之间 | -| frequency_penalty | number | 0 | No | 频率惩罚,-2.0到2.0之间 | -| agent_config | object | null | No | Agent 专属配置(max_iterations、tools 等) | -| threshold | number | - | No | 评估通过阈值(各评估器自定义) | | *其他字段* | any | - | No | 所有额外字段直接透传给 LLM API | ## 配置文件示例 diff --git a/docs/technical/technical_all.md b/docs/technical/technical_all.md index 212fd15..6aafdc5 100644 --- a/docs/technical/technical_all.md +++ b/docs/technical/technical_all.md @@ -220,19 +220,10 @@ dingo 在使用提示词进行评估任务的时候,必须同时使用场景 + model + key + api_url -+ temperature(直接平铺在配置中) -+ top_p -+ max_tokens -+ presence_penalty -+ frequency_penalty -+ agent_config(Agent 评估器专用,包含 max_iterations、tools 等) - -LLM 调参配置直接平铺在 `config` 对象中(不再嵌套在 `parameters` 字段下),会对模型推理产生影响,可以设置的值包括: -+ temperature -+ top_p -+ max_tokens -+ presence_penalty -+ frequency_penalty ++ embedding_config(RAG 评估器专用) ++ 其他字段(直接平铺在配置中并透传到 LLM API) + +LLM 调参配置直接平铺在 `config` 对象中(不再嵌套在 `parameters` 字段下),除标准字段外,其它字段将直接透传到 LLM API。 更多参数细节可参考OpenAI API官方文档。