-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrule_based_verifier.py
More file actions
66 lines (54 loc) · 1.99 KB
/
rule_based_verifier.py
File metadata and controls
66 lines (54 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import re
STOPWORDS = {"of", "the", "a", "an", "to", "for", "and"}
def rule_based_verifier(user_query, plan, execution_results, final_answer):
plan_steps = len(plan.get("steps", []))
execution_steps = len(execution_results)
# Rule 1: Step count validation
if execution_steps < plan_steps:
return {
"verdict": "RETRY",
"reason": "Not all planned steps were executed",
"retry_hint": "Execute all remaining steps in the plan"
}
if execution_steps > plan_steps:
return {
"verdict": "FAIL",
"reason": "Executor produced extra unexpected steps",
"retry_hint": None
}
# Rule 2: Tool compliance (relaxed)
# Just check that we got some content, don't enforce specific format
for step, result in zip(plan.get("steps", []), execution_results):
if len(result.strip()) < 20:
return {
"verdict": "RETRY",
"reason": f"Step {step['id']} produced insufficient output",
"retry_hint": "Provide more detailed information"
}
# Rule 3: Goal satisfaction (relaxed - only fail if answer is too short)
# Allow answers that are substantive even if they don't contain exact goal keywords
if len(final_answer.strip()) < 30:
return {
"verdict": "FAIL",
"reason": "Final answer is too short",
"retry_hint": None
}
# Rule 4: Freshness for news (removed - too strict)
# Rule 5: Generic failure text
generic_failures = [
"i am not sure",
"as an ai",
"i cannot find",
"no information available"
]
if any(p in final_answer.lower() for p in generic_failures):
return {
"verdict": "FAIL",
"reason": "Final answer is generic or evasive",
"retry_hint": None
}
return {
"verdict": "PASS",
"reason": "All verification rules passed",
"retry_hint": None
}