We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 4ce0057 commit 8a01ddcCopy full SHA for 8a01ddc
tests/chinook/test_pydantic_chinook.py
@@ -82,6 +82,7 @@ class Response(BaseModel):
82
return row
83
84
85
+@pytest.mark.skip(reason="takes too long to run")
86
@pytest.mark.asyncio
87
@evaluation_test(
88
input_rows=collect_dataset(),
@@ -136,7 +137,7 @@ class Response(BaseModel):
136
137
system_prompt=LLM_JUDGE_PROMPT,
138
output_type=Response,
139
model=model,
- result_retries=3,
140
+ result_retries=5,
141
)
142
result = await comparison_agent.run(
143
f"Expected answer: {row.ground_truth}\nResponse: {last_assistant_message.content}"
0 commit comments