File tree Expand file tree Collapse file tree 1 file changed +1
-19
lines changed
Expand file tree Collapse file tree 1 file changed +1
-19
lines changed Original file line number Diff line number Diff line change @@ -82,15 +82,6 @@ class Response(BaseModel):
8282 return row
8383
8484
85- """
86- invocation ids:
87-
88- - lead-low-war-kind-business (worked)
89- - miss-wonder-early-friend-side (failed)
90- -
91- """
92-
93-
9485@pytest .mark .asyncio
9586@evaluation_test (
9687 input_rows = collect_dataset (),
@@ -103,18 +94,9 @@ class Response(BaseModel):
10394 }
10495 }
10596 },
106- {
107- "model" : {
108- "orchestrator_agent_model" : {
109- "model" : "accounts/fireworks/models/deepseek-v3p1" ,
110- "provider" : "fireworks" ,
111- }
112- }
113- },
11497 ],
11598 rollout_processor = PydanticAgentRolloutProcessor (),
11699 rollout_processor_kwargs = {"agent" : setup_agent },
117- num_runs = 3 ,
118100 mode = "pointwise" ,
119101)
120102async def test_complex_queries (row : EvaluationRow ) -> EvaluationRow :
@@ -154,7 +136,7 @@ class Response(BaseModel):
154136 system_prompt = LLM_JUDGE_PROMPT ,
155137 output_type = Response ,
156138 model = model ,
157- result_retries = 5 ,
139+ result_retries = 3 ,
158140 )
159141 result = await comparison_agent .run (
160142 f"Expected answer: { row .ground_truth } \n Response: { last_assistant_message .content } "
You can’t perform that action at this time.
0 commit comments