Skip to content

Commit a26cf02

Browse files
committed
broken still
1 parent c1b0516 commit a26cf02

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

eval_protocol/benchmarks/test_aime25.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]:
7878
],
7979
dataset_adapter=aime2025_dataset_adapter,
8080
completion_params=[
81-
{"model": "gpt-4.1"},
8281
{
8382
"max_tokens": 131000,
8483
"extra_body": {"reasoning_effort": "low"},
@@ -89,6 +88,11 @@ def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]:
8988
"extra_body": {"reasoning_effort": "medium"},
9089
"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b",
9190
},
91+
{
92+
"max_tokens": 131000,
93+
"extra_body": {"reasoning_effort": "low"},
94+
"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-20b",
95+
},
9296
],
9397
rollout_processor=SingleTurnRolloutProcessor(),
9498
aggregation_method="mean",

0 commit comments

Comments
 (0)