We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 466581c commit 620611fCopy full SHA for 620611f
eval_protocol/benchmarks/test_aime25.py
@@ -78,6 +78,7 @@ def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]:
78
],
79
dataset_adapter=aime2025_dataset_adapter,
80
completion_params=[
81
+ {"model": "gpt-4.1"},
82
{
83
"max_tokens": 131000,
84
"extra_body": {"reasoning_effort": "low"},
0 commit comments