Skip to content

Commit 514ff96

Browse files
committed
Test
1 parent e8dde79 commit 514ff96

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"question": "On $\\triangle ABC$ points $A,D,E$, and $B$ lie that order on side $\\overline{AB}$ with $AD=4, DE=16$, and $EB=8$. Points $A,F,G$, and $C$ lie in that order on side $\\overline{AC}$ with $AF=13, FG=52$, and $GC=26$. Let $M$ be the reflection of $D$ through $F$, and let $N$ be the reflection of $G$ through $E$. Quadrilateral $DEGF$ has area 288. Find the area of heptagon $AFNBCEM$.", "answer": "588"}

eval_protocol/benchmarks/test_aime25.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,9 @@ def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]:
7373

7474
@evaluation_test(
7575
input_dataset=[
76-
"https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-I.jsonl",
77-
"https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-II.jsonl",
76+
"eval_protocol/benchmarks/data/aime.jsonl",
77+
# "https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-I.jsonl",
78+
# "https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-II.jsonl",
7879
],
7980
dataset_adapter=aime2025_dataset_adapter,
8081
completion_params=[
@@ -87,8 +88,8 @@ def aime2025_dataset_adapter(rows: List[Dict[str, Any]]) -> List[EvaluationRow]:
8788
rollout_processor=SingleTurnRolloutProcessor(),
8889
aggregation_method="mean",
8990
passed_threshold=0.8,
90-
num_runs=8,
91-
max_dataset_rows=2,
91+
num_runs=1,
92+
max_dataset_rows=1,
9293
max_concurrent_rollouts=4,
9394
mode="pointwise",
9495
)

0 commit comments

Comments
 (0)