Skip to content

Commit ca2793f

Browse files
committed
fix tests
1 parent 52027f1 commit ca2793f

File tree

2 files changed

+0
-4
lines changed

2 files changed

+0
-4
lines changed

tests/pytest/test_pytest_math_format_length.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,10 @@
55
from eval_protocol.rewards.length import count_tokens
66
from eval_protocol.rewards.math import math_reward
77
from examples.math_with_format_and_length.main import check_think_answer_format
8-
from tests.pytest.helper.gsm8k_to_evaluation_row import gsm8k_to_evaluation_row
98

109

1110
@evaluation_test(
1211
input_dataset=["development/gsm8k_sample.jsonl"],
13-
dataset_adapter=gsm8k_to_evaluation_row,
1412
completion_params=[{"temperature": 0.0, "model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b"}],
1513
max_dataset_rows=5,
1614
passed_threshold=0.0,

tests/pytest/test_pytest_word_count_example.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,10 @@
22

33
from eval_protocol.models import EvaluateResult, EvaluationRow, MetricResult
44
from eval_protocol.pytest import SingleTurnRolloutProcessor, evaluation_test
5-
from tests.pytest.helper.word_count_to_evaluation_row import word_count_to_evaluation_row
65

76

87
@evaluation_test(
98
input_dataset=["development/gsm8k_sample.jsonl"],
10-
dataset_adapter=word_count_to_evaluation_row,
119
completion_params=[{"temperature": 0.0, "model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b"}],
1210
max_dataset_rows=5,
1311
passed_threshold=0.3, # Reasonable threshold for word count evaluation

0 commit comments

Comments
 (0)