Skip to content

Commit 2d4a350

Browse files
committed
Force summary print, don't need to do -s
1 parent 1172dd7 commit 2d4a350

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

eval_protocol/pytest/evaluation_test_postprocess.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import pathlib
66
import statistics
7+
import sys
78
import time
89
from eval_protocol.dataset_logger.dataset_logger import DatasetLogger
910
from eval_protocol.models import CompletionParams, EvaluationRow, EvaluationThreshold
@@ -123,11 +124,13 @@ def postprocess(
123124
if should_print:
124125
if ci_low is not None and ci_high is not None and standard_error is not None:
125126
print(
126-
f"EP Summary | suite={suite_name} model={model_used} agg={summary_obj['agg_score']:.3f} se={summary_obj['standard_error']:.3f} ci95=[{ci_low:.3f},{ci_high:.3f}] runs={num_runs} rows={total_rows}"
127+
f"EP Summary | suite={suite_name} model={model_used} agg={summary_obj['agg_score']:.3f} se={summary_obj['standard_error']:.3f} ci95=[{ci_low:.3f},{ci_high:.3f}] runs={num_runs} rows={total_rows}",
128+
file=sys.__stderr__,
127129
)
128130
else:
129131
print(
130-
f"EP Summary | suite={suite_name} model={model_used} agg={summary_obj['agg_score']:.3f} runs={num_runs} rows={total_rows}"
132+
f"EP Summary | suite={suite_name} model={model_used} agg={summary_obj['agg_score']:.3f} runs={num_runs} rows={total_rows}",
133+
file=sys.__stderr__,
131134
)
132135
# As per project convention, avoid printing per-metric CI lines to reduce noise
133136
if summary_path:

0 commit comments

Comments
 (0)