Skip to content

Commit f73ebe5

Browse files
committed
update
1 parent 6e881ff commit f73ebe5

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

eval_protocol/pytest/handle_persist_flow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
7171
row_data["evals"] = {"score": 0}
7272
row_data["eval_details"] = {
7373
"score": 0,
74-
"is_score_valid": True,
74+
"is_score_valid": False,
7575
"reason": "No evaluation result",
7676
"metrics": {},
7777
}

tests/test_evaluation_postprocess.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def create_test_row(self, score: float, is_valid: bool = True) -> EvaluationRow:
2929
),
3030
)
3131

32-
@patch.dict("os.environ", {"EP_SUMMARY_JSON": ""}) # Disable uploads
32+
@patch.dict("os.environ", {"EP_NO_UPLOAD": "1"}) # Disable uploads
3333
def test_bootstrap_aggregation_with_valid_scores(self):
3434
"""Test bootstrap aggregation with all valid scores and verify exact scores list."""
3535
# Create test data: 2 runs with 2 rows each
@@ -62,7 +62,7 @@ def test_bootstrap_aggregation_with_valid_scores(self):
6262
# Should call logger.log for each row
6363
assert mock_logger.log.call_count == 4
6464

65-
@patch.dict("os.environ", {"EP_SUMMARY_JSON": ""}) # Disable uploads
65+
@patch.dict("os.environ", {"EP_NO_UPLOAD": "1"}) # Disable uploads
6666
def test_bootstrap_aggregation_filters_invalid_scores(self):
6767
"""Test that bootstrap aggregation excludes invalid scores and generates correct scores list."""
6868
# Create test data with some invalid scores
@@ -101,7 +101,7 @@ def test_bootstrap_aggregation_filters_invalid_scores(self):
101101
# Should still call logger.log for all rows (including invalid ones)
102102
assert mock_logger.log.call_count == 4
103103

104-
@patch.dict("os.environ", {"EP_SUMMARY_JSON": ""}) # Disable uploads
104+
@patch.dict("os.environ", {"EP_NO_UPLOAD": "1"}) # Disable uploads
105105
def test_mean_aggregation_with_valid_scores(self):
106106
"""Test mean aggregation with all valid scores."""
107107
all_results = [
@@ -126,7 +126,7 @@ def test_mean_aggregation_with_valid_scores(self):
126126
# Should call logger.log for each row
127127
assert mock_logger.log.call_count == 4
128128

129-
@patch.dict("os.environ", {"EP_SUMMARY_JSON": ""}) # Disable uploads
129+
@patch.dict("os.environ", {"EP_NO_UPLOAD": "1"}) # Disable uploads
130130
def test_mean_aggregation_filters_invalid_scores(self):
131131
"""Test that mean aggregation excludes invalid scores from run averages."""
132132
all_results = [
@@ -157,7 +157,7 @@ def test_mean_aggregation_filters_invalid_scores(self):
157157
# Should call logger.log for all rows
158158
assert mock_logger.log.call_count == 4
159159

160-
@patch.dict("os.environ", {"EP_SUMMARY_JSON": ""}) # Disable uploads
160+
@patch.dict("os.environ", {"EP_NO_UPLOAD": "1"}) # Disable uploads
161161
def test_empty_runs_are_skipped(self):
162162
"""Test that runs with no valid scores are skipped."""
163163
all_results = [
@@ -182,7 +182,7 @@ def test_empty_runs_are_skipped(self):
182182
# Should still call logger.log for all rows
183183
assert mock_logger.log.call_count == 2
184184

185-
@patch.dict("os.environ", {"EP_SUMMARY_JSON": ""}) # Disable uploads
185+
@patch.dict("os.environ", {"EP_NO_UPLOAD": "1"}) # Disable uploads
186186
def test_all_invalid_scores(self):
187187
"""Test behavior when all scores are invalid."""
188188
all_results = [

0 commit comments

Comments
 (0)