@@ -29,7 +29,7 @@ def create_test_row(self, score: float, is_valid: bool = True) -> EvaluationRow:
2929 ),
3030 )
3131
32- @patch .dict ("os.environ" , {"EP_SUMMARY_JSON " : "" }) # Disable uploads
32+ @patch .dict ("os.environ" , {"EP_NO_UPLOAD " : "1 " }) # Disable uploads
3333 def test_bootstrap_aggregation_with_valid_scores (self ):
3434 """Test bootstrap aggregation with all valid scores and verify exact scores list."""
3535 # Create test data: 2 runs with 2 rows each
@@ -62,7 +62,7 @@ def test_bootstrap_aggregation_with_valid_scores(self):
6262 # Should call logger.log for each row
6363 assert mock_logger .log .call_count == 4
6464
65- @patch .dict ("os.environ" , {"EP_SUMMARY_JSON " : "" }) # Disable uploads
65+ @patch .dict ("os.environ" , {"EP_NO_UPLOAD " : "1 " }) # Disable uploads
6666 def test_bootstrap_aggregation_filters_invalid_scores (self ):
6767 """Test that bootstrap aggregation excludes invalid scores and generates correct scores list."""
6868 # Create test data with some invalid scores
@@ -101,7 +101,7 @@ def test_bootstrap_aggregation_filters_invalid_scores(self):
101101 # Should still call logger.log for all rows (including invalid ones)
102102 assert mock_logger .log .call_count == 4
103103
104- @patch .dict ("os.environ" , {"EP_SUMMARY_JSON " : "" }) # Disable uploads
104+ @patch .dict ("os.environ" , {"EP_NO_UPLOAD " : "1 " }) # Disable uploads
105105 def test_mean_aggregation_with_valid_scores (self ):
106106 """Test mean aggregation with all valid scores."""
107107 all_results = [
@@ -126,7 +126,7 @@ def test_mean_aggregation_with_valid_scores(self):
126126 # Should call logger.log for each row
127127 assert mock_logger .log .call_count == 4
128128
129- @patch .dict ("os.environ" , {"EP_SUMMARY_JSON " : "" }) # Disable uploads
129+ @patch .dict ("os.environ" , {"EP_NO_UPLOAD " : "1 " }) # Disable uploads
130130 def test_mean_aggregation_filters_invalid_scores (self ):
131131 """Test that mean aggregation excludes invalid scores from run averages."""
132132 all_results = [
@@ -157,7 +157,7 @@ def test_mean_aggregation_filters_invalid_scores(self):
157157 # Should call logger.log for all rows
158158 assert mock_logger .log .call_count == 4
159159
160- @patch .dict ("os.environ" , {"EP_SUMMARY_JSON " : "" }) # Disable uploads
160+ @patch .dict ("os.environ" , {"EP_NO_UPLOAD " : "1 " }) # Disable uploads
161161 def test_empty_runs_are_skipped (self ):
162162 """Test that runs with no valid scores are skipped."""
163163 all_results = [
@@ -182,7 +182,7 @@ def test_empty_runs_are_skipped(self):
182182 # Should still call logger.log for all rows
183183 assert mock_logger .log .call_count == 2
184184
185- @patch .dict ("os.environ" , {"EP_SUMMARY_JSON " : "" }) # Disable uploads
185+ @patch .dict ("os.environ" , {"EP_NO_UPLOAD " : "1 " }) # Disable uploads
186186 def test_all_invalid_scores (self ):
187187 """Test behavior when all scores are invalid."""
188188 all_results = [
0 commit comments