Skip to content

Commit 2b5887f

Browse files
committed
fix test
1 parent 5690f98 commit 2b5887f

File tree

3 files changed

+8
-6
lines changed

3 files changed

+8
-6
lines changed

tests/pytest/test_execution_metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from openai.types import CompletionUsage
33

44
from eval_protocol.models import EvaluationRow, ExecutionMetadata, InputMetadata, CostMetrics, Message
5-
from eval_protocol.pytest.utils import add_cost_metrics
5+
from eval_protocol.pytest.evaluation_test_utils import add_cost_metrics
66

77

88
class TestExecutionMetadata:

tests/pytest/test_utils.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from unittest.mock import AsyncMock, MagicMock, patch
33
import pytest
44

5-
from eval_protocol.pytest.utils import rollout_processor_with_retry
5+
from eval_protocol.pytest.evaluation_test_utils import rollout_processor_with_retry
66
from eval_protocol.pytest.types import RolloutProcessorConfig
77
from eval_protocol.models import EvaluationRow, Status, InputMetadata, ExecutionMetadata
88
from eval_protocol.dataset_logger.dataset_logger import DatasetLogger
@@ -112,12 +112,14 @@ async def flaky_task():
112112
if call_count == 1:
113113
raise ConnectionError("Connection failed")
114114
else:
115+
from datetime import datetime
116+
115117
row = EvaluationRow(
116118
messages=[],
117-
input_metadata={},
119+
input_metadata=InputMetadata(completion_params={}),
118120
rollout_status=Status.rollout_finished(),
119-
execution_metadata={},
120-
created_at="2024-01-01T00:00:00Z",
121+
execution_metadata=ExecutionMetadata(),
122+
created_at=datetime.fromisoformat("2024-01-01T00:00:00"),
121123
)
122124
return row
123125

tests/test_evaluation_postprocess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ class TestBootstrapEquivalence:
212212
def test_bootstrap_equivalence_pandas_vs_pure_python(self):
213213
import random
214214
import pandas as pd
215-
from eval_protocol.pytest.utils import calculate_bootstrap_scores as py_bootstrap
215+
from eval_protocol.pytest.evaluation_test_utils import calculate_bootstrap_scores as py_bootstrap
216216

217217
# Deterministic synthetic scores
218218
rng = random.Random(123)

0 commit comments

Comments
 (0)