-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathtest_pytest_ensure_logging.py
More file actions
59 lines (51 loc) · 2.75 KB
/
test_pytest_ensure_logging.py
File metadata and controls
59 lines (51 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
from unittest.mock import Mock, patch
def test_ensure_logging(monkeypatch):
"""
Ensure that default SQLITE logger gets called by mocking the storage and checking that the storage is called.
"""
# Mock the SqliteEvaluationRowStore to track calls
mock_store = Mock()
mock_store.upsert_row = Mock()
mock_store.read_rows = Mock(return_value=[])
mock_store.db_path = "/tmp/test.db"
# Mock the SqliteEvaluationRowStore constructor so that when SqliteDatasetLoggerAdapter
# creates its store, it gets our mock instead
with patch(
"eval_protocol.dataset_logger.sqlite_dataset_logger_adapter.SqliteEvaluationRowStore", return_value=mock_store
):
from eval_protocol.models import EvaluationRow
from eval_protocol.pytest.default_no_op_rollout_processor import NoOpRolloutProcessor
from eval_protocol.pytest.evaluation_test import evaluation_test
from tests.pytest.test_markdown_highlighting import markdown_dataset_to_evaluation_row
@evaluation_test(
input_dataset=[
"tests/pytest/data/markdown_dataset.jsonl",
],
completion_params=[{"temperature": 0.0, "model": "dummy/local-model"}],
dataset_adapter=markdown_dataset_to_evaluation_row,
rollout_processor=NoOpRolloutProcessor(),
mode="pointwise",
combine_datasets=False,
num_runs=2,
# Don't pass logger parameter - let it use the default_logger (which we've replaced)
)
def eval_fn(row: EvaluationRow) -> EvaluationRow:
return row
eval_fn(
dataset_path=["tests/pytest/data/markdown_dataset.jsonl"],
completion_params={"temperature": 0.0, "model": "dummy/local-model"},
)
# Verify that the store's upsert_row method was called
assert mock_store.upsert_row.called, "SqliteEvaluationRowStore.upsert_row should have been called"
# Check that it was called multiple times (once for each row)
call_count = mock_store.upsert_row.call_count
assert call_count > 0, f"Expected upsert_row to be called at least once, but it was called {call_count} times"
# Verify the calls were made with proper data structure
for call in mock_store.upsert_row.call_args_list:
args, kwargs = call
data = args[0] if args else kwargs.get("data")
assert data is not None, "upsert_row should be called with data parameter"
assert isinstance(data, dict), "data should be a dictionary"
assert "execution_metadata" in data, "data should contain execution_metadata"
assert "rollout_id" in data["execution_metadata"], "data should contain rollout_id in execution_metadata"