|
1 | | -import sys |
2 | | -import types |
3 | | -from pathlib import Path |
4 | 1 | from typing import cast |
5 | 2 |
|
6 | 3 | import pytest |
7 | 4 |
|
8 | | -# The evaluation modules depend on optional third-party packages that aren't |
9 | | -# available in the execution environment for these unit tests. To exercise the |
10 | | -# real implementation of ``load_and_prepare_rows`` without installing those |
11 | | -# packages, we register lightweight stubs in ``sys.modules`` that provide the |
12 | | -# minimal interfaces required during import. |
13 | | - |
14 | | -PACKAGE_ROOT = Path(__file__).resolve().parents[2] / "eval_protocol" |
15 | | -sys.modules.pop("eval_protocol", None) |
16 | | -sys.modules.pop("eval_protocol.pytest", None) |
17 | | - |
18 | | -eval_protocol_pkg = types.ModuleType("eval_protocol") |
19 | | -eval_protocol_pkg.__path__ = [str(PACKAGE_ROOT)] # type: ignore[attr-defined] |
20 | | -sys.modules["eval_protocol"] = eval_protocol_pkg |
21 | | - |
22 | | -pytest_pkg = types.ModuleType("eval_protocol.pytest") |
23 | | -pytest_pkg.__path__ = [str(PACKAGE_ROOT / "pytest")] # type: ignore[attr-defined] |
24 | | -sys.modules["eval_protocol.pytest"] = pytest_pkg |
25 | | -setattr(eval_protocol_pkg, "pytest", pytest_pkg) |
26 | | - |
27 | | -if "loguru" not in sys.modules: |
28 | | - loguru_module = types.ModuleType("loguru") |
29 | | - |
30 | | - class _DummyLogger: |
31 | | - def __getattr__(self, _name): # pragma: no cover - dynamic fallback |
32 | | - def _noop(*_args, **_kwargs): |
33 | | - return None |
34 | | - |
35 | | - return _noop |
36 | | - |
37 | | - loguru_module.logger = _DummyLogger() # type: ignore[attr-defined] |
38 | | - sys.modules["loguru"] = loguru_module |
39 | | - |
40 | | -if "toml" not in sys.modules: |
41 | | - toml_module = types.ModuleType("toml") |
42 | | - |
43 | | - def _noop_load(*_args, **_kwargs): # pragma: no cover - helper for stubbing |
44 | | - return {} |
45 | | - |
46 | | - def _noop_dump(*_args, **_kwargs): # pragma: no cover - helper for stubbing |
47 | | - return None |
48 | | - |
49 | | - toml_module.load = _noop_load # type: ignore[attr-defined] |
50 | | - toml_module.dump = _noop_dump # type: ignore[attr-defined] |
51 | | - sys.modules["toml"] = toml_module |
52 | | - |
53 | | -if "addict" not in sys.modules: |
54 | | - addict_module = types.ModuleType("addict") |
55 | | - |
56 | | - class _AddictDict(dict): # pragma: no cover - simple stub |
57 | | - def __getattr__(self, item): |
58 | | - try: |
59 | | - return self[item] |
60 | | - except KeyError as exc: |
61 | | - raise AttributeError(item) from exc |
62 | | - |
63 | | - def __setattr__(self, key, value): |
64 | | - self[key] = value |
65 | | - |
66 | | - addict_module.Dict = _AddictDict # type: ignore[attr-defined] |
67 | | - sys.modules["addict"] = addict_module |
68 | | - |
69 | | -if "eval_protocol.mcp_env" not in sys.modules: |
70 | | - mcp_env_module = types.ModuleType("eval_protocol.mcp_env") |
71 | | - |
72 | | - class _DummyPolicy: # pragma: no cover - stub placeholder |
73 | | - pass |
74 | | - |
75 | | - def _noop(*_args, **_kwargs): |
76 | | - return None |
77 | | - |
78 | | - mcp_env_module.AnthropicPolicy = _DummyPolicy # type: ignore[attr-defined] |
79 | | - mcp_env_module.FireworksPolicy = _DummyPolicy # type: ignore[attr-defined] |
80 | | - mcp_env_module.LiteLLMPolicy = _DummyPolicy # type: ignore[attr-defined] |
81 | | - mcp_env_module.OpenAIPolicy = _DummyPolicy # type: ignore[attr-defined] |
82 | | - mcp_env_module.make = _noop # type: ignore[attr-defined] |
83 | | - mcp_env_module.rollout = _noop # type: ignore[attr-defined] |
84 | | - mcp_env_module.test_mcp = _noop # type: ignore[attr-defined] |
85 | | - sys.modules["eval_protocol.mcp_env"] = mcp_env_module |
86 | | - |
87 | | -if "eval_protocol.mcp" not in sys.modules: |
88 | | - sys.modules["eval_protocol.mcp"] = types.ModuleType("eval_protocol.mcp") |
89 | | - |
90 | | -if "eval_protocol.rewards" not in sys.modules: |
91 | | - sys.modules["eval_protocol.rewards"] = types.ModuleType("eval_protocol.rewards") |
92 | | - |
93 | | -if "eval_protocol.dataset_logger" not in sys.modules: |
94 | | - dataset_logger_module = types.ModuleType("eval_protocol.dataset_logger") |
95 | | - |
96 | | - class _StubDatasetLogger: # pragma: no cover - stub placeholder |
97 | | - def log(self, *_args, **_kwargs): |
98 | | - return None |
99 | | - |
100 | | - dataset_logger_module.default_logger = _StubDatasetLogger() # type: ignore[attr-defined] |
101 | | - sys.modules["eval_protocol.dataset_logger"] = dataset_logger_module |
102 | | - |
103 | | -if "eval_protocol.dataset_logger.dataset_logger" not in sys.modules: |
104 | | - dataset_logger_pkg = types.ModuleType("eval_protocol.dataset_logger.dataset_logger") |
105 | | - dataset_logger_pkg.DatasetLogger = _StubDatasetLogger # type: ignore[attr-defined] |
106 | | - sys.modules["eval_protocol.dataset_logger.dataset_logger"] = dataset_logger_pkg |
107 | | - |
108 | | -if "backoff" not in sys.modules: |
109 | | - backoff_module = types.ModuleType("backoff") |
110 | | - |
111 | | - def _noop_decorator(*_args, **_kwargs): |
112 | | - def _decorator(func): |
113 | | - return func |
114 | | - |
115 | | - return _decorator |
116 | | - |
117 | | - backoff_module.on_exception = _noop_decorator # type: ignore[attr-defined] |
118 | | - backoff_module.expo = lambda *args, **kwargs: None # type: ignore[attr-defined] |
119 | | - sys.modules["backoff"] = backoff_module |
120 | | - |
121 | | -if "litellm" not in sys.modules: |
122 | | - litellm_module = types.ModuleType("litellm") |
123 | | - cost_calculator_module = types.ModuleType("litellm.cost_calculator") |
124 | | - cost_calculator_module.cost_per_token = lambda *args, **kwargs: 0.0 # type: ignore[attr-defined] |
125 | | - sys.modules["litellm"] = litellm_module |
126 | | - sys.modules["litellm.cost_calculator"] = cost_calculator_module |
127 | | - |
128 | | -if "tqdm" not in sys.modules: |
129 | | - tqdm_module = types.ModuleType("tqdm") |
130 | | - |
131 | | - def _noop_tqdm(iterable=None, **_kwargs): |
132 | | - return iterable if iterable is not None else [] |
133 | | - |
134 | | - tqdm_module.tqdm = _noop_tqdm # type: ignore[attr-defined] |
135 | | - sys.modules["tqdm"] = tqdm_module |
136 | | - |
137 | | -if "openai" not in sys.modules: |
138 | | - openai_module = types.ModuleType("openai") |
139 | | - openai_types_module = types.ModuleType("openai.types") |
140 | | - openai_chat_module = types.ModuleType("openai.types.chat") |
141 | | - openai_chat_completion_module = types.ModuleType("openai.types.chat.chat_completion_message") |
142 | | - openai_chat_tool_module = types.ModuleType("openai.types.chat.chat_completion_message_tool_call") |
143 | | - |
144 | | - class _NotGiven: # pragma: no cover - stub placeholder |
145 | | - pass |
146 | | - |
147 | | - openai_module.NOT_GIVEN = _NotGiven() # type: ignore[attr-defined] |
148 | | - openai_module.NotGiven = _NotGiven # type: ignore[attr-defined] |
149 | | - |
150 | | - openai_types_module.CompletionUsage = object # type: ignore[attr-defined] |
151 | | - openai_chat_completion_module.FunctionCall = object # type: ignore[attr-defined] |
152 | | - openai_chat_tool_module.ChatCompletionMessageToolCall = object # type: ignore[attr-defined] |
153 | | - |
154 | | - openai_types_module.chat = openai_chat_module # type: ignore[attr-defined] |
155 | | - openai_chat_module.chat_completion_message = openai_chat_completion_module # type: ignore[attr-defined] |
156 | | - openai_chat_module.chat_completion_message_tool_call = openai_chat_tool_module # type: ignore[attr-defined] |
157 | | - openai_module.types = openai_types_module # type: ignore[attr-defined] |
158 | | - |
159 | | - sys.modules["openai"] = openai_module |
160 | | - sys.modules["openai.types"] = openai_types_module |
161 | | - sys.modules["openai.types.chat"] = openai_chat_module |
162 | | - sys.modules["openai.types.chat.chat_completion_message"] = openai_chat_completion_module |
163 | | - sys.modules["openai.types.chat.chat_completion_message_tool_call"] = openai_chat_tool_module |
| 5 | +pytest.importorskip("openai") |
164 | 6 |
|
165 | 7 | from eval_protocol.models import EvaluationRow, Message |
166 | 8 | from eval_protocol.pytest.dataset_preparation import load_and_prepare_rows |
|
0 commit comments