From 9bf9c5baaef97db3abe261abb50cfc171b021e30 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Sun, 29 Mar 2026 09:25:34 +0200 Subject: [PATCH 01/17] Added many tests; ignored tools; fixed mypy issue Added a lot of tests covering ml_service - both frontend and backend. Ran coverage and got ~70% for now. Updated .gitignore to ignore the tools directory, which just contains the coverage report. Fixed one mypy issue in tests that occurred as a result of two conftest.py files being present in tests/. More tests will be added, but this is a good start. --- .gitignore | 5 +- tests/__init__.py | 4 + tests/test_ml_service/conftest.py | 124 ++++++++++++++ tests/test_ml_service/test_backend_routes.py | 155 +++++++++++++++++ tests/test_ml_service/test_dir_viewer.py | 40 +++++ .../test_execute_subprocess.py | 119 +++++++++++++ .../test_frontend_callbacks.py | 162 ++++++++++++++++++ tests/test_ml_service/test_frontend_utils.py | 65 +++++++ tests/test_ml_service/test_layouts.py | 38 ++++ 9 files changed, 711 insertions(+), 1 deletion(-) create mode 100644 tests/__init__.py create mode 100644 tests/test_ml_service/conftest.py create mode 100644 tests/test_ml_service/test_backend_routes.py create mode 100644 tests/test_ml_service/test_dir_viewer.py create mode 100644 tests/test_ml_service/test_execute_subprocess.py create mode 100644 tests/test_ml_service/test_frontend_callbacks.py create mode 100644 tests/test_ml_service/test_frontend_utils.py create mode 100644 tests/test_ml_service/test_layouts.py diff --git a/.gitignore b/.gitignore index b90b81bb..9681679f 100644 --- a/.gitignore +++ b/.gitignore @@ -230,4 +230,7 @@ __marimo__/ /predictions/ /monitoring/ /orchestration_logs/ -/scripts_logs/ \ No newline at end of file +/scripts_logs/ + +# tools +/tools/ \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..0bcdea59 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,4 @@ +"""Make the `tests` directory a package so mypy maps test modules unambiguously. + +This file is intentionally empty. +""" diff --git a/tests/test_ml_service/conftest.py b/tests/test_ml_service/conftest.py new file mode 100644 index 00000000..6140dc13 --- /dev/null +++ b/tests/test_ml_service/conftest.py @@ -0,0 +1,124 @@ +"""Pytest fixtures for testing the ml_service package. + +The fixtures are intentionally lightweight and platform agnostic so tests +run consistently on Windows and Linux. +""" +from __future__ import annotations + +from collections.abc import Callable +from typing import Any + +import pytest + + +class DummyDashApp: + """Minimal stand-in for a Dash app capturing callback registration. + + Instances collect registered callbacks as dicts with keys + ``'args'``, ``'kwargs'`` and ``'func'`` so tests can inspect what was + registered without importing the real `dash` package. + """ + + def __init__(self) -> None: + self.callbacks: list[dict[str, Any]] = [] + + def callback(self, *args: Any, **kwargs: Any) -> Callable[[Callable[..., Any]], Callable[..., Any]]: + """Return a decorator that records the wrapped function and metadata.""" + + def decorator(func: Callable[..., Any]) -> Callable[..., Any]: + self.callbacks.append({"args": args, "kwargs": kwargs, "func": func}) + return func + + return decorator + + +@pytest.fixture +def dummy_dash_app() -> DummyDashApp: + """Provide a dummy Dash-like app for registering callbacks in frontend pages.""" + + return DummyDashApp() + + +@pytest.fixture +def mock_requests(monkeypatch) -> dict[str, Any]: + """Helpers to patch the `requests` module during tests. + + Returns a small factory dict with a `MockResponse` class and + `patch_post` / `patch_get` helpers that tests can use to inject + deterministic responses. + """ + + import requests as _requests + + + class MockResponse: + def __init__(self, ok: bool = True, status_code: int = 200, text: str = "", json_data: Any = None) -> None: + self.ok = ok + self.status_code = status_code + self.text = text + self._json = json_data if json_data is not None else {} + + def json(self) -> Any: # pragma: no cover - trivial helper + return self._json + + def raise_for_status(self) -> None: + if not self.ok: + raise _requests.HTTPError(f"{self.status_code}: {self.text}") + + + def patch_post(func: Callable[..., Any]) -> None: + monkeypatch.setattr(_requests, "post", func) + + + def patch_get(func: Callable[..., Any]) -> None: + monkeypatch.setattr(_requests, "get", func) + + + return {"MockResponse": MockResponse, "patch_post": patch_post, "patch_get": patch_get} + + +@pytest.fixture +def patch_subprocess(monkeypatch) -> Callable[[int, str, str], None]: + """Helper to patch ``subprocess.run`` with a controllable result. + + Usage: + + patch_subprocess(returncode=0, stdout="ok", stderr="") + + After calling the helper, any call to ``subprocess.run`` will return + an object with ``returncode``, ``stdout`` and ``stderr`` attributes. + """ + + import subprocess as _subprocess + + + class Result: + def __init__(self, returncode: int = 0, stdout: str = "", stderr: str = "") -> None: + self.returncode = returncode + self.stdout = stdout + self.stderr = stderr + + + def _patch(returncode: int = 0, stdout: str = "", stderr: str = "") -> None: + result = Result(returncode=returncode, stdout=stdout, stderr=stderr) + + def fake_run(*args: Any, **kwargs: Any) -> Result: + return result + + monkeypatch.setattr(_subprocess, "run", fake_run) + + + return _patch + + +@pytest.fixture +def fastapi_client() -> Any: + """Provide a `TestClient` for the ml_service FastAPI app. + + Tests that need to exercise the HTTP layer can use this fixture. + """ + + from fastapi.testclient import TestClient + from ml_service.backend.main import app as _app + + return TestClient(_app) diff --git a/tests/test_ml_service/test_backend_routes.py b/tests/test_ml_service/test_backend_routes.py new file mode 100644 index 00000000..5d438797 --- /dev/null +++ b/tests/test_ml_service/test_backend_routes.py @@ -0,0 +1,155 @@ +"""Integration-style tests for ml_service backend routers using TestClient. + +These tests monkeypatch internal helpers to avoid heavy side-effects and +verify that endpoints route requests and return expected JSON shapes. +""" +from __future__ import annotations + +from typing import Any + + +def test_pipelines_train_endpoint(fastapi_client, monkeypatch): + import ml_service.backend.routers.pipelines as pipelines_router + + def fake_execute(module_path: str, payload: Any, boolean_args: list[str] | None = None): + return {"executed": module_path, "payload": payload.model_dump() if hasattr(payload, "model_dump") else dict(payload)} + + monkeypatch.setattr(pipelines_router, "execute_pipeline", fake_execute) + + res = fastapi_client.post("/pipelines/train", json={"problem": "p", "segment": "s", "version": "v"}) + assert res.status_code == 200 + j = res.json() + assert j["executed"] == "pipelines.runners.train" + + +def test_scripts_generate_fake_data(fastapi_client, monkeypatch): + import ml_service.backend.routers.scripts as scripts_router + + def fake_exec(module_path: str, payload: Any, boolean_args: list[str] | None = None): + return {"script": module_path, "args": getattr(payload, "model_dump", lambda **k: dict(payload))()} + + monkeypatch.setattr(scripts_router, "execute_script", fake_exec) + + res = fastapi_client.post("/scripts/generate_fake_data", json={"data": "hotel_bookings", "version": "v1"}) + assert res.status_code == 200 + assert res.json()["script"] == "scripts.generators.generate_fake_data" + + +def test_pipeline_cfg_validate_and_write(fastapi_client, monkeypatch, tmp_path): + import ml_service.backend.routers.pipeline_cfg as pcfg + + # Stub loader + validator + monkeypatch.setattr(pcfg, "load_yaml_and_add_lineage", lambda yaml_text: {"version": "v1"}) + + class DummyValidated: + def model_dump(self, mode: str = "json"): + return {"normalized": True} + + monkeypatch.setattr(pcfg, "validate_config_payload", lambda data: DummyValidated()) + + # Point get_config_path to a path under tmp_path that exists + def fake_get_config_path(*, repo_root: str, data_type: str, algorithm: str, pipeline_version: str): + p = tmp_path / data_type / algorithm + p.mkdir(parents=True, exist_ok=True) + fp = p / f"{pipeline_version}.yaml" + fp.write_text("x: 1") + return fp + + monkeypatch.setattr(pcfg, "get_config_path", fake_get_config_path) + + # Validate should report exists=True + res = fastapi_client.post("/pipeline_cfg/validate", json={"config": "dummy", "data_type": "dt", "algorithm": "alg"}) + assert res.status_code == 200 + j = res.json() + assert j["valid"] is True + assert j["exists"] is True + + # Now test write: make get_config_path return non-existing file and patch save_config + def fake_get_config_path2(*, repo_root: str, data_type: str, algorithm: str, pipeline_version: str): + p = tmp_path / "new" / algorithm + p.mkdir(parents=True, exist_ok=True) + return p / f"{pipeline_version}.yaml" + + monkeypatch.setattr(pcfg, "get_config_path", fake_get_config_path2) + + monkeypatch.setattr(pcfg, "save_config", lambda config, config_path: None) + + res2 = fastapi_client.post("/pipeline_cfg/write", json={"config": "dummy", "data_type": "dt2", "algorithm": "alg2"}) + assert res2.status_code == 201 + j2 = res2.json() + assert j2["success"] == "written" + + +def test_features_validate_and_write(fastapi_client, monkeypatch, tmp_path): + import ml_service.backend.routers.features as features_router + + monkeypatch.setattr(features_router, "load_yaml_and_add_lineage", lambda yaml_text: {"some": "data"}) + + class DummyVal: + def model_dump(self, mode: str = "json"): + return {"ok": True} + + monkeypatch.setattr(features_router, "validate_feature_config", lambda d: DummyVal()) + + # Simulate registry path + monkeypatch.setattr(features_router, "get_registry_path", lambda repo_root: tmp_path / "features.yaml") + + monkeypatch.setattr(features_router, "registry_entry_exists", lambda name, version, registry_path: False) + monkeypatch.setattr(features_router, "save_feature_registry", lambda name, version, validated_config, registry_path: {"saved": True}) + + res = fastapi_client.post("/features/validate", json={"name": "n", "version": "v", "config": "yaml"}) + assert res.status_code == 200 + assert res.json()["valid"] is True + + res2 = fastapi_client.post("/features/write", json={"name": "n2", "version": "v2", "config": "yaml"}) + assert res2.status_code == 200 or res2.status_code == 201 + + +def test_file_viewer_and_dir_viewer_load(fastapi_client, tmp_path, monkeypatch): + # File viewer: write a small YAML file and request it + yaml_path = tmp_path / "cfg.yaml" + yaml_path.write_text("a: 1") + + res = fastapi_client.post("/file_viewer/load", json={"path": str(yaml_path)}) + assert res.status_code == 200 + j = res.json() + assert "content" in j and "mode" in j + + # Dir viewer: set repo_root to tmp_path and create directory + import ml_service.backend.routers.dir_viewer as dir_router + + monkeypatch.setattr(dir_router, "repo_root", str(tmp_path)) + (tmp_path / "some_dir").mkdir() + (tmp_path / "some_dir" / "f.txt").write_text("x") + + res2 = fastapi_client.post("/dir_viewer/load", json={"path": "some_dir"}) + assert res2.status_code == 200 + j2 = res2.json() + assert "tree" in j2 and "tree_yaml" in j2 + + +def test_promotion_thresholds_validate_and_write(fastapi_client, monkeypatch, tmp_path): + import ml_service.backend.routers.promotion_thresholds as prom_router + + monkeypatch.setattr(prom_router, "load_yaml_and_add_lineage", lambda yaml_text: {"x": 1}) + + class DummyVal: + def model_dump(self, mode: str = "json"): + return {"ok": True} + + monkeypatch.setattr(prom_router, "validate_config_payload", lambda d: DummyVal()) + + thresholds_path = tmp_path / "configs" / "promotion" / "thresholds.yaml" + thresholds_path.parent.mkdir(parents=True, exist_ok=True) + + # Case: not exists + monkeypatch.setattr(prom_router, "check_thresholds_exist", lambda config_path, problem_type, segment: (False, {})) + monkeypatch.setattr(prom_router, "save_promotion_thresholds", lambda **kwargs: None) + + res = fastapi_client.post("/promotion_thresholds/validate", json={"config": "x", "problem_type": "p", "segment": "s"}) + assert res.status_code == 200 + assert res.json()["valid"] is True + + res2 = fastapi_client.post("/promotion_thresholds/write", json={"config": "x", "problem_type": "p", "segment": "s"}) + assert res2.status_code == 201 + assert res2.json()["success"] == "written" diff --git a/tests/test_ml_service/test_dir_viewer.py b/tests/test_ml_service/test_dir_viewer.py new file mode 100644 index 00000000..80fbbc4e --- /dev/null +++ b/tests/test_ml_service/test_dir_viewer.py @@ -0,0 +1,40 @@ +"""Tests for the directory tree builder utility. + +These tests use `tmp_path` and a small fake path object to ensure behaviour +is consistent across Windows and Linux. +""" +from __future__ import annotations + +from pathlib import Path +from typing import Any, cast + +from ml_service.backend.dir_viewer.utils.build_tree import build_tree + + +def test_build_tree_nested(tmp_path: Path) -> None: + """`build_tree` returns nested dictionaries for directories and ``None`` for files.""" + + a = tmp_path / "a" + a.mkdir() + (a / "file1.txt").write_text("hello") + b = a / "b" + b.mkdir() + (b / "file2.txt").write_text("hi") + + tree = cast(dict[str, Any], build_tree(tmp_path)) + + assert "a" in tree + assert tree["a"]["file1.txt"] is None + assert "b" in tree["a"] + assert tree["a"]["b"]["file2.txt"] is None + + +def test_build_tree_permission_error() -> None: + """When iteration raises PermissionError, `build_tree` returns an error dict.""" + + class FakePath: + def iterdir(self) -> Any: + raise PermissionError + + result = build_tree(cast(Path, FakePath())) + assert result == {"error": "Permission denied"} diff --git a/tests/test_ml_service/test_execute_subprocess.py b/tests/test_ml_service/test_execute_subprocess.py new file mode 100644 index 00000000..a9eee8ef --- /dev/null +++ b/tests/test_ml_service/test_execute_subprocess.py @@ -0,0 +1,119 @@ +"""Tests for executing scripts and pipelines via subprocess wrappers. + +These tests assert that CLI arguments are constructed correctly and that +subprocess failures are surfaced as `HTTPException`. +""" +from __future__ import annotations + +import subprocess +from typing import Any + +import pytest +from ml_service.backend.pipelines.execute_pipeline import execute_pipeline +from ml_service.backend.registries.exit_codes_meaning import EXIT_MEANING +from ml_service.backend.scripts.execute_script import execute_script +from pydantic import BaseModel + + +def test_execute_script_list_and_boolean(monkeypatch) -> None: + class Payload(BaseModel): + name: str + list_items: list[str] | None = None + enable: bool | None = None + empty_field: str | None = None + + payload = Payload(name="test", list_items=["a", "b"], enable=True, empty_field="") + + def fake_run(cmd: list[str], capture_output: Any, text: Any, env: Any, cwd: Any): + # basic sanity checks on the constructed command + assert cmd[0] == "python" + assert "-m" in cmd + # list handling + assert "--list-items" in cmd + idx = cmd.index("--list-items") + assert cmd[idx + 1] == "a" + assert cmd[idx + 2] == "b" + # boolean handling + assert "--enable" in cmd + idx2 = cmd.index("--enable") + assert cmd[idx2 + 1] == "True" + + class R: # minimal CompletedProcess-like + returncode = 0 + stdout = "ok" + stderr = "" + + return R() + + monkeypatch.setattr(subprocess, "run", fake_run) + + res = execute_script("some.module", payload, boolean_args=["enable"]) # type: ignore[arg-type] + + assert res["exit_code"] == 0 + assert res["stdout"] == "ok" + assert res["stderr"] == "" + assert res["status"] == EXIT_MEANING.get(0, "UNKNOWN_ERROR") + + +def test_execute_script_start_failure(monkeypatch) -> None: + class Payload(BaseModel): + name: str + + payload = Payload(name="x") + + def bad_run(*args: Any, **kwargs: Any): + raise OSError("cannot start") + + monkeypatch.setattr(subprocess, "run", bad_run) + + with pytest.raises(Exception) as exc: + execute_script("some.module", payload) + + # HTTPException from FastAPI exposes `status_code` attribute + assert getattr(exc.value, "status_code", 500) == 500 + + +def test_execute_pipeline_boolean(monkeypatch) -> None: + class Payload(BaseModel): + strict: bool | None = None + name: str = "p" + + payload = Payload(strict=True, name="p") + + def fake_run(cmd: list[str], capture_output: Any, text: Any, env: Any, cwd: Any): + assert "--strict" in cmd + idx = cmd.index("--strict") + assert cmd[idx + 1] == "True" + + class R: + returncode = 2 + stdout = "done" + stderr = "" + + return R() + + monkeypatch.setattr(subprocess, "run", fake_run) + + res = execute_pipeline("pipelines.example", payload, boolean_args=["strict"]) # type: ignore[arg-type] + + assert res["exit_code"] == 2 + assert res["stdout"] == "done" + assert res["status"] == EXIT_MEANING.get(2, "UNKNOWN_ERROR") + + +def test_execute_pipeline_start_failure(monkeypatch) -> None: + class Payload(BaseModel): + name: str + + payload = Payload(name="y") + + def bad_run(*args: Any, **kwargs: Any): + raise RuntimeError("spawn failed") + + monkeypatch.setattr(subprocess, "run", bad_run) + + with pytest.raises(Exception) as exc: + execute_pipeline("pipelines.example", payload) + + # HTTPException from FastAPI exposes `status_code` attribute + assert getattr(exc.value, "status_code", 500) == 500 diff --git a/tests/test_ml_service/test_frontend_callbacks.py b/tests/test_ml_service/test_frontend_callbacks.py new file mode 100644 index 00000000..5d0d236b --- /dev/null +++ b/tests/test_ml_service/test_frontend_callbacks.py @@ -0,0 +1,162 @@ +"""Tests for frontend callbacks (scripts, pipelines, file+dir viewer, docs). + +These tests use `dummy_dash_app` to capture callback registration and +patch network/call helpers to keep tests deterministic. +""" +from __future__ import annotations + +from pathlib import Path +from typing import Any, cast + +from ml_service.frontend.dir_viewer.callbacks import register_callbacks as register_dir_callbacks +from ml_service.frontend.docs import callbacks as docs_callbacks +from ml_service.frontend.docs.callbacks import register_callbacks as register_docs_callbacks +from ml_service.frontend.docs.callbacks import rewrite_links +from ml_service.frontend.file_viewer.callbacks import register_callbacks as register_file_callbacks +from ml_service.frontend.pipelines.callbacks import ( + register_callbacks as register_pipelines_callbacks, +) +from ml_service.frontend.pipelines.pipelines_metadata import FRONTEND_PIPELINES +from ml_service.frontend.scripts.callbacks import register_callbacks as register_scripts_callbacks +from ml_service.frontend.scripts.scripts_metadata import FRONTEND_SCRIPTS + + +def test_file_viewer_and_dir_viewer_callbacks(dummy_dash_app, mock_requests: dict[str, Any]): + # File viewer + before = len(dummy_dash_app.callbacks) + register_file_callbacks(dummy_dash_app) + new = dummy_dash_app.callbacks[before:] + funcs = [c["func"] for c in new if c["func"].__name__ == "load_file"] + assert funcs, "load_file not registered" + load_file = funcs[0] + + reqs = cast(dict[str, Any], mock_requests) + MockResponse = reqs["MockResponse"] + + def fake_post_file(url, json: dict[str, Any] | None = None, **kwargs): + assert json is not None + return MockResponse(ok=True, status_code=200, text="ok", json_data={"content": "hello", "mode": "yaml", "path": json["path"]}) + + reqs["patch_post"](fake_post_file) + + content, mode, alert = load_file(None, "/some/path/config.yaml") + assert content == "hello" + assert mode == "yaml" + assert "Loaded" in str(alert) + + # Empty path returns validation Alert and empty content + content2, mode2, _ = load_file(None, "") + assert content2 == "" + assert mode2 == "yaml" + + # Directory viewer + before2 = len(dummy_dash_app.callbacks) + register_dir_callbacks(dummy_dash_app) + new2 = dummy_dash_app.callbacks[before2:] + funcs2 = [c["func"] for c in new2 if c["func"].__name__ == "load_dir"] + assert funcs2, "load_dir not registered" + load_dir = funcs2[0] + + def fake_post_dir(url, json: dict[str, Any] | None = None, **kwargs): + assert json is not None + return MockResponse(ok=True, status_code=200, text="ok", json_data={"tree_yaml": "t", "path": json["path"]}) + + reqs["patch_post"](fake_post_dir) + + tcontent, tmode, talert = load_dir(None, "configs") + assert tcontent == "t" + assert tmode == "yaml" + assert "Loaded directory" in str(talert) + + +def _find_callback_by_name(app_callbacks, name: str): + return [c for c in app_callbacks if c["func"].__name__ == name] + + +def test_scripts_and_pipelines_run_pipeline_callbacks(dummy_dash_app, monkeypatch): + # Scripts + register_scripts_callbacks(dummy_dash_app) + run_callbacks = _find_callback_by_name(dummy_dash_app.callbacks, "run_pipeline") + assert run_callbacks, "No run_pipeline callbacks registered for scripts" + + # Patch call_script used inside the scripts callbacks + import ml_service.frontend.scripts.callbacks as scripts_callbacks + + monkeypatch.setattr(scripts_callbacks, "call_script", lambda endpoint, payload: {"status": "SUCCESS", "result": payload}) + + # Take first run_pipeline callback and invoke it with None values for all fields + cb = run_callbacks[0] + output_obj = cb["args"][0] + comp_id = getattr(output_obj, "component_id", getattr(output_obj, "id", "")) + matching = [s for s in FRONTEND_SCRIPTS if s["name"] in comp_id] + script = matching[0] if matching else FRONTEND_SCRIPTS[0] + field_count = len(script["fields"]) + + result_comp = cb["func"](1, *([None] * field_count)) + assert "SUCCESS" in str(result_comp) + + # Pipelines + register_pipelines_callbacks(dummy_dash_app) + run_callbacks_p = _find_callback_by_name(dummy_dash_app.callbacks, "run_pipeline") + # There will be multiple run_pipeline functions (scripts + pipelines); pick one that matches pipeline names + import ml_service.frontend.pipelines.callbacks as pipelines_callbacks + monkeypatch.setattr(pipelines_callbacks, "call_pipeline", lambda endpoint, payload: {"status": "SUCCESS", "result": payload}) + + # find a pipeline callback whose component id contains one of the pipeline names + cb_pipeline = None + for c in run_callbacks_p: + out = c["args"][0] + cid = getattr(out, "component_id", getattr(out, "id", "")) + if any(p["name"] in cid for p in FRONTEND_PIPELINES): + cb_pipeline = c + break + + assert cb_pipeline is not None + pipeline_name = next(p for p in FRONTEND_PIPELINES if p["name"] in getattr(cb_pipeline["args"][0], "component_id", "")) + pf_count = len(pipeline_name["fields"]) + res_comp = cb_pipeline["func"](1, *([None] * pf_count)) + assert "SUCCESS" in str(res_comp) + + +def test_docs_rewrite_and_loading(tmp_path: Path): + # Prepare a small docs tree + docs_root = tmp_path + (docs_root / "readme.md").write_text("Hello [About](about.md)") + (docs_root / "about.md").write_text("About page") + + # Monkeypatch the DOCS_ROOT used by callbacks module + docs_callbacks.DOCS_ROOT = docs_root + + out = rewrite_links("Hello [About](about.md)", "readme.md") + assert "/Docs?doc=about.md" in out + + # Register callback and call load_doc_from_url + class Dummy: + callbacks: list[dict[str, Any]] + + def __init__(self) -> None: + self.callbacks = [] + + dummy = Dummy() + + # emulate the minimal callback decorator storage used in other tests + def fake_callback(*args, **kwargs): + def decorator(f): + dummy.callbacks.append({"args": args, "kwargs": kwargs, "func": f}) + return f + + return decorator + + # Use register function with our fake app object + class FakeApp: + def callback(self, *a, **k): + return fake_callback(*a, **k) + + register_docs_callbacks(FakeApp()) + + # Find the load_doc_from_url function and call it + funcs = [c["func"] for c in dummy.callbacks if c["func"].__name__ == "load_doc_from_url"] + assert funcs + load_fn = funcs[0] + res = load_fn("?doc=readme.md") + assert "/Docs?doc=about.md" in res diff --git a/tests/test_ml_service/test_frontend_utils.py b/tests/test_ml_service/test_frontend_utils.py new file mode 100644 index 00000000..055e1ad8 --- /dev/null +++ b/tests/test_ml_service/test_frontend_utils.py @@ -0,0 +1,65 @@ +"""Unit tests for frontend utility functions in ``ml_service.frontend``. + +These tests mock external HTTP calls to keep them fast and reliable. +""" +from __future__ import annotations + +from typing import Any + +import requests +from ml_service.frontend.pipelines.utils import call_pipeline +from ml_service.frontend.scripts.utils import call_script + + +def test_call_script_success(mock_requests: dict[str, Any]) -> None: + """`call_script` returns parsed JSON when the backend responds successfully.""" + + MockResponse = mock_requests["MockResponse"] + + def fake_post(url, json=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"ok": True}) + + mock_requests["patch_post"](fake_post) + + res = call_script("scripts/check_import_layers", {"foo": "bar"}) + assert res == {"ok": True} + + +def test_call_script_error(mock_requests: dict[str, Any]) -> None: + """`call_script` returns an error dict when the HTTP client raises an exception.""" + + def fake_post(url, json=None, **kwargs): + raise requests.RequestException("connection failed") + + mock_requests["patch_post"](fake_post) + + res = call_script("scripts/check_import_layers", {"foo": "bar"}) + assert "error" in res + assert "connection failed" in res["error"] + + +def test_call_pipeline_success(mock_requests: dict[str, Any]) -> None: + """`call_pipeline` returns parsed JSON on successful response.""" + + MockResponse = mock_requests["MockResponse"] + + def fake_post(url, json=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "started"}) + + mock_requests["patch_post"](fake_post) + + res = call_pipeline("pipelines/run", {"x": 1}) + assert res == {"status": "started"} + + +def test_call_pipeline_error(mock_requests: dict[str, Any]) -> None: + """`call_pipeline` returns an error dict when the HTTP client raises an exception.""" + + def fake_post(url, json=None, **kwargs): + raise requests.RequestException("timeout") + + mock_requests["patch_post"](fake_post) + + res = call_pipeline("pipelines/run", {"x": 1}) + assert "error" in res + assert "timeout" in res["error"] diff --git a/tests/test_ml_service/test_layouts.py b/tests/test_ml_service/test_layouts.py new file mode 100644 index 00000000..4c76fe98 --- /dev/null +++ b/tests/test_ml_service/test_layouts.py @@ -0,0 +1,38 @@ +"""Simple tests validating that page layout builders return container-like objects.""" + +from __future__ import annotations + +from ml_service.frontend.app import home_layout +from ml_service.frontend.docs.layout import build_layout as build_docs_layout +from ml_service.frontend.file_viewer.layout import build_layout as build_file_viewer_layout +from ml_service.frontend.pipelines.layout import build_layout as build_pipelines_layout +from ml_service.frontend.scripts.layout import build_layout as build_scripts_layout + + +def _has_children(obj: object) -> bool: + return hasattr(obj, "children") or hasattr(obj, "props") + + +def test_build_scripts_layout(): + layout = build_scripts_layout() + assert _has_children(layout) + + +def test_build_pipelines_layout(): + layout = build_pipelines_layout() + assert _has_children(layout) + + +def test_build_file_viewer_layout(): + layout = build_file_viewer_layout() + assert _has_children(layout) + + +def test_build_docs_layout(): + layout = build_docs_layout() + assert _has_children(layout) + + +def test_home_layout(): + layout = home_layout() + assert _has_children(layout) From d4146aeeb30891d7fdf3f9591b8b63bf3f71ef21 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Sun, 29 Mar 2026 09:46:19 +0200 Subject: [PATCH 02/17] Fixed mypy issues in tests, added to pre-commit. Fixed a few dozen mypy issues that existed in tests, and added the tests directory to the mypy pre-commit hook and ruff configuration. --- .pre-commit-config.yaml | 4 +- pyproject.toml | 4 +- .../generators/generate_snapshot_binding.py | 2 +- .../unit/data/config/test_validate_config.py | 6 +- .../memory/test_compute_memory_change.py | 11 ++- .../data/validation/test_validate_data.py | 7 +- tests/unit/features/loading/test_schemas.py | 2 +- .../test_build_pipeline_with_model.py | 10 +- tests/unit/promotion/test_getters.py | 33 ++++--- tests/unit/promotion/test_persister.py | 51 ++++++---- tests/unit/promotion/test_registry.py | 3 +- tests/unit/promotion/test_service.py | 26 ++--- tests/unit/promotion/test_state_loader.py | 2 +- .../test_evaluation_persist_run_flow.py | 98 +++++++++---------- .../test_evaluation_prepare_metadata.py | 10 +- .../test_persist_explainability_run_flow.py | 96 ++++++++---------- .../test_validate_reproducibility.py | 12 ++- .../catboost/test_catboost_searcher.py | 8 +- .../search/utils/test_randomized_search.py | 28 +++--- tests/unit/utils/pipeline_core/test_runner.py | 10 +- 20 files changed, 218 insertions(+), 205 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c6a86d54..4d6ddb67 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,11 +10,11 @@ repos: pass_filenames: false - id: mypy - name: mypy (ml + pipelines + ml_service) + name: mypy (ml + pipelines + ml_service + tests) entry: mypy language: system pass_filenames: false - args: ["ml", "pipelines", "ml_service"] + args: ["ml", "pipelines", "ml_service", "tests"] - id: import-layers name: import layer guardrails diff --git a/pyproject.toml b/pyproject.toml index 3afdfedd..18dfd8fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,13 +2,13 @@ profile = "black" line_length = 100 py_version = 311 -src_paths = ["ml", "pipelines", "scripts", "ml_service"] +src_paths = ["ml", "pipelines", "scripts", "ml_service", "tests"] skip = [".git", ".venv", "env", "__pycache__", ".pytest_cache"] [tool.ruff] line-length = 100 target-version = "py311" -src = ["ml", "pipelines", "scripts", "ml_service"] +src = ["ml", "pipelines", "scripts", "ml_service", "tests"] exclude = [ ".git", ".venv", diff --git a/scripts/generators/generate_snapshot_binding.py b/scripts/generators/generate_snapshot_binding.py index d356adf9..79acea73 100644 --- a/scripts/generators/generate_snapshot_binding.py +++ b/scripts/generators/generate_snapshot_binding.py @@ -34,7 +34,7 @@ def scan_latest_snapshots(base_dir: Path) -> dict[str, dict[str, str]]: Returns: dict[name][version] = snapshot_name (str) """ - result = {} + result: dict[str, dict[str, str]] = {} if not base_dir.exists(): logger.warning(f"Base directory {base_dir} does not exist. Skipping.") return result diff --git a/tests/unit/data/config/test_validate_config.py b/tests/unit/data/config/test_validate_config.py index 51255b6a..d0e20705 100644 --- a/tests/unit/data/config/test_validate_config.py +++ b/tests/unit/data/config/test_validate_config.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing import Any, cast + import ml.data.config.validate_config as validate_config_module import pytest from ml.exceptions import ConfigError @@ -52,7 +54,7 @@ def __init__(self, **_kwargs) -> None: monkeypatch.setattr(validate_config_module, "InterimConfig", _FailingSchema) with pytest.raises(ConfigError, match="Configuration validation error") as exc_info: - validate_config_module.validate_config({}, "interim") + validate_config_module.validate_config(cast(dict[str, Any], {}), "interim") assert isinstance(exc_info.value.__cause__, ValueError) @@ -60,7 +62,7 @@ def __init__(self, **_kwargs) -> None: def test_validate_config_wraps_unsupported_type_with_cause() -> None: """Raise wrapper ``ConfigError`` for unsupported type selectors and preserve cause.""" with pytest.raises(ConfigError, match="Configuration validation error") as exc_info: - validate_config_module.validate_config({}, "unknown") # type: ignore[arg-type] + validate_config_module.validate_config(cast(dict[str, Any], {}), "unknown") # type: ignore[call-overload] assert isinstance(exc_info.value.__cause__, ConfigError) assert "Unsupported config type" in str(exc_info.value.__cause__) diff --git a/tests/unit/data/utils/memory/test_compute_memory_change.py b/tests/unit/data/utils/memory/test_compute_memory_change.py index e58dd0d3..5f0d596e 100644 --- a/tests/unit/data/utils/memory/test_compute_memory_change.py +++ b/tests/unit/data/utils/memory/test_compute_memory_change.py @@ -1,5 +1,8 @@ """Unit tests for stage-wise dataframe memory delta computation.""" +from pathlib import Path +from typing import Any + import pytest from ml.data.utils.memory.compute_memory_change import compute_memory_change from ml.exceptions import DataError @@ -9,7 +12,7 @@ def test_compute_memory_change_for_interim_stage_uses_root_memory_usage() -> None: """Read baseline memory from interim metadata root and compute deltas.""" - metadata = {"memory_usage_mb": 100.0} + metadata: dict[str, Any] = {"memory_usage_mb": 100.0} result = compute_memory_change( target_metadata=metadata, @@ -27,7 +30,7 @@ def test_compute_memory_change_for_interim_stage_uses_root_memory_usage() -> Non def test_compute_memory_change_for_processed_stage_uses_nested_memory_usage() -> None: """Read baseline memory from processed metadata nested memory block.""" - metadata = {"memory": {"new_memory_mb": 50.0}} + metadata: dict[str, Any] = {"memory": {"new_memory_mb": 50.0}} result = compute_memory_change( target_metadata=metadata, @@ -45,7 +48,7 @@ def test_compute_memory_change_for_processed_stage_uses_nested_memory_usage() -> def test_compute_memory_change_returns_zero_percentage_when_old_memory_is_zero() -> None: """Avoid division-by-zero and emit zero percentage when baseline is zero.""" - metadata = {"memory_usage_mb": 0.0} + metadata: dict[str, Any] = {"memory_usage_mb": 0.0} result = compute_memory_change( target_metadata=metadata, @@ -58,7 +61,7 @@ def test_compute_memory_change_returns_zero_percentage_when_old_memory_is_zero() def test_compute_memory_change_raises_data_error_when_required_keys_missing() -> None: """Raise DataError when stage-specific baseline metadata key is absent.""" - metadata = {"memory": {}} + metadata: dict[str, Any] = {"memory": {}} with pytest.raises(DataError, match="missing the key required"): compute_memory_change( diff --git a/tests/unit/data/validation/test_validate_data.py b/tests/unit/data/validation/test_validate_data.py index 36063636..f4bd28b6 100644 --- a/tests/unit/data/validation/test_validate_data.py +++ b/tests/unit/data/validation/test_validate_data.py @@ -1,6 +1,7 @@ """Unit tests for dataset hash validation against metadata expectations.""" from pathlib import Path +from typing import Any import pytest from ml.data.validation.validate_data import validate_data @@ -13,7 +14,7 @@ def test_validate_data_returns_empty_and_warns_when_metadata_hash_missing( caplog: pytest.LogCaptureFixture, ) -> None: """Skip integrity checks when metadata does not include expected data hash.""" - metadata = {"data": {}} + metadata: dict[str, Any] = {"data": {}} with caplog.at_level("WARNING"): result = validate_data(data_path=Path("dummy.parquet"), metadata=metadata) @@ -26,7 +27,7 @@ def test_validate_data_returns_actual_hash_when_expected_matches( monkeypatch: pytest.MonkeyPatch, ) -> None: """Return computed hash when metadata hash matches persisted data hash.""" - metadata = {"data": {"hash": "abc123"}} + metadata: dict[str, Any] = {"data": {"hash": "abc123"}} monkeypatch.setattr("ml.data.validation.validate_data.hash_data", lambda path: "abc123") result = validate_data(data_path=Path("dummy.parquet"), metadata=metadata) @@ -38,7 +39,7 @@ def test_validate_data_raises_when_expected_hash_mismatches_actual( monkeypatch: pytest.MonkeyPatch, ) -> None: """Reject datasets whose computed hash differs from metadata expectation.""" - metadata = {"data": {"hash": "expected"}} + metadata: dict[str, Any] = {"data": {"hash": "expected"}} monkeypatch.setattr("ml.data.validation.validate_data.hash_data", lambda path: "actual") with pytest.raises(UserError, match="Data hash mismatch"): diff --git a/tests/unit/features/loading/test_schemas.py b/tests/unit/features/loading/test_schemas.py index 3ad32e5b..64c31fc3 100644 --- a/tests/unit/features/loading/test_schemas.py +++ b/tests/unit/features/loading/test_schemas.py @@ -112,7 +112,7 @@ def test_load_schemas_aggregates_input_and_derived_schemas_across_feature_sets(t Patching validate_operators in its original module to allow normal hashes and raise DataError for manually injected bad_hash. """ - VersionInfo = namedtuple("version_info", ["major", "minor", "micro", "releaselevel", "serial"]) + VersionInfo = namedtuple("VersionInfo", ["major", "minor", "micro", "releaselevel", "serial"]) dummy_hash = "dummy_hash" # Patch sys.version_info for consistent hash environment diff --git a/tests/unit/modeling/catboost/test_build_pipeline_with_model.py b/tests/unit/modeling/catboost/test_build_pipeline_with_model.py index 57b064f8..98d9e222 100644 --- a/tests/unit/modeling/catboost/test_build_pipeline_with_model.py +++ b/tests/unit/modeling/catboost/test_build_pipeline_with_model.py @@ -118,11 +118,11 @@ def test_build_pipeline_with_model_rejects_non_catboost_model(monkeypatch: pytes monkeypatch.setattr(build_module, "build_pipeline", lambda **_: Pipeline(steps=[])) add_model_calls: list[str] = [] - monkeypatch.setattr( - build_module, - "add_model_to_pipeline", - lambda *_: add_model_calls.append("called") or Pipeline(steps=[]), - ) + def _fake_add_model(*_args: Any) -> Pipeline: + add_model_calls.append("called") + return Pipeline(steps=[]) + + monkeypatch.setattr(build_module, "add_model_to_pipeline", _fake_add_model) with pytest.raises(PipelineContractError, match="not a CatBoostClassifier or CatBoostRegressor"): build_module.build_pipeline_with_model( diff --git a/tests/unit/promotion/test_getters.py b/tests/unit/promotion/test_getters.py index ddad9b13..87b16a4d 100644 --- a/tests/unit/promotion/test_getters.py +++ b/tests/unit/promotion/test_getters.py @@ -2,7 +2,7 @@ from pathlib import Path from types import SimpleNamespace -from typing import cast +from typing import Any, cast import pytest from ml.exceptions import PersistenceError, UserError @@ -19,7 +19,7 @@ def test_extract_thresholds_returns_problem_segment_threshold_mapping() -> None: """Select the exact threshold dictionary for the requested problem and segment.""" - thresholds = { + thresholds: dict[str, Any] = { "cancellation": { "city_hotel": {"promotion_metrics": {"sets": ["val"]}}, } @@ -32,7 +32,7 @@ def test_extract_thresholds_returns_problem_segment_threshold_mapping() -> None: def test_extract_thresholds_raises_when_problem_segment_not_found() -> None: """Raise UserError when no threshold config exists for requested problem/segment.""" - thresholds = {"cancellation": {}} + thresholds: dict[str, Any] = {"cancellation": {}} with pytest.raises(UserError, match="No promotion thresholds found"): extract_thresholds(thresholds, "cancellation", "city_hotel") @@ -105,18 +105,21 @@ def _load_json(path: Path) -> dict[str, str]: raise AssertionError(f"Unexpected path: {path}") monkeypatch.setattr("ml.promotion.getters.get.load_json", _load_json) - monkeypatch.setattr( - "ml.promotion.getters.get.validate_training_metadata", - lambda payload: calls.append(f"validate_training:{payload['stage']}") or "training-validated", - ) - monkeypatch.setattr( - "ml.promotion.getters.get.validate_evaluation_metadata", - lambda payload: calls.append(f"validate_evaluation:{payload['stage']}") or "evaluation-validated", - ) - monkeypatch.setattr( - "ml.promotion.getters.get.validate_explainability_metadata", - lambda payload: calls.append(f"validate_explainability:{payload['stage']}") or "explainability-validated", - ) + def _validate_training(payload: dict[str, Any]) -> str: + calls.append(f"validate_training:{payload['stage']}") + return "training-validated" + + def _validate_evaluation(payload: dict[str, Any]) -> str: + calls.append(f"validate_evaluation:{payload['stage']}") + return "evaluation-validated" + + def _validate_explainability(payload: dict[str, Any]) -> str: + calls.append(f"validate_explainability:{payload['stage']}") + return "explainability-validated" + + monkeypatch.setattr("ml.promotion.getters.get.validate_training_metadata", _validate_training) + monkeypatch.setattr("ml.promotion.getters.get.validate_evaluation_metadata", _validate_evaluation) + monkeypatch.setattr("ml.promotion.getters.get.validate_explainability_metadata", _validate_explainability) result = get_runners_metadata(train_run_dir, eval_run_dir, explain_run_dir) diff --git a/tests/unit/promotion/test_persister.py b/tests/unit/promotion/test_persister.py index 7bbe47f4..05267de5 100644 --- a/tests/unit/promotion/test_persister.py +++ b/tests/unit/promotion/test_persister.py @@ -196,16 +196,23 @@ def test_persist_without_promotion_skips_registry_updates_and_saves_metadata(mon calls: list[str] = [] - monkeypatch.setattr( - "ml.promotion.persister.update_registry_and_archive", - lambda **kwargs: calls.append("update_registry") or {"updated": 1}, - ) - monkeypatch.setattr("ml.promotion.persister.persist_registry_diff", lambda **kwargs: calls.append("persist_diff")) - monkeypatch.setattr("ml.promotion.persister.prepare_metadata", lambda **kwargs: {"metadata": 1}) - monkeypatch.setattr( - "ml.promotion.persister.save_metadata", - lambda **kwargs: calls.append(f"save_metadata:{kwargs['target_dir'] == context.paths.run_dir}"), - ) + def _update_registry_and_archive(**kwargs) -> dict: + calls.append("update_registry") + return {"updated": 1} + + def _persist_registry_diff(**kwargs) -> None: + calls.append("persist_diff") + + def _prepare_metadata(**kwargs) -> dict: + return {"metadata": 1} + + def _save_metadata(**kwargs) -> None: + calls.append(f"save_metadata:{kwargs['target_dir'] == context.paths.run_dir}") + + monkeypatch.setattr("ml.promotion.persister.update_registry_and_archive", _update_registry_and_archive) + monkeypatch.setattr("ml.promotion.persister.persist_registry_diff", _persist_registry_diff) + monkeypatch.setattr("ml.promotion.persister.prepare_metadata", _prepare_metadata) + monkeypatch.setattr("ml.promotion.persister.save_metadata", _save_metadata) persister.persist(context, state, result) @@ -221,13 +228,23 @@ def test_persist_with_promotion_updates_registry_persists_diff_and_metadata(monk calls: list[str] = [] - monkeypatch.setattr( - "ml.promotion.persister.update_registry_and_archive", - lambda **kwargs: calls.append("update_registry") or {"updated": 1}, - ) - monkeypatch.setattr("ml.promotion.persister.persist_registry_diff", lambda **kwargs: calls.append("persist_diff")) - monkeypatch.setattr("ml.promotion.persister.prepare_metadata", lambda **kwargs: {"metadata": 1}) - monkeypatch.setattr("ml.promotion.persister.save_metadata", lambda **kwargs: calls.append("save_metadata")) + def _update_registry_and_archive_2(**kwargs) -> dict: + calls.append("update_registry") + return {"updated": 1} + + def _persist_registry_diff_2(**kwargs) -> None: + calls.append("persist_diff") + + def _prepare_metadata_2(**kwargs) -> dict: + return {"metadata": 1} + + def _save_metadata_2(**kwargs) -> None: + calls.append("save_metadata") + + monkeypatch.setattr("ml.promotion.persister.update_registry_and_archive", _update_registry_and_archive_2) + monkeypatch.setattr("ml.promotion.persister.persist_registry_diff", _persist_registry_diff_2) + monkeypatch.setattr("ml.promotion.persister.prepare_metadata", _prepare_metadata_2) + monkeypatch.setattr("ml.promotion.persister.save_metadata", _save_metadata_2) persister.persist(context, state, result) diff --git a/tests/unit/promotion/test_registry.py b/tests/unit/promotion/test_registry.py index 3b3c3512..88468ad6 100644 --- a/tests/unit/promotion/test_registry.py +++ b/tests/unit/promotion/test_registry.py @@ -1,6 +1,7 @@ """Unit tests for promotion registry update and diff persistence helpers.""" from pathlib import Path +from typing import Any import pytest import yaml @@ -25,7 +26,7 @@ def test_update_registry_and_archive_production_archives_previous_and_updates_re } } } - archive_registry = {"cancellation": {"city_hotel": {}}} + archive_registry: dict[str, Any] = {"cancellation": {"city_hotel": {}}} def _make_entry(promotion_id: str, metrics: dict | None = None) -> dict: return { "experiment_id": "exp-1", diff --git a/tests/unit/promotion/test_service.py b/tests/unit/promotion/test_service.py index 19f41e2f..4265a965 100644 --- a/tests/unit/promotion/test_service.py +++ b/tests/unit/promotion/test_service.py @@ -134,15 +134,17 @@ def _fake_registry_lock(got_context: PromotionContext): monkeypatch.setattr(service, "_validate", lambda got_context: got_context) monkeypatch.setattr(service, "_registry_lock", _fake_registry_lock) - monkeypatch.setattr(service._state_loader, "load", lambda got_context: state_obj) + + def _load_state(got_context: PromotionContext) -> Any: + return state_obj + + monkeypatch.setattr(service._state_loader, "load", _load_state) monkeypatch.setattr(service, "_get_strategy", lambda stage: _FakeStrategy()) - monkeypatch.setattr( - service._persister, - "persist", - lambda got_context, got_state, got_result: events.append( - f"persist:{got_context is context}:{got_state is state_obj}:{got_result is result_obj}" - ), - ) + + def _persist(got_context: PromotionContext, got_state: Any, got_result: Any) -> None: + events.append(f"persist:{got_context is context}:{got_state is state_obj}:{got_result is result_obj}") + + monkeypatch.setattr(service._persister, "persist", _persist) result = service.run(context) @@ -198,10 +200,10 @@ def _validate_run_dirs(*_args: Any, **_kwargs: Any) -> None: raise _RunDirValidationError("missing run dir") monkeypatch.setattr("ml.promotion.service.validate_run_dirs", _validate_run_dirs) - monkeypatch.setattr( - "ml.promotion.service.get_runners_metadata", - lambda *_args, **_kwargs: side_calls.append("get_runners_metadata") or None, - ) + def _get_runners_metadata(*_args: Any, **_kwargs: Any) -> None: + side_calls.append("get_runners_metadata") + + monkeypatch.setattr("ml.promotion.service.get_runners_metadata", _get_runners_metadata) with pytest.raises(_RunDirValidationError, match="missing run dir"): service._validate(context) diff --git a/tests/unit/promotion/test_state_loader.py b/tests/unit/promotion/test_state_loader.py index 14d4286a..4a16e9e7 100644 --- a/tests/unit/promotion/test_state_loader.py +++ b/tests/unit/promotion/test_state_loader.py @@ -40,7 +40,7 @@ def test_state_loader_load_builds_full_state_with_production_identity(monkeypatc } } } - archive_registry = {"archive": []} + archive_registry: dict[str, Any] = {"archive": []} global_thresholds = {"raw": "thresholds"} metrics_file = {"metrics": {"val": {"f1": 0.82}}} diff --git a/tests/unit/runners/evaluation/persistence/test_evaluation_persist_run_flow.py b/tests/unit/runners/evaluation/persistence/test_evaluation_persist_run_flow.py index e613ddc0..164fd29d 100644 --- a/tests/unit/runners/evaluation/persistence/test_evaluation_persist_run_flow.py +++ b/tests/unit/runners/evaluation/persistence/test_evaluation_persist_run_flow.py @@ -4,7 +4,7 @@ from pathlib import Path from types import SimpleNamespace -from typing import Any +from typing import Any, cast import pytest from ml.exceptions import PersistenceError @@ -57,48 +57,44 @@ def test_persist_evaluation_run_happy_path_calls_all_persistence_steps( runtime_calls: list[dict[str, Any]] = [] validate_raw: dict[str, Any] = {} - monkeypatch.setattr(persist_module, "save_metrics", lambda *args, **kwargs: str(metrics_file)) - monkeypatch.setattr( - persist_module, - "save_predictions", - lambda prediction_dfs, target_dir: SimpleNamespace( + def _save_metrics(*args, **kwargs) -> str: + return str(metrics_file) + + def _save_predictions(prediction_dfs, target_dir: Path) -> Any: + return SimpleNamespace( train_predictions_path=Path(target_dir / "predictions_train.parquet").as_posix(), val_predictions_path=Path(target_dir / "predictions_val.parquet").as_posix(), test_predictions_path=Path(target_dir / "predictions_test.parquet").as_posix(), - ), - ) - monkeypatch.setattr( - persist_module, - "hash_artifact", - lambda path: hash_calls.append(path) or f"hash::{path.name}", - ) - monkeypatch.setattr( - persist_module, - "PredictionsPathsAndHashes", - lambda **kwargs: _ModelDumpStub(payload=kwargs), - ) - monkeypatch.setattr( - persist_module, - "validate_evaluation_artifacts", - lambda raw: validate_raw.update(raw) or _ModelDumpStub(payload={"validated": True}), - ) - monkeypatch.setattr( - persist_module, - "prepare_metadata", - lambda **kwargs: {"meta": "ok", "artifacts": kwargs["artifacts"].model_dump()}, - ) - monkeypatch.setattr( - persist_module, - "save_metadata", - lambda metadata, target_dir: save_metadata_calls.append( - {"metadata": metadata, "target_dir": target_dir} - ), - ) - monkeypatch.setattr( - persist_module, - "save_runtime_snapshot", - lambda **kwargs: runtime_calls.append(kwargs), - ) + ) + + def _hash_artifact(path: Path) -> str: + hash_calls.append(path) + return f"hash::{path.name}" + + def _predictions_model(**kwargs: Any) -> _ModelDumpStub: + return _ModelDumpStub(payload=kwargs) + + def _validate_evaluation_artifacts(raw: dict[str, Any]) -> _ModelDumpStub: + validate_raw.update(raw) + return _ModelDumpStub(payload={"validated": True}) + + def _prepare_metadata(**kwargs: Any) -> dict[str, Any]: + return {"meta": "ok", "artifacts": kwargs["artifacts"].model_dump()} + + def _save_metadata(metadata: dict[str, Any], target_dir: Path) -> None: + save_metadata_calls.append({"metadata": metadata, "target_dir": target_dir}) + + def _save_runtime_snapshot(**kwargs: Any) -> None: + runtime_calls.append(kwargs) + + monkeypatch.setattr(persist_module, "save_metrics", _save_metrics) + monkeypatch.setattr(persist_module, "save_predictions", _save_predictions) + monkeypatch.setattr(persist_module, "hash_artifact", _hash_artifact) + monkeypatch.setattr(persist_module, "PredictionsPathsAndHashes", _predictions_model) + monkeypatch.setattr(persist_module, "validate_evaluation_artifacts", _validate_evaluation_artifacts) + monkeypatch.setattr(persist_module, "prepare_metadata", _prepare_metadata) + monkeypatch.setattr(persist_module, "save_metadata", _save_metadata) + monkeypatch.setattr(persist_module, "save_runtime_snapshot", _save_runtime_snapshot) persist_module.persist_evaluation_run( _model_cfg_stub(), # type: ignore[arg-type] @@ -108,7 +104,7 @@ def test_persist_evaluation_run_happy_path_calls_all_persistence_steps( eval_run_dir=eval_run_dir, metrics={"val": {"auc": 0.8}}, prediction_dfs=SimpleNamespace(), # type: ignore[arg-type] - feature_lineage=[SimpleNamespace(model_dump=lambda: {"name": "adr"})], # type: ignore[arg-type] + feature_lineage=cast(list[Any], [SimpleNamespace(model_dump=lambda: {"name": "adr"})]), # type: ignore[arg-type] start_time=9.5, timestamp="20260306T130000", artifacts=_artifacts_stub(with_pipeline=True), # type: ignore[arg-type] @@ -177,18 +173,14 @@ def test_persist_evaluation_run_wraps_predictions_model_construction_failures( save_metadata_calls: list[dict[str, Any]] = [] runtime_calls: list[dict[str, Any]] = [] - monkeypatch.setattr( - persist_module, - "save_metadata", - lambda metadata, target_dir: save_metadata_calls.append( - {"metadata": metadata, "target_dir": target_dir} - ), - ) - monkeypatch.setattr( - persist_module, - "save_runtime_snapshot", - lambda **kwargs: runtime_calls.append(kwargs), - ) + def _save_metadata2(metadata: dict[str, Any], target_dir: Path) -> None: + save_metadata_calls.append({"metadata": metadata, "target_dir": target_dir}) + + def _save_runtime_snapshot2(**kwargs: Any) -> None: + runtime_calls.append(kwargs) + + monkeypatch.setattr(persist_module, "save_metadata", _save_metadata2) + monkeypatch.setattr(persist_module, "save_runtime_snapshot", _save_runtime_snapshot2) with pytest.raises( PersistenceError, diff --git a/tests/unit/runners/evaluation/persistence/test_evaluation_prepare_metadata.py b/tests/unit/runners/evaluation/persistence/test_evaluation_prepare_metadata.py index 981ea703..58abdf65 100644 --- a/tests/unit/runners/evaluation/persistence/test_evaluation_prepare_metadata.py +++ b/tests/unit/runners/evaluation/persistence/test_evaluation_prepare_metadata.py @@ -100,11 +100,11 @@ def test_prepare_metadata_uses_experiment_dir_name_for_snapshot_id( metadata_stub = _ModelDumpStub(payload={"ok": True}) raw_payloads: list[dict[str, Any]] = [] - monkeypatch.setattr( - prepare_metadata_module, - "validate_evaluation_metadata", - lambda raw: raw_payloads.append(raw) or metadata_stub, - ) + def _capture_and_return(raw: dict[str, Any]) -> _ModelDumpStub: + raw_payloads.append(raw) + return metadata_stub + + monkeypatch.setattr(prepare_metadata_module, "validate_evaluation_metadata", _capture_and_return) prepare_metadata_module.prepare_metadata( model_cfg=model_cfg, # type: ignore[arg-type] diff --git a/tests/unit/runners/explainability/persistence/test_persist_explainability_run_flow.py b/tests/unit/runners/explainability/persistence/test_persist_explainability_run_flow.py index e114a9a1..7d7d7d16 100644 --- a/tests/unit/runners/explainability/persistence/test_persist_explainability_run_flow.py +++ b/tests/unit/runners/explainability/persistence/test_persist_explainability_run_flow.py @@ -4,7 +4,7 @@ from pathlib import Path from types import SimpleNamespace -from typing import Any +from typing import Any, cast import pandas as pd import pytest @@ -63,33 +63,28 @@ def test_persist_explainability_run_minimal_path_without_optional_tables( csv_calls: list[tuple[Path, str]] = [] validation_stub = _ArtifactsValidationStub(payload={"model_path": "model.cbm", "model_hash": "model-hash"}) - monkeypatch.setattr( - persist_module, - "validate_explainability_artifacts", - lambda raw: captured_raw.update(raw) or validation_stub, - ) - monkeypatch.setattr( - persist_module, - "save_metadata", - lambda metadata, target_dir: save_metadata_calls.append( - {"metadata": metadata, "target_dir": target_dir} - ), - ) - monkeypatch.setattr( - persist_module, - "save_runtime_snapshot", - lambda **kwargs: runtime_calls.append(kwargs), - ) - monkeypatch.setattr( - persist_module, - "hash_artifact", - lambda path: hash_calls.append(path) or "unexpected-hash", - ) - monkeypatch.setattr( - persist_module, - "save_metrics_csv", - lambda metrics, *, target_file, name: csv_calls.append((target_file, name)), - ) + def _validate_artifacts(raw: dict[str, Any]) -> _ArtifactsValidationStub: + captured_raw.update(raw) + return validation_stub + + def _save_metadata(metadata: dict[str, Any], target_dir: Path) -> None: + save_metadata_calls.append({"metadata": metadata, "target_dir": target_dir}) + + def _save_runtime_snapshot(**kwargs: Any) -> None: + runtime_calls.append(kwargs) + + def _hash_artifact(path: Path) -> str: + hash_calls.append(path) + return "unexpected-hash" + + def _save_metrics_csv(metrics, *, target_file: Path, name: str) -> None: + csv_calls.append((target_file, name)) + + monkeypatch.setattr(persist_module, "validate_explainability_artifacts", _validate_artifacts) + monkeypatch.setattr(persist_module, "save_metadata", _save_metadata) + monkeypatch.setattr(persist_module, "save_runtime_snapshot", _save_runtime_snapshot) + monkeypatch.setattr(persist_module, "hash_artifact", _hash_artifact) + monkeypatch.setattr(persist_module, "save_metrics_csv", _save_metrics_csv) persist_module.persist_explainability_run( _model_cfg_stub(), # type: ignore[arg-type] @@ -98,7 +93,7 @@ def test_persist_explainability_run_minimal_path_without_optional_tables( experiment_dir=Path("experiments") / "snapshot-88", explain_run_dir=explain_run_dir, explainability_metrics=explainability_metrics, # type: ignore[arg-type] - feature_lineage=[SimpleNamespace(model_dump=lambda: {"name": "lead_time"})], # type: ignore[arg-type] + feature_lineage=cast(list[Any], [SimpleNamespace(model_dump=lambda: {"name": "lead_time"})]), start_time=12.34, timestamp="20260306T120000", artifacts=artifacts, # type: ignore[arg-type] @@ -149,30 +144,23 @@ def test_persist_explainability_run_full_path_with_pipeline_and_top_k_tables( save_metadata_calls: list[dict[str, Any]] = [] validation_stub = _ArtifactsValidationStub(payload={"validated": True}) - monkeypatch.setattr( - persist_module, - "save_metrics_csv", - lambda metrics, *, target_file, name: csv_calls.append( - {"metrics": metrics, "target_file": target_file, "name": name} - ), - ) - monkeypatch.setattr( - persist_module, - "hash_artifact", - lambda path: hash_calls.append(path) or f"hash::{path.name}", - ) - monkeypatch.setattr( - persist_module, - "validate_explainability_artifacts", - lambda raw: validation_stub, - ) - monkeypatch.setattr( - persist_module, - "save_metadata", - lambda metadata, target_dir: save_metadata_calls.append( - {"metadata": metadata, "target_dir": target_dir} - ), - ) + def _save_metrics_csv_2(metrics, *, target_file: Path, name: str) -> None: + csv_calls.append({"metrics": metrics, "target_file": target_file, "name": name}) + + def _hash_artifact_2(path: Path) -> str: + hash_calls.append(path) + return f"hash::{path.name}" + + def _validate_artifacts_2(raw: dict[str, Any]) -> _ArtifactsValidationStub: + return validation_stub + + def _save_metadata_2(metadata: dict[str, Any], target_dir: Path) -> None: + save_metadata_calls.append({"metadata": metadata, "target_dir": target_dir}) + + monkeypatch.setattr(persist_module, "save_metrics_csv", _save_metrics_csv_2) + monkeypatch.setattr(persist_module, "hash_artifact", _hash_artifact_2) + monkeypatch.setattr(persist_module, "validate_explainability_artifacts", _validate_artifacts_2) + monkeypatch.setattr(persist_module, "save_metadata", _save_metadata_2) monkeypatch.setattr(persist_module, "save_runtime_snapshot", lambda **kwargs: None) persist_module.persist_explainability_run( @@ -182,7 +170,7 @@ def test_persist_explainability_run_full_path_with_pipeline_and_top_k_tables( experiment_dir=Path("experiments") / "snapshot-99", explain_run_dir=explain_run_dir, explainability_metrics=explainability_metrics, # type: ignore[arg-type] - feature_lineage=[SimpleNamespace(model_dump=lambda: {"name": "adr"})], # type: ignore[arg-type] + feature_lineage=cast(list[Any], [SimpleNamespace(model_dump=lambda: {"name": "adr"})]), start_time=1.0, timestamp="20260306T120001", artifacts=artifacts, # type: ignore[arg-type] diff --git a/tests/unit/runners/shared/reproducibility/test_validate_reproducibility.py b/tests/unit/runners/shared/reproducibility/test_validate_reproducibility.py index 7a734116..b9ec62ef 100644 --- a/tests/unit/runners/shared/reproducibility/test_validate_reproducibility.py +++ b/tests/unit/runners/shared/reproducibility/test_validate_reproducibility.py @@ -20,13 +20,21 @@ def test_validate_reproducibility_runs_all_checks_in_expected_order(monkeypatch: runtime_info = SimpleNamespace(name="runtime-info") calls: list[str] = [] + def _load_json(path: Path) -> dict[str, Any]: + calls.append(f"load_json:{path}") + return raw_payload + + def _validate_runtime_info(payload: dict[str, Any]) -> Any: + calls.append(f"validate_runtime_info:{payload is raw_payload}") + return runtime_info + monkeypatch.setattr( "ml.runners.shared.reproducibility.validate_reproducibility.load_json", - lambda path: calls.append(f"load_json:{path}") or raw_payload, + _load_json, ) monkeypatch.setattr( "ml.runners.shared.reproducibility.validate_reproducibility.validate_runtime_info", - lambda payload: calls.append(f"validate_runtime_info:{payload is raw_payload}") or runtime_info, + _validate_runtime_info, ) monkeypatch.setattr( "ml.runners.shared.reproducibility.validate_reproducibility.validate_git_commits_match", diff --git a/tests/unit/search/searchers/catboost/test_catboost_searcher.py b/tests/unit/search/searchers/catboost/test_catboost_searcher.py index c1fb7f06..1aa700ab 100644 --- a/tests/unit/search/searchers/catboost/test_catboost_searcher.py +++ b/tests/unit/search/searchers/catboost/test_catboost_searcher.py @@ -44,10 +44,10 @@ def __init__(self, *, model_cfg: Any, strict: bool, failure_management_dir: Path self.strict = strict self.failure_management_dir = failure_management_dir self.snapshot_binding_key = snapshot_binding_key - self.feature_lineage = [] - self.pipeline_hash = "" - self.scoring = "" - self.splits_info = {} + self.feature_lineage: list[Any] = [] + self.pipeline_hash: str = "" + self.scoring: str = "" + self.splits_info: dict[str, Any] = {} @property def require_feature_lineage(self) -> list[Any]: diff --git a/tests/unit/search/utils/test_randomized_search.py b/tests/unit/search/utils/test_randomized_search.py index 1a1a7d82..fb10c48b 100644 --- a/tests/unit/search/utils/test_randomized_search.py +++ b/tests/unit/search/utils/test_randomized_search.py @@ -72,14 +72,12 @@ def test_perform_randomized_search_uses_gpu_safe_defaults_and_serializes_results captured_classifier_flags: list[bool] = [] monkeypatch.setattr(randomized_search_module, "RandomizedSearchCV", _FakeRandomizedSearchCV) - monkeypatch.setattr( - randomized_search_module, - "check_cv", - lambda cv, y, classifier: ( - captured_classifier_flags.append(classifier), - _ResolvedCV(3), - )[1], - ) + + def _check_cv_and_capture(cv, y, classifier): + captured_classifier_flags.append(classifier) + return _ResolvedCV(3) + + monkeypatch.setattr(randomized_search_module, "check_cv", _check_cv_and_capture) monkeypatch.setattr(randomized_search_module, "is_classifier", lambda _: True) pipeline = Pipeline(steps=[("identity", FunctionTransformer(validate=False))]) @@ -140,14 +138,12 @@ class _CustomCV: monkeypatch.setattr(randomized_search_module, "RandomizedSearchCV", _FakeRandomizedSearchCV) captured_classifier_flags: list[bool] = [] - monkeypatch.setattr( - randomized_search_module, - "check_cv", - lambda cv, y, classifier: ( - captured_classifier_flags.append(classifier), - _ResolvedCV(4), - )[1], - ) + + def _check_cv_and_capture_2(cv, y, classifier): + captured_classifier_flags.append(classifier) + return _ResolvedCV(4) + + monkeypatch.setattr(randomized_search_module, "check_cv", _check_cv_and_capture_2) monkeypatch.setattr(randomized_search_module, "is_classifier", lambda _: False) pipeline = Pipeline(steps=[("identity", FunctionTransformer(validate=False))]) diff --git a/tests/unit/utils/pipeline_core/test_runner.py b/tests/unit/utils/pipeline_core/test_runner.py index 20cba476..a53027b6 100644 --- a/tests/unit/utils/pipeline_core/test_runner.py +++ b/tests/unit/utils/pipeline_core/test_runner.py @@ -70,7 +70,7 @@ def run(self, ctx: dict) -> dict: def test_pipeline_runner_executes_before_run_after_in_order() -> None: """Verify hook execution order and in-place context updates.""" - runner = PipelineRunner([_AppendStep("first"), _AppendStep("second")]) + runner: PipelineRunner = PipelineRunner([_AppendStep("first"), _AppendStep("second")]) ctx = {"events": [], "value": 0} result = runner.run(ctx) @@ -89,7 +89,7 @@ def test_pipeline_runner_executes_before_run_after_in_order() -> None: def test_pipeline_runner_propagates_replaced_context_between_steps() -> None: """Verify that replaced context objects are propagated to later steps.""" - runner = PipelineRunner([_ReplaceContextStep(), _AppendStep("next")]) + runner: PipelineRunner = PipelineRunner([_ReplaceContextStep(), _AppendStep("next")]) initial = {"events": [], "value": 1} result = runner.run(initial) @@ -106,7 +106,7 @@ def test_pipeline_runner_propagates_replaced_context_between_steps() -> None: def test_pipeline_runner_returns_input_context_when_no_steps() -> None: """Verify that an empty runner returns the input context unchanged.""" - runner = PipelineRunner([]) + runner: PipelineRunner = PipelineRunner([]) ctx = {"events": [], "value": 5} result = runner.run(ctx) @@ -117,7 +117,7 @@ def test_pipeline_runner_returns_input_context_when_no_steps() -> None: def test_pipeline_runner_uses_default_noop_hooks_from_base_step() -> None: """Verify that default no-op hooks do not interfere with `run` execution.""" - runner = PipelineRunner([_NoHookStep()]) + runner: PipelineRunner = PipelineRunner([_NoHookStep()]) ctx = {"events": [], "value": 0} result = runner.run(ctx) @@ -127,7 +127,7 @@ def test_pipeline_runner_uses_default_noop_hooks_from_base_step() -> None: def test_pipeline_runner_stops_execution_after_failing_step() -> None: """Verify that runner execution stops and propagates on step failure.""" - runner = PipelineRunner([_FailingStep(), _TailStep()]) + runner: PipelineRunner = PipelineRunner([_FailingStep(), _TailStep()]) ctx = {"events": [], "value": 0} with pytest.raises(RuntimeError, match="boom"): From 015f201e7196d659a85709be618c4b2606a95db8 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Sun, 29 Mar 2026 09:51:29 +0200 Subject: [PATCH 03/17] Added mypy type checking for scripts; minor fix. Added mypy checks for scripts, and fixed two issues that were discovered when running mypy on the scripts for the first time. --- .pre-commit-config.yaml | 4 ++-- scripts/generators/generate_fake_data.py | 3 ++- scripts/quality/check_naming_conventions.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4d6ddb67..80b905e3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,11 +10,11 @@ repos: pass_filenames: false - id: mypy - name: mypy (ml + pipelines + ml_service + tests) + name: mypy type checking entry: mypy language: system pass_filenames: false - args: ["ml", "pipelines", "ml_service", "tests"] + args: ["ml", "pipelines", "scripts", "ml_service", "tests"] - id: import-layers name: import layer guardrails diff --git a/scripts/generators/generate_fake_data.py b/scripts/generators/generate_fake_data.py index e6096d2e..d21befd8 100644 --- a/scripts/generators/generate_fake_data.py +++ b/scripts/generators/generate_fake_data.py @@ -484,7 +484,8 @@ def main() -> int: metadata.detect_table_from_dataframe( data=df_model, table_name="synthetic_data", - infer_keys=None # type: ignore -> None is actually a valid value for infer_keys + # None is actually a valid value for infer keys, but mypy doesn't like it + infer_keys=None # type: ignore ) metadata.set_primary_key( diff --git a/scripts/quality/check_naming_conventions.py b/scripts/quality/check_naming_conventions.py index c02721e5..9c88c52b 100644 --- a/scripts/quality/check_naming_conventions.py +++ b/scripts/quality/check_naming_conventions.py @@ -105,7 +105,7 @@ def check_ast(file: Path): ) -def main(): +def main() -> int: """Main function to check naming conventions across the codebase. This script checks that: From 48c0ea39bb26d855bbd8475113a03ec80ab5ffd4 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Sun, 29 Mar 2026 11:33:59 +0200 Subject: [PATCH 04/17] Improved file structure; added more tests. Improved the nesting of tests to better reflect the structure of the codebase, and added more tests to increase coverage and ensure robustness. --- tests/conftest.py | 28 ++++ tests/test_ml_service/conftest.py | 124 ------------------ .../test_validate_config_payload.py | 41 ++++++ .../features/loading/test_load_registry.py | 30 +++++ .../persistence/test_save_feature_registry.py | 92 +++++++++++++ .../features/utils/test_registry_utils.py | 71 ++++++++++ .../configs/formatting/test_timestamp.py | 23 ++++ .../loading/test_load_yaml_and_add_lineage.py | 28 ++++ .../configs/persistence/test_save_config.py | 36 +++++ .../backend/dir_viewer}/test_dir_viewer.py | 0 .../pipelines}/test_execute_subprocess.py | 0 .../backend/routers}/test_backend_routes.py | 0 tests/unit/ml_service/conftest.py | 65 +++++++++ .../frontend}/test_frontend_callbacks.py | 0 .../frontend}/test_frontend_utils.py | 0 .../ml_service/frontend}/test_layouts.py | 0 16 files changed, 414 insertions(+), 124 deletions(-) delete mode 100644 tests/test_ml_service/conftest.py create mode 100644 tests/unit/ml_service/backend/configs/data/validation/test_validate_config_payload.py create mode 100644 tests/unit/ml_service/backend/configs/features/loading/test_load_registry.py create mode 100644 tests/unit/ml_service/backend/configs/features/persistence/test_save_feature_registry.py create mode 100644 tests/unit/ml_service/backend/configs/features/utils/test_registry_utils.py create mode 100644 tests/unit/ml_service/backend/configs/formatting/test_timestamp.py create mode 100644 tests/unit/ml_service/backend/configs/loading/test_load_yaml_and_add_lineage.py create mode 100644 tests/unit/ml_service/backend/configs/persistence/test_save_config.py rename tests/{test_ml_service => unit/ml_service/backend/dir_viewer}/test_dir_viewer.py (100%) rename tests/{test_ml_service => unit/ml_service/backend/pipelines}/test_execute_subprocess.py (100%) rename tests/{test_ml_service => unit/ml_service/backend/routers}/test_backend_routes.py (100%) create mode 100644 tests/unit/ml_service/conftest.py rename tests/{test_ml_service => unit/ml_service/frontend}/test_frontend_callbacks.py (100%) rename tests/{test_ml_service => unit/ml_service/frontend}/test_frontend_utils.py (100%) rename tests/{test_ml_service => unit/ml_service/frontend}/test_layouts.py (100%) diff --git a/tests/conftest.py b/tests/conftest.py index f15dee16..13f9ecc3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,16 @@ from pathlib import Path from typing import Any +import pytest + +# Lightweight TestClient fixture for FastAPI integration-style tests +try: + import ml_service.backend.main as _backend_main + from fastapi.testclient import TestClient +except Exception: # pragma: no cover - defensive import for environments without FastAPI + TestClient = None # type: ignore + _backend_main = None # type: ignore + # Global test stub for the optional `catboost` dependency. Many modules import # `catboost` at import-time; providing a minimal stub prevents import errors # when running unit tests in environments without the real package installed. @@ -65,3 +75,21 @@ def __init__(self, *args, **kwargs): if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) + + +@pytest.fixture +def fastapi_client(): + """Provide a `TestClient` for the ml_service FastAPI app. + + Tests that exercise ml_service backend routers can use this fixture. + If FastAPI isn't available in the environment the fixture will raise + at import-time when a test attempts to use it. + """ + if TestClient is None or _backend_main is None: + raise RuntimeError("FastAPI TestClient or ml_service backend not importable in test environment") + + client = TestClient(_backend_main.app) + try: + yield client + finally: + client.close() diff --git a/tests/test_ml_service/conftest.py b/tests/test_ml_service/conftest.py deleted file mode 100644 index 6140dc13..00000000 --- a/tests/test_ml_service/conftest.py +++ /dev/null @@ -1,124 +0,0 @@ -"""Pytest fixtures for testing the ml_service package. - -The fixtures are intentionally lightweight and platform agnostic so tests -run consistently on Windows and Linux. -""" -from __future__ import annotations - -from collections.abc import Callable -from typing import Any - -import pytest - - -class DummyDashApp: - """Minimal stand-in for a Dash app capturing callback registration. - - Instances collect registered callbacks as dicts with keys - ``'args'``, ``'kwargs'`` and ``'func'`` so tests can inspect what was - registered without importing the real `dash` package. - """ - - def __init__(self) -> None: - self.callbacks: list[dict[str, Any]] = [] - - def callback(self, *args: Any, **kwargs: Any) -> Callable[[Callable[..., Any]], Callable[..., Any]]: - """Return a decorator that records the wrapped function and metadata.""" - - def decorator(func: Callable[..., Any]) -> Callable[..., Any]: - self.callbacks.append({"args": args, "kwargs": kwargs, "func": func}) - return func - - return decorator - - -@pytest.fixture -def dummy_dash_app() -> DummyDashApp: - """Provide a dummy Dash-like app for registering callbacks in frontend pages.""" - - return DummyDashApp() - - -@pytest.fixture -def mock_requests(monkeypatch) -> dict[str, Any]: - """Helpers to patch the `requests` module during tests. - - Returns a small factory dict with a `MockResponse` class and - `patch_post` / `patch_get` helpers that tests can use to inject - deterministic responses. - """ - - import requests as _requests - - - class MockResponse: - def __init__(self, ok: bool = True, status_code: int = 200, text: str = "", json_data: Any = None) -> None: - self.ok = ok - self.status_code = status_code - self.text = text - self._json = json_data if json_data is not None else {} - - def json(self) -> Any: # pragma: no cover - trivial helper - return self._json - - def raise_for_status(self) -> None: - if not self.ok: - raise _requests.HTTPError(f"{self.status_code}: {self.text}") - - - def patch_post(func: Callable[..., Any]) -> None: - monkeypatch.setattr(_requests, "post", func) - - - def patch_get(func: Callable[..., Any]) -> None: - monkeypatch.setattr(_requests, "get", func) - - - return {"MockResponse": MockResponse, "patch_post": patch_post, "patch_get": patch_get} - - -@pytest.fixture -def patch_subprocess(monkeypatch) -> Callable[[int, str, str], None]: - """Helper to patch ``subprocess.run`` with a controllable result. - - Usage: - - patch_subprocess(returncode=0, stdout="ok", stderr="") - - After calling the helper, any call to ``subprocess.run`` will return - an object with ``returncode``, ``stdout`` and ``stderr`` attributes. - """ - - import subprocess as _subprocess - - - class Result: - def __init__(self, returncode: int = 0, stdout: str = "", stderr: str = "") -> None: - self.returncode = returncode - self.stdout = stdout - self.stderr = stderr - - - def _patch(returncode: int = 0, stdout: str = "", stderr: str = "") -> None: - result = Result(returncode=returncode, stdout=stdout, stderr=stderr) - - def fake_run(*args: Any, **kwargs: Any) -> Result: - return result - - monkeypatch.setattr(_subprocess, "run", fake_run) - - - return _patch - - -@pytest.fixture -def fastapi_client() -> Any: - """Provide a `TestClient` for the ml_service FastAPI app. - - Tests that need to exercise the HTTP layer can use this fixture. - """ - - from fastapi.testclient import TestClient - from ml_service.backend.main import app as _app - - return TestClient(_app) diff --git a/tests/unit/ml_service/backend/configs/data/validation/test_validate_config_payload.py b/tests/unit/ml_service/backend/configs/data/validation/test_validate_config_payload.py new file mode 100644 index 00000000..a969e4a8 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/data/validation/test_validate_config_payload.py @@ -0,0 +1,41 @@ +import importlib + +import pytest + + +def test_validate_config_payload_interim(monkeypatch): + vmod = importlib.import_module( + "ml_service.backend.configs.data.validation.validate_config_payload" + ) + + class FakeInterim: + def __init__(self, **kwargs): + self.kwargs = kwargs + + monkeypatch.setattr(vmod, "InterimConfig", FakeInterim) + res = vmod.validate_config_payload("interim", {"a": 1}) + assert isinstance(res, FakeInterim) + assert res.kwargs == {"a": 1} + + +def test_validate_config_payload_processed(monkeypatch): + vmod = importlib.import_module( + "ml_service.backend.configs.data.validation.validate_config_payload" + ) + + class FakeProcessed: + def __init__(self, **kwargs): + self.kwargs = kwargs + + monkeypatch.setattr(vmod, "ProcessedConfig", FakeProcessed) + res = vmod.validate_config_payload("processed", {"b": 2}) + assert isinstance(res, FakeProcessed) + assert res.kwargs == {"b": 2} + + +def test_validate_config_payload_unknown(): + vmod = importlib.import_module( + "ml_service.backend.configs.data.validation.validate_config_payload" + ) + with pytest.raises(ValueError): + vmod.validate_config_payload("unknown", {}) diff --git a/tests/unit/ml_service/backend/configs/features/loading/test_load_registry.py b/tests/unit/ml_service/backend/configs/features/loading/test_load_registry.py new file mode 100644 index 00000000..d7b5c590 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/features/loading/test_load_registry.py @@ -0,0 +1,30 @@ +import importlib + + +def test_load_feature_registry_missing(tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.loading.load_registry" + ) + p = tmp_path / "nope.yaml" + assert not p.exists() + assert mod.load_feature_registry(p) == {} + + +def test_load_feature_registry_empty_file(tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.loading.load_registry" + ) + p = tmp_path / "empty.yaml" + p.write_text("") + assert mod.load_feature_registry(p) == {} + + +def test_load_feature_registry_parses_yaml(tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.loading.load_registry" + ) + p = tmp_path / "reg.yaml" + p.write_text("foo:\n bar: 1\n") + res = mod.load_feature_registry(p) + assert isinstance(res, dict) + assert res == {"foo": {"bar": 1}} diff --git a/tests/unit/ml_service/backend/configs/features/persistence/test_save_feature_registry.py b/tests/unit/ml_service/backend/configs/features/persistence/test_save_feature_registry.py new file mode 100644 index 00000000..a5eb3295 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/features/persistence/test_save_feature_registry.py @@ -0,0 +1,92 @@ +import importlib + + +def test_save_feature_registry_creates_new_entry(tmp_path, monkeypatch): + mod = importlib.import_module( + "ml_service.backend.configs.features.persistence.save_feature_registry" + ) + + captured = {} + + def fake_load_registry(path): + return {} + + def fake_save_config(cfg, path): + captured["cfg"] = cfg + captured["path"] = path + + monkeypatch.setattr(mod, "load_registry", fake_load_registry) + monkeypatch.setattr(mod, "save_config", fake_save_config) + + class DummyConfig: + def model_dump(self, mode=None): + return {"fields": []} + + out = mod.save_feature_registry( + "featX", + "v1", + validated_config=DummyConfig(), + registry_path=tmp_path / "cfgs" / "registry.yaml", + ) + + assert captured["cfg"] == {"featX": {"v1": {"fields": []}}} + assert str(captured["path"]) == str(tmp_path / "cfgs" / "registry.yaml") + assert out["status"] == "written" + + +def test_save_feature_registry_appends_to_existing(tmp_path, monkeypatch): + mod = importlib.import_module( + "ml_service.backend.configs.features.persistence.save_feature_registry" + ) + + registry = {"featA": {"v1": {"old": 1}}} + captured = {} + + monkeypatch.setattr(mod, "load_registry", lambda p: registry) + + def fake_save(cfg, path): + captured["cfg"] = cfg + + monkeypatch.setattr(mod, "save_config", fake_save) + + class DummyConfig2: + def model_dump(self, mode=None): + return {"new": True} + + mod.save_feature_registry( + "featA", + "v2", + validated_config=DummyConfig2(), + registry_path=tmp_path / "registry.yaml", + ) + + assert "featA" in captured["cfg"] + assert "v1" in captured["cfg"]["featA"] + assert "v2" in captured["cfg"]["featA"] + assert captured["cfg"]["featA"]["v1"] == {"old": 1} + assert captured["cfg"]["featA"]["v2"] == {"new": True} + + +def test_model_dump_called_with_mode(monkeypatch, tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.persistence.save_feature_registry" + ) + + monkeypatch.setattr(mod, "load_registry", lambda p: {}) + monkeypatch.setattr(mod, "save_config", lambda cfg, p: None) + + called = {} + + class SpyConfig: + def model_dump(self, mode=None): + called["mode"] = mode + return {"x": 1} + + mod.save_feature_registry( + "f", + "v", + validated_config=SpyConfig(), + registry_path=tmp_path / "r.yaml", + ) + + assert called.get("mode") == "json" diff --git a/tests/unit/ml_service/backend/configs/features/utils/test_registry_utils.py b/tests/unit/ml_service/backend/configs/features/utils/test_registry_utils.py new file mode 100644 index 00000000..b9d0002f --- /dev/null +++ b/tests/unit/ml_service/backend/configs/features/utils/test_registry_utils.py @@ -0,0 +1,71 @@ +import importlib + +import pytest + + +def test_load_registry_missing_raises(tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.utils.registry" + ) + p = tmp_path / "nope.yaml" + assert not p.exists() + with pytest.raises(RuntimeError, match="Feature registry missing"): + mod.load_registry(p) + + +def test_load_registry_empty_raises(tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.utils.registry" + ) + p = tmp_path / "empty.yaml" + p.write_text("") + with pytest.raises(RuntimeError, match="empty or corrupted"): + mod.load_registry(p) + + +def test_load_registry_non_dict_raises(tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.utils.registry" + ) + p = tmp_path / "list.yaml" + p.write_text("- one\n- two\n") + with pytest.raises(RuntimeError, match="must be a dict"): + mod.load_registry(p) + + +def test_load_registry_returns_dict(tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.utils.registry" + ) + p = tmp_path / "reg.yaml" + p.write_text("foo:\n bar: 1\n") + res = mod.load_registry(p) + assert isinstance(res, dict) + assert res == {"foo": {"bar": 1}} + + +def test_registry_entry_exists_true(tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.utils.registry" + ) + p = tmp_path / "reg.yaml" + p.write_text("featA:\n v1: {}\n") + assert mod.registry_entry_exists("featA", "v1", p) is True + + +def test_registry_entry_exists_false(tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.utils.registry" + ) + p = tmp_path / "reg.yaml" + p.write_text("featA:\n v1: {}\n") + assert mod.registry_entry_exists("featA", "v2", p) is False + + +def test_registry_entry_exists_name_missing(tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.features.utils.registry" + ) + p = tmp_path / "reg.yaml" + p.write_text("featB:\n v1: {}\n") + assert mod.registry_entry_exists("unknown", "v1", p) is False diff --git a/tests/unit/ml_service/backend/configs/formatting/test_timestamp.py b/tests/unit/ml_service/backend/configs/formatting/test_timestamp.py new file mode 100644 index 00000000..0fca9d7b --- /dev/null +++ b/tests/unit/ml_service/backend/configs/formatting/test_timestamp.py @@ -0,0 +1,23 @@ +import importlib + +import pytest + + +def test_add_timestamp_sets_created_at(monkeypatch): + mod = importlib.import_module( + "ml_service.backend.configs.formatting.timestamp" + ) + monkeypatch.setattr(mod, "utc_timestamp", lambda: "2026-03-29T12:00:00Z") + data = {"lineage": {}} + res = mod.add_timestamp(data, "lineage") + assert "created_at" in data["lineage"] + assert data["lineage"]["created_at"] == "2026-03-29T12:00:00Z" + assert res is data + + +def test_add_timestamp_missing_key_raises(): + mod = importlib.import_module( + "ml_service.backend.configs.formatting.timestamp" + ) + with pytest.raises(ValueError): + mod.add_timestamp({}, "lineage") diff --git a/tests/unit/ml_service/backend/configs/loading/test_load_yaml_and_add_lineage.py b/tests/unit/ml_service/backend/configs/loading/test_load_yaml_and_add_lineage.py new file mode 100644 index 00000000..83a1a954 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/loading/test_load_yaml_and_add_lineage.py @@ -0,0 +1,28 @@ +import importlib + +import pytest + + +def test_load_yaml_and_add_lineage_injects_timestamp(monkeypatch): + mod = importlib.import_module( + "ml_service.backend.configs.loading.load_yaml_and_add_lineage" + ) + # patch the underlying timestamp generator for deterministic output + ts_mod = importlib.import_module( + "ml_service.backend.configs.formatting.timestamp" + ) + monkeypatch.setattr(ts_mod, "utc_timestamp", lambda: "2026-03-29T12:00:00Z") + + yaml_text = "lineage: {}\nfoo: bar\n" + res = mod.load_yaml_and_add_lineage(yaml_text) + assert "lineage" in res + assert res["lineage"]["created_at"] == "2026-03-29T12:00:00Z" + assert res["foo"] == "bar" + + +def test_load_yaml_and_add_lineage_missing_lineage_raises(): + mod = importlib.import_module( + "ml_service.backend.configs.loading.load_yaml_and_add_lineage" + ) + with pytest.raises(ValueError): + mod.load_yaml_and_add_lineage("foo: bar\n") diff --git a/tests/unit/ml_service/backend/configs/persistence/test_save_config.py b/tests/unit/ml_service/backend/configs/persistence/test_save_config.py new file mode 100644 index 00000000..895cb87d --- /dev/null +++ b/tests/unit/ml_service/backend/configs/persistence/test_save_config.py @@ -0,0 +1,36 @@ +import importlib + +import pytest +import yaml + + +def test_save_config_writes_file(tmp_path): + sc = importlib.import_module( + "ml_service.backend.configs.persistence.save_config" + ) + cfg = {"alpha": 1, "nested": {"x": "y"}} + cp = tmp_path / "cfgs" / "cfg.yaml" + sc.save_config(cfg, cp) + assert cp.exists() + loaded = yaml.safe_load(cp.read_text(encoding="utf-8")) + assert loaded == cfg + assert not (cp.parent / f"{cp.name}.tmp").exists() + + +def test_save_config_failure_cleans_tmp(tmp_path, monkeypatch): + sc = importlib.import_module( + "ml_service.backend.configs.persistence.save_config" + ) + cp = tmp_path / "cfgs" / "cfg.yaml" + + def raise_replace(a, b): + raise OSError("boom") + + monkeypatch.setattr(sc.os, "replace", raise_replace) + + with pytest.raises(sc.HTTPException) as excinfo: + sc.save_config({"a": 1}, cp) + + tmp_file = cp.parent / f"{cp.name}.tmp" + assert not tmp_file.exists() + assert excinfo.value.status_code == 500 diff --git a/tests/test_ml_service/test_dir_viewer.py b/tests/unit/ml_service/backend/dir_viewer/test_dir_viewer.py similarity index 100% rename from tests/test_ml_service/test_dir_viewer.py rename to tests/unit/ml_service/backend/dir_viewer/test_dir_viewer.py diff --git a/tests/test_ml_service/test_execute_subprocess.py b/tests/unit/ml_service/backend/pipelines/test_execute_subprocess.py similarity index 100% rename from tests/test_ml_service/test_execute_subprocess.py rename to tests/unit/ml_service/backend/pipelines/test_execute_subprocess.py diff --git a/tests/test_ml_service/test_backend_routes.py b/tests/unit/ml_service/backend/routers/test_backend_routes.py similarity index 100% rename from tests/test_ml_service/test_backend_routes.py rename to tests/unit/ml_service/backend/routers/test_backend_routes.py diff --git a/tests/unit/ml_service/conftest.py b/tests/unit/ml_service/conftest.py new file mode 100644 index 00000000..4b4564e6 --- /dev/null +++ b/tests/unit/ml_service/conftest.py @@ -0,0 +1,65 @@ +"""Pytest fixtures for `tests/unit/ml_service`. + +Provide a lightweight `dummy_dash_app` that captures callback registration +and a `mock_requests` helper to patch `requests.post` in frontend tests. +""" +from __future__ import annotations + +from typing import Any + +import pytest +import requests + + +@pytest.fixture +def dummy_dash_app() -> object: + """A minimal fake Dash app used to capture callback registration. + + Tests expect an object with a `.callbacks` list and a `.callback` method + that acts as a decorator. When used, the decorator appends a record + describing the registration to `.callbacks`. + """ + + class DummyApp: + def __init__(self) -> None: + self.callbacks: list[dict[str, Any]] = [] + + def callback(self, *args: Any, **kwargs: Any): + def decorator(f): + self.callbacks.append({"args": args, "kwargs": kwargs, "func": f}) + return f + + return decorator + + return DummyApp() + + +@pytest.fixture +def mock_requests(monkeypatch) -> dict[str, Any]: + """Provide utilities to mock `requests.post` and a small `MockResponse`. + + Usage in tests: + reqs = mock_requests + MockResponse = reqs["MockResponse"] + reqs["patch_post"](fake_post) + """ + + class MockResponse: + def __init__(self, ok: bool = True, status_code: int = 200, text: str = "", json_data: dict[str, Any] | None = None) -> None: + self.ok = ok + self.status_code = status_code + self.text = text + self._json = json_data or {} + + def json(self) -> dict[str, Any]: + return self._json + + def raise_for_status(self) -> None: + """Emulate requests.Response.raise_for_status.""" + if not self.ok or not (200 <= int(self.status_code) < 300): + raise requests.HTTPError(f"HTTP {self.status_code}") + + def patch_post(fn): + monkeypatch.setattr(requests, "post", fn) + + return {"MockResponse": MockResponse, "patch_post": patch_post} diff --git a/tests/test_ml_service/test_frontend_callbacks.py b/tests/unit/ml_service/frontend/test_frontend_callbacks.py similarity index 100% rename from tests/test_ml_service/test_frontend_callbacks.py rename to tests/unit/ml_service/frontend/test_frontend_callbacks.py diff --git a/tests/test_ml_service/test_frontend_utils.py b/tests/unit/ml_service/frontend/test_frontend_utils.py similarity index 100% rename from tests/test_ml_service/test_frontend_utils.py rename to tests/unit/ml_service/frontend/test_frontend_utils.py diff --git a/tests/test_ml_service/test_layouts.py b/tests/unit/ml_service/frontend/test_layouts.py similarity index 100% rename from tests/test_ml_service/test_layouts.py rename to tests/unit/ml_service/frontend/test_layouts.py From a6c6f2d9a5057c88fc5f70883696ff5bbcf7d1ee Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Sun, 29 Mar 2026 12:11:17 +0200 Subject: [PATCH 05/17] Added more tests; over 80% ml_service coverage. Added more tests for the ml_service directory, and increased coverage to over 80%. --- .../test_load_all_yamls_and_add_lineage.py | 116 +++++++++++++ .../backend/routers/test_data_routes.py | 158 ++++++++++++++++++ .../backend/routers/test_modeling_routes.py | 106 ++++++++++++ .../features/test_features_callbacks.py | 95 +++++++++++ .../modeling/test_modeling_callbacks.py | 85 ++++++++++ .../configs/pipeline_cfg/test_callbacks.py | 101 +++++++++++ 6 files changed, 661 insertions(+) create mode 100644 tests/unit/ml_service/backend/configs/modeling/loading/test_load_all_yamls_and_add_lineage.py create mode 100644 tests/unit/ml_service/backend/routers/test_data_routes.py create mode 100644 tests/unit/ml_service/backend/routers/test_modeling_routes.py create mode 100644 tests/unit/ml_service/frontend/configs/features/test_features_callbacks.py create mode 100644 tests/unit/ml_service/frontend/configs/modeling/test_modeling_callbacks.py create mode 100644 tests/unit/ml_service/frontend/configs/pipeline_cfg/test_callbacks.py diff --git a/tests/unit/ml_service/backend/configs/modeling/loading/test_load_all_yamls_and_add_lineage.py b/tests/unit/ml_service/backend/configs/modeling/loading/test_load_all_yamls_and_add_lineage.py new file mode 100644 index 00000000..2d9928fa --- /dev/null +++ b/tests/unit/ml_service/backend/configs/modeling/loading/test_load_all_yamls_and_add_lineage.py @@ -0,0 +1,116 @@ +"""Unit tests for load_all_yamls_and_add_lineage. + +These tests exercise YAML parsing, lineage injection, and error paths. +""" +from __future__ import annotations + +from typing import Any + +import pytest +from ml_service.backend.configs.modeling.loading.load_all_yamls_and_add_lineage import ( + load_all_yamls_and_add_lineage, +) +from ml_service.backend.configs.modeling.models.configs import RawConfigsWithLineage + + +def _model_specs_yaml(with_lineage: bool = True) -> str: + if with_lineage: + return """ +model_specs_lineage: {} +models: + my_model: {} +""" + return """ +models: + my_model: {} +""" + + +def _search_yaml(with_lineage: bool = True) -> str: + if with_lineage: + return """ +search_lineage: {} +search: + extends: [] +""" + return """ +search: + extends: [] +""" + + +def _training_yaml(with_lineage: bool = True) -> str: + if with_lineage: + return """ +training_lineage: {} +training: + param: value +""" + return """ +training: + param: value +""" + + +def test_load_all_yamls_and_add_lineage_success(monkeypatch: Any) -> None: + # Make timestamps deterministic + monkeypatch.setattr("ml_service.backend.configs.formatting.timestamp.utc_timestamp", lambda: "2026-03-29T12:00:00Z") + + payload = { + "model_specs": _model_specs_yaml(True), + "search": _search_yaml(True), + "training": _training_yaml(True), + } + + out = load_all_yamls_and_add_lineage(payload) + assert isinstance(out, RawConfigsWithLineage) + + assert "model_specs_lineage" in out.model_specs + assert out.model_specs["model_specs_lineage"]["created_at"] == "2026-03-29T12:00:00Z" + + assert "search_lineage" in out.search + assert out.search["search_lineage"]["created_at"] == "2026-03-29T12:00:00Z" + + assert "training_lineage" in out.training + assert out.training["training_lineage"]["created_at"] == "2026-03-29T12:00:00Z" + + +def test_missing_lineage_key_raises() -> None: + # model_specs missing lineage key should cause add_timestamp to raise + payload = { + "model_specs": _model_specs_yaml(False), + "search": _search_yaml(True), + "training": _training_yaml(True), + } + + with pytest.raises(ValueError) as exc: + load_all_yamls_and_add_lineage(payload) + + assert "Missing 'model_specs_lineage'" in str(exc.value) + + +def test_invalid_yaml_raises() -> None: + payload = { + "model_specs": "foo: [unclosed", + "search": _search_yaml(True), + "training": _training_yaml(True), + } + + with pytest.raises(ValueError) as exc: + load_all_yamls_and_add_lineage(payload) + + assert "YAML parsing error" in str(exc.value) + + +def test_empty_configs_raise() -> None: + # Empty model_specs string should be treated as empty and trigger the empty-config error + payload = { + "model_specs": "", + "search": _search_yaml(True), + "training": _training_yaml(True), + } + + with pytest.raises(ValueError) as exc: + load_all_yamls_and_add_lineage(payload) + + assert "One or more configs are empty or invalid YAML" in str(exc.value) diff --git a/tests/unit/ml_service/backend/routers/test_data_routes.py b/tests/unit/ml_service/backend/routers/test_data_routes.py new file mode 100644 index 00000000..c2403830 --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_data_routes.py @@ -0,0 +1,158 @@ +import pytest + + +def _fake_path(exists: bool, path_str: str = "/fake/path"): + class FakePath: + def __init__(self, exists_val: bool): + self._exists = exists_val + + def exists(self): + return self._exists + + def __str__(self): + return path_str + + return FakePath(exists) + + +def test_validate_yaml_success(monkeypatch, fastapi_client): + payload = {"type": "interim", "config": "dummy: yaml"} + + data_dict = {"data": {"name": "ds", "version": "v1"}, "lineage": {"created_at": "t"}} + + monkeypatch.setattr( + "ml_service.backend.routers.data.load_yaml_and_add_lineage", + lambda text: data_dict, + ) + monkeypatch.setattr( + "ml_service.backend.routers.data.validate_config_payload", + lambda config_type, d: True, + ) + monkeypatch.setattr( + "ml_service.backend.routers.data.get_config_path", + lambda repo_root, config_type, dataset_name, dataset_version: _fake_path(True), + ) + + import ml_service.backend.routers.data as data_mod + from fastapi import Request + + orig = getattr(data_mod.validate_yaml, "__wrapped__", data_mod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig(payload, req) + assert body["valid"] is True + assert body["exists"] is True + assert body["normalized"] == data_dict + + +def test_validate_yaml_missing_fields(monkeypatch, fastapi_client): + # missing type should produce valid=False with an error message + import ml_service.backend.routers.data as data_mod + from fastapi import Request + + orig = getattr(data_mod.validate_yaml, "__wrapped__", data_mod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig({"config": "x"}, req) + assert body["valid"] is False + assert "Missing or invalid config type" in body["error"] + + +def test_write_yaml_exists(monkeypatch, fastapi_client): + payload = {"type": "processed", "config": "dummy: yaml"} + + data_dict = {"data": {"name": "ds2", "version": "v2"}, "lineage": {"created_at": "t"}} + + monkeypatch.setattr( + "ml_service.backend.routers.data.load_yaml_and_add_lineage", + lambda text: data_dict, + ) + monkeypatch.setattr( + "ml_service.backend.routers.data.validate_config_payload", + lambda config_type, d: True, + ) + monkeypatch.setattr( + "ml_service.backend.routers.data.get_config_path", + lambda repo_root, config_type, dataset_name, dataset_version: _fake_path(True, "/exists/path"), + ) + + import ml_service.backend.routers.data as data_mod + from fastapi import Request + + orig = getattr(data_mod.write_yaml, "__wrapped__", data_mod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig(payload, req) + assert body["status"] == "exists" + + +def test_write_yaml_written_and_save_called(monkeypatch, fastapi_client): + payload = {"type": "processed", "config": "dummy: yaml"} + + data_dict = {"data": {"name": "ds3", "version": "v3"}, "lineage": {"created_at": "t"}} + + monkeypatch.setattr( + "ml_service.backend.routers.data.load_yaml_and_add_lineage", + lambda text: data_dict, + ) + monkeypatch.setattr( + "ml_service.backend.routers.data.validate_config_payload", + lambda config_type, d: True, + ) + + fake_path = _fake_path(False, "/written/path") + monkeypatch.setattr( + "ml_service.backend.routers.data.get_config_path", + lambda repo_root, config_type, dataset_name, dataset_version: fake_path, + ) + + called = {} + + def _save_config(payload_dict, path): + called["called_with"] = (payload_dict, path) + + monkeypatch.setattr("ml_service.backend.routers.data.save_config", _save_config) + + import ml_service.backend.routers.data as data_mod + from fastapi import Request + + orig = getattr(data_mod.write_yaml, "__wrapped__", data_mod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig(payload, req) + assert body["status"] == "written" + assert body["path"] == str(fake_path) + assert "called_with" in called + + +def test_write_yaml_save_failure_raises(monkeypatch, fastapi_client): + payload = {"type": "processed", "config": "dummy: yaml"} + + data_dict = {"data": {"name": "ds4", "version": "v4"}, "lineage": {"created_at": "t"}} + + monkeypatch.setattr( + "ml_service.backend.routers.data.load_yaml_and_add_lineage", + lambda text: data_dict, + ) + monkeypatch.setattr( + "ml_service.backend.routers.data.validate_config_payload", + lambda config_type, d: True, + ) + + fake_path = _fake_path(False, "/will/fail") + monkeypatch.setattr( + "ml_service.backend.routers.data.get_config_path", + lambda repo_root, config_type, dataset_name, dataset_version: fake_path, + ) + + def _save_config_fail(payload_dict, path): + raise RuntimeError("disk write error") + + monkeypatch.setattr("ml_service.backend.routers.data.save_config", _save_config_fail) + + import ml_service.backend.routers.data as data_mod + from fastapi import Request + + orig = getattr(data_mod.write_yaml, "__wrapped__", data_mod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig(payload, req) + + # the router wraps failures in HTTPException with the original message + assert "disk write error" in str(exc.value) diff --git a/tests/unit/ml_service/backend/routers/test_modeling_routes.py b/tests/unit/ml_service/backend/routers/test_modeling_routes.py new file mode 100644 index 00000000..301374b0 --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_modeling_routes.py @@ -0,0 +1,106 @@ +"""Tests for the `ml_service.backend.routers.modeling` FastAPI router. + +These tests monkeypatch the internal helpers to keep the tests lightweight +and focus on routing/response behavior. +""" +from __future__ import annotations + +from dataclasses import dataclass + +import ml_service.backend.routers.modeling as modeling_router +import pytest +from fastapi import Request + + +class _DummyModel: + def __init__(self, name: str) -> None: + self._name = name + + def model_dump(self, mode: str = "json", exclude: dict | None = None) -> dict: + return {"name": self._name, "mode": mode, "exclude": bool(exclude)} + + +class _DummyValidated: + def __init__(self) -> None: + self.model_specs = _DummyModel("model_specs") + self.search = _DummyModel("search") + self.training = _DummyModel("training") + + +@dataclass +class _DummyPaths: + model_specs: str + search: str + training: str + + +def test_validate_yaml_success(fastapi_client, monkeypatch) -> None: + # Arrange: patch loaders/validators/paths to return predictable objects + monkeypatch.setattr(modeling_router, "load_all_yamls_and_add_lineage", lambda payload: {"ok": True}) + monkeypatch.setattr(modeling_router, "validate_all_configs", lambda data: _DummyValidated()) + monkeypatch.setattr(modeling_router, "check_paths", lambda validated: None) + + # Act - call the undecorated function to bypass slowapi rate limiting in tests + orig = getattr(modeling_router.validate_yaml, "__wrapped__", modeling_router.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig({"model_specs": "x", "search": "y", "training": "z"}, req) + + # Assert + assert j["valid"] is True + assert "normalized" in j and "model_specs" in j["normalized"] + + +def test_validate_yaml_error_returns_valid_false(fastapi_client, monkeypatch) -> None: + # Simulate loader raising an error + def _bad(_): + raise ValueError("bad yaml") + + monkeypatch.setattr(modeling_router, "load_all_yamls_and_add_lineage", _bad) + + orig = getattr(modeling_router.validate_yaml, "__wrapped__", modeling_router.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig({"model_specs": "x", "search": "y", "training": "z"}, req) + assert j["valid"] is False + assert "bad yaml" in j["error"] + + +def test_write_yaml_success(fastapi_client, monkeypatch) -> None: + monkeypatch.setattr(modeling_router, "load_all_yamls_and_add_lineage", lambda payload: {"ok": True}) + monkeypatch.setattr(modeling_router, "validate_all_configs", lambda data: _DummyValidated()) + + paths = _DummyPaths(model_specs="p1.yaml", search="p2.yaml", training="p3.yaml") + monkeypatch.setattr(modeling_router, "check_paths", lambda validated: paths) + + saved = {} + + def _save(validated, pths): + # record that save_all_configs was called with the validated object and returned paths + saved["called"] = True + saved["paths"] = pths + + monkeypatch.setattr(modeling_router, "save_all_configs", _save) + + orig = getattr(modeling_router.write_yaml, "__wrapped__", modeling_router.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig({"model_specs": "x", "search": "y", "training": "z"}, req) + assert j["paths"]["model_specs"] == "p1.yaml" + assert saved.get("called") is True + + +def test_write_yaml_failure_raises_http_exception(fastapi_client, monkeypatch) -> None: + monkeypatch.setattr(modeling_router, "load_all_yamls_and_add_lineage", lambda payload: {"ok": True}) + monkeypatch.setattr(modeling_router, "validate_all_configs", lambda data: _DummyValidated()) + + monkeypatch.setattr(modeling_router, "check_paths", lambda validated: _DummyPaths("a", "b", "c")) + + def _bad_save(validated, paths): + raise RuntimeError("disk full") + + monkeypatch.setattr(modeling_router, "save_all_configs", _bad_save) + + orig = getattr(modeling_router.write_yaml, "__wrapped__", modeling_router.write_yaml) + with pytest.raises(Exception) as exc: + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + orig({"model_specs": "x", "search": "y", "training": "z"}, req) + + assert "disk full" in str(exc.value) diff --git a/tests/unit/ml_service/frontend/configs/features/test_features_callbacks.py b/tests/unit/ml_service/frontend/configs/features/test_features_callbacks.py new file mode 100644 index 00000000..aeb22eff --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/features/test_features_callbacks.py @@ -0,0 +1,95 @@ +"""Tests for `ml_service.frontend.configs.features.callbacks`. + +Reuses `dummy_dash_app` and `mock_requests` fixtures used across frontend tests. +""" +from __future__ import annotations + +from typing import Any + +import yaml +from ml_service.frontend.configs.features.callbacks import register_callbacks + + +def _find_callback_by_name(app_callbacks: list[dict[str, Any]], name: str): + return [c for c in app_callbacks if c["func"].__name__ == name] + + +def test_validate_branches(dummy_dash_app, mock_requests): + register_callbacks(dummy_dash_app) + cb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_yaml")[0] + + reqs = mock_requests + MockResponse = reqs["MockResponse"] + + # backend not ok + def fake_not_ok(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=False, status_code=500, text="oops") + + reqs["patch_post"](fake_not_ok) + alert, is_open, val = cb["func"](1, "fname", "v1", "content") + assert "Backend error 500" in str(alert) + assert is_open is False + assert val == "content" + + # invalid response + def fake_invalid(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": False, "error": "bad cfg"}) + + reqs["patch_post"](fake_invalid) + alert2, is_open2, val2 = cb["func"](1, "fname", "v1", "content") + assert "bad cfg" in str(alert2) + assert is_open2 is False + + # exists response + def fake_exists(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": True, "exists": True}) + + reqs["patch_post"](fake_exists) + alert3, is_open3, val3 = cb["func"](1, "fname", "v1", "content") + assert "fname/v1 already exists" in str(alert3) + assert is_open3 is False + + # success returns normalized YAML + def fake_success(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": True, "normalized": {"a": 1}}) + + reqs["patch_post"](fake_success) + alert4, is_open4, norm = cb["func"](1, "fname", "v1", "content") + assert "Config valid" in str(alert4) or "Config valid." in str(alert4) + assert is_open4 is True + assert yaml.safe_load(norm)["a"] == 1 + + +def test_write_branches(dummy_dash_app, mock_requests): + register_callbacks(dummy_dash_app) + cb = _find_callback_by_name(dummy_dash_app.callbacks, "write_yaml")[0] + + reqs = mock_requests + MockResponse = reqs["MockResponse"] + + # backend not ok + def fake_not_ok(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=False, status_code=502, text="bad") + + reqs["patch_post"](fake_not_ok) + alert, is_open = cb["func"](1, "fname", "v1", "content") + assert "Backend error 502" in str(alert) + assert is_open is False + + # exists status + def fake_exists(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "exists", "message": "already"}) + + reqs["patch_post"](fake_exists) + alert2, is_open2 = cb["func"](1, "fname", "v1", "content") + assert "already" in str(alert2) + assert is_open2 is False + + # success + def fake_written(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"path": "/x"}) + + reqs["patch_post"](fake_written) + alert3, is_open3 = cb["func"](1, "fname", "v1", "content") + assert "/x" in str(alert3) + assert is_open3 is False diff --git a/tests/unit/ml_service/frontend/configs/modeling/test_modeling_callbacks.py b/tests/unit/ml_service/frontend/configs/modeling/test_modeling_callbacks.py new file mode 100644 index 00000000..fa477373 --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/modeling/test_modeling_callbacks.py @@ -0,0 +1,85 @@ +"""Tests for `ml_service.frontend.configs.modeling.callbacks` (renamed to avoid basename collisions).""" +from __future__ import annotations + +from typing import Any + +import yaml +from ml_service.frontend.configs.modeling.callbacks import register_callbacks + + +def _find_callback_by_name(app_callbacks: list[dict[str, Any]], name: str): + return [c for c in app_callbacks if c["func"].__name__ == name] + + +def test_validate_yaml_backend_error(dummy_dash_app, mock_requests): + register_callbacks(dummy_dash_app) + cb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_yaml")[0] + + reqs = mock_requests + MockResponse = reqs["MockResponse"] + + def fake_err(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=False, status_code=500, text="server error") + + reqs["patch_post"](fake_err) + + alert, is_open, v1, v2, v3 = cb["func"](1, "a: 1", "b: 2", "c: 3") + assert "Backend error 500" in str(alert) + assert is_open is False + assert v1 == "a: 1" + + +def test_validate_yaml_invalid_and_success(dummy_dash_app, mock_requests): + register_callbacks(dummy_dash_app) + cb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_yaml")[0] + + reqs = mock_requests + MockResponse = reqs["MockResponse"] + + # invalid result from backend + def fake_invalid(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": False, "error": "bad config"}) + + reqs["patch_post"](fake_invalid) + alert, is_open, v1, v2, v3 = cb["func"](1, "a:1", "b:2", "c:3") + assert "bad config" in str(alert) + assert is_open is False + + # success path returns normalized YAML strings + def fake_success(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={ + "valid": True, + "normalized": {"model_specs": {"foo": 1}, "search": {}, "training": {}}, + }) + + reqs["patch_post"](fake_success) + alert2, is_open2, n1, n2, n3 = cb["func"](1, "a:1", "b:2", "c:3") + assert "Config is valid" in str(alert2) or "Config is valid." in str(alert2) + assert is_open2 is True + assert yaml.safe_load(n1)["foo"] == 1 + + +def test_write_yaml_branches(dummy_dash_app, mock_requests): + register_callbacks(dummy_dash_app) + cb = _find_callback_by_name(dummy_dash_app.callbacks, "write_yaml")[0] + + reqs = mock_requests + MockResponse = reqs["MockResponse"] + + # backend not ok + def fake_not_ok(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=False, status_code=502, text="bad gateway") + + reqs["patch_post"](fake_not_ok) + alert, is_open = cb["func"](1, "a:1", "b:2", "c:3") + assert "Backend error 502" in str(alert) + assert is_open is False + + # success -> check that returned paths appear in Alert text + def fake_written(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"paths": {"model_specs": "p1", "search": "p2", "training": "p3"}}) + + reqs["patch_post"](fake_written) + alert2, is_open2 = cb["func"](1, "a:1", "b:2", "c:3") + assert "p1" in str(alert2) + assert is_open2 is False diff --git a/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_callbacks.py b/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_callbacks.py new file mode 100644 index 00000000..e2cc3aea --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_callbacks.py @@ -0,0 +1,101 @@ +"""Tests for `ml_service.frontend.configs.pipeline_cfg.callbacks`. + +These tests register callbacks on the `dummy_dash_app` fixture and invoke +the registered functions directly, patching `requests.post` via +`mock_requests` for deterministic behavior. +""" +from __future__ import annotations + +from typing import Any + +import yaml +from ml_service.frontend.configs.pipeline_cfg.callbacks import register_callbacks + + +def _find_callback_by_name(app_callbacks: list[dict[str, Any]], name: str): + return [c for c in app_callbacks if c["func"].__name__ == name] + + +def test_validate_config_requires_fields(dummy_dash_app): + register_callbacks(dummy_dash_app) + cb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_config")[0] + + # missing data_type/algorithm should return validation Alert and original text + alert, is_open, value = cb["func"](1, None, None, "version: v1") + assert "Data type and algorithm are required." in str(alert) + assert is_open is False + assert value == "version: v1" + + +def test_validate_config_yaml_parse_error(dummy_dash_app): + register_callbacks(dummy_dash_app) + cb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_config")[0] + + # Provide YAML that parses but lacks 'version' to trigger the missing-version branch + alert, is_open, _ = cb["func"](1, "dt", "alg", "foo: bar") + assert "YAML parsing error" in str(alert) + assert is_open is False + + +def test_validate_config_backend_invalid_and_success(dummy_dash_app, mock_requests): + register_callbacks(dummy_dash_app) + cb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_config")[0] + + reqs = mock_requests + MockResponse = reqs["MockResponse"] + + # Backend returns invalid result + def fake_invalid(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": False, "error": "bad config"}) + + reqs["patch_post"](fake_invalid) + alert, is_open, _ = cb["func"](1, "dt", "alg", "version: v1") + assert "bad config" in str(alert) + assert is_open is False + + # Backend returns success with normalized payload + def fake_success(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": True, "exists": False, "normalized": {"a": 1, "version": "v1"}}) + + reqs["patch_post"](fake_success) + alert2, is_open2, normalized = cb["func"](1, "dt", "alg", "version: v1") + assert "Config valid" in str(alert2) or "Config valid." in str(alert2) + assert is_open2 is True + # normalized is YAML dump of the returned normalized dict + assert yaml.safe_load(normalized)["a"] == 1 + + +def test_write_config_branches(dummy_dash_app, mock_requests): + register_callbacks(dummy_dash_app) + cb = _find_callback_by_name(dummy_dash_app.callbacks, "write_config")[0] + + reqs = mock_requests + MockResponse = reqs["MockResponse"] + + # missing fields + alert, is_open = cb["func"](1, None, None, "version: v1") + assert "Data type and algorithm are required." in str(alert) + assert is_open is False + + # YAML parsing error: provide YAML without `version` to hit the missing-version branch + alert2, is_open2 = cb["func"](1, "dt", "alg", "foo: bar") + assert "YAML parsing error" in str(alert2) + assert is_open2 is False + + # backend reports exists + def fake_exists(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "exists", "message": "already"}) + + reqs["patch_post"](fake_exists) + alert3, is_open3 = cb["func"](1, "dt", "alg", "version: v1") + assert "already" in str(alert3) + assert is_open3 is False + + # backend reports written + def fake_written(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "written", "path": "/x/y"}) + + reqs["patch_post"](fake_written) + alert4, is_open4 = cb["func"](1, "dt", "alg", "version: v1") + assert "Config written successfully" in str(alert4) + assert is_open4 is False From 58640a0acee4e86f3efea0ae6ee88c4a2d92ed73 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Sun, 29 Mar 2026 13:35:57 +0200 Subject: [PATCH 06/17] 90+% ml_service coverage; started tracking it. Increased the tests coverage of ml_service to 90%, and started tracking it in the CI. Added a line to .gitignore to ignore the coverage report xml file for ml_service. --- .gitignore | 1 + pyproject.toml | 2 +- .../data/utils/test_get_config_path.py | 33 ++++ .../features/utils/test_paths_features.py | 9 + .../test_validate_feature_config.py | 37 ++++ .../persistence/test_save_all_configs.py | 67 +++++++ .../configs/modeling/utils/test_paths.py | 104 +++++++++++ .../validation/test_validate_all_configs.py | 58 ++++++ .../test_get_config_path_pipeline_cfg.py | 41 ++++ ...st_validate_config_payload_pipeline_cfg.py | 36 ++++ .../test_save_promotion_thresholds.py | 94 ++++++++++ .../utils/test_check_thresholds_exist.py | 38 ++++ .../backend/routers/test_features_routes.py | 160 ++++++++++++++++ .../routers/test_file_viewer_routes.py | 81 ++++++++ .../routers/test_pipeline_cfg_routes.py | 176 ++++++++++++++++++ .../backend/routers/test_pipelines_routes.py | 50 +++++ .../configs/data/test_data_callbacks.py | 116 ++++++++++++ .../test_promotion_thresholds_callbacks.py | 92 +++++++++ 18 files changed, 1194 insertions(+), 1 deletion(-) create mode 100644 tests/unit/ml_service/backend/configs/data/utils/test_get_config_path.py create mode 100644 tests/unit/ml_service/backend/configs/features/utils/test_paths_features.py create mode 100644 tests/unit/ml_service/backend/configs/features/validation/test_validate_feature_config.py create mode 100644 tests/unit/ml_service/backend/configs/modeling/persistence/test_save_all_configs.py create mode 100644 tests/unit/ml_service/backend/configs/modeling/utils/test_paths.py create mode 100644 tests/unit/ml_service/backend/configs/modeling/validation/test_validate_all_configs.py create mode 100644 tests/unit/ml_service/backend/configs/pipeline_cfg/utils/test_get_config_path_pipeline_cfg.py create mode 100644 tests/unit/ml_service/backend/configs/pipeline_cfg/validation/test_validate_config_payload_pipeline_cfg.py create mode 100644 tests/unit/ml_service/backend/configs/promotion_thresholds/persistence/test_save_promotion_thresholds.py create mode 100644 tests/unit/ml_service/backend/configs/promotion_thresholds/utils/test_check_thresholds_exist.py create mode 100644 tests/unit/ml_service/backend/routers/test_features_routes.py create mode 100644 tests/unit/ml_service/backend/routers/test_file_viewer_routes.py create mode 100644 tests/unit/ml_service/backend/routers/test_pipeline_cfg_routes.py create mode 100644 tests/unit/ml_service/backend/routers/test_pipelines_routes.py create mode 100644 tests/unit/ml_service/frontend/configs/data/test_data_callbacks.py create mode 100644 tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_callbacks.py diff --git a/.gitignore b/.gitignore index 9681679f..bd2167a1 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,7 @@ htmlcov/ nosetests.xml coverage.xml coverage.json +coverage_ml_service.xml *.cover *.py.cover .hypothesis/ diff --git a/pyproject.toml b/pyproject.toml index 18dfd8fb..b9b981a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ exclude = "(^notebooks/|^feature_store/|^data/|^experiments/)" [tool.coverage.run] branch = true -source = ["ml", "pipelines", "scripts"] +source = ["ml", "pipelines", "scripts", "ml_service"] omit = [ "tests/*", "notebooks/*", diff --git a/tests/unit/ml_service/backend/configs/data/utils/test_get_config_path.py b/tests/unit/ml_service/backend/configs/data/utils/test_get_config_path.py new file mode 100644 index 00000000..e14b66bf --- /dev/null +++ b/tests/unit/ml_service/backend/configs/data/utils/test_get_config_path.py @@ -0,0 +1,33 @@ +from pathlib import Path + +from ml_service.backend.configs.data.utils.get_config_path import get_config_path + + +def test_get_config_path_basic(): + repo_root = "repo_root" + config_type = "interim" + dataset_name = "dataset" + dataset_version = "v1" + + p = get_config_path( + repo_root=repo_root, + config_type=config_type, + dataset_name=dataset_name, + dataset_version=dataset_version, + ) + + expected = ( + Path(repo_root) / "configs" / "data" / config_type / dataset_name / f"{dataset_version}.yaml" + ) + + assert p == expected + + +def test_get_config_path_trailing_separator(tmp_path): + repo_root = str(tmp_path) + "/" + p = get_config_path( + repo_root=repo_root, config_type="processed", dataset_name="d", dataset_version="v2" + ) + + expected = Path(str(tmp_path)) / "configs" / "data" / "processed" / "d" / "v2.yaml" + assert p == expected diff --git a/tests/unit/ml_service/backend/configs/features/utils/test_paths_features.py b/tests/unit/ml_service/backend/configs/features/utils/test_paths_features.py new file mode 100644 index 00000000..971659b9 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/features/utils/test_paths_features.py @@ -0,0 +1,9 @@ + +from ml_service.backend.configs.features.utils.paths import get_registry_path + + +def test_get_registry_path(tmp_path): + repo_root = tmp_path + p = get_registry_path(repo_root) + expected = repo_root / "configs" / "feature_registry" / "features.yaml" + assert p == expected diff --git a/tests/unit/ml_service/backend/configs/features/validation/test_validate_feature_config.py b/tests/unit/ml_service/backend/configs/features/validation/test_validate_feature_config.py new file mode 100644 index 00000000..f5a4f7d0 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/features/validation/test_validate_feature_config.py @@ -0,0 +1,37 @@ +"""Tests for `ml_service.backend.configs.features.validation.validate_feature_config`. + +These tests stub the imported `TabularFeaturesConfig` to avoid constructing +the real, heavy pydantic model while exercising the simple dispatch logic. +""" +from __future__ import annotations + +from typing import Any + +import pytest +from ml_service.backend.configs.features.validation import validate_feature_config as vmod + + +def test_tabular_delegates_to_tabular_model(monkeypatch: Any) -> None: + called: dict[str, Any] = {} + + class FakeTabular: + def __init__(self, **kwargs: Any) -> None: + called["kwargs"] = kwargs + + def __repr__(self) -> str: # pragma: no cover - helper + return "FakeTabular()" + + monkeypatch.setattr(vmod, "TabularFeaturesConfig", FakeTabular) + + payload = {"type": "tabular", "foo": "bar"} + res = vmod.validate_feature_config(payload) + + assert isinstance(res, FakeTabular) + assert called["kwargs"] == payload + + +def test_unsupported_type_raises() -> None: + with pytest.raises(ValueError) as exc: + vmod.validate_feature_config({"type": "unknown"}) + + assert "Unsupported feature config type" in str(exc.value) diff --git a/tests/unit/ml_service/backend/configs/modeling/persistence/test_save_all_configs.py b/tests/unit/ml_service/backend/configs/modeling/persistence/test_save_all_configs.py new file mode 100644 index 00000000..bce862ac --- /dev/null +++ b/tests/unit/ml_service/backend/configs/modeling/persistence/test_save_all_configs.py @@ -0,0 +1,67 @@ +import importlib +from pathlib import Path + + +def test_save_all_configs_calls_save_config(monkeypatch, tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.modeling.persistence.save_all_configs" + ) + + calls = [] + + def fake_save(data, path): + calls.append((data, path)) + + monkeypatch.setattr(mod, "save_config", fake_save) + + class FakeModel: + def __init__(self, payload): + self.payload = payload + + def model_dump(self, mode="json", exclude=None): + d = dict(self.payload) + if exclude: + for k in list(exclude): + d.pop(k, None) + return d + + # use typing.cast so static type checkers accept our test fakes + from typing import cast + + from ml.config.schemas.model_specs import ModelSpecs + from ml_service.backend.configs.modeling.models.configs import ( + ConfigPaths, + SearchConfigForValidation, + TrainConfigForValidation, + ValidatedConfigs, + ) + + model_specs = cast(ModelSpecs, FakeModel({"foo": "bar", "meta": {"x": 1}})) + search = cast(SearchConfigForValidation, FakeModel({"search": True})) + training = cast(TrainConfigForValidation, FakeModel({"train": True})) + + from types import SimpleNamespace + + validated = cast( + ValidatedConfigs, + SimpleNamespace(model_specs=model_specs, search=search, training=training), + ) + paths = ConfigPaths( + model_specs=str(tmp_path / "m_specs.yaml"), + search=str(tmp_path / "search.yaml"), + training=str(tmp_path / "train.yaml"), + ) + + mod.save_all_configs(validated, paths) + + assert len(calls) == 3 + + data0, path0 = calls[0] + assert "meta" not in data0 + assert path0 == Path(paths.model_specs) + + assert calls[1][0] == {"search": True} + assert calls[1][1] == Path(paths.search) + + assert calls[2][0] == {"train": True} + assert calls[2][1] == Path(paths.training) diff --git a/tests/unit/ml_service/backend/configs/modeling/utils/test_paths.py b/tests/unit/ml_service/backend/configs/modeling/utils/test_paths.py new file mode 100644 index 00000000..7295774b --- /dev/null +++ b/tests/unit/ml_service/backend/configs/modeling/utils/test_paths.py @@ -0,0 +1,104 @@ +"""Unit tests for `ml_service.backend.configs.modeling.utils.paths`. + +These tests exercise `compute_paths` and `check_paths` logic with +lightweight dummy objects and monkeypatched filesystem checks. +""" +from __future__ import annotations + +from typing import Any + +import pytest +from ml_service.backend.configs.modeling.utils import paths as paths_mod + + +def _make_dummy_validated(problem: str = "prob", segment_name: str = "seg", version: str = "v1") -> Any: + class DummySeg: + def __init__(self, name: str) -> None: + self.name = name + + class DummyModelSpecs: + def __init__(self, problem: str, segment: Any, version: str) -> None: + self.problem = problem + self.segment = segment + self.version = version + + class V: + model_specs: Any + + v = V() + v.model_specs = DummyModelSpecs(problem, DummySeg(segment_name), version) + return v + + +def test_compute_paths_returns_expected_paths() -> None: + v = _make_dummy_validated("myprob", "myseg", "vv") + result = paths_mod.compute_paths(v) + + expected_model = f"{paths_mod.repo_root}/configs/model_specs/myprob/myseg/vv.yaml" + expected_search = f"{paths_mod.repo_root}/configs/search/myprob/myseg/vv.yaml" + expected_train = f"{paths_mod.repo_root}/configs/train/myprob/myseg/vv.yaml" + + assert result.model_specs == expected_model + assert result.search == expected_search + assert result.training == expected_train + + +def test_check_paths_raises_on_existing_model_spec(monkeypatch) -> None: + v = _make_dummy_validated("prob", "seg", "v1") + paths = paths_mod.compute_paths(v) + + # Simulate that only model_specs path already exists + def fake_exists(p: str) -> bool: + return p == paths.model_specs + + monkeypatch.setattr(paths_mod.os.path, "exists", fake_exists) + + with pytest.raises(FileExistsError) as exc: + paths_mod.check_paths(v) + + assert paths.model_specs in str(exc.value) + + +def test_check_paths_returns_paths_when_none_exist(monkeypatch) -> None: + v = _make_dummy_validated("prob", "seg", "v1") + + monkeypatch.setattr(paths_mod.os.path, "exists", lambda p: False) + + res = paths_mod.check_paths(v) + # ensure returned object matches compute_paths output + expected = paths_mod.compute_paths(v) + assert res.model_specs == expected.model_specs + assert res.search == expected.search + assert res.training == expected.training + + +def test_check_paths_raises_on_existing_search(monkeypatch) -> None: + v = _make_dummy_validated("prob", "seg", "v1") + paths = paths_mod.compute_paths(v) + + # Simulate that only search path already exists + def fake_exists(p: str) -> bool: + return p == paths.search + + monkeypatch.setattr(paths_mod.os.path, "exists", fake_exists) + + with pytest.raises(FileExistsError) as exc: + paths_mod.check_paths(v) + + assert paths.search in str(exc.value) + + +def test_check_paths_raises_on_existing_training(monkeypatch) -> None: + v = _make_dummy_validated("prob", "seg", "v1") + paths = paths_mod.compute_paths(v) + + # Simulate that only training path already exists + def fake_exists(p: str) -> bool: + return p == paths.training + + monkeypatch.setattr(paths_mod.os.path, "exists", fake_exists) + + with pytest.raises(FileExistsError) as exc: + paths_mod.check_paths(v) + + assert paths.training in str(exc.value) diff --git a/tests/unit/ml_service/backend/configs/modeling/validation/test_validate_all_configs.py b/tests/unit/ml_service/backend/configs/modeling/validation/test_validate_all_configs.py new file mode 100644 index 00000000..4fcaa1e6 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/modeling/validation/test_validate_all_configs.py @@ -0,0 +1,58 @@ +import importlib + +import pytest + + +def test_validate_all_configs_success(monkeypatch): + v = importlib.import_module( + "ml_service.backend.configs.modeling.validation.validate_all_configs" + ) + + class FakeModelSpecs: + def __init__(self, **kwargs): + self.kwargs = kwargs + + + class FakeSearch: + def __init__(self, **kwargs): + self.kwargs = kwargs + + + class FakeTrain: + def __init__(self, **kwargs): + self.kwargs = kwargs + + + monkeypatch.setattr(v, "ModelSpecs", FakeModelSpecs) + monkeypatch.setattr(v, "SearchConfigForValidation", FakeSearch) + monkeypatch.setattr(v, "TrainConfigForValidation", FakeTrain) + + from ml_service.backend.configs.modeling.models.configs import RawConfigsWithLineage + + raw = RawConfigsWithLineage(model_specs={"a": 1}, search={"b": 2}, training={"c": 3}) + validated = v.validate_all_configs(raw) + + assert validated.model_specs.kwargs == {"a": 1} + assert validated.search.kwargs == {"b": 2} + assert validated.training.kwargs == {"c": 3} + + +def test_validate_all_configs_error(monkeypatch): + v = importlib.import_module( + "ml_service.backend.configs.modeling.validation.validate_all_configs" + ) + + def bad(*args, **kwargs): + raise Exception("boom") + + + monkeypatch.setattr(v, "ModelSpecs", bad) + + from ml_service.backend.configs.modeling.models.configs import RawConfigsWithLineage + + raw = RawConfigsWithLineage(model_specs={}, search={}, training={}) + with pytest.raises(ValueError) as exc: + v.validate_all_configs(raw) + + assert "Config validation error" in str(exc.value) + assert "boom" in str(exc.value) diff --git a/tests/unit/ml_service/backend/configs/pipeline_cfg/utils/test_get_config_path_pipeline_cfg.py b/tests/unit/ml_service/backend/configs/pipeline_cfg/utils/test_get_config_path_pipeline_cfg.py new file mode 100644 index 00000000..53b25566 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/pipeline_cfg/utils/test_get_config_path_pipeline_cfg.py @@ -0,0 +1,41 @@ +from pathlib import Path + +from ml_service.backend.configs.pipeline_cfg.utils.get_config_path import ( + get_config_path, +) + + +def test_get_config_path_basic(): + repo_root = "repo_root" + data_type = "tabular" + algorithm = "random_forest" + pipeline_version = "v1.0" + + p = get_config_path( + repo_root=repo_root, + data_type=data_type, + algorithm=algorithm, + pipeline_version=pipeline_version, + ) + + expected = ( + Path(repo_root) + / "configs" + / "pipelines" + / data_type + / algorithm + / f"{pipeline_version}.yaml" + ) + + assert isinstance(p, Path) + assert p == expected + + +def test_get_config_path_trailing_separator(tmp_path): + repo_root = str(tmp_path) + "/" + p = get_config_path( + repo_root=repo_root, data_type="dt", algorithm="alg", pipeline_version="v2" + ) + + expected = Path(str(tmp_path)) / "configs" / "pipelines" / "dt" / "alg" / "v2.yaml" + assert p == expected diff --git a/tests/unit/ml_service/backend/configs/pipeline_cfg/validation/test_validate_config_payload_pipeline_cfg.py b/tests/unit/ml_service/backend/configs/pipeline_cfg/validation/test_validate_config_payload_pipeline_cfg.py new file mode 100644 index 00000000..261e0ca9 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/pipeline_cfg/validation/test_validate_config_payload_pipeline_cfg.py @@ -0,0 +1,36 @@ +import importlib + +import pytest + + +def test_validate_config_payload_success(monkeypatch): + mod = importlib.import_module( + "ml_service.backend.configs.pipeline_cfg.validation.validate_config_payload" + ) + + class FakePipelineConfig: + def __init__(self, **kwargs): + self.kwargs = kwargs + + monkeypatch.setattr(mod, "PipelineConfig", FakePipelineConfig) + + payload = {"x": 1} + validated = mod.validate_config_payload(payload) + assert isinstance(validated, FakePipelineConfig) + assert validated.kwargs == payload + + +def test_validate_config_payload_error_propagates(monkeypatch): + mod = importlib.import_module( + "ml_service.backend.configs.pipeline_cfg.validation.validate_config_payload" + ) + + def bad(**kwargs): + raise ValueError("invalid config") + + monkeypatch.setattr(mod, "PipelineConfig", bad) + + with pytest.raises(ValueError) as exc: + mod.validate_config_payload({"a": 2}) + + assert "invalid config" in str(exc.value) diff --git a/tests/unit/ml_service/backend/configs/promotion_thresholds/persistence/test_save_promotion_thresholds.py b/tests/unit/ml_service/backend/configs/promotion_thresholds/persistence/test_save_promotion_thresholds.py new file mode 100644 index 00000000..d12ff907 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/promotion_thresholds/persistence/test_save_promotion_thresholds.py @@ -0,0 +1,94 @@ +import importlib + + +def test_save_with_existing_thresholds(monkeypatch, tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.promotion_thresholds.persistence.save_promotion_thresholds" + ) + + saved = {} + + def fake_save(cfg, pth): + saved["cfg"] = cfg + saved["pth"] = pth + + monkeypatch.setattr(mod, "save_config", fake_save) + + class FakeValidated: + def model_dump(self, mode="json"): + return {"new": True} + + thresholds = {"regression": {"s0": {"threshold": 0.1}}} + validated = FakeValidated() + config_path = tmp_path / "pth.yaml" + + mod.save_promotion_thresholds( + thresholds=thresholds, + validated=validated, + config_path=config_path, + problem_type="regression", + segment="s1", + ) + + assert "regression" in saved["cfg"] + assert "s0" in saved["cfg"]["regression"] + assert saved["cfg"]["regression"]["s1"] == {"new": True} + assert saved["pth"] == config_path + + +def test_save_with_none_thresholds(monkeypatch, tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.promotion_thresholds.persistence.save_promotion_thresholds" + ) + + recorded = {} + + def fake_save(cfg, pth): + recorded["cfg"] = cfg + recorded["pth"] = pth + + monkeypatch.setattr(mod, "save_config", fake_save) + + class FakeValidated: + def model_dump(self, mode="json"): + return {"v": 1} + + mod.save_promotion_thresholds( + thresholds=None, + validated=FakeValidated(), + config_path=tmp_path / "out.yaml", + problem_type="classification", + segment="sA", + ) + + assert recorded["cfg"]["classification"]["sA"] == {"v": 1} + + +def test_save_propagates_error(monkeypatch, tmp_path): + mod = importlib.import_module( + "ml_service.backend.configs.promotion_thresholds.persistence.save_promotion_thresholds" + ) + + def bad_save(cfg, pth): + raise RuntimeError("disk full") + + monkeypatch.setattr(mod, "save_config", bad_save) + + class FakeValidated: + def model_dump(self, mode="json"): + return {"x": 2} + + try: + mod.save_promotion_thresholds( + thresholds={}, + validated=FakeValidated(), + config_path=tmp_path / "out.yaml", + problem_type="p", + segment="s", + ) + raised = False + except RuntimeError as e: + raised = True + assert "disk full" in str(e) + + assert raised diff --git a/tests/unit/ml_service/backend/configs/promotion_thresholds/utils/test_check_thresholds_exist.py b/tests/unit/ml_service/backend/configs/promotion_thresholds/utils/test_check_thresholds_exist.py new file mode 100644 index 00000000..5ec03d4f --- /dev/null +++ b/tests/unit/ml_service/backend/configs/promotion_thresholds/utils/test_check_thresholds_exist.py @@ -0,0 +1,38 @@ +import yaml +from ml_service.backend.configs.promotion_thresholds.utils.check_thresholds_exist import ( + check_thresholds_exist, +) + + +def test_missing_file(tmp_path): + p = tmp_path / "no.yaml" + exists, thresholds = check_thresholds_exist(p, "regression", "s1") + assert exists is False + assert thresholds == {} + + +def test_empty_file(tmp_path): + p = tmp_path / "empty.yaml" + p.write_text("") + exists, thresholds = check_thresholds_exist(p, "regression", "s1") + assert exists is False + assert thresholds == {} + + +def test_missing_segment(tmp_path): + p = tmp_path / "th.yaml" + content = {"regression": {"other_segment": {"threshold": 0.1}}} + p.write_text(yaml.safe_dump(content)) + exists, thresholds = check_thresholds_exist(p, "regression", "s1") + assert exists is False + assert "regression" in thresholds + assert thresholds["regression"] == {"other_segment": {"threshold": 0.1}} + + +def test_existing_segment(tmp_path): + p = tmp_path / "th.yaml" + content = {"regression": {"s1": {"threshold": 0.5}}} + p.write_text(yaml.safe_dump(content)) + exists, thresholds = check_thresholds_exist(p, "regression", "s1") + assert exists is True + assert thresholds["regression"]["s1"] == {"threshold": 0.5} diff --git a/tests/unit/ml_service/backend/routers/test_features_routes.py b/tests/unit/ml_service/backend/routers/test_features_routes.py new file mode 100644 index 00000000..b5b4408d --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_features_routes.py @@ -0,0 +1,160 @@ +import pytest + + +def test_validate_yaml_success(monkeypatch): + payload = {"name": "feat", "version": "v1", "config": "yaml: x"} + + data_with_lineage = {"a": 1} + + monkeypatch.setattr( + "ml_service.backend.routers.features.load_yaml_and_add_lineage", + lambda text: data_with_lineage, + ) + + class FakeValidated: + def model_dump(self, mode="json"): + return {"ok": True} + + monkeypatch.setattr( + "ml_service.backend.routers.features.validate_feature_config", + lambda d: FakeValidated(), + ) + + monkeypatch.setattr( + "ml_service.backend.routers.features.get_registry_path", + lambda repo_root: "/registry/path", + ) + + monkeypatch.setattr( + "ml_service.backend.routers.features.registry_entry_exists", + lambda name, version, path: True, + ) + + import ml_service.backend.routers.features as fmod + from fastapi import Request + + orig = getattr(fmod.validate_yaml, "__wrapped__", fmod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig(payload, req) + + assert j["valid"] is True + assert j["exists"] is True + assert j["normalized"]["ok"] is True + + +def test_validate_yaml_missing_fields(monkeypatch): + import ml_service.backend.routers.features as fmod + from fastapi import Request + + orig = getattr(fmod.validate_yaml, "__wrapped__", fmod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig({"config": "x"}, req) + assert j["valid"] is False + assert "Missing feature set name or version" in j["error"] + + +def test_write_yaml_exists(monkeypatch): + payload = {"name": "f", "version": "v2", "config": "y"} + + monkeypatch.setattr( + "ml_service.backend.routers.features.load_yaml_and_add_lineage", + lambda text: {"a": 1}, + ) + monkeypatch.setattr( + "ml_service.backend.routers.features.validate_feature_config", + lambda d: True, + ) + monkeypatch.setattr( + "ml_service.backend.routers.features.get_registry_path", + lambda repo_root: "/registry/path", + ) + monkeypatch.setattr( + "ml_service.backend.routers.features.registry_entry_exists", + lambda name, version, path: True, + ) + + import ml_service.backend.routers.features as fmod + from fastapi import Request + + orig = getattr(fmod.write_yaml, "__wrapped__", fmod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig(payload, req) + assert j["status"] == "exists" + + +def test_write_yaml_saved_and_returned(monkeypatch): + payload = {"name": "f", "version": "v3", "config": "y"} + + monkeypatch.setattr( + "ml_service.backend.routers.features.load_yaml_and_add_lineage", + lambda text: {"a": 1}, + ) + monkeypatch.setattr( + "ml_service.backend.routers.features.validate_feature_config", + lambda d: True, + ) + monkeypatch.setattr( + "ml_service.backend.routers.features.get_registry_path", + lambda repo_root: "/registry/path", + ) + monkeypatch.setattr( + "ml_service.backend.routers.features.registry_entry_exists", + lambda name, version, path: False, + ) + + def fake_save(name, version, validated_config, registry_path): + return {"ok": True, "name": name, "version": version} + + monkeypatch.setattr( + "ml_service.backend.routers.features.save_feature_registry", + fake_save, + ) + + import ml_service.backend.routers.features as fmod + from fastapi import Request + + orig = getattr(fmod.write_yaml, "__wrapped__", fmod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig(payload, req) + + assert j["ok"] is True + assert j["name"] == "f" + + +def test_write_yaml_save_failure_raises(monkeypatch): + payload = {"name": "f", "version": "v4", "config": "y"} + + monkeypatch.setattr( + "ml_service.backend.routers.features.load_yaml_and_add_lineage", + lambda text: {"a": 1}, + ) + monkeypatch.setattr( + "ml_service.backend.routers.features.validate_feature_config", + lambda d: True, + ) + monkeypatch.setattr( + "ml_service.backend.routers.features.get_registry_path", + lambda repo_root: "/registry/path", + ) + monkeypatch.setattr( + "ml_service.backend.routers.features.registry_entry_exists", + lambda name, version, path: False, + ) + + def _bad_save(name, version, validated_config, registry_path): + raise RuntimeError("boom") + + monkeypatch.setattr( + "ml_service.backend.routers.features.save_feature_registry", + _bad_save, + ) + + import ml_service.backend.routers.features as fmod + from fastapi import Request + + orig = getattr(fmod.write_yaml, "__wrapped__", fmod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig(payload, req) + + assert "boom" in str(exc.value) diff --git a/tests/unit/ml_service/backend/routers/test_file_viewer_routes.py b/tests/unit/ml_service/backend/routers/test_file_viewer_routes.py new file mode 100644 index 00000000..2d2e315a --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_file_viewer_routes.py @@ -0,0 +1,81 @@ +import importlib + +from fastapi import HTTPException, Request + + +def test_load_file_missing_path_raises(): + mod = importlib.import_module("ml_service.backend.routers.file_viewer") + orig = getattr(mod.load_file, "__wrapped__", mod.load_file) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + try: + orig({}, req) + raised = False + except HTTPException as e: + raised = True + assert e.status_code == 400 + + assert raised + + +def test_load_file_not_found(tmp_path): + mod = importlib.import_module("ml_service.backend.routers.file_viewer") + orig = getattr(mod.load_file, "__wrapped__", mod.load_file) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + p = tmp_path / "nofile.yaml" + payload = {"path": str(p)} + try: + orig(payload, req) + raised = False + except HTTPException as e: + raised = True + assert e.status_code == 404 + + assert raised + + +def test_load_yaml_returns_content(tmp_path, monkeypatch): + mod = importlib.import_module("ml_service.backend.routers.file_viewer") + orig = getattr(mod.load_file, "__wrapped__", mod.load_file) + p = tmp_path / "f.yaml" + p.write_text("a: 1") + + monkeypatch.setattr(mod, "load_yaml", lambda path: {"a": 1}) + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + payload = {"path": str(p)} + j = orig(payload, req) + assert j["mode"] == "yaml" + assert "a:" in j["content"] + assert j["path"] == str(p) + + +def test_load_json_returns_content(tmp_path, monkeypatch): + mod = importlib.import_module("ml_service.backend.routers.file_viewer") + orig = getattr(mod.load_file, "__wrapped__", mod.load_file) + p = tmp_path / "f.json" + p.write_text("{}") + + monkeypatch.setattr(mod, "load_json", lambda path: {"foo": "bar"}) + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + payload = {"path": str(p)} + j = orig(payload, req) + assert j["mode"] == "json" + assert "foo" in j["content"] + assert j["path"] == str(p) + + +def test_unsupported_file_type_raises(tmp_path): + mod = importlib.import_module("ml_service.backend.routers.file_viewer") + orig = getattr(mod.load_file, "__wrapped__", mod.load_file) + p = tmp_path / "f.txt" + p.write_text("hello") + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + try: + orig({"path": str(p)}, req) + raised = False + except HTTPException as e: + raised = True + assert e.status_code == 400 + + assert raised diff --git a/tests/unit/ml_service/backend/routers/test_pipeline_cfg_routes.py b/tests/unit/ml_service/backend/routers/test_pipeline_cfg_routes.py new file mode 100644 index 00000000..5330dcae --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_pipeline_cfg_routes.py @@ -0,0 +1,176 @@ +import pytest + + +def _fake_path(exists: bool, path_str: str = "/fake/pipeline/path"): + class FakePath: + def __init__(self, exists_val: bool): + self._exists = exists_val + + def exists(self): + return self._exists + + def __str__(self): + return path_str + + return FakePath(exists) + + +def test_validate_yaml_success(monkeypatch): + payload = {"config": "cfg" , "data_type": "tabular", "algorithm": "alg"} + + data_dict = {"version": "v1"} + + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.load_yaml_and_add_lineage", + lambda text: data_dict, + ) + + class FakeValidated: + def model_dump(self, mode="json"): + return {"ok": True} + + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.validate_config_payload", + lambda d: FakeValidated(), + ) + + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.get_config_path", + lambda repo_root, data_type, algorithm, pipeline_version: _fake_path(True), + ) + + import ml_service.backend.routers.pipeline_cfg as pc_mod + from fastapi import Request + + orig = getattr(pc_mod.validate_yaml, "__wrapped__", pc_mod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig(payload, req) + + assert j["valid"] is True + assert j["exists"] is True + assert j["normalized"]["ok"] is True + + +def test_validate_yaml_missing_fields(monkeypatch): + # load returns no version -> function should return valid False + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.load_yaml_and_add_lineage", + lambda text: {}, + ) + # ensure payload validation does not raise so the router reaches the missing-fields check + class _FakeVal: + def model_dump(self, mode="json"): + return {} + + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.validate_config_payload", + lambda d: _FakeVal(), + ) + + import ml_service.backend.routers.pipeline_cfg as pc_mod + from fastapi import Request + + orig = getattr(pc_mod.validate_yaml, "__wrapped__", pc_mod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig({"config": "x"}, req) + assert j["valid"] is False + assert "Missing required fields" in j["error"] + + +def test_write_yaml_exists(monkeypatch): + payload = {"config": "cfg", "data_type": "tabular", "algorithm": "alg"} + data_dict = {"version": "v2"} + + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.load_yaml_and_add_lineage", + lambda text: data_dict, + ) + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.validate_config_payload", + lambda d: True, + ) + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.get_config_path", + lambda repo_root, data_type, algorithm, pipeline_version: _fake_path(True), + ) + + import ml_service.backend.routers.pipeline_cfg as pc_mod + from fastapi import Request + + orig = getattr(pc_mod.write_yaml, "__wrapped__", pc_mod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig(payload, req) + assert j["status"] == "exists" + + +def test_write_yaml_written_and_save_called(monkeypatch, tmp_path): + payload = {"config": "cfg", "data_type": "tabular", "algorithm": "alg"} + data_dict = {"version": "v3"} + + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.load_yaml_and_add_lineage", + lambda text: data_dict, + ) + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.validate_config_payload", + lambda d: True, + ) + + fake_path = _fake_path(False, "/pipeline/written") + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.get_config_path", + lambda repo_root, data_type, algorithm, pipeline_version: fake_path, + ) + + called = {} + + def _save_config(config, config_path): + called["c"] = (config, config_path) + + monkeypatch.setattr("ml_service.backend.routers.pipeline_cfg.save_config", _save_config) + + import ml_service.backend.routers.pipeline_cfg as pc_mod + from fastapi import Request + + orig = getattr(pc_mod.write_yaml, "__wrapped__", pc_mod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + j = orig(payload, req) + + assert j["success"] == "written" + assert j["path"] == str(fake_path) + assert "c" in called + + +def test_write_yaml_save_failure_raises(monkeypatch): + payload = {"config": "cfg", "data_type": "tabular", "algorithm": "alg"} + data_dict = {"version": "v4"} + + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.load_yaml_and_add_lineage", + lambda text: data_dict, + ) + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.validate_config_payload", + lambda d: True, + ) + + fake_path = _fake_path(False, "/will/fail") + monkeypatch.setattr( + "ml_service.backend.routers.pipeline_cfg.get_config_path", + lambda repo_root, data_type, algorithm, pipeline_version: fake_path, + ) + + def _bad_save(config, config_path): + raise RuntimeError("no space") + + monkeypatch.setattr("ml_service.backend.routers.pipeline_cfg.save_config", _bad_save) + + import ml_service.backend.routers.pipeline_cfg as pc_mod + from fastapi import Request + + orig = getattr(pc_mod.write_yaml, "__wrapped__", pc_mod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig(payload, req) + + assert "no space" in str(exc.value) diff --git a/tests/unit/ml_service/backend/routers/test_pipelines_routes.py b/tests/unit/ml_service/backend/routers/test_pipelines_routes.py new file mode 100644 index 00000000..5921f59e --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_pipelines_routes.py @@ -0,0 +1,50 @@ +import importlib + +from fastapi import Request + + +def test_register_raw_snapshot_calls_execute_pipeline(monkeypatch): + mod = importlib.import_module("ml_service.backend.routers.pipelines") + + recorded = {} + + def fake_execute(module_path, payload, boolean_args): + recorded["module_path"] = module_path + recorded["payload"] = payload + recorded["boolean_args"] = boolean_args + return {"ok": True, "module": module_path} + + monkeypatch.setattr(mod, "execute_pipeline", fake_execute) + + orig = getattr(mod.register_raw_snapshot, "__wrapped__", mod.register_raw_snapshot) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + payload = {"snap": True} + res = orig(payload, req) + + assert recorded["module_path"] == "pipelines.data.register_raw_snapshot" + assert recorded["payload"] == payload + assert recorded["boolean_args"] == [] + assert res["ok"] is True + + +def test_search_passes_boolean_args(monkeypatch): + mod = importlib.import_module("ml_service.backend.routers.pipelines") + + seen = {} + + def fake_execute(module_path, payload, boolean_args): + seen["module_path"] = module_path + seen["payload"] = payload + seen["boolean_args"] = boolean_args + return {"ran": True} + + monkeypatch.setattr(mod, "execute_pipeline", fake_execute) + + orig = getattr(mod.search, "__wrapped__", mod.search) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + payload = {"q": 1} + res = orig(payload, req) + + assert seen["module_path"] == "pipelines.search.search" + assert "strict" in seen["boolean_args"] + assert res["ran"] is True diff --git a/tests/unit/ml_service/frontend/configs/data/test_data_callbacks.py b/tests/unit/ml_service/frontend/configs/data/test_data_callbacks.py new file mode 100644 index 00000000..b19be8cc --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/data/test_data_callbacks.py @@ -0,0 +1,116 @@ +"""Tests for `ml_service.frontend.configs.data.callbacks`. + +Cover editor tab behavior, validate and write branches similar to other frontend callback tests. +""" +from __future__ import annotations + +from typing import Any + +import yaml + +from ml_service.frontend.configs.data.callbacks import register_callbacks + + +def _find_callback_by_name(app_callbacks: list[dict[str, Any]], name: str): + return [c for c in app_callbacks if c["func"].__name__ == name] + + +def test_update_editor_on_tab_change(dummy_dash_app): + register_callbacks(dummy_dash_app) + cb = _find_callback_by_name(dummy_dash_app.callbacks, "update_editor_on_tab_change")[0] + + from ml_service.frontend.configs.data.examples.interim import INTERIM_EXAMPLE + from ml_service.frontend.configs.data.examples.processed import PROCESSED_EXAMPLE + from ml_service.frontend.configs.data.layout import PAGE_PREFIX + + # interim tab + v = cb["func"](f"{PAGE_PREFIX}-interim-tab") + assert v == INTERIM_EXAMPLE + + # processed tab + v2 = cb["func"](f"{PAGE_PREFIX}-processed-tab") + assert v2 == PROCESSED_EXAMPLE + + # unknown tab + v3 = cb["func"]("something-else") + assert v3 == "" + + +def test_validate_and_write_branches(dummy_dash_app, mock_requests): + register_callbacks(dummy_dash_app) + vcb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_config")[0] + wcb = _find_callback_by_name(dummy_dash_app.callbacks, "write_config")[0] + + reqs = mock_requests + MockResponse = reqs["MockResponse"] + + from ml_service.frontend.configs.data.layout import PAGE_PREFIX + + active_tab = f"{PAGE_PREFIX}-interim-tab" + + # YAML parse error (missing data keys) + alert, is_open, val = vcb["func"](1, active_tab, "no-data-here") + assert "YAML parsing error" in str(alert) + assert is_open is False + + # backend not ok + def fake_not_ok(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=False, status_code=500, text="bad") + + valid_yaml = "data:\n name: x\n version: v" + + reqs["patch_post"](fake_not_ok) + alert2, is_open2, val2 = vcb["func"](1, active_tab, valid_yaml) + assert "Backend error 500" in str(alert2) + assert is_open2 is False + + # invalid response + def fake_invalid(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": False, "error": "bad cfg"}) + + reqs["patch_post"](fake_invalid) + alert3, is_open3, val3 = vcb["func"](1, active_tab, valid_yaml) + assert "bad cfg" in str(alert3) + assert is_open3 is False + + # exists response + def fake_exists(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": True, "exists": True, "normalized": {}}) + + reqs["patch_post"](fake_exists) + alert4, is_open4, val4 = vcb["func"](1, active_tab, valid_yaml) + assert "already exists" in str(alert4) + assert is_open4 is False + + # success + def fake_success(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": True, "normalized": {"data": {"name": "n", "version": "v"}}}) + + reqs["patch_post"](fake_success) + alert5, is_open5, normalized = vcb["func"](1, active_tab, valid_yaml) + assert "Config valid" in str(alert5) + assert is_open5 is True + assert yaml.safe_load(normalized)["data"]["name"] == "n" + + # write: backend not ok + reqs["patch_post"](fake_not_ok) + w_alert, w_open = wcb["func"](1, active_tab, valid_yaml) + assert "Backend error 500" in str(w_alert) + + # write: exists + def fake_write_exists(url, json=None, timeout=None, **kwargs): + payload = json or {} + msg = f"{payload.get('name')}/{payload.get('version')} already exists." + return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "exists", "message": msg}) + + reqs["patch_post"](fake_write_exists) + w_alert2, w_open2 = wcb["func"](1, active_tab, valid_yaml) + assert "already exists" in str(w_alert2) + + # write: success + def fake_written(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "written", "path": "/x/y"}) + + reqs["patch_post"](fake_written) + w_alert3, w_open3 = wcb["func"](1, active_tab, valid_yaml) + assert "/x/y" in str(w_alert3) diff --git a/tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_callbacks.py b/tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_callbacks.py new file mode 100644 index 00000000..0ac8afb1 --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_callbacks.py @@ -0,0 +1,92 @@ +"""Tests for `ml_service.frontend.configs.promotion_thresholds.callbacks`. + +Exercise validate and write branches mirroring other frontend config tests. +""" +from __future__ import annotations + +from typing import Any + +import yaml +from ml_service.frontend.configs.promotion_thresholds.callbacks import register_callbacks + + +def _find_callback_by_name(app_callbacks: list[dict[str, Any]], name: str): + return [c for c in app_callbacks if c["func"].__name__ == name] + + +def test_validate_and_write_branches(dummy_dash_app, mock_requests): + register_callbacks(dummy_dash_app) + vcb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_config")[0] + wcb = _find_callback_by_name(dummy_dash_app.callbacks, "write_config")[0] + + reqs = mock_requests + MockResponse = reqs["MockResponse"] + + problem = "no_show" + segment = "city_hotel" + + # missing required inputs + alert, is_open, val = vcb["func"](1, None, None, "cfg") + assert "Problem type and segment are required" in str(alert) + assert is_open is False + + # backend not ok + def fake_not_ok(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=False, status_code=500, text="bad") + + cfg_yaml = "thresholds:\n x: 1" + + reqs["patch_post"](fake_not_ok) + alert2, is_open2, val2 = vcb["func"](1, problem, segment, cfg_yaml) + assert "Backend error 500" in str(alert2) + assert is_open2 is False + + # invalid response + def fake_invalid(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": False, "error": "bad cfg"}) + + reqs["patch_post"](fake_invalid) + alert3, is_open3, val3 = vcb["func"](1, problem, segment, cfg_yaml) + assert "bad cfg" in str(alert3) + assert is_open3 is False + + # exists response + def fake_exists(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": True, "exists": True, "normalized": {}}) + + reqs["patch_post"](fake_exists) + alert4, is_open4, val4 = vcb["func"](1, problem, segment, cfg_yaml) + assert "already exists" in str(alert4) + assert is_open4 is False + + # success + def fake_success(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": True, "normalized": {"thresholds": {"x": 1}}}) + + reqs["patch_post"](fake_success) + alert5, is_open5, normalized = vcb["func"](1, problem, segment, cfg_yaml) + assert "Config valid" in str(alert5) + assert is_open5 is True + assert yaml.safe_load(normalized)["thresholds"]["x"] == 1 + + # write: backend not ok + reqs["patch_post"](fake_not_ok) + w_alert, w_open = wcb["func"](1, problem, segment, cfg_yaml) + assert "Backend error 500" in str(w_alert) + + # write: exists + def fake_write_exists(url, json=None, timeout=None, **kwargs): + msg = f"Thresholds for {json.get('problem_type')}/{json.get('segment')} already exist." if json else "already exist" + return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "exists", "message": msg}) + + reqs["patch_post"](fake_write_exists) + w_alert2, w_open2 = wcb["func"](1, problem, segment, cfg_yaml) + assert "already exist" in str(w_alert2) + + # write: success + def fake_written(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "written", "path": "/x/y"}) + + reqs["patch_post"](fake_written) + w_alert3, w_open3 = wcb["func"](1, problem, segment, cfg_yaml) + assert "/x/y" in str(w_alert3) From 34b8af3682a40175d729195100e8b77e51323432 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Sun, 29 Mar 2026 16:23:53 +0200 Subject: [PATCH 07/17] Added more tests, improved tests/ file structure. Added more tests for the ml_service code, greatly increasing the coverage. Structured the files more logically, so they roughly mirror the structure of the ml_service code itself. --- .../configs/formatting/test_timestamp.py | 31 ++++ .../configs/persistence/test_save_config.py | 13 +- .../test_promotion_validate_config_payload.py | 42 +++++ .../pipelines/test_execute_pipeline.py | 61 +++++++ .../backend/routers/test_dir_viewer_routes.py | 64 +++++++ .../backend/routers/test_features_routes.py | 60 +++++++ .../backend/routers/test_pipelines_all.py | 48 ++++++ .../routers/test_promotion_thresholds.py | 159 ++++++++++++++++++ .../backend/routers/test_scripts_routes.py | 80 +++++++++ .../backend/scripts/test_execute_script.py | 68 ++++++++ tests/unit/ml_service/backend/test_main.py | 21 +++ .../configs/pipeline_cfg/test_callbacks.py | 101 ----------- .../test_pipeline_cfg_callbacks.py | 59 +++++++ .../test_promotion_thresholds_layout.py | 59 +++++++ .../frontend/configs/test_data_layout.py | 40 +++++ .../configs/test_features_modeling_layouts.py | 57 +++++++ .../frontend/configs/test_page_wrappers.py | 28 +++ .../dir_viewer/test_dir_viewer_callbacks.py | 62 +++++++ .../dir_viewer/test_dir_viewer_layout.py | 51 ++++++ .../frontend/docs/test_docs_callbacks.py | 18 ++ .../docs/test_docs_callbacks_loader.py | 49 ++++++ .../pipelines/test_pipelines_callbacks.py | 91 ++++++++++ .../pipelines/test_pipelines_utils.py | 31 ++++ .../scripts/test_scripts_callbacks.py | 86 ++++++++++ .../frontend/scripts/test_scripts_utils.py | 31 ++++ tests/unit/ml_service/frontend/test_app.py | 59 +++++++ .../frontend/test_app_import_dummy_pages.py | 84 +++++++++ .../ml_service/frontend/test_app_main_run.py | 82 +++++++++ .../ml_service/frontend/test_app_noicon.py | 32 ++++ .../frontend/test_frontend_utils.py | 65 ------- 30 files changed, 1559 insertions(+), 173 deletions(-) create mode 100644 tests/unit/ml_service/backend/configs/promotion_thresholds/validation/test_promotion_validate_config_payload.py create mode 100644 tests/unit/ml_service/backend/pipelines/test_execute_pipeline.py create mode 100644 tests/unit/ml_service/backend/routers/test_dir_viewer_routes.py create mode 100644 tests/unit/ml_service/backend/routers/test_pipelines_all.py create mode 100644 tests/unit/ml_service/backend/routers/test_promotion_thresholds.py create mode 100644 tests/unit/ml_service/backend/routers/test_scripts_routes.py create mode 100644 tests/unit/ml_service/backend/scripts/test_execute_script.py create mode 100644 tests/unit/ml_service/backend/test_main.py delete mode 100644 tests/unit/ml_service/frontend/configs/pipeline_cfg/test_callbacks.py create mode 100644 tests/unit/ml_service/frontend/configs/pipeline_cfg/test_pipeline_cfg_callbacks.py create mode 100644 tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_layout.py create mode 100644 tests/unit/ml_service/frontend/configs/test_data_layout.py create mode 100644 tests/unit/ml_service/frontend/configs/test_features_modeling_layouts.py create mode 100644 tests/unit/ml_service/frontend/configs/test_page_wrappers.py create mode 100644 tests/unit/ml_service/frontend/dir_viewer/test_dir_viewer_callbacks.py create mode 100644 tests/unit/ml_service/frontend/dir_viewer/test_dir_viewer_layout.py create mode 100644 tests/unit/ml_service/frontend/docs/test_docs_callbacks.py create mode 100644 tests/unit/ml_service/frontend/docs/test_docs_callbacks_loader.py create mode 100644 tests/unit/ml_service/frontend/pipelines/test_pipelines_callbacks.py create mode 100644 tests/unit/ml_service/frontend/pipelines/test_pipelines_utils.py create mode 100644 tests/unit/ml_service/frontend/scripts/test_scripts_callbacks.py create mode 100644 tests/unit/ml_service/frontend/scripts/test_scripts_utils.py create mode 100644 tests/unit/ml_service/frontend/test_app.py create mode 100644 tests/unit/ml_service/frontend/test_app_import_dummy_pages.py create mode 100644 tests/unit/ml_service/frontend/test_app_main_run.py create mode 100644 tests/unit/ml_service/frontend/test_app_noicon.py delete mode 100644 tests/unit/ml_service/frontend/test_frontend_utils.py diff --git a/tests/unit/ml_service/backend/configs/formatting/test_timestamp.py b/tests/unit/ml_service/backend/configs/formatting/test_timestamp.py index 0fca9d7b..191f2093 100644 --- a/tests/unit/ml_service/backend/configs/formatting/test_timestamp.py +++ b/tests/unit/ml_service/backend/configs/formatting/test_timestamp.py @@ -1,8 +1,39 @@ import importlib +import re import pytest +def test_add_timestamp_raises_on_missing_lineage_key(): + mod = importlib.import_module("ml_service.backend.configs.formatting.timestamp") + data = {} + try: + mod.add_timestamp(data, "lineage") + raised = False + except ValueError: + raised = True + assert raised + + +def test_add_timestamp_sets_iso_created_at(): + mod = importlib.import_module("ml_service.backend.configs.formatting.timestamp") + data = {"lineage": {"created_by": "tester"}} + out = mod.add_timestamp(data, "lineage") + assert "created_at" in out["lineage"] + ts = out["lineage"]["created_at"] + assert isinstance(ts, str) + # basic ISO-like check: contains T and ends with Z + assert "T" in ts and ts.endswith("Z") + + +def test_utc_timestamp_format(): + mod = importlib.import_module("ml_service.backend.configs.formatting.timestamp") + ts = mod.utc_timestamp() + assert isinstance(ts, str) + # basic pattern YYYY-MM-DDTHH:MM:SSZ + assert re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$", ts) + + def test_add_timestamp_sets_created_at(monkeypatch): mod = importlib.import_module( "ml_service.backend.configs.formatting.timestamp" diff --git a/tests/unit/ml_service/backend/configs/persistence/test_save_config.py b/tests/unit/ml_service/backend/configs/persistence/test_save_config.py index 895cb87d..4deb65c8 100644 --- a/tests/unit/ml_service/backend/configs/persistence/test_save_config.py +++ b/tests/unit/ml_service/backend/configs/persistence/test_save_config.py @@ -5,23 +5,23 @@ def test_save_config_writes_file(tmp_path): - sc = importlib.import_module( - "ml_service.backend.configs.persistence.save_config" - ) + sc = importlib.import_module("ml_service.backend.configs.persistence.save_config") cfg = {"alpha": 1, "nested": {"x": "y"}} cp = tmp_path / "cfgs" / "cfg.yaml" sc.save_config(cfg, cp) assert cp.exists() loaded = yaml.safe_load(cp.read_text(encoding="utf-8")) assert loaded == cfg + # ensure no tmp file left behind assert not (cp.parent / f"{cp.name}.tmp").exists() def test_save_config_failure_cleans_tmp(tmp_path, monkeypatch): - sc = importlib.import_module( - "ml_service.backend.configs.persistence.save_config" - ) + sc = importlib.import_module("ml_service.backend.configs.persistence.save_config") cp = tmp_path / "cfgs" / "cfg.yaml" + cp.parent.mkdir(parents=True, exist_ok=True) + tmp_file = cp.parent / f"{cp.name}.tmp" + tmp_file.write_text("will be removed") def raise_replace(a, b): raise OSError("boom") @@ -31,6 +31,5 @@ def raise_replace(a, b): with pytest.raises(sc.HTTPException) as excinfo: sc.save_config({"a": 1}, cp) - tmp_file = cp.parent / f"{cp.name}.tmp" assert not tmp_file.exists() assert excinfo.value.status_code == 500 diff --git a/tests/unit/ml_service/backend/configs/promotion_thresholds/validation/test_promotion_validate_config_payload.py b/tests/unit/ml_service/backend/configs/promotion_thresholds/validation/test_promotion_validate_config_payload.py new file mode 100644 index 00000000..07a9d855 --- /dev/null +++ b/tests/unit/ml_service/backend/configs/promotion_thresholds/validation/test_promotion_validate_config_payload.py @@ -0,0 +1,42 @@ +import importlib + +import pytest + + +def _valid_payload(): + return { + "promotion_metrics": { + "sets": ["test"], + "metrics": ["accuracy"], + "directions": {"accuracy": "maximize"}, + }, + "thresholds": {"test": {"accuracy": 0.9}, "val": {}, "train": {}}, + "lineage": {"created_by": "tester", "created_at": "2024-01-01T00:00:00"}, + } + + +def test_validate_config_payload_accepts_valid_payload(): + mod = importlib.import_module( + "ml_service.backend.configs.promotion_thresholds.validation.validate_config_payload" + ) + res = mod.validate_config_payload(_valid_payload()) + # type check: returns PromotionThresholds model instance + from ml.promotion.config.promotion_thresholds import PromotionThresholds + + assert isinstance(res, PromotionThresholds) + + +def test_validate_config_payload_rejects_inconsistent_sets(): + mod = importlib.import_module( + "ml_service.backend.configs.promotion_thresholds.validation.validate_config_payload" + ) + + payload = _valid_payload() + # require both test and val to be present but leave val thresholds empty + payload["promotion_metrics"]["sets"] = ["test", "val"] + + with pytest.raises(Exception) as excinfo: + mod.validate_config_payload(payload) + + # should raise a ConfigError (subclass of Exception) for inconsistency + assert excinfo.value is not None diff --git a/tests/unit/ml_service/backend/pipelines/test_execute_pipeline.py b/tests/unit/ml_service/backend/pipelines/test_execute_pipeline.py new file mode 100644 index 00000000..dfd2fd9b --- /dev/null +++ b/tests/unit/ml_service/backend/pipelines/test_execute_pipeline.py @@ -0,0 +1,61 @@ +import importlib +import types + +from fastapi import HTTPException +from pydantic import BaseModel + + +def test_execute_pipeline_builds_command_and_returns(monkeypatch): + mod = importlib.import_module("ml_service.backend.pipelines.execute_pipeline") + + class Payload(BaseModel): + foo: int | None = None + flag: bool | None = None + + payload = Payload(foo=1, flag=True) + + captured = {} + + def fake_run(cmd, capture_output, text, env, cwd): + captured["cmd"] = cmd + return types.SimpleNamespace(returncode=0, stdout="ok", stderr="") + + monkeypatch.setattr(mod, "subprocess", types.SimpleNamespace(run=fake_run)) + + res = mod.execute_pipeline("pipelines.my_module", payload, boolean_args=["flag"]) + + assert res["exit_code"] == 0 + assert res["stdout"] == "ok" + + cmd = captured.get("cmd") + assert cmd is not None + assert cmd[0] == "python" + assert "-m" in cmd + assert "pipelines.my_module" in cmd + assert "--foo" in cmd + assert "1" in cmd + assert "--flag" in cmd + assert "True" in cmd + + +def test_execute_pipeline_raises_http_on_subprocess_exception(monkeypatch): + mod = importlib.import_module("ml_service.backend.pipelines.execute_pipeline") + + class Payload(BaseModel): + x: int | None = None + + payload = Payload(x=1) + + def bad_run(*args, **kwargs): + raise OSError("no exec") + + monkeypatch.setattr(mod, "subprocess", types.SimpleNamespace(run=bad_run)) + + try: + mod.execute_pipeline("pipelines.bad", payload) + raised = False + except HTTPException as e: + raised = True + assert e.status_code == 500 + + assert raised diff --git a/tests/unit/ml_service/backend/routers/test_dir_viewer_routes.py b/tests/unit/ml_service/backend/routers/test_dir_viewer_routes.py new file mode 100644 index 00000000..cbf3e0cb --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_dir_viewer_routes.py @@ -0,0 +1,64 @@ +import importlib +from typing import Any, cast + +from fastapi import HTTPException, Request + + +def test_load_dir_missing_path_raises(): + mod = importlib.import_module("ml_service.backend.routers.dir_viewer") + orig = getattr(mod.load_dir, "__wrapped__", mod.load_dir) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + try: + orig({}, req) + raised = False + except HTTPException as e: + raised = True + assert e.status_code == 400 + + assert raised + + +def test_load_dir_outside_repo_raises(tmp_path): + mod = importlib.import_module("ml_service.backend.routers.dir_viewer") + orig = getattr(mod.load_dir, "__wrapped__", mod.load_dir) + # make repo_root the tmp path so '../' escapes it + cast(Any, mod).repo_root = str(tmp_path) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + try: + orig({"path": ".."}, req) + raised = False + except HTTPException as e: + raised = True + assert e.status_code == 403 + + assert raised + + +def test_load_dir_not_found(tmp_path): + mod = importlib.import_module("ml_service.backend.routers.dir_viewer") + orig = getattr(mod.load_dir, "__wrapped__", mod.load_dir) + cast(Any, mod).repo_root = str(tmp_path) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + try: + orig({"path": "nope"}, req) + raised = False + except HTTPException as e: + raised = True + assert e.status_code == 404 + + assert raised + + +def test_load_dir_success(tmp_path, monkeypatch): + mod = importlib.import_module("ml_service.backend.routers.dir_viewer") + orig = getattr(mod.load_dir, "__wrapped__", mod.load_dir) + # use monkeypatch to set a module attribute safely + monkeypatch.setattr(cast(Any, mod), "repo_root", str(tmp_path), raising=False) + d = tmp_path / "sub" + d.mkdir() + monkeypatch.setattr(mod, "build_tree", lambda p: {"ok": True}) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + res = orig({"path": "sub"}, req) + assert res["tree"] == {"ok": True} + assert res["path"] == str(d.resolve()) + assert "tree_yaml" in res diff --git a/tests/unit/ml_service/backend/routers/test_features_routes.py b/tests/unit/ml_service/backend/routers/test_features_routes.py index b5b4408d..d803f26a 100644 --- a/tests/unit/ml_service/backend/routers/test_features_routes.py +++ b/tests/unit/ml_service/backend/routers/test_features_routes.py @@ -1,4 +1,64 @@ +import importlib + import pytest +from fastapi import Request + + +class DummyModel: + def __init__(self, payload): + self._payload = payload + + def model_dump(self, mode=None): + return {"dumped": True, "payload": self._payload} + + +def test_validate_yaml_missing_fields_returns_invalid(): + mod = importlib.import_module("ml_service.backend.routers.features") + orig = getattr(mod.validate_yaml, "__wrapped__", mod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + + res = orig({}, req) + assert res["valid"] is False + + +def test_validate_yaml_success_and_exists(monkeypatch, tmp_path): + mod = importlib.import_module("ml_service.backend.routers.features") + orig = getattr(mod.validate_yaml, "__wrapped__", mod.validate_yaml) + + # stub dependencies + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"k": "v"}) + monkeypatch.setattr(mod, "validate_feature_config", lambda data: DummyModel(data)) + monkeypatch.setattr(mod, "get_registry_path", lambda p: tmp_path, raising=False) + monkeypatch.setattr(mod, "registry_entry_exists", lambda name, version, p: True) + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + payload = {"name": "n", "version": "v", "config": "yaml: 1"} + res = orig(payload, req) + + assert res["valid"] is True + assert res["exists"] is True + assert "normalized" in res + + +def test_write_yaml_exists_and_write(monkeypatch, tmp_path): + mod = importlib.import_module("ml_service.backend.routers.features") + orig = getattr(mod.write_yaml, "__wrapped__", mod.write_yaml) + + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"k": "v"}) + monkeypatch.setattr(mod, "validate_feature_config", lambda data: DummyModel(data)) + monkeypatch.setattr(mod, "get_registry_path", lambda p: tmp_path, raising=False) + + # case: exists -> short-circuit + monkeypatch.setattr(mod, "registry_entry_exists", lambda n, v, p: True) + payload = {"name": "n", "version": "v", "config": "yaml: 1"} + res = orig(payload, Request({"type": "http", "method": "POST", "path": "/", "headers": []})) + assert res["status"] == "exists" + + # case: save proceeds + monkeypatch.setattr(mod, "registry_entry_exists", lambda n, v, p: False) + monkeypatch.setattr(mod, "save_feature_registry", lambda name, version, validated_config, registry_path: {"status": "saved"}) + res2 = orig(payload, Request({"type": "http", "method": "POST", "path": "/", "headers": []})) + assert res2 == {"status": "saved"} def test_validate_yaml_success(monkeypatch): diff --git a/tests/unit/ml_service/backend/routers/test_pipelines_all.py b/tests/unit/ml_service/backend/routers/test_pipelines_all.py new file mode 100644 index 00000000..3a969ad1 --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_pipelines_all.py @@ -0,0 +1,48 @@ +"""Tests exercising all endpoints in `ml_service.backend.routers.pipelines`. + +These tests bypass the rate-limiter decorator by invoking the wrapped +function and monkeypatch `execute_pipeline` to capture calls. +""" + +from __future__ import annotations + +from fastapi import Request + + +def test_all_pipeline_endpoints_call_execute_pipeline(monkeypatch): + import ml_service.backend.routers.pipelines as pl_mod + + calls = [] + + def fake_execute_pipeline(module_path, payload, boolean_args): + calls.append((module_path, payload, boolean_args)) + return {"module": module_path} + + monkeypatch.setattr("ml_service.backend.routers.pipelines.execute_pipeline", fake_execute_pipeline) + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + + mapping = [ + ("register_raw_snapshot", "pipelines.data.register_raw_snapshot"), + ("build_interim_dataset", "pipelines.data.build_interim_dataset"), + ("build_processed_dataset", "pipelines.data.build_processed_dataset"), + ("freeze_feature_set", "pipelines.features.freeze"), + ("search", "pipelines.search.search"), + ("train", "pipelines.runners.train"), + ("evaluate", "pipelines.runners.evaluate"), + ("explain", "pipelines.runners.explain"), + ("promote", "pipelines.promotion.promote"), + ("execute_all_data_preprocessing", "pipelines.orchestration.data.execute_all_data_preprocessing"), + ("freeze_all_feature_sets", "pipelines.orchestration.features.freeze_all_feature_sets"), + ("execute_experiment_with_latest", "pipelines.orchestration.experiments.execute_experiment_with_latest"), + ("execute_all_experiments_with_latest", "pipelines.orchestration.experiments.execute_all_experiments_with_latest"), + ("run_all_workflows", "pipelines.orchestration.master.run_all_workflows"), + ("infer", "pipelines.post_promotion.infer"), + ("monitor", "pipelines.post_promotion.monitor"), + ] + + for func_name, expected_module in mapping: + func = getattr(pl_mod, func_name) + orig = getattr(func, "__wrapped__", func) + res = orig({}, req) + assert res["module"] == expected_module diff --git a/tests/unit/ml_service/backend/routers/test_promotion_thresholds.py b/tests/unit/ml_service/backend/routers/test_promotion_thresholds.py new file mode 100644 index 00000000..8c007bc0 --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_promotion_thresholds.py @@ -0,0 +1,159 @@ +"""Tests for the promotion_thresholds router endpoints (validate + write).""" + +from __future__ import annotations + +import pytest +from fastapi import Request + + +def _fake_validated(): + class FakeValidated: + def model_dump(self, mode="json"): + return {"ok": True} + + return FakeValidated() + + +def _req(): + return Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + + +def test_validate_yaml_success_exists(monkeypatch): + import ml_service.backend.routers.promotion_thresholds as pt_mod + + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.load_yaml_and_add_lineage", + lambda text: {"foo": "bar"}, + ) + + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.validate_config_payload", + lambda d: _fake_validated(), + ) + + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.check_thresholds_exist", + lambda path, pt, seg: (True, {"t": 1}), + ) + + orig = getattr(pt_mod.validate_yaml, "__wrapped__", pt_mod.validate_yaml) + payload = {"config": "cfg", "problem_type": "p", "segment": "s"} + res = orig(payload, _req()) + assert res["valid"] is True + assert res["exists"] is True + assert res["normalized"]["ok"] is True + + +def test_validate_yaml_missing_config_returns_error(): + import ml_service.backend.routers.promotion_thresholds as pt_mod + + orig = getattr(pt_mod.validate_yaml, "__wrapped__", pt_mod.validate_yaml) + res = orig({}, _req()) + assert res["valid"] is False + assert "Missing config payload" in res["error"] + + +def test_validate_yaml_missing_fields_returns_error(monkeypatch): + import ml_service.backend.routers.promotion_thresholds as pt_mod + + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.load_yaml_and_add_lineage", + lambda text: {"foo": "bar"}, + ) + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.validate_config_payload", + lambda d: _fake_validated(), + ) + + orig = getattr(pt_mod.validate_yaml, "__wrapped__", pt_mod.validate_yaml) + res = orig({"config": "x"}, _req()) + assert res["valid"] is False + assert "Missing required fields" in res["error"] + + +def test_write_yaml_exists(monkeypatch): + import ml_service.backend.routers.promotion_thresholds as pt_mod + + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.load_yaml_and_add_lineage", + lambda text: {"foo": "bar"}, + ) + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.validate_config_payload", + lambda d: _fake_validated(), + ) + + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.check_thresholds_exist", + lambda path, pt, seg: (True, {"t": 1}), + ) + + orig = getattr(pt_mod.write_yaml, "__wrapped__", pt_mod.write_yaml) + payload = {"config": "cfg", "problem_type": "p", "segment": "s"} + res = orig(payload, _req()) + assert res["status"] == "exists" + + +def test_write_yaml_written_and_save_called(monkeypatch, tmp_path): + import ml_service.backend.routers.promotion_thresholds as pt_mod + + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.load_yaml_and_add_lineage", + lambda text: {"foo": "bar"}, + ) + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.validate_config_payload", + lambda d: _fake_validated(), + ) + + pt_mod.repo_root = str(tmp_path) + + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.check_thresholds_exist", + lambda path, pt, seg: (False, {"min": 0, "max": 1}), + ) + + called = {} + + def _save(thresholds, validated, config_path, problem_type, segment): + called["args"] = (thresholds, validated, str(config_path), problem_type, segment) + + monkeypatch.setattr("ml_service.backend.routers.promotion_thresholds.save_promotion_thresholds", _save) + + orig = getattr(pt_mod.write_yaml, "__wrapped__", pt_mod.write_yaml) + payload = {"config": "cfg", "problem_type": "p", "segment": "s"} + res = orig(payload, _req()) + assert res["success"] == "written" + assert "path" in res + assert "args" in called + + +def test_write_yaml_save_failure_raises(monkeypatch, tmp_path): + import ml_service.backend.routers.promotion_thresholds as pt_mod + from fastapi import HTTPException + + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.load_yaml_and_add_lineage", + lambda text: {"foo": "bar"}, + ) + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.validate_config_payload", + lambda d: _fake_validated(), + ) + + pt_mod.repo_root = str(tmp_path) + + monkeypatch.setattr( + "ml_service.backend.routers.promotion_thresholds.check_thresholds_exist", + lambda path, pt, seg: (False, {"min": 0}), + ) + + def _bad_save(*args, **kwargs): + raise RuntimeError("no space") + + monkeypatch.setattr("ml_service.backend.routers.promotion_thresholds.save_promotion_thresholds", _bad_save) + + orig = getattr(pt_mod.write_yaml, "__wrapped__", pt_mod.write_yaml) + payload = {"config": "cfg", "problem_type": "p", "segment": "s"} + with pytest.raises(HTTPException): + orig(payload, _req()) diff --git a/tests/unit/ml_service/backend/routers/test_scripts_routes.py b/tests/unit/ml_service/backend/routers/test_scripts_routes.py new file mode 100644 index 00000000..b360c294 --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_scripts_routes.py @@ -0,0 +1,80 @@ +import importlib + +from fastapi import Request + + +def test_generate_cols_for_row_id_fingerprint_calls_execute(monkeypatch): + mod = importlib.import_module("ml_service.backend.routers.scripts") + + recorded = {} + + def fake_execute(module_path, payload, boolean_args): + recorded["module_path"] = module_path + recorded["payload"] = payload + recorded["boolean_args"] = boolean_args + return {"ok": True, "module": module_path} + + monkeypatch.setattr(mod, "execute_script", fake_execute) + + orig = getattr(mod.generate_cols_for_row_id_fingerprint, "__wrapped__", mod.generate_cols_for_row_id_fingerprint) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + payload = {"x": 1} + res = orig(payload, req) + + assert recorded["module_path"] == "scripts.generators.generate_cols_for_row_id_fingerprint" + assert recorded["payload"] == payload + assert recorded["boolean_args"] == [] + assert res["ok"] is True + + +def test_generate_fake_data_passes_boolean_args(monkeypatch): + mod = importlib.import_module("ml_service.backend.routers.scripts") + + seen = {} + + def fake_execute(module_path, payload, boolean_args): + seen["module_path"] = module_path + seen["payload"] = payload + seen["boolean_args"] = boolean_args + return {"ran": True} + + monkeypatch.setattr(mod, "execute_script", fake_execute) + + orig = getattr(mod.generate_fake_data, "__wrapped__", mod.generate_fake_data) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + payload = {"foo": "bar"} + res = orig(payload, req) + + assert seen["module_path"] == "scripts.generators.generate_fake_data" + assert "include_old" in seen["boolean_args"] + assert res["ran"] is True + + +def test_other_script_endpoints_call_execute(monkeypatch): + mod = importlib.import_module("ml_service.backend.routers.scripts") + + called = [] + + def fake_execute(module_path, payload, boolean_args): + called.append((module_path, boolean_args)) + return {"ok": True} + + monkeypatch.setattr(mod, "execute_script", fake_execute) + + for fn_name, _expect_module in [ + ("generate_operator_hash", "scripts.generators.generate_operator_hash"), + ("generate_snapshot_binding", "scripts.generators.generate_snapshot_binding"), + ("check_import_layers", "scripts.quality.check_import_layers"), + ("check_naming_conventions", "scripts.quality.check_naming_conventions"), + ]: + orig = getattr(getattr(mod, fn_name), "__wrapped__", getattr(mod, fn_name)) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + res = orig({"ok": True}, req) + assert res["ok"] is True + + # ensure all expected module paths were called + called_modules = {m for m, _ in called} + assert "scripts.generators.generate_operator_hash" in called_modules + assert "scripts.generators.generate_snapshot_binding" in called_modules + assert "scripts.quality.check_import_layers" in called_modules + assert "scripts.quality.check_naming_conventions" in called_modules diff --git a/tests/unit/ml_service/backend/scripts/test_execute_script.py b/tests/unit/ml_service/backend/scripts/test_execute_script.py new file mode 100644 index 00000000..28311732 --- /dev/null +++ b/tests/unit/ml_service/backend/scripts/test_execute_script.py @@ -0,0 +1,68 @@ +import importlib +import types + +from fastapi import HTTPException +from pydantic import BaseModel + + +def test_execute_script_builds_command_and_returns(monkeypatch): + mod = importlib.import_module("ml_service.backend.scripts.execute_script") + + class Payload(BaseModel): + foo: int | None = None + flag: bool | None = None + items: list[str] | None = None + + payload = Payload(foo=1, flag=True, items=["a", "b"]) + + captured = {} + + def fake_run(cmd, capture_output, text, env, cwd): + captured["cmd"] = cmd + return types.SimpleNamespace(returncode=0, stdout="ok", stderr="") + + # Replace the subprocess module used in the target module + monkeypatch.setattr(mod, "subprocess", types.SimpleNamespace(run=fake_run)) + + res = mod.execute_script("scripts.my_module", payload, boolean_args=["flag"]) + + assert res["exit_code"] == 0 + assert res["stdout"] == "ok" + + # Ensure the assembled command contains expected pieces + cmd = captured.get("cmd") + assert cmd is not None + assert cmd[0] == "python" + assert "-m" in cmd + assert "scripts.my_module" in cmd + # flags + assert "--foo" in cmd + assert "1" in cmd + assert "--flag" in cmd + assert "True" in cmd + # list expansion + assert "--items" in cmd + assert "a" in cmd and "b" in cmd + + +def test_execute_script_raises_http_on_subprocess_exception(monkeypatch): + mod = importlib.import_module("ml_service.backend.scripts.execute_script") + + class Payload(BaseModel): + x: int | None = None + + payload = Payload(x=1) + + def bad_run(*args, **kwargs): + raise OSError("no exec") + + monkeypatch.setattr(mod, "subprocess", types.SimpleNamespace(run=bad_run)) + + try: + mod.execute_script("scripts.bad", payload) + raised = False + except HTTPException as e: + raised = True + assert e.status_code == 500 + + assert raised diff --git a/tests/unit/ml_service/backend/test_main.py b/tests/unit/ml_service/backend/test_main.py new file mode 100644 index 00000000..82efab68 --- /dev/null +++ b/tests/unit/ml_service/backend/test_main.py @@ -0,0 +1,21 @@ +import asyncio +import importlib + + +def test_health_check_async(): + mod = importlib.import_module("ml_service.backend.main") + res = asyncio.get_event_loop().run_until_complete(mod.health_check()) + assert res == {"Healthy": 200} + + +def test_rate_limit_exceeded_handler_returns_429(): + mod = importlib.import_module("ml_service.backend.main") + # Call the async handler synchronously + # The handler does not inspect the exception, so a plain Exception is fine + resp = asyncio.get_event_loop().run_until_complete( + mod.rate_limit_exceeded_handler(None, Exception("rlimit")) + ) + assert getattr(resp, "status_code", None) == 429 + body = getattr(resp, "body", None) + assert body is not None + assert b"Rate limit exceeded" in body diff --git a/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_callbacks.py b/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_callbacks.py deleted file mode 100644 index e2cc3aea..00000000 --- a/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_callbacks.py +++ /dev/null @@ -1,101 +0,0 @@ -"""Tests for `ml_service.frontend.configs.pipeline_cfg.callbacks`. - -These tests register callbacks on the `dummy_dash_app` fixture and invoke -the registered functions directly, patching `requests.post` via -`mock_requests` for deterministic behavior. -""" -from __future__ import annotations - -from typing import Any - -import yaml -from ml_service.frontend.configs.pipeline_cfg.callbacks import register_callbacks - - -def _find_callback_by_name(app_callbacks: list[dict[str, Any]], name: str): - return [c for c in app_callbacks if c["func"].__name__ == name] - - -def test_validate_config_requires_fields(dummy_dash_app): - register_callbacks(dummy_dash_app) - cb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_config")[0] - - # missing data_type/algorithm should return validation Alert and original text - alert, is_open, value = cb["func"](1, None, None, "version: v1") - assert "Data type and algorithm are required." in str(alert) - assert is_open is False - assert value == "version: v1" - - -def test_validate_config_yaml_parse_error(dummy_dash_app): - register_callbacks(dummy_dash_app) - cb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_config")[0] - - # Provide YAML that parses but lacks 'version' to trigger the missing-version branch - alert, is_open, _ = cb["func"](1, "dt", "alg", "foo: bar") - assert "YAML parsing error" in str(alert) - assert is_open is False - - -def test_validate_config_backend_invalid_and_success(dummy_dash_app, mock_requests): - register_callbacks(dummy_dash_app) - cb = _find_callback_by_name(dummy_dash_app.callbacks, "validate_config")[0] - - reqs = mock_requests - MockResponse = reqs["MockResponse"] - - # Backend returns invalid result - def fake_invalid(url, json=None, timeout=None, **kwargs): - return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": False, "error": "bad config"}) - - reqs["patch_post"](fake_invalid) - alert, is_open, _ = cb["func"](1, "dt", "alg", "version: v1") - assert "bad config" in str(alert) - assert is_open is False - - # Backend returns success with normalized payload - def fake_success(url, json=None, timeout=None, **kwargs): - return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": True, "exists": False, "normalized": {"a": 1, "version": "v1"}}) - - reqs["patch_post"](fake_success) - alert2, is_open2, normalized = cb["func"](1, "dt", "alg", "version: v1") - assert "Config valid" in str(alert2) or "Config valid." in str(alert2) - assert is_open2 is True - # normalized is YAML dump of the returned normalized dict - assert yaml.safe_load(normalized)["a"] == 1 - - -def test_write_config_branches(dummy_dash_app, mock_requests): - register_callbacks(dummy_dash_app) - cb = _find_callback_by_name(dummy_dash_app.callbacks, "write_config")[0] - - reqs = mock_requests - MockResponse = reqs["MockResponse"] - - # missing fields - alert, is_open = cb["func"](1, None, None, "version: v1") - assert "Data type and algorithm are required." in str(alert) - assert is_open is False - - # YAML parsing error: provide YAML without `version` to hit the missing-version branch - alert2, is_open2 = cb["func"](1, "dt", "alg", "foo: bar") - assert "YAML parsing error" in str(alert2) - assert is_open2 is False - - # backend reports exists - def fake_exists(url, json=None, timeout=None, **kwargs): - return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "exists", "message": "already"}) - - reqs["patch_post"](fake_exists) - alert3, is_open3 = cb["func"](1, "dt", "alg", "version: v1") - assert "already" in str(alert3) - assert is_open3 is False - - # backend reports written - def fake_written(url, json=None, timeout=None, **kwargs): - return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "written", "path": "/x/y"}) - - reqs["patch_post"](fake_written) - alert4, is_open4 = cb["func"](1, "dt", "alg", "version: v1") - assert "Config written successfully" in str(alert4) - assert is_open4 is False diff --git a/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_pipeline_cfg_callbacks.py b/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_pipeline_cfg_callbacks.py new file mode 100644 index 00000000..3f5117f9 --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_pipeline_cfg_callbacks.py @@ -0,0 +1,59 @@ +import importlib + + +class DummyApp: + def __init__(self): + self.callbacks = [] + + def callback(self, *args, **kwargs): + def dec(func): + self.callbacks.append(func) + return func + + return dec + + +class FakeResp: + def __init__(self, ok=True, json_data=None, status=200, text=""): + self.ok = ok + self._json = json_data or {} + self.status_code = status + self.text = text + + def json(self): + return self._json + + +def test_pipeline_cfg_validate_and_write(monkeypatch): + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + + app = DummyApp() + mod.register_callbacks(app) + # should have registered two callbacks + names = {fn.__name__ for fn in app.callbacks} + assert "validate_config" in names + assert "write_config" in names + + # find functions + validate_fn = next(fn for fn in app.callbacks if fn.__name__ == "validate_config") + write_fn = next(fn for fn in app.callbacks if fn.__name__ == "write_config") + + # missing inputs -> error + res = validate_fn(None, None, "alg", "") + assert res[1] is False + + # backend success path for validate + monkeypatch.setattr( + "ml_service.frontend.configs.pipeline_cfg.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=True, json_data={"valid": True, "exists": False, "normalized": {}}), + ) + res2 = validate_fn(None, "dt", "alg", "version: 1") + assert res2[1] is True + + # write: backend reports exists + monkeypatch.setattr( + "ml_service.frontend.configs.pipeline_cfg.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=True, json_data={"status": "exists", "message": "exists"}), + ) + res3 = write_fn(None, "dt", "alg", "version: 1") + assert res3[1] is False diff --git a/tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_layout.py b/tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_layout.py new file mode 100644 index 00000000..1b0069ef --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_layout.py @@ -0,0 +1,59 @@ +import importlib + + +def _collect_ids(obj: object) -> set: + ids: set = set() + if hasattr(obj, "id"): + maybe_id = getattr(obj, "id", None) + if maybe_id: + ids.add(maybe_id) + props = getattr(obj, "props", None) + if isinstance(props, dict): + pid = props.get("id") + if pid: + ids.add(pid) + + children = getattr(obj, "children", None) + if children is None: + return ids + if not isinstance(children, (list, tuple)): + children = [children] + for child in children: + if child is None: + continue + ids.update(_collect_ids(child)) + return ids + + +def test_promotion_thresholds_layout_and_page_ids(): + mod = importlib.import_module( + "ml_service.frontend.configs.promotion_thresholds.layout" + ) + layout = mod.build_layout() + ids = _collect_ids(layout) + assert f"{mod.PAGE_PREFIX}-config-editor" in ids + assert f"{mod.PAGE_PREFIX}-validate-btn" in ids + assert f"{mod.PAGE_PREFIX}-confirm-modal" in ids + assert f"{mod.PAGE_PREFIX}-problem-type-input" in ids + assert f"{mod.PAGE_PREFIX}-segment-input" in ids + + +def test_promotion_thresholds_page_registers(monkeypatch): + mod = importlib.import_module( + "ml_service.frontend.configs.promotion_thresholds.page" + ) + called = [] + + def fake_register(app): + called.append(True) + + # patch internal register_callbacks to avoid heavy dash objects + monkeypatch.setattr( + "ml_service.frontend.configs.promotion_thresholds.page.register_callbacks", + fake_register, + raising=False, + ) + + # calling register should call our fake + mod.register(object()) + assert called diff --git a/tests/unit/ml_service/frontend/configs/test_data_layout.py b/tests/unit/ml_service/frontend/configs/test_data_layout.py new file mode 100644 index 00000000..e458da30 --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/test_data_layout.py @@ -0,0 +1,40 @@ +"""Tests for the data config page layout builder.""" + +from __future__ import annotations + +from ml_service.frontend.configs.data.layout import PAGE_PREFIX, build_layout + + +def _has_children(obj: object) -> bool: + return hasattr(obj, "children") or hasattr(obj, "props") + + +def _collect_ids(obj: object) -> set: + ids: set = set() + if hasattr(obj, "id"): + maybe_id = getattr(obj, "id", None) + if maybe_id: + ids.add(maybe_id) + props = getattr(obj, "props", None) + if isinstance(props, dict): + pid = props.get("id") + if pid: + ids.add(pid) + + children = getattr(obj, "children", None) + if children is None: + return ids + if not isinstance(children, (list, tuple)): + children = [children] + for child in children: + if child is None: + continue + ids.update(_collect_ids(child)) + return ids + + +def test_build_data_layout_contains_expected_ids(): + layout = build_layout() + assert _has_children(layout) + ids = _collect_ids(layout) + assert f"{PAGE_PREFIX}-config-tabs" in ids diff --git a/tests/unit/ml_service/frontend/configs/test_features_modeling_layouts.py b/tests/unit/ml_service/frontend/configs/test_features_modeling_layouts.py new file mode 100644 index 00000000..458a3588 --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/test_features_modeling_layouts.py @@ -0,0 +1,57 @@ +"""Tests for feature registry and modeling layout builders.""" + +from __future__ import annotations + +from ml_service.frontend.configs.features.layout import PAGE_PREFIX as FEATURES_PREFIX +from ml_service.frontend.configs.features.layout import build_layout as build_features_layout +from ml_service.frontend.configs.modeling.config_examples import CONFIG_EXAMPLES_REGISTRY +from ml_service.frontend.configs.modeling.layout import PAGE_PREFIX as MODELING_PREFIX +from ml_service.frontend.configs.modeling.layout import build_layout as build_modeling_layout + + +def _has_children(obj: object) -> bool: + return hasattr(obj, "children") or hasattr(obj, "props") + + +def _collect_ids(obj: object) -> set: + ids: set = set() + if hasattr(obj, "id"): + maybe_id = getattr(obj, "id", None) + if maybe_id: + ids.add(maybe_id) + props = getattr(obj, "props", None) + if isinstance(props, dict): + pid = props.get("id") + if pid: + ids.add(pid) + + children = getattr(obj, "children", None) + if children is None: + return ids + if not isinstance(children, (list, tuple)): + children = [children] + for child in children: + if child is None: + continue + ids.update(_collect_ids(child)) + return ids + + +def test_features_layout_contains_expected_ids(): + layout = build_features_layout() + assert _has_children(layout) + ids = _collect_ids(layout) + assert f"{FEATURES_PREFIX}-feature-editor" in ids + assert f"{FEATURES_PREFIX}-validate-btn" in ids + assert f"{FEATURES_PREFIX}-confirm-modal" in ids + + +def test_modeling_layout_contains_all_example_editors(): + layout = build_modeling_layout() + assert _has_children(layout) + ids = _collect_ids(layout) + # Validate button present + assert f"{MODELING_PREFIX}-validate-btn" in ids + # Each example in the registry should have an editor id + for name in CONFIG_EXAMPLES_REGISTRY: + assert f"{MODELING_PREFIX}-{name}" in ids diff --git a/tests/unit/ml_service/frontend/configs/test_page_wrappers.py b/tests/unit/ml_service/frontend/configs/test_page_wrappers.py new file mode 100644 index 00000000..0835eee8 --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/test_page_wrappers.py @@ -0,0 +1,28 @@ +"""Smoke tests for simple page wrapper modules (get_layout / register).""" + +import importlib + + +def test_page_wrappers_call_register_and_return_layout(monkeypatch): + pages = [ + ("ml_service.frontend.configs.data.page", "data"), + ("ml_service.frontend.configs.features.page", "features"), + ("ml_service.frontend.configs.modeling.page", "modeling"), + ("ml_service.frontend.configs.pipeline_cfg.page", "pipeline_cfg"), + ] + + for module_path, marker in pages: + mod = importlib.import_module(module_path) + called = [] + + def _reg(app, _marker=marker, _called=called): + _called.append(_marker) + + monkeypatch.setattr(mod, "register_callbacks", _reg, raising=False) + + layout = mod.get_layout() + assert layout is not None + + # calling register should invoke our monkeypatched register_callbacks + mod.register(object()) + assert called and called[0] == marker diff --git a/tests/unit/ml_service/frontend/dir_viewer/test_dir_viewer_callbacks.py b/tests/unit/ml_service/frontend/dir_viewer/test_dir_viewer_callbacks.py new file mode 100644 index 00000000..da598d1b --- /dev/null +++ b/tests/unit/ml_service/frontend/dir_viewer/test_dir_viewer_callbacks.py @@ -0,0 +1,62 @@ +import importlib +from types import SimpleNamespace + + +def test_dir_viewer_load_branches(monkeypatch): + mod = importlib.import_module("ml_service.frontend.dir_viewer.callbacks") + + class FakeApp: + def __init__(self): + self._callbacks = [] + + def callback(self, *args, **kwargs): + def _decorator(func): + self._callbacks.append(func) + return func + + return _decorator + + fake_app = FakeApp() + mod.register_callbacks(fake_app) + funcs = {f.__name__: f for f in fake_app._callbacks} + assert "load_dir" in funcs + load_dir = funcs["load_dir"] + + # Missing path -> error alert + out = load_dir(None, "") + assert out[0] == "" + assert out[1] == "yaml" + alert = out[2] + children = getattr(alert, "children", "") + assert "Path required" in str(children) + + # Backend unreachable + def raise_post(*a, **k): + raise Exception("conn") + + monkeypatch.setattr(mod, "requests", SimpleNamespace(post=raise_post)) + out2 = load_dir(None, "sub") + assert "Backend unreachable" in str(getattr(out2[2], "children", "")) + + # Not OK response + class Resp: + ok = False + status_code = 403 + text = "Not allowed" + + monkeypatch.setattr(mod, "requests", SimpleNamespace(post=lambda *a, **k: Resp())) + out3 = load_dir(None, "sub") + assert "403" in str(getattr(out3[2], "children", "")) + + # Success + class RespOK: + ok = True + + def json(self): + return {"tree_yaml": "a: b\n", "path": "/tmp/sub"} + + monkeypatch.setattr(mod, "requests", SimpleNamespace(post=lambda *a, **k: RespOK())) + out4 = load_dir(None, "sub") + assert out4[0] == "a: b\n" + assert out4[1] == "yaml" + assert "Loaded directory tree for /tmp/sub" in str(getattr(out4[2], "children", "")) diff --git a/tests/unit/ml_service/frontend/dir_viewer/test_dir_viewer_layout.py b/tests/unit/ml_service/frontend/dir_viewer/test_dir_viewer_layout.py new file mode 100644 index 00000000..03858910 --- /dev/null +++ b/tests/unit/ml_service/frontend/dir_viewer/test_dir_viewer_layout.py @@ -0,0 +1,51 @@ +import importlib + + +def _collect_ids(obj: object) -> set: + ids: set = set() + if hasattr(obj, "id"): + maybe_id = getattr(obj, "id", None) + if maybe_id: + ids.add(maybe_id) + props = getattr(obj, "props", None) + if isinstance(props, dict): + pid = props.get("id") + if pid: + ids.add(pid) + + children = getattr(obj, "children", None) + if children is None: + return ids + if not isinstance(children, (list, tuple)): + children = [children] + for child in children: + if child is None: + continue + ids.update(_collect_ids(child)) + return ids + + +def test_dir_viewer_layout_has_expected_ids(): + mod = importlib.import_module("ml_service.frontend.dir_viewer.layout") + layout = mod.build_layout() + ids = _collect_ids(layout) + assert f"{mod.PAGE_PREFIX}-path-input" in ids + assert f"{mod.PAGE_PREFIX}-load-btn" in ids + assert f"{mod.PAGE_PREFIX}-viewer" in ids + assert f"{mod.PAGE_PREFIX}-manual-path" in ids + + +def test_dir_viewer_page_registers(monkeypatch): + mod = importlib.import_module("ml_service.frontend.dir_viewer.page") + called = [] + + def fake_register(app): + called.append(True) + + monkeypatch.setattr( + "ml_service.frontend.dir_viewer.page.register_callbacks", + fake_register, + raising=False, + ) + mod.register(object()) + assert called diff --git a/tests/unit/ml_service/frontend/docs/test_docs_callbacks.py b/tests/unit/ml_service/frontend/docs/test_docs_callbacks.py new file mode 100644 index 00000000..88ab1410 --- /dev/null +++ b/tests/unit/ml_service/frontend/docs/test_docs_callbacks.py @@ -0,0 +1,18 @@ +import importlib +from pathlib import Path + + +def test_rewrite_links_internal_and_external(tmp_path, monkeypatch): + mod = importlib.import_module("ml_service.frontend.docs.callbacks") + + # set docs root to tmp_path and create files + monkeypatch.setattr(mod, "DOCS_ROOT", Path(tmp_path)) + + sub = tmp_path / "subdir" + sub.mkdir() + (sub / "other.md").write_text("# other") + + md = "See [other](other.md) and [ext](http://example.com)" + out = mod.rewrite_links(md, "subdir/readme.md") + assert "(/Docs?doc=subdir/other.md)" in out + assert "http://example.com" in out diff --git a/tests/unit/ml_service/frontend/docs/test_docs_callbacks_loader.py b/tests/unit/ml_service/frontend/docs/test_docs_callbacks_loader.py new file mode 100644 index 00000000..01943f6a --- /dev/null +++ b/tests/unit/ml_service/frontend/docs/test_docs_callbacks_loader.py @@ -0,0 +1,49 @@ +import importlib + + +def test_rewrite_links_and_load_doc(tmp_path, monkeypatch): + mod = importlib.import_module("ml_service.frontend.docs.callbacks") + + # Patch DOCS_ROOT to tmp_path + monkeypatch.setattr(mod, "DOCS_ROOT", tmp_path) + + # Create docs structure + sub = tmp_path / "sub" + sub.mkdir() + (sub / "other.md").write_text("# Other\nContent") + + readme = tmp_path / "readme.md" + readme.write_text("See [other](sub/other.md) and [web](http://example.com) and [anch](#sec)") + + # Test rewrite_links directly + out = mod.rewrite_links(readme.read_text(), "readme.md") + assert "/Docs?doc=sub/other.md" in out + assert "http://example.com" in out + assert "#sec" in out + + # Register callbacks and extract loader + class FakeApp: + def __init__(self): + self._callbacks = [] + + def callback(self, *args, **kwargs): + def _decorator(func): + self._callbacks.append(func) + return func + + return _decorator + + fake_app = FakeApp() + mod.register_callbacks(fake_app) + funcs = {f.__name__: f for f in fake_app._callbacks} + assert "load_doc_from_url" in funcs + + loader = funcs["load_doc_from_url"] + + # No search -> readme.md + res = loader("") + assert "/Docs?doc=sub/other.md" in res + + # Non-existing doc -> not found + res2 = loader("?doc=missing.md") + assert "Document not found" in res2 diff --git a/tests/unit/ml_service/frontend/pipelines/test_pipelines_callbacks.py b/tests/unit/ml_service/frontend/pipelines/test_pipelines_callbacks.py new file mode 100644 index 00000000..7ef2e1b8 --- /dev/null +++ b/tests/unit/ml_service/frontend/pipelines/test_pipelines_callbacks.py @@ -0,0 +1,91 @@ +import importlib +import types + +import dash +import dash_bootstrap_components as dbc + + +def test_register_callbacks_toggle_and_run(monkeypatch): + mod = importlib.import_module("ml_service.frontend.pipelines.callbacks") + + # Create a simple pipeline definition with boolean, number, text fields + pipeline = { + "name": "testpipe", + "endpoint": "pipelines.test.run", + "fields": [ + {"name": "flag", "type": "boolean"}, + {"name": "count", "type": "number"}, + {"name": "note", "type": "text"}, + ], + } + + monkeypatch.setattr(mod, "FRONTEND_PIPELINES", [pipeline]) + + # Fake app that records registered callbacks + class FakeApp: + def __init__(self): + self._callbacks = [] + + def callback(self, *args, **kwargs): + def _decorator(func): + self._callbacks.append(func) + return func + + return _decorator + + fake_app = FakeApp() + + # Register callbacks (this will append two functions) + mod.register_callbacks(fake_app) + + funcs = {f.__name__: f for f in fake_app._callbacks} + assert "toggle_modal" in funcs + assert "run_pipeline" in funcs + + toggle = funcs["toggle_modal"] + run_pipeline = funcs["run_pipeline"] + + # Test toggle_modal: no trigger -> returns current state + monkeypatch.setattr(dash, "callback_context", types.SimpleNamespace(triggered=[])) + assert toggle(None, None, None, True) is True + + # simulate submit button triggered + submit_id = f"/pipelines-{pipeline['name']}-submit" + monkeypatch.setattr(dash, "callback_context", types.SimpleNamespace(triggered=[{"prop_id": f"{submit_id}.n_clicks"}])) + assert toggle(1, None, None, False) is True + + # simulate confirm -> should close modal + confirm_id = f"/pipelines-{pipeline['name']}-confirm" + monkeypatch.setattr(dash, "callback_context", types.SimpleNamespace(triggered=[{"prop_id": f"{confirm_id}.n_clicks"}])) + assert toggle(None, 1, None, True) is False + + # Test run_pipeline: if n_clicks is None -> no_update + res = run_pipeline(None, True, "3", "hello") + assert res is dash.no_update + + # Now test actual pipeline call paths with success and failure + called = {} + + def fake_call(endpoint, payload): + called["last_payload"] = payload + return {"status": "SUCCESS"} + + monkeypatch.setattr(mod, "call_pipeline", fake_call) + + out = run_pipeline(1, True, "2", "text") + # should return a Textarea component with success background + assert isinstance(out, dbc.Textarea) + assert getattr(out, "id", None) == f"/pipelines-{pipeline['name']}-result" + style = getattr(out, "style", {}) or {} + assert style.get("backgroundColor") == "#81ff81" + + # failing pipeline + def fake_call_fail(endpoint, payload): + called["last_payload"] = payload + return {"status": "FAIL"} + + monkeypatch.setattr(mod, "call_pipeline", fake_call_fail) + out2 = run_pipeline(1, False, "3.5", "") + assert isinstance(out2, dbc.Textarea) + style2 = getattr(out2, "style", {}) or {} + assert style2.get("backgroundColor") == "#ff8181" diff --git a/tests/unit/ml_service/frontend/pipelines/test_pipelines_utils.py b/tests/unit/ml_service/frontend/pipelines/test_pipelines_utils.py new file mode 100644 index 00000000..0ab0ce19 --- /dev/null +++ b/tests/unit/ml_service/frontend/pipelines/test_pipelines_utils.py @@ -0,0 +1,31 @@ +"""Tests for pipeline utils (call_pipeline).""" + +from __future__ import annotations + +from typing import Any + +import requests +from ml_service.frontend.pipelines.utils import call_pipeline + + +def test_call_pipeline_success(mock_requests: dict[str, Any]) -> None: + MockResponse = mock_requests["MockResponse"] + + def fake_post(url, json=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "started"}) + + mock_requests["patch_post"](fake_post) + + res = call_pipeline("pipelines/run", {"x": 1}) + assert res == {"status": "started"} + + +def test_call_pipeline_error(mock_requests: dict[str, Any]) -> None: + def fake_post(url, json=None, **kwargs): + raise requests.RequestException("timeout") + + mock_requests["patch_post"](fake_post) + + res = call_pipeline("pipelines/run", {"x": 1}) + assert "error" in res + assert "timeout" in res["error"] diff --git a/tests/unit/ml_service/frontend/scripts/test_scripts_callbacks.py b/tests/unit/ml_service/frontend/scripts/test_scripts_callbacks.py new file mode 100644 index 00000000..61d7524b --- /dev/null +++ b/tests/unit/ml_service/frontend/scripts/test_scripts_callbacks.py @@ -0,0 +1,86 @@ +import importlib +import types + +import dash +import dash_bootstrap_components as dbc + + +def test_register_scripts_toggle_and_run(monkeypatch): + mod = importlib.import_module("ml_service.frontend.scripts.callbacks") + + script = { + "name": "testscript", + "endpoint": "scripts.test.endpoint", + "fields": [ + {"name": "operators", "type": "text"}, + {"name": "flag", "type": "boolean"}, + {"name": "count", "type": "number"}, + {"name": "note", "type": "text"}, + ], + } + + monkeypatch.setattr(mod, "FRONTEND_SCRIPTS", [script]) + + class FakeApp: + def __init__(self): + self._callbacks = [] + + def callback(self, *args, **kwargs): + def _decorator(func): + self._callbacks.append(func) + return func + + return _decorator + + fake_app = FakeApp() + mod.register_callbacks(fake_app) + + funcs = {f.__name__: f for f in fake_app._callbacks} + assert "toggle_modal" in funcs + assert "run_pipeline" in funcs + + toggle = funcs["toggle_modal"] + run_pipeline = funcs["run_pipeline"] + + # no trigger -> return current + monkeypatch.setattr(dash, "callback_context", types.SimpleNamespace(triggered=[])) + assert toggle(None, None, None, True) is True + + submit_id = f"{mod.PAGE_PREFIX}-{script['name']}-submit" + monkeypatch.setattr(dash, "callback_context", types.SimpleNamespace(triggered=[{"prop_id": f"{submit_id}.n_clicks"}])) + assert toggle(1, None, None, False) is True + + confirm_id = f"{mod.PAGE_PREFIX}-{script['name']}-confirm" + monkeypatch.setattr(dash, "callback_context", types.SimpleNamespace(triggered=[{"prop_id": f"{confirm_id}.n_clicks"}])) + assert toggle(None, 1, None, True) is False + + # n_clicks None -> no update + res = run_pipeline(None, "a,b", True, "3", "hello") + assert res is dash.no_update + + captured = {} + + def fake_call(endpoint, payload): + captured["endpoint"] = endpoint + captured["payload"] = payload + return {"status": "SUCCESS"} + + monkeypatch.setattr(mod, "call_script", fake_call) + + out = run_pipeline(1, "a,b", True, "3", "hello") + assert isinstance(out, dbc.Textarea) + assert getattr(out, "id", None) == f"{mod.PAGE_PREFIX}-{script['name']}-result" + style = getattr(out, "style", {}) or {} + assert style.get("backgroundColor") == "#81ff81" + + # failing status + def fake_call_fail(endpoint, payload): + captured["endpoint"] = endpoint + captured["payload"] = payload + return {"status": "FAIL"} + + monkeypatch.setattr(mod, "call_script", fake_call_fail) + out2 = run_pipeline(1, "a,b", True, "3", "") + assert isinstance(out2, dbc.Textarea) + style2 = getattr(out2, "style", {}) or {} + assert style2.get("backgroundColor") == "#ff8181" diff --git a/tests/unit/ml_service/frontend/scripts/test_scripts_utils.py b/tests/unit/ml_service/frontend/scripts/test_scripts_utils.py new file mode 100644 index 00000000..79952550 --- /dev/null +++ b/tests/unit/ml_service/frontend/scripts/test_scripts_utils.py @@ -0,0 +1,31 @@ +"""Tests for script utils (call_script).""" + +from __future__ import annotations + +from typing import Any + +import requests +from ml_service.frontend.scripts.utils import call_script + + +def test_call_script_success(mock_requests: dict[str, Any]) -> None: + MockResponse = mock_requests["MockResponse"] + + def fake_post(url, json=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"ok": True}) + + mock_requests["patch_post"](fake_post) + + res = call_script("scripts/check_import_layers", {"foo": "bar"}) + assert res == {"ok": True} + + +def test_call_script_error(mock_requests: dict[str, Any]) -> None: + def fake_post(url, json=None, **kwargs): + raise requests.RequestException("connection failed") + + mock_requests["patch_post"](fake_post) + + res = call_script("scripts/check_import_layers", {"foo": "bar"}) + assert "error" in res + assert "connection failed" in res["error"] diff --git a/tests/unit/ml_service/frontend/test_app.py b/tests/unit/ml_service/frontend/test_app.py new file mode 100644 index 00000000..77ce85a0 --- /dev/null +++ b/tests/unit/ml_service/frontend/test_app.py @@ -0,0 +1,59 @@ +"""Tests for the main Dash app helpers in ml_service.frontend.app.""" + +from __future__ import annotations + +import ml_service.frontend.app as app_mod + + +def _has_children(obj: object) -> bool: + return hasattr(obj, "children") or hasattr(obj, "props") + + +def test_generate_page_links(): + links = app_mod.generate_page_links() + assert isinstance(links, list) + assert len(links) == len(app_mod.PAGES) + assert all(_has_children(link) for link in links) + + +def test_toggle_and_close_sidebar(): + toggle = getattr(app_mod.toggle_sidebar, "__wrapped__", app_mod.toggle_sidebar) + assert toggle(1, True) is False + assert toggle(2, False) is True + assert toggle(0, True) is True + + close = getattr(app_mod.close_sidebar_on_home, "__wrapped__", app_mod.close_sidebar_on_home) + assert close(1, True) is False + assert close(None, True) is True + + +def test_display_page_and_404(): + display = getattr(app_mod.display_page, "__wrapped__", app_mod.display_page) + # root -> home + res_home = display("/") + assert _has_children(res_home) + # known page + res_page = display("/Pipelines") + assert _has_children(res_page) + # unknown page -> 404 container with H2 child + res_404 = display("/this_page_does_not_exist") + txt = "" + if hasattr(res_404, "children"): + ch = res_404.children + if isinstance(ch, (list, tuple)): + ch = ch[0] + if hasattr(ch, "children"): + if isinstance(ch.children, (list, tuple)): + txt = "".join(str(x) for x in ch.children) + else: + txt = str(ch.children) + assert "404" in txt + + +def test_update_active_links(): + update = getattr(app_mod.update_active_links, "__wrapped__", app_mod.update_active_links) + res_none = update(None) + assert all(v is False for v in res_none) + res_pipelines = update("/Pipelines") + expected = [name == "Pipelines" for name in app_mod.PAGES] + assert res_pipelines == expected diff --git a/tests/unit/ml_service/frontend/test_app_import_dummy_pages.py b/tests/unit/ml_service/frontend/test_app_import_dummy_pages.py new file mode 100644 index 00000000..9d37507f --- /dev/null +++ b/tests/unit/ml_service/frontend/test_app_import_dummy_pages.py @@ -0,0 +1,84 @@ +"""Test re-importing the Dash app with dummy page modules. + +This ensures the top-level registration loop in `ml_service.frontend.app` +calls each page `register()` and that the `PAGES` mapping points to the +expected `get_layout` functions. +""" + +from __future__ import annotations + +import importlib +import sys +import types +from typing import Any + + +def test_app_import_calls_registers_and_populates_pages(): + page_module_names = [ + "ml_service.frontend.configs.data.page", + "ml_service.frontend.configs.features.page", + "ml_service.frontend.configs.modeling.page", + "ml_service.frontend.configs.pipeline_cfg.page", + "ml_service.frontend.configs.promotion_thresholds.page", + "ml_service.frontend.dir_viewer.page", + "ml_service.frontend.docs.page", + "ml_service.frontend.file_viewer.page", + "ml_service.frontend.pipelines.page", + "ml_service.frontend.scripts.page", + ] + + original_modules: dict[str, types.ModuleType | None] = {} + register_calls: list[str] = [] + + original_app = sys.modules.pop("ml_service.frontend.app", None) + try: + # Insert dummy page modules that expose `get_layout` and `register`. + for module_name in page_module_names: + original_modules[module_name] = sys.modules.pop(module_name, None) + dummy: Any = types.ModuleType(module_name) + + def make_get_layout(name: str): + return lambda: f"{name}_layout" + + def make_register(name: str): + def register_function(app): + register_calls.append(name) + + return register_function + + dummy.get_layout = make_get_layout(module_name) + dummy.register = make_register(module_name) + sys.modules[module_name] = dummy + + # Import the app module fresh so it picks up our dummy page modules. + app_module = importlib.import_module("ml_service.frontend.app") + + # All dummy register functions should have been called. + assert set(register_calls) == set(page_module_names) + + # The PAGES mapping should call through to our dummy get_layout functions. + expected_key_to_module = { + "Data Config": "ml_service.frontend.configs.data.page", + "Feature Config": "ml_service.frontend.configs.features.page", + "Pipelines": "ml_service.frontend.pipelines.page", + "Scripts": "ml_service.frontend.scripts.page", + "Docs": "ml_service.frontend.docs.page", + "File Viewer": "ml_service.frontend.file_viewer.page", + "Directory Viewer": "ml_service.frontend.dir_viewer.page", + } + + for page_name, module_name in expected_key_to_module.items(): + layout_func = app_module.PAGES.get(page_name) + assert layout_func is not None + assert layout_func() == f"{module_name}_layout" + + finally: + # Clean up: remove the imported app and restore original modules. + sys.modules.pop("ml_service.frontend.app", None) + for module_name, original in original_modules.items(): + if original is not None: + sys.modules[module_name] = original + else: + sys.modules.pop(module_name, None) + if original_app is not None: + sys.modules["ml_service.frontend.app"] = original_app diff --git a/tests/unit/ml_service/frontend/test_app_main_run.py b/tests/unit/ml_service/frontend/test_app_main_run.py new file mode 100644 index 00000000..aa3fdc60 --- /dev/null +++ b/tests/unit/ml_service/frontend/test_app_main_run.py @@ -0,0 +1,82 @@ +"""Run `ml_service.frontend.app` as __main__ to cover the script-entry branch. + +This executes the module with `runpy.run_module(..., run_name='__main__')` while +injecting dummy page modules and a stubbed `dash.Dash` so the call to +`app.run()` executes but does not start a server. +""" + +from __future__ import annotations + +import runpy +import sys +import types +from typing import Any + +import dash + + +def test_app_run_as_main_executes_run(monkeypatch): + page_module_names = [ + "ml_service.frontend.configs.data.page", + "ml_service.frontend.configs.features.page", + "ml_service.frontend.configs.modeling.page", + "ml_service.frontend.configs.pipeline_cfg.page", + "ml_service.frontend.configs.promotion_thresholds.page", + "ml_service.frontend.dir_viewer.page", + "ml_service.frontend.docs.page", + "ml_service.frontend.file_viewer.page", + "ml_service.frontend.pipelines.page", + "ml_service.frontend.scripts.page", + ] + + # Save originals and install dummy page modules + originals = {name: sys.modules.get(name) for name in page_module_names} + orig_app = None + try: + for name in page_module_names: + mod: Any = types.ModuleType(name) + mod.get_layout = lambda name=name: f"{name}_layout" + mod.register = lambda app: None + sys.modules[name] = mod + + # Stub dash.Dash so app.run() is safe to call + class DummyDash: + ran = False + + def __init__(self, *args, **kwargs): + self.server = "stub" + + def run(self, *args, **kwargs): + DummyDash.ran = True + + def callback(self, *args, **kwargs): + # return a no-op decorator used by @app.callback(...) + def _decorator(func): + return func + + return _decorator + + monkeypatch.setattr(dash, "Dash", DummyDash) + + # Ensure module is executed as __main__ + # Remove any pre-existing 'ml_service.frontend.app' entry to avoid + # runpy RuntimeWarning about a module being present in sys.modules + # prior to execution. + orig_app = sys.modules.pop("ml_service.frontend.app", None) + runpy.run_module("ml_service.frontend.app", run_name="__main__", alter_sys=True) + + assert DummyDash.ran is True + + finally: + # Restore ml_service.frontend.app if it existed before the test + if orig_app is None: + sys.modules.pop("ml_service.frontend.app", None) + else: + sys.modules["ml_service.frontend.app"] = orig_app + + # Restore sys.modules for the dummy page modules + for name, orig in originals.items(): + if orig is None: + sys.modules.pop(name, None) + else: + sys.modules[name] = orig diff --git a/tests/unit/ml_service/frontend/test_app_noicon.py b/tests/unit/ml_service/frontend/test_app_noicon.py new file mode 100644 index 00000000..a486e121 --- /dev/null +++ b/tests/unit/ml_service/frontend/test_app_noicon.py @@ -0,0 +1,32 @@ +"""Cover the branch in `generate_page_links` where a page has no icon.""" + +from __future__ import annotations + +import dash_bootstrap_components as dbc +import ml_service.frontend.app as app_mod + + +def test_generate_page_links_no_icon(monkeypatch): + # Create a modified PAGES mapping that includes a page not present in ICONS + original = app_mod.PAGES + try: + new_pages = dict(original) + new_pages["No Icon Page"] = lambda: dbc.Container("noop") + monkeypatch.setattr(app_mod, "PAGES", new_pages) + + links = app_mod.generate_page_links() + + found = False + for link in links: + lid = getattr(link, "id", None) + if lid == "nav-No_Icon_Page": + found = True + break + props = getattr(link, "props", None) + if isinstance(props, dict) and props.get("id") == "nav-No_Icon_Page": + found = True + break + + assert found, "Expected nav-No_Icon_Page in generated links" + finally: + monkeypatch.setattr(app_mod, "PAGES", original) diff --git a/tests/unit/ml_service/frontend/test_frontend_utils.py b/tests/unit/ml_service/frontend/test_frontend_utils.py deleted file mode 100644 index 055e1ad8..00000000 --- a/tests/unit/ml_service/frontend/test_frontend_utils.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Unit tests for frontend utility functions in ``ml_service.frontend``. - -These tests mock external HTTP calls to keep them fast and reliable. -""" -from __future__ import annotations - -from typing import Any - -import requests -from ml_service.frontend.pipelines.utils import call_pipeline -from ml_service.frontend.scripts.utils import call_script - - -def test_call_script_success(mock_requests: dict[str, Any]) -> None: - """`call_script` returns parsed JSON when the backend responds successfully.""" - - MockResponse = mock_requests["MockResponse"] - - def fake_post(url, json=None, **kwargs): - return MockResponse(ok=True, status_code=200, text="ok", json_data={"ok": True}) - - mock_requests["patch_post"](fake_post) - - res = call_script("scripts/check_import_layers", {"foo": "bar"}) - assert res == {"ok": True} - - -def test_call_script_error(mock_requests: dict[str, Any]) -> None: - """`call_script` returns an error dict when the HTTP client raises an exception.""" - - def fake_post(url, json=None, **kwargs): - raise requests.RequestException("connection failed") - - mock_requests["patch_post"](fake_post) - - res = call_script("scripts/check_import_layers", {"foo": "bar"}) - assert "error" in res - assert "connection failed" in res["error"] - - -def test_call_pipeline_success(mock_requests: dict[str, Any]) -> None: - """`call_pipeline` returns parsed JSON on successful response.""" - - MockResponse = mock_requests["MockResponse"] - - def fake_post(url, json=None, **kwargs): - return MockResponse(ok=True, status_code=200, text="ok", json_data={"status": "started"}) - - mock_requests["patch_post"](fake_post) - - res = call_pipeline("pipelines/run", {"x": 1}) - assert res == {"status": "started"} - - -def test_call_pipeline_error(mock_requests: dict[str, Any]) -> None: - """`call_pipeline` returns an error dict when the HTTP client raises an exception.""" - - def fake_post(url, json=None, **kwargs): - raise requests.RequestException("timeout") - - mock_requests["patch_post"](fake_post) - - res = call_pipeline("pipelines/run", {"x": 1}) - assert "error" in res - assert "timeout" in res["error"] From 614640bb541ee6de70a9f206a948391847b68314 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Mon, 30 Mar 2026 07:20:30 +0200 Subject: [PATCH 08/17] Added more tests; >99% coverage of ml_service Added more tests to ml_service, which now has >99% code coverage. This should help ensure the reliability and robustness of the service as we continue to develop and maintain it. --- .../configs/persistence/test_save_config.py | 53 ++++- .../pipelines/test_execute_pipeline.py | 101 +++++++++ .../backend/routers/test_data_routes.py | 142 ++++++++++++ .../backend/routers/test_features_routes.py | 98 ++++++++ .../routers/test_pipeline_cfg_routes.py | 99 ++++++++ .../test_promotion_thresholds_routes.py | 33 +++ .../data/test_data_callbacks_branches.py | 20 ++ .../test_pipeline_cfg_branches.py | 213 ++++++++++++++++++ ...otion_thresholds_layout_callbacks_extra.py | 36 +++ .../frontend/docs/test_docs_callbacks.py | 60 +++++ .../docs/test_docs_callbacks_branches.py | 27 +++ .../file_viewer/test_file_viewer_callbacks.py | 101 +++++++++ .../pages/test_pages_and_scripts_layout.py | 70 ++++++ .../test_pipelines_callbacks_branches.py | 84 +++++++ .../test_pipelines_metadata_layout.py | 73 ++++++ .../scripts/test_scripts_callbacks.py | 63 ++++++ .../test_scripts_callbacks_branches.py | 76 +++++++ .../frontend/scripts/test_scripts_metadata.py | 28 +++ 18 files changed, 1375 insertions(+), 2 deletions(-) create mode 100644 tests/unit/ml_service/backend/routers/test_promotion_thresholds_routes.py create mode 100644 tests/unit/ml_service/frontend/configs/data/test_data_callbacks_branches.py create mode 100644 tests/unit/ml_service/frontend/configs/pipeline_cfg/test_pipeline_cfg_branches.py create mode 100644 tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_layout_callbacks_extra.py create mode 100644 tests/unit/ml_service/frontend/docs/test_docs_callbacks_branches.py create mode 100644 tests/unit/ml_service/frontend/file_viewer/test_file_viewer_callbacks.py create mode 100644 tests/unit/ml_service/frontend/pages/test_pages_and_scripts_layout.py create mode 100644 tests/unit/ml_service/frontend/pipelines/test_pipelines_callbacks_branches.py create mode 100644 tests/unit/ml_service/frontend/pipelines/test_pipelines_metadata_layout.py create mode 100644 tests/unit/ml_service/frontend/scripts/test_scripts_callbacks_branches.py create mode 100644 tests/unit/ml_service/frontend/scripts/test_scripts_metadata.py diff --git a/tests/unit/ml_service/backend/configs/persistence/test_save_config.py b/tests/unit/ml_service/backend/configs/persistence/test_save_config.py index 4deb65c8..7fa35c45 100644 --- a/tests/unit/ml_service/backend/configs/persistence/test_save_config.py +++ b/tests/unit/ml_service/backend/configs/persistence/test_save_config.py @@ -1,10 +1,59 @@ +"""Tests for `ml_service.backend.configs.persistence.save_config`.""" + +from __future__ import annotations + import importlib import pytest import yaml +from fastapi import HTTPException +from ml_service.backend.configs.persistence.save_config import save_config + + +def test_save_config_success(tmp_path) -> None: + cfg = {"a": 1} + p = tmp_path / "cfg.yaml" + save_config(cfg, p) + assert p.exists() + assert yaml.safe_load(p.read_text(encoding="utf-8")) == cfg + + +def test_save_config_failure_cleans_tmp(tmp_path, monkeypatch) -> None: + cfg = {"b": 2} + p = tmp_path / "cfg.yaml" + tmp_file = p.parent / f"{p.name}.tmp" + + def _bad_replace(src, dst): + raise OSError("boom") + monkeypatch.setattr( + "ml_service.backend.configs.persistence.save_config.os.replace", + _bad_replace, + ) -def test_save_config_writes_file(tmp_path): + with pytest.raises(HTTPException) as exc: + save_config(cfg, p) + + # tmp file should be removed by the exception handler + assert not tmp_file.exists() + assert exc.value.status_code == 500 + + +def test_save_config_open_fails_no_tmp(tmp_path, monkeypatch) -> None: + cfg = {"c": 3} + p = tmp_path / "cfg2.yaml" + tmp_file = p.parent / f"{p.name}.tmp" + + def _bad_open(*args, **kwargs): + raise OSError("open failed") + monkeypatch.setattr("builtins.open", _bad_open) + + with pytest.raises(HTTPException) as exc: + save_config(cfg, p) + + assert not tmp_file.exists() + assert exc.value.status_code == 500 +def test_save_config_writes_file(tmp_path) -> None: sc = importlib.import_module("ml_service.backend.configs.persistence.save_config") cfg = {"alpha": 1, "nested": {"x": "y"}} cp = tmp_path / "cfgs" / "cfg.yaml" @@ -16,7 +65,7 @@ def test_save_config_writes_file(tmp_path): assert not (cp.parent / f"{cp.name}.tmp").exists() -def test_save_config_failure_cleans_tmp(tmp_path, monkeypatch): +def test_save_config_failure_cleans_tmp_via_module(tmp_path, monkeypatch) -> None: sc = importlib.import_module("ml_service.backend.configs.persistence.save_config") cp = tmp_path / "cfgs" / "cfg.yaml" cp.parent.mkdir(parents=True, exist_ok=True) diff --git a/tests/unit/ml_service/backend/pipelines/test_execute_pipeline.py b/tests/unit/ml_service/backend/pipelines/test_execute_pipeline.py index dfd2fd9b..8656b957 100644 --- a/tests/unit/ml_service/backend/pipelines/test_execute_pipeline.py +++ b/tests/unit/ml_service/backend/pipelines/test_execute_pipeline.py @@ -1,10 +1,60 @@ +"""Unit tests for ``ml_service.backend.pipelines.execute_pipeline``. + +These tests cover subprocess start failures and non-zero exit-code paths, +including that boolean flags are passed as string values when requested. +""" + +from __future__ import annotations + import importlib import types +from types import SimpleNamespace +from typing import Any +import pytest from fastapi import HTTPException +from ml_service.backend.pipelines.execute_pipeline import execute_pipeline +from ml_service.backend.registries.exit_codes_meaning import EXIT_MEANING from pydantic import BaseModel +class DummyPayload(BaseModel): + text: str | None = None + flag: bool | None = None + + +def test_execute_pipeline_subprocess_raises(monkeypatch: Any) -> None: + """If starting the subprocess raises, return an HTTP 500 error.""" + + def fake_run(*_a: Any, **_k: Any) -> None: # pragma: no cover - exercised + raise Exception("spawn failed") + + monkeypatch.setattr("subprocess.run", fake_run) + with pytest.raises(HTTPException) as excinfo: + execute_pipeline("ml_service.pipelines.fake", DummyPayload(text="a")) + assert excinfo.value.status_code == 500 + + +def test_execute_pipeline_nonzero_and_flag_in_cmd(monkeypatch: Any) -> None: + """When subprocess returns non-zero, the mapping is used and the command + contains boolean flags converted to strings when requested. + """ + + last_cmd: dict[str, Any] = {} + + def fake_run(cmd: list[str], capture_output: bool, text: bool, env: dict[str, str], cwd: str): + last_cmd["cmd"] = cmd + return SimpleNamespace(returncode=2, stdout="out", stderr="err") + + monkeypatch.setattr("subprocess.run", fake_run) + payload = DummyPayload(text="hello", flag=True) + res = execute_pipeline("ml_service.pipelines.fake", payload, boolean_args=["flag"]) + assert res["exit_code"] == 2 + assert res["status"] == EXIT_MEANING.get(2, "UNKNOWN_ERROR") + # boolean flag was added as a CLI flag (e.g. --flag True) + assert any(part == "--flag" or part.startswith("--flag") for part in (str(p) for p in last_cmd["cmd"])) + + def test_execute_pipeline_builds_command_and_returns(monkeypatch): mod = importlib.import_module("ml_service.backend.pipelines.execute_pipeline") @@ -59,3 +109,54 @@ def bad_run(*args, **kwargs): assert e.status_code == 500 assert raised + + +def test_execute_pipeline_skips_empty_and_none(monkeypatch): + mod = importlib.import_module("ml_service.backend.pipelines.execute_pipeline") + + class Payload(BaseModel): + foo: int | None = None + bar: str | None = None + baz: int | None = None + + payload = Payload(foo=None, bar="", baz=3) + + captured = {} + + def fake_run(cmd, capture_output, text, env, cwd): + captured["cmd"] = cmd + return types.SimpleNamespace(returncode=0, stdout="ok", stderr="") + + monkeypatch.setattr(mod, "subprocess", types.SimpleNamespace(run=fake_run)) + + res = mod.execute_pipeline("pipelines.my_module", payload) + assert res["exit_code"] == 0 + cmd = captured.get("cmd") + assert cmd is not None + assert "--baz" in cmd + assert not any(part == "--foo" or part.startswith("--foo") for part in cmd) + assert not any(part == "--bar" or part.startswith("--bar") for part in cmd) + + +def test_execute_pipeline_boolean_false_included(monkeypatch): + mod = importlib.import_module("ml_service.backend.pipelines.execute_pipeline") + + class Payload(BaseModel): + flag: bool | None = None + + payload = Payload(flag=False) + + captured = {} + + def fake_run(cmd, capture_output, text, env, cwd): + captured["cmd"] = cmd + return types.SimpleNamespace(returncode=0, stdout="ok", stderr="") + + monkeypatch.setattr(mod, "subprocess", types.SimpleNamespace(run=fake_run)) + + res = mod.execute_pipeline("pipelines.my_module", payload, boolean_args=["flag"]) + assert res["exit_code"] == 0 + cmd = captured.get("cmd") + assert cmd is not None + assert "--flag" in cmd + assert "False" in cmd diff --git a/tests/unit/ml_service/backend/routers/test_data_routes.py b/tests/unit/ml_service/backend/routers/test_data_routes.py index c2403830..6d852bf2 100644 --- a/tests/unit/ml_service/backend/routers/test_data_routes.py +++ b/tests/unit/ml_service/backend/routers/test_data_routes.py @@ -1,5 +1,77 @@ +"""Tests for ``ml_service.backend.routers.data`` validation and write helpers. + +These tests monkeypatch the module-local helpers to exercise the "exists" +and "written" branches as well as the missing/invalid input path. +""" + +from __future__ import annotations + +import importlib +from typing import Any + import pytest +mod = importlib.import_module("ml_service.backend.routers.data") + + +class FakePath: + def __init__(self, exists_flag: bool) -> None: + self._exists = exists_flag + + def exists(self) -> bool: # pragma: no cover - exercised + return self._exists + + def __str__(self) -> str: + return "/fake/path" + + +def test_validate_yaml_missing_fields_simple() -> None: + from fastapi import Request + orig = getattr(mod.validate_yaml, "__wrapped__", mod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig({}, req) + assert body["valid"] is False + assert "Missing or invalid config type" in body["error"] + + +def test_validate_yaml_reports_exists(monkeypatch: Any) -> None: + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"data": {"name": "n", "version": "v"}}) + monkeypatch.setattr(mod, "validate_config_payload", lambda t, d: None) + monkeypatch.setattr(mod, "get_config_path", lambda **k: FakePath(True)) + + from fastapi import Request + orig = getattr(mod.validate_yaml, "__wrapped__", mod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig({"type": "interim", "config": "a: b"}, req) + assert body["valid"] is True + assert body["exists"] is True + + +def test_write_yaml_written_and_exists(monkeypatch: Any) -> None: + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"data": {"name": "n", "version": "v"}}) + monkeypatch.setattr(mod, "validate_config_payload", lambda t, d=None: None) + + # exists -> status indicates already exists + monkeypatch.setattr(mod, "get_config_path", lambda **k: FakePath(True)) + from fastapi import Request + orig_write = getattr(mod.write_yaml, "__wrapped__", mod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + res_exists = orig_write({"type": "interim", "config": "a: b"}, req) + assert res_exists["status"] == "exists" or "already" in res_exists.get("message", "") + + # written -> save_config called and returns written path + fake_path = FakePath(False) + saved: dict[str, Any] = {} + + def fake_save(cfg: Any, config_path: Any) -> None: + saved["ok"] = True + + monkeypatch.setattr(mod, "get_config_path", lambda **k: fake_path) + monkeypatch.setattr(mod, "save_config", fake_save) + res_written = orig_write({"type": "interim", "config": "a: b"}, req) + assert res_written["status"] == "written" + assert "path" in res_written + def _fake_path(exists: bool, path_str: str = "/fake/path"): class FakePath: @@ -156,3 +228,73 @@ def _save_config_fail(payload_dict, path): # the router wraps failures in HTTPException with the original message assert "disk write error" in str(exc.value) + + +def test_validate_yaml_missing_config_payload() -> None: + import ml_service.backend.routers.data as data_mod + from fastapi import Request + + orig = getattr(data_mod.validate_yaml, "__wrapped__", data_mod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig({"type": "interim"}, req) + assert body["valid"] is False + assert "Missing config payload" in body["error"] + + +def test_validate_yaml_missing_data_fields(monkeypatch) -> None: + payload = {"type": "interim", "config": "dummy: yaml"} + + monkeypatch.setattr( + "ml_service.backend.routers.data.load_yaml_and_add_lineage", + lambda text: {"data": {}}, + ) + + import ml_service.backend.routers.data as data_mod + from fastapi import Request + + orig = getattr(data_mod.validate_yaml, "__wrapped__", data_mod.validate_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig(payload, req) + assert body["valid"] is False + assert "Missing 'data.name' or 'data.version'" in body["error"] + + +def test_write_yaml_missing_config_payload_raises() -> None: + import ml_service.backend.routers.data as data_mod + from fastapi import Request + + orig = getattr(data_mod.write_yaml, "__wrapped__", data_mod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig({"type": "processed"}, req) + assert "Missing config payload" in str(exc.value) + + +def test_write_yaml_missing_data_fields_raises(monkeypatch) -> None: + payload = {"type": "processed", "config": "dummy: yaml"} + + monkeypatch.setattr( + "ml_service.backend.routers.data.load_yaml_and_add_lineage", + lambda text: {"data": {}}, + ) + + import ml_service.backend.routers.data as data_mod + from fastapi import Request + + orig = getattr(data_mod.write_yaml, "__wrapped__", data_mod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig(payload, req) + assert "Missing 'data.name' or 'data.version'" in str(exc.value) + + +def test_write_yaml_missing_type_raises() -> None: + import ml_service.backend.routers.data as data_mod + from fastapi import Request + + orig = getattr(data_mod.write_yaml, "__wrapped__", data_mod.write_yaml) + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig({"config": "a: b"}, req) + + assert "Missing or invalid config type" in str(exc.value) diff --git a/tests/unit/ml_service/backend/routers/test_features_routes.py b/tests/unit/ml_service/backend/routers/test_features_routes.py index d803f26a..ac8df4ca 100644 --- a/tests/unit/ml_service/backend/routers/test_features_routes.py +++ b/tests/unit/ml_service/backend/routers/test_features_routes.py @@ -1,9 +1,83 @@ +"""Tests for `ml_service.backend.routers.features` routes and branches.""" + +from __future__ import annotations + import importlib +from types import SimpleNamespace +from typing import Any import pytest from fastapi import Request +def test_validate_features_missing_fields() -> None: + mod = importlib.import_module("ml_service.backend.routers.features") + orig = getattr(mod.validate_yaml, "__wrapped__", mod.validate_yaml) + from fastapi import Request + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig({"name": "x"}, req) + assert body["valid"] is False + assert "Missing feature set name or version" in body["error"] + + +def test_validate_features_missing_config(monkeypatch) -> None: + mod = importlib.import_module("ml_service.backend.routers.features") + orig = getattr(mod.validate_yaml, "__wrapped__", mod.validate_yaml) + from fastapi import Request + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig({"name": "n", "version": "v"}, req) + assert body["valid"] is False + assert "Missing feature_config payload" in body["error"] + + +def test_validate_features_exists_and_normalized(monkeypatch) -> None: + mod = importlib.import_module("ml_service.backend.routers.features") + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"data": {}}) + monkeypatch.setattr(mod, "validate_feature_config", lambda d: SimpleNamespace(model_dump=lambda mode="json": {"ok": True})) + monkeypatch.setattr(mod, "get_registry_path", lambda *a, **k: SimpleNamespace()) + monkeypatch.setattr(mod, "registry_entry_exists", lambda name, version, path: True) + + orig = getattr(mod.validate_yaml, "__wrapped__", mod.validate_yaml) + from fastapi import Request + + payload = {"name": "n", "version": "v", "config": "x: y"} + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig(payload, req) + assert body["valid"] is True + assert body["exists"] is True + + +def test_write_features_exists_and_written(monkeypatch) -> None: + mod = importlib.import_module("ml_service.backend.routers.features") + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"data": {}}) + monkeypatch.setattr(mod, "validate_feature_config", lambda d: SimpleNamespace()) + monkeypatch.setattr(mod, "get_registry_path", lambda *a, **k: SimpleNamespace()) + + # exists -> status exists + monkeypatch.setattr(mod, "registry_entry_exists", lambda name, version, path: True) + orig_write = getattr(mod.write_yaml, "__wrapped__", mod.write_yaml) + from fastapi import Request + payload = {"name": "n", "version": "v", "config": "x: y"} + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + res = orig_write(payload, req) + assert res["status"] == "exists" + + # written -> save_feature_registry called + monkeypatch.setattr(mod, "registry_entry_exists", lambda name, version, path: False) + called: dict[str, Any] = {} + + def fake_save(name, version, validated_config, registry_path): + called["ok"] = True + return {"status": "written", "path": "/x"} + + monkeypatch.setattr(mod, "save_feature_registry", fake_save) + res2 = orig_write(payload, req) + assert res2["status"] in ("written", "success") + assert called.get("ok") is True + + class DummyModel: def __init__(self, payload): self._payload = payload @@ -218,3 +292,27 @@ def _bad_save(name, version, validated_config, registry_path): orig(payload, req) assert "boom" in str(exc.value) + + +def test_write_yaml_missing_name_or_version_raises() -> None: + import ml_service.backend.routers.features as fmod + orig = getattr(fmod.write_yaml, "__wrapped__", fmod.write_yaml) + from fastapi import Request + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig({"config": "x: y"}, req) + + assert "Missing feature set name or version" in str(exc.value) + + +def test_write_yaml_missing_config_raises() -> None: + import ml_service.backend.routers.features as fmod + orig = getattr(fmod.write_yaml, "__wrapped__", fmod.write_yaml) + from fastapi import Request + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig({"name": "n", "version": "v"}, req) + + assert "Missing feature_config payload" in str(exc.value) diff --git a/tests/unit/ml_service/backend/routers/test_pipeline_cfg_routes.py b/tests/unit/ml_service/backend/routers/test_pipeline_cfg_routes.py index 5330dcae..a2ebd6dd 100644 --- a/tests/unit/ml_service/backend/routers/test_pipeline_cfg_routes.py +++ b/tests/unit/ml_service/backend/routers/test_pipeline_cfg_routes.py @@ -1,6 +1,80 @@ +"""Tests for `ml_service.backend.routers.pipeline_cfg` validate and write branches.""" + +from __future__ import annotations + +import importlib +from types import SimpleNamespace + import pytest +def test_validate_pipeline_missing_config_payload() -> None: + mod = importlib.import_module("ml_service.backend.routers.pipeline_cfg") + orig = getattr(mod.validate_yaml, "__wrapped__", mod.validate_yaml) + from fastapi import Request + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig({}, req) + assert body["valid"] is False + assert "Missing config payload" in body["error"] + + +def test_validate_pipeline_missing_required_fields(monkeypatch) -> None: + mod = importlib.import_module("ml_service.backend.routers.pipeline_cfg") + # create a yaml with version but missing data_type/algorithm + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"version": "v1"}) + monkeypatch.setattr(mod, "validate_config_payload", lambda d: SimpleNamespace(model_dump=lambda mode="json": {})) + + orig = getattr(mod.validate_yaml, "__wrapped__", mod.validate_yaml) + from fastapi import Request + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig({"config": "version: v1"}, req) + assert body["valid"] is False + assert "Missing required fields" in body["error"] + + +def test_validate_pipeline_exists(monkeypatch) -> None: + mod = importlib.import_module("ml_service.backend.routers.pipeline_cfg") + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"version": "v1"}) + monkeypatch.setattr(mod, "validate_config_payload", lambda d: SimpleNamespace(model_dump=lambda mode="json": {})) + monkeypatch.setattr(mod, "get_config_path", lambda **k: SimpleNamespace(exists=lambda : True) ) + + orig = getattr(mod.validate_yaml, "__wrapped__", mod.validate_yaml) + from fastapi import Request + payload = {"config": "version: v1", "data_type": "dt", "algorithm": "alg"} + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + body = orig(payload, req) + assert body["valid"] is True + assert body["exists"] is True + + +def test_write_pipeline_exists_and_written(monkeypatch) -> None: + mod = importlib.import_module("ml_service.backend.routers.pipeline_cfg") + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"version": "v1"}) + monkeypatch.setattr(mod, "validate_config_payload", lambda d: SimpleNamespace()) + + # exists case + monkeypatch.setattr(mod, "get_config_path", lambda **k: SimpleNamespace(exists=lambda : True)) + orig_write = getattr(mod.write_yaml, "__wrapped__", mod.write_yaml) + from fastapi import Request + payload = {"config": "version: v1", "data_type": "dt", "algorithm": "alg"} + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + r = orig_write(payload, req) + assert r["status"] == "exists" + + # written case + fake_saved = {} + def fake_save(config, config_path): + fake_saved["ok"] = True + + monkeypatch.setattr(mod, "get_config_path", lambda **k: SimpleNamespace(exists=lambda : False)) + monkeypatch.setattr(mod, "save_config", fake_save) + r2 = orig_write(payload, req) + assert r2["success"] == "written" + assert "path" in r2 + + def _fake_path(exists: bool, path_str: str = "/fake/pipeline/path"): class FakePath: def __init__(self, exists_val: bool): @@ -174,3 +248,28 @@ def _bad_save(config, config_path): orig(payload, req) assert "no space" in str(exc.value) + + +def test_write_yaml_missing_config_payload_raises() -> None: + mod = importlib.import_module("ml_service.backend.routers.pipeline_cfg") + orig = getattr(mod.write_yaml, "__wrapped__", mod.write_yaml) + from fastapi import Request + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig({}, req) + assert "Missing config payload" in str(exc.value) + + +def test_write_yaml_missing_required_fields_raises(monkeypatch) -> None: + mod = importlib.import_module("ml_service.backend.routers.pipeline_cfg") + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"version": "v1"}) + monkeypatch.setattr(mod, "validate_config_payload", lambda d: True) + + orig = getattr(mod.write_yaml, "__wrapped__", mod.write_yaml) + from fastapi import Request + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig({"config": "version: v1"}, req) + assert "Missing required fields" in str(exc.value) diff --git a/tests/unit/ml_service/backend/routers/test_promotion_thresholds_routes.py b/tests/unit/ml_service/backend/routers/test_promotion_thresholds_routes.py new file mode 100644 index 00000000..66e3ceff --- /dev/null +++ b/tests/unit/ml_service/backend/routers/test_promotion_thresholds_routes.py @@ -0,0 +1,33 @@ +"""Tests for `ml_service.backend.routers.promotion_thresholds` write branches.""" + +from __future__ import annotations + +import importlib +from types import SimpleNamespace + +import pytest + + +def test_write_promotion_missing_config_payload_raises() -> None: + mod = importlib.import_module("ml_service.backend.routers.promotion_thresholds") + orig = getattr(mod.write_yaml, "__wrapped__", mod.write_yaml) + from fastapi import Request + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig({}, req) + assert "Missing config payload" in str(exc.value) + + +def test_write_promotion_missing_required_fields(monkeypatch) -> None: + mod = importlib.import_module("ml_service.backend.routers.promotion_thresholds") + monkeypatch.setattr(mod, "load_yaml_and_add_lineage", lambda txt: {"v": 1}) + monkeypatch.setattr(mod, "validate_config_payload", lambda d: SimpleNamespace()) + + orig = getattr(mod.write_yaml, "__wrapped__", mod.write_yaml) + from fastapi import Request + + req = Request({"type": "http", "method": "POST", "path": "/", "headers": []}) + with pytest.raises(Exception) as exc: + orig({"config": "cfg"}, req) + assert "Missing required fields" in str(exc.value) diff --git a/tests/unit/ml_service/frontend/configs/data/test_data_callbacks_branches.py b/tests/unit/ml_service/frontend/configs/data/test_data_callbacks_branches.py new file mode 100644 index 00000000..8a27bd05 --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/data/test_data_callbacks_branches.py @@ -0,0 +1,20 @@ +"""Targeted branches for data config callbacks.""" + +from __future__ import annotations + +from ml_service.frontend.configs.data.callbacks import register_callbacks +from ml_service.frontend.configs.data.layout import PAGE_PREFIX + + +def _find_callback(app_callbacks: list[dict], name: str): + return [c for c in app_callbacks if c["func"].__name__ == name][0]["func"] + + +def test_write_config_parsing_error(dummy_dash_app): + register_callbacks(dummy_dash_app) + write_fn = _find_callback(dummy_dash_app.callbacks, "write_config") + + # invalid YAML should hit the parse-exception branch + alert, is_open = write_fn(1, f"{PAGE_PREFIX}-interim-tab", "not: [yaml") + assert "YAML parsing error" in str(alert) + assert is_open is False diff --git a/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_pipeline_cfg_branches.py b/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_pipeline_cfg_branches.py new file mode 100644 index 00000000..92c8dada --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/pipeline_cfg/test_pipeline_cfg_branches.py @@ -0,0 +1,213 @@ +"""Tests for branches in ``frontend.configs.pipeline_cfg.callbacks``. + +These tests exercise backend-response branches (non-OK responses, +validation-failure payloads, exists/written paths) by monkeypatching +``requests.post`` used by the callbacks module. +""" + +from __future__ import annotations + +import importlib +from typing import Any + + +class DummyApp: + """Minimal fake Dash-like app that records decorated callbacks.""" + + def __init__(self) -> None: + self.callbacks: list[dict[str, Any]] = [] + + def callback(self, *args: Any, **kwargs: Any): + def decorator(func: Any) -> Any: + self.callbacks.append({"func": func, "args": args, "kwargs": kwargs}) + return func + + return decorator + + +class FakeResp: + """Small fake response object compatible with ``requests`` usage.""" + + def __init__(self, ok: bool = True, json_data: dict | None = None, status_code: int = 200, text: str = "") -> None: + self.ok = ok + self._json = json_data or {} + self.status_code = status_code + self.text = text + + def json(self) -> dict: + return self._json + + +def _find_callback(app: DummyApp, name: str): + for c in app.callbacks: + if c["func"].__name__ == name: + return c["func"] + raise AssertionError(f"callback {name} not found") + + +def test_validate_config_handles_non_ok(monkeypatch: Any) -> None: + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + validate_fn = _find_callback(app, "validate_config") + + # Backend returns non-OK HTTP response + monkeypatch.setattr( + "ml_service.frontend.configs.pipeline_cfg.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=False, status_code=500, text="srv err"), + ) + alert, is_open, _ = validate_fn(None, "dt", "alg", "version: 1") + assert is_open is False + assert "Backend error" in str(alert) or "500" in str(alert) + + +def test_validate_config_invalid_result(monkeypatch: Any) -> None: + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + validate_fn = _find_callback(app, "validate_config") + + # Backend returns JSON saying config invalid + monkeypatch.setattr( + "ml_service.frontend.configs.pipeline_cfg.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=True, json_data={"valid": False, "error": "bad config"}), + ) + alert, is_open, _ = validate_fn(None, "dt", "alg", "version: 1") + assert is_open is False + assert "bad config" in str(alert) + + +def test_write_config_exists_and_written(monkeypatch: Any) -> None: + """Test the write callback for both exists and written backend responses.""" + + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + write_fn = _find_callback(app, "write_config") + + # Backend reports exists + monkeypatch.setattr( + "ml_service.frontend.configs.pipeline_cfg.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=True, json_data={"status": "exists", "message": "already"}), + ) + alert1, is_open1 = write_fn(None, "dt", "alg", "version: 1") + assert is_open1 is False + assert "already" in str(alert1) + + # Backend reports written + monkeypatch.setattr( + "ml_service.frontend.configs.pipeline_cfg.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=True, json_data={"status": "written", "path": "/x/y"}), + ) + alert2, is_open2 = write_fn(None, "dt", "alg", "version: 1") + assert is_open2 is False + assert "Config written successfully" in str(alert2) + + +def test_validate_config_missing_inputs(monkeypatch: Any) -> None: + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + validate_fn = _find_callback(app, "validate_config") + + alert, is_open, value = validate_fn(None, "", "alg", "version: 1") + assert is_open is False + assert "required" in str(alert).lower() + assert value == "version: 1" + + +def test_validate_config_yaml_parsing_error(monkeypatch: Any) -> None: + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + validate_fn = _find_callback(app, "validate_config") + + # invalid YAML (no version) should trigger parse/validation error + alert, is_open, value = validate_fn(None, "dt", "alg", "not: [unbalanced") + assert is_open is False + assert "yaml" in str(alert).lower() or "parsing" in str(alert).lower() + + +def test_validate_config_success_normalized(monkeypatch: Any) -> None: + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + validate_fn = _find_callback(app, "validate_config") + + normalized = {"data": {"name": "n", "version": "v1"}} + monkeypatch.setattr( + "ml_service.frontend.configs.pipeline_cfg.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=True, json_data={"valid": True, "normalized": normalized}), + ) + alert, is_open, val = validate_fn(None, "dt", "alg", "version: 1") + assert is_open is True + # returned value should be YAML dumped form of normalized + assert "name: n" in val + + +def test_write_config_backend_error(monkeypatch: Any) -> None: + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + write_fn = _find_callback(app, "write_config") + + monkeypatch.setattr( + "ml_service.frontend.configs.pipeline_cfg.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=False, status_code=500, text="oops"), + ) + alert, is_open = write_fn(None, "dt", "alg", "version: 1") + assert is_open is False + assert "backend error" in str(alert).lower() or "500" in str(alert) + + +def test_validate_config_missing_version(monkeypatch: Any) -> None: + """A YAML payload missing the `version` key should trigger the parsing/validation error branch.""" + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + validate_fn = _find_callback(app, "validate_config") + + # valid YAML but missing 'version' + alert, is_open, value = validate_fn(None, "dt", "alg", "name: nover") + assert is_open is False + assert "yaml parsing error" in str(alert).lower() or "missing 'version'" in str(alert).lower() + + +def test_validate_config_exists_branch(monkeypatch: Any) -> None: + """When backend reports the config already exists, the exists branch should run.""" + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + validate_fn = _find_callback(app, "validate_config") + + monkeypatch.setattr( + "ml_service.frontend.configs.pipeline_cfg.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=True, json_data={"valid": True, "exists": True, "normalized": {}}), + ) + alert, is_open, _ = validate_fn(None, "dt", "alg", "version: 1") + assert is_open is False + assert "already exists" in str(alert).lower() or "exists" in str(alert).lower() + + +def test_write_config_missing_inputs(monkeypatch: Any) -> None: + """Missing data/algorithm for write should return the required-inputs alert.""" + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + write_fn = _find_callback(app, "write_config") + + alert, is_open = write_fn(None, "", "alg", "version: 1") + assert is_open is False + assert "required" in str(alert).lower() + + +def test_write_config_missing_version(monkeypatch: Any) -> None: + """A write with YAML missing version should hit the YAML parsing/validation error branch.""" + mod = importlib.import_module("ml_service.frontend.configs.pipeline_cfg.callbacks") + app = DummyApp() + mod.register_callbacks(app) + write_fn = _find_callback(app, "write_config") + + alert, is_open = write_fn(None, "dt", "alg", "name: nover") + assert is_open is False + assert "yaml parsing error" in str(alert).lower() or "missing 'version'" in str(alert).lower() diff --git a/tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_layout_callbacks_extra.py b/tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_layout_callbacks_extra.py new file mode 100644 index 00000000..6725d43c --- /dev/null +++ b/tests/unit/ml_service/frontend/configs/promotion_thresholds/test_promotion_thresholds_layout_callbacks_extra.py @@ -0,0 +1,36 @@ +"""Extra tests for promotion thresholds layout and callbacks. + +These complement existing callback tests by ensuring the layout builder +is executed and the success branch in `validate_config` is covered via +the `normalized` YAML dump path. +""" +from __future__ import annotations + +from importlib import import_module, reload + +import yaml + +promo_pkg = import_module("ml_service.frontend.configs.promotion_thresholds") +register_callbacks = import_module("ml_service.frontend.configs.promotion_thresholds.callbacks").register_callbacks + + +def test_promotion_thresholds_layout_builds(): + layout_mod = reload(import_module("ml_service.frontend.configs.promotion_thresholds.layout")) + layout = layout_mod.build_layout() + assert layout is not None + + +def test_promotion_thresholds_callbacks_success_branch(monkeypatch, dummy_dash_app, mock_requests): + register_callbacks(dummy_dash_app) + vcb = [c for c in dummy_dash_app.callbacks if c["func"].__name__ == "validate_config"][0] + + MockResponse = mock_requests["MockResponse"] + + def fake_success(url, json=None, timeout=None, **kwargs): + return MockResponse(ok=True, status_code=200, text="ok", json_data={"valid": True, "normalized": {"thresholds": {"x": 2}}}) + + mock_requests["patch_post"](fake_success) + alert, is_open, normalized = vcb["func"](1, "no_show", "city", "cfg") + assert "Config valid" in str(alert) + assert is_open is True + assert yaml.safe_load(normalized)["thresholds"]["x"] == 2 diff --git a/tests/unit/ml_service/frontend/docs/test_docs_callbacks.py b/tests/unit/ml_service/frontend/docs/test_docs_callbacks.py index 88ab1410..65e0b0fe 100644 --- a/tests/unit/ml_service/frontend/docs/test_docs_callbacks.py +++ b/tests/unit/ml_service/frontend/docs/test_docs_callbacks.py @@ -1,7 +1,67 @@ +"""Tests for `ml_service.frontend.docs.callbacks` functions.""" + +from __future__ import annotations + import importlib from pathlib import Path +def test_rewrite_links_and_load(tmp_path: Path, monkeypatch) -> None: + mod = importlib.import_module("ml_service.frontend.docs.callbacks") + + # prepare docs tree + docs_root = tmp_path + a = docs_root / "a.md" + b = docs_root / "b.md" + a.write_text("See [B](b.md)") + b.write_text("Hello B") + + monkeypatch.setattr(mod, "DOCS_ROOT", docs_root) + + out = mod.rewrite_links("See [B](b.md)", "a.md") + assert "/Docs?doc=b.md" in out + + # test load_doc_from_url when document exists by registering callbacks + class DummyApp: + def __init__(self): + self._callbacks = {} + + def callback(self, *args, **kwargs): + def decorator(f): + self._callbacks[f.__name__] = f + return f + + return decorator + + app = DummyApp() + mod.register_callbacks(app) + load_fn = app._callbacks.get("load_doc_from_url") + assert load_fn is not None + res = load_fn("?doc=a.md") + assert "Docs?doc=b.md" in res or "See" in res + + +def test_load_doc_not_found(monkeypatch, tmp_path: Path) -> None: + mod = importlib.import_module("ml_service.frontend.docs.callbacks") + monkeypatch.setattr(mod, "DOCS_ROOT", tmp_path) + class DummyApp: + def __init__(self): + self._callbacks = {} + + def callback(self, *args, **kwargs): + def decorator(f): + self._callbacks[f.__name__] = f + return f + + return decorator + + app = DummyApp() + mod.register_callbacks(app) + load_fn = app._callbacks.get("load_doc_from_url") + assert load_fn is not None + assert load_fn("?doc=nope.md") == "Document not found." + + def test_rewrite_links_internal_and_external(tmp_path, monkeypatch): mod = importlib.import_module("ml_service.frontend.docs.callbacks") diff --git a/tests/unit/ml_service/frontend/docs/test_docs_callbacks_branches.py b/tests/unit/ml_service/frontend/docs/test_docs_callbacks_branches.py new file mode 100644 index 00000000..4779b98c --- /dev/null +++ b/tests/unit/ml_service/frontend/docs/test_docs_callbacks_branches.py @@ -0,0 +1,27 @@ +"""Additional branch tests for `ml_service.frontend.docs.callbacks`.""" + +from __future__ import annotations + +import importlib +from pathlib import Path + + +def test_rewrite_links_outside_and_non_md(tmp_path: Path, monkeypatch) -> None: + """Ensure non-markdown links and links resolving outside DOCS_ROOT are preserved.""" + mod = importlib.import_module("ml_service.frontend.docs.callbacks") + + docs_root = tmp_path / "docs" + docs_root.mkdir() + monkeypatch.setattr(mod, "DOCS_ROOT", docs_root) + + # non-markdown link should be left unchanged + md = "See [file](file.txt)" + out = mod.rewrite_links(md, "readme.md") + assert "file.txt" in out + assert "/Docs?doc=" not in out + + # absolute path outside DOCS_ROOT should fall into the exception branch and be unchanged + md2 = "See [X](/outside.md)" + out2 = mod.rewrite_links(md2, "readme.md") + assert "/outside.md" in out2 + assert "/Docs?doc=" not in out2 diff --git a/tests/unit/ml_service/frontend/file_viewer/test_file_viewer_callbacks.py b/tests/unit/ml_service/frontend/file_viewer/test_file_viewer_callbacks.py new file mode 100644 index 00000000..7f1ce35e --- /dev/null +++ b/tests/unit/ml_service/frontend/file_viewer/test_file_viewer_callbacks.py @@ -0,0 +1,101 @@ +"""Tests for `ml_service.frontend.file_viewer.callbacks`. + +Covers path-required, backend-unreachable, non-OK response, and success branches. +""" + +from __future__ import annotations + +import importlib +from typing import Any + + +class DummyApp: + def __init__(self) -> None: + self.callbacks: list[dict[str, Any]] = [] + + def callback(self, *a: Any, **k: Any): + def dec(f: Any) -> Any: + self.callbacks.append({"func": f, "args": a, "kwargs": k}) + return f + + return dec + + +class FakeResp: + def __init__(self, ok: bool = True, json_data: dict | None = None, status_code: int = 200, text: str = "") -> None: + self.ok = ok + self._json = json_data or {} + self.status_code = status_code + self.text = text + + def json(self) -> dict: + return self._json + + +def _find(app: DummyApp, name: str): + for c in app.callbacks: + if c["func"].__name__ == name: + return c["func"] + raise AssertionError(name) + + +def test_load_file_no_path() -> None: + mod = importlib.import_module("ml_service.frontend.file_viewer.callbacks") + app = DummyApp() + mod.register_callbacks(app) + fn = _find(app, "load_file") + + value, mode, alert = fn(None, "") + assert value == "" + assert mode == "yaml" + assert "path required" in str(alert).lower() + + +def test_load_file_backend_unreachable(monkeypatch: Any) -> None: + mod = importlib.import_module("ml_service.frontend.file_viewer.callbacks") + app = DummyApp() + mod.register_callbacks(app) + fn = _find(app, "load_file") + + def bad_post(*a: Any, **k: Any): + raise RuntimeError("boom") + + monkeypatch.setattr("ml_service.frontend.file_viewer.callbacks.requests.post", bad_post) + value, mode, alert = fn(None, "/some/path") + assert value == "" + assert mode == "yaml" + assert "backend unreachable" in str(alert).lower() + + +def test_load_file_non_ok(monkeypatch: Any) -> None: + mod = importlib.import_module("ml_service.frontend.file_viewer.callbacks") + app = DummyApp() + mod.register_callbacks(app) + fn = _find(app, "load_file") + + monkeypatch.setattr( + "ml_service.frontend.file_viewer.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=False, status_code=404, text="not found"), + ) + + value, mode, alert = fn(None, "/x") + assert value == "" + assert mode == "yaml" + assert "404" in str(alert) or "not found" in str(alert).lower() + + +def test_load_file_success(monkeypatch: Any) -> None: + mod = importlib.import_module("ml_service.frontend.file_viewer.callbacks") + app = DummyApp() + mod.register_callbacks(app) + fn = _find(app, "load_file") + + monkeypatch.setattr( + "ml_service.frontend.file_viewer.callbacks.requests.post", + lambda *a, **k: FakeResp(ok=True, json_data={"content": "abc", "mode": "text", "path": "/x/y"}), + ) + + value, mode, alert = fn(None, "/x/y") + assert value == "abc" + assert mode == "text" + assert "loaded /x/y" in str(alert).lower() diff --git a/tests/unit/ml_service/frontend/pages/test_pages_and_scripts_layout.py b/tests/unit/ml_service/frontend/pages/test_pages_and_scripts_layout.py new file mode 100644 index 00000000..0a21520c --- /dev/null +++ b/tests/unit/ml_service/frontend/pages/test_pages_and_scripts_layout.py @@ -0,0 +1,70 @@ +"""Basic page-level tests and a focused scripts layout test. + +These tests call small wrapper functions like `get_layout()` and +`register(app)` for multiple page modules to improve page-level coverage. +They also exercise the `scripts.layout.build_layout()` branches by +temporarily replacing `FRONTEND_SCRIPTS` with a diverse set of field types. +""" + +from __future__ import annotations + +import importlib +from typing import Any + + +class DummyApp: + """Minimal fake Dash-like app that records decorated callbacks.""" + + def __init__(self) -> None: + self.callbacks: list[dict[str, Any]] = [] + + def callback(self, *args: Any, **kwargs: Any): + def decorator(func: Any) -> Any: + self.callbacks.append({"func": func, "args": args, "kwargs": kwargs}) + return func + + return decorator + + +def test_pages_get_layout_and_register_simple() -> None: + """Call `get_layout()` and `register()` on a set of page modules.""" + modules = [ + "ml_service.frontend.dir_viewer.page", + "ml_service.frontend.docs.page", + "ml_service.frontend.file_viewer.page", + "ml_service.frontend.configs.promotion_thresholds.page", + "ml_service.frontend.scripts.page", + ] + + for mod_name in modules: + mod = importlib.import_module(mod_name) + # get_layout should build without raising + layout = getattr(mod, "get_layout", lambda: None)() + assert layout is not None + + # register should call into callbacks and add entries to our DummyApp + app = DummyApp() + register = getattr(mod, "register", None) + if register is not None: + register(app) + # at least one callback should be registered for modules that wire callbacks + assert isinstance(app.callbacks, list) + + +def test_scripts_layout_various_field_types(monkeypatch) -> None: + """Exercise the many input-type branches in `scripts.layout.build_layout()`.""" + layout_mod = importlib.import_module("ml_service.frontend.scripts.layout") + + custom_scripts = [ + {"name": "one", "fields": [{"name": "a", "type": "text", "optional": True}]}, + {"name": "two", "fields": [{"name": "b", "type": "number"}, {"name": "c", "type": "boolean", "value": True}]}, + {"name": "three", "fields": [{"name": "d", "type": "dropdown", "options": ["x", "y"], "value": "x"}]}, + {"name": "four", "fields": [{"name": "e", "type": "unknown"}]}, + ] + + monkeypatch.setattr(layout_mod, "FRONTEND_SCRIPTS", custom_scripts) + + layout = layout_mod.build_layout() + # Basic sanity: layout is built and contains the page title + assert layout is not None + assert "ML Scripts Dashboard" in repr(layout) diff --git a/tests/unit/ml_service/frontend/pipelines/test_pipelines_callbacks_branches.py b/tests/unit/ml_service/frontend/pipelines/test_pipelines_callbacks_branches.py new file mode 100644 index 00000000..4d27225d --- /dev/null +++ b/tests/unit/ml_service/frontend/pipelines/test_pipelines_callbacks_branches.py @@ -0,0 +1,84 @@ +"""Additional pipeline callback branches to reach full coverage.""" + +from __future__ import annotations + +import importlib +import types + + +def _make_pipeline(): + return { + "name": "testpipe", + "endpoint": "pipelines.test.run", + "fields": [ + {"name": "flag", "type": "boolean"}, + {"name": "count", "type": "number"}, + {"name": "note", "type": "text"}, + ], + } + + +def test_toggle_modal_unmatched_button(monkeypatch): + mod = importlib.import_module("ml_service.frontend.pipelines.callbacks") + pipeline = _make_pipeline() + monkeypatch.setattr(mod, "FRONTEND_PIPELINES", [pipeline]) + + class FakeApp: + def __init__(self): + self._callbacks = [] + + def callback(self, *args, **kwargs): + def _decorator(f): + self._callbacks.append(f) + return f + + return _decorator + + fake_app = FakeApp() + mod.register_callbacks(fake_app) + funcs = {f.__name__: f for f in fake_app._callbacks} + toggle = funcs["toggle_modal"] + + # simulate an unrelated trigger -> should return current is_open + monkeypatch.setattr(mod.dash, "callback_context", types.SimpleNamespace(triggered=[{"prop_id": "other.id.n_clicks"}])) + assert toggle(None, None, None, True) is True + + +def test_run_pipeline_boolean_none_and_number_empty(monkeypatch): + mod = importlib.import_module("ml_service.frontend.pipelines.callbacks") + pipeline = _make_pipeline() + monkeypatch.setattr(mod, "FRONTEND_PIPELINES", [pipeline]) + + class FakeApp: + def __init__(self): + self._callbacks = [] + + def callback(self, *args, **kwargs): + def _decorator(f): + self._callbacks.append(f) + return f + + return _decorator + + fake_app = FakeApp() + mod.register_callbacks(fake_app) + funcs = {f.__name__: f for f in fake_app._callbacks} + run_pipeline = funcs["run_pipeline"] + + called = {} + + def fake_call(endpoint, payload): + called["payload"] = payload + return {"status": "SUCCESS"} + + monkeypatch.setattr(mod, "call_pipeline", fake_call) + # trigger execution (confirm button clicked) + monkeypatch.setattr(mod.dash, "callback_context", types.SimpleNamespace(triggered=[{"prop_id": f"/pipelines-{pipeline['name']}-confirm.n_clicks"}])) + out = run_pipeline(1, None, "", "text") + + # boolean None -> False, number '' -> None, text -> 'text' + assert called["payload"] == {"flag": False, "count": None, "note": "text"} + + import dash_bootstrap_components as dbc + + assert isinstance(out, dbc.Textarea) diff --git a/tests/unit/ml_service/frontend/pipelines/test_pipelines_metadata_layout.py b/tests/unit/ml_service/frontend/pipelines/test_pipelines_metadata_layout.py new file mode 100644 index 00000000..19bc03f3 --- /dev/null +++ b/tests/unit/ml_service/frontend/pipelines/test_pipelines_metadata_layout.py @@ -0,0 +1,73 @@ +"""Tests to exercise remaining branches in pipelines metadata and layout. + +These tests patch the frontend registry and reload modules so the +module-level construction logic in `pipelines_metadata` runs and the +layout builder in `layout` is exercised (including the fallback else +branch for unknown field types). +""" +from __future__ import annotations + +from importlib import import_module, reload +from types import SimpleNamespace +from typing import Any + +registry = import_module("ml_service.frontend.pipelines.pipelines_registry") +pipelines_pkg = import_module("ml_service.frontend.pipelines") + + +def _mk_field(annotation: Any, default: Any = None) -> Any: + return SimpleNamespace(annotation=annotation, default=default) + + +def test_pipelines_metadata_various_field_types(monkeypatch): + # Build a fake args_schema.model_fields mapping with a range of types + model_fields = { + "logging_level": _mk_field(str, default=None), + "env": _mk_field(str, default=None), + "stage": _mk_field(str, default=None), + "is_enabled": _mk_field(bool, default=True), + "count": _mk_field(int, default=None), + "name": _mk_field(str, default=None), + } + + fake_args_schema = SimpleNamespace(model_fields=model_fields) + fake_registry = [ + { + "name": "TestPipeline", + "endpoint": "pipelines/test", + "args_schema": fake_args_schema, + "field_metadata": { + "is_enabled": {"optional": True, "label": "Enabled?"}, + "count": {"optional": True, "placeholder": "count"}, + }, + } + ] + + monkeypatch.setattr(registry, "FRONTEND_PIPELINES_REGISTRY", fake_registry, raising=False) + + # Reload the pipelines_metadata module so it rebuilds FRONTEND_PIPELINES + metadata = reload(import_module("ml_service.frontend.pipelines.pipelines_metadata")) + + assert any(p["name"] == "TestPipeline" for p in metadata.FRONTEND_PIPELINES) + p = next(p for p in metadata.FRONTEND_PIPELINES if p["name"] == "TestPipeline") + types_by_name = {f["name"]: f["type"] for f in p["fields"]} + + assert types_by_name["logging_level"] == "dropdown" + assert types_by_name["env"] == "dropdown" + assert types_by_name["stage"] == "dropdown" + assert types_by_name["is_enabled"] == "boolean" + assert types_by_name["count"] == "number" + assert types_by_name["name"] == "text" + + +def test_pipelines_layout_else_branch(monkeypatch): + # Create a pipeline with an unknown field type to hit the layout's else branch + fake_field = {"name": "mystery", "type": "mystery", "placeholder": "x", "optional": False} + fake_pipeline = {"name": "MysteryPipeline", "endpoint": "pipelines/mystery", "fields": [fake_field]} + + metadata = reload(import_module("ml_service.frontend.pipelines.pipelines_metadata")) + monkeypatch.setattr(metadata, "FRONTEND_PIPELINES", [fake_pipeline], raising=False) + + layout_mod = reload(import_module("ml_service.frontend.pipelines.layout")) + layout = layout_mod.build_layout() + assert layout is not None diff --git a/tests/unit/ml_service/frontend/scripts/test_scripts_callbacks.py b/tests/unit/ml_service/frontend/scripts/test_scripts_callbacks.py index 61d7524b..c5a62978 100644 --- a/tests/unit/ml_service/frontend/scripts/test_scripts_callbacks.py +++ b/tests/unit/ml_service/frontend/scripts/test_scripts_callbacks.py @@ -1,10 +1,73 @@ +"""Tests for `ml_service.frontend.scripts.callbacks` behavior.""" + +from __future__ import annotations + import importlib import types +from types import SimpleNamespace +from typing import Any import dash import dash_bootstrap_components as dbc +class DummyApp: + def __init__(self) -> None: + self.callbacks: list[dict[str, Any]] = [] + + def callback(self, *a: Any, **k: Any): + def dec(f: Any) -> Any: + self.callbacks.append({"func": f, "args": a, "kwargs": k}) + return f + + return dec + + +def _find(app: DummyApp, name: str): + for c in app.callbacks: + if c["func"].__name__ == name: + return c["func"] + raise AssertionError(name) + + +def test_toggle_modal_and_run_pipeline(monkeypatch) -> None: + cb_mod = importlib.import_module("ml_service.frontend.scripts.callbacks") + + custom = [ + {"name": "x", "endpoint": "/scripts/x", "fields": [ + {"name": "n", "type": "number"}, + {"name": "flag", "type": "boolean"}, + {"name": "ops", "type": "text"}, + ]} + ] + + # patch the FRONTEND_SCRIPTS used by callbacks + monkeypatch.setattr(cb_mod, "FRONTEND_SCRIPTS", custom) + + app = DummyApp() + cb_mod.register_callbacks(app) + + toggle = _find(app, "toggle_modal") + run = _find(app, "run_pipeline") + + # simulate dash.callback_context.triggered + fake_ctx = SimpleNamespace(triggered=[{"prop_id": "/scripts-x-submit.n_clicks"}]) + monkeypatch.setattr(cb_mod.dash, "callback_context", fake_ctx, raising=False) + + # submit click should open modal + assert toggle(1, None, None, False) is True + + # confirm click should close modal + fake_ctx.triggered = [{"prop_id": "/scripts-x-confirm.n_clicks"}] + assert toggle(None, 1, None, True) is False + + # run pipeline: monkeypatch call_script to return SUCCESS + monkeypatch.setattr(cb_mod, "call_script", lambda ep, payload: {"status": "SUCCESS"}) + res = run(1, "3", True, "a,b") + # returned is a dbc.Textarea-like object; its `value` should contain the status + assert "SUCCESS" in str(res) + + def test_register_scripts_toggle_and_run(monkeypatch): mod = importlib.import_module("ml_service.frontend.scripts.callbacks") diff --git a/tests/unit/ml_service/frontend/scripts/test_scripts_callbacks_branches.py b/tests/unit/ml_service/frontend/scripts/test_scripts_callbacks_branches.py new file mode 100644 index 00000000..cd2b6713 --- /dev/null +++ b/tests/unit/ml_service/frontend/scripts/test_scripts_callbacks_branches.py @@ -0,0 +1,76 @@ +"""Branch-targeted tests for `ml_service.frontend.scripts.callbacks`.""" + +from __future__ import annotations + +import importlib +import types + + +def test_toggle_modal_else_returns_current(monkeypatch) -> None: + """When a non-matching button triggers the modal callback, it should return current state.""" + mod = importlib.import_module("ml_service.frontend.scripts.callbacks") + + script = {"name": "s1", "endpoint": "ep", "fields": [{"name": "f1", "type": "text"}]} + monkeypatch.setattr(mod, "FRONTEND_SCRIPTS", [script]) + + class FakeApp: + def __init__(self): + self._callbacks = [] + + def callback(self, *args, **kwargs): + def _decorator(func): + self._callbacks.append(func) + return func + + return _decorator + + fake_app = FakeApp() + mod.register_callbacks(fake_app) + funcs = {f.__name__: f for f in fake_app._callbacks} + toggle = funcs["toggle_modal"] + + # triggered by unknown button id -> should return current state + monkeypatch.setattr(mod.dash, "callback_context", types.SimpleNamespace(triggered=[{"prop_id": "unknown.n_clicks"}])) + assert toggle(None, None, None, True) is True + assert toggle(None, None, None, False) is False + + +def test_run_pipeline_number_none_and_float(monkeypatch) -> None: + """Cover `number` field branches: None and float conversions.""" + mod = importlib.import_module("ml_service.frontend.scripts.callbacks") + + script = {"name": "num", "endpoint": "ep", "fields": [{"name": "n", "type": "number"}]} + monkeypatch.setattr(mod, "FRONTEND_SCRIPTS", [script]) + + class FakeApp: + def __init__(self): + self._callbacks = [] + + def callback(self, *args, **kwargs): + def _decorator(func): + self._callbacks.append(func) + return func + + return _decorator + + fake_app = FakeApp() + mod.register_callbacks(fake_app) + funcs = {f.__name__: f for f in fake_app._callbacks} + run_pipeline = funcs["run_pipeline"] + + captured = {} + + def fake_call(endpoint, payload): + captured["payload"] = payload + return {"status": "SUCCESS"} + + monkeypatch.setattr(mod, "call_script", fake_call) + + # None value -> payload 'n' should be None + _ = run_pipeline(1, None) + assert captured["payload"]["n"] is None + + # float value -> payload 'n' should be float + _ = run_pipeline(1, "3.14") + assert isinstance(captured["payload"]["n"], float) + assert abs(captured["payload"]["n"] - 3.14) < 1e-8 diff --git a/tests/unit/ml_service/frontend/scripts/test_scripts_metadata.py b/tests/unit/ml_service/frontend/scripts/test_scripts_metadata.py new file mode 100644 index 00000000..662c11fe --- /dev/null +++ b/tests/unit/ml_service/frontend/scripts/test_scripts_metadata.py @@ -0,0 +1,28 @@ +"""Tests for `ml_service.frontend.scripts.scripts_metadata` helpers.""" + +from __future__ import annotations + +import importlib +from types import SimpleNamespace + +# Use PEP 604 `X | Y` union syntax for compatibility with ruff UP007 + + +def test_is_boolean_and_number_field_helpers() -> None: + mod = importlib.import_module("ml_service.frontend.scripts.scripts_metadata") + + # boolean detection + assert mod.is_boolean_field(SimpleNamespace(annotation=bool)) is True + assert mod.is_boolean_field(SimpleNamespace(annotation=bool | None)) is True + + # number detection + assert mod.is_number_field(SimpleNamespace(annotation=int)) is True + assert mod.is_number_field(SimpleNamespace(annotation=int | float)) is True + + +def test_frontend_scripts_populated() -> None: + mod = importlib.import_module("ml_service.frontend.scripts.scripts_metadata") + assert isinstance(mod.FRONTEND_SCRIPTS, list) + # ensure each entry has expected keys + for s in mod.FRONTEND_SCRIPTS: + assert "name" in s and "endpoint" in s and "fields" in s From ff709bb1d9c0ea2eadec1adfc58b26953181051a Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Mon, 30 Mar 2026 07:42:29 +0200 Subject: [PATCH 09/17] Started adding some integration and e2e tests. While we already had a handful of integration and e2e tests, we needed more. This commit adds a few more. --- tests/conftest.py | 5 + tests/e2e/test_latest_snapshot_e2e.py | 53 +++++++++++ .../test_backend_pipelines_route.py | 42 +++++++++ .../test_execute_pipeline_integration.py | 93 +++++++++++++++++++ tests/integration/test_pipeline_runner.py | 42 +++++++++ 5 files changed, 235 insertions(+) create mode 100644 tests/e2e/test_latest_snapshot_e2e.py create mode 100644 tests/integration/test_backend_pipelines_route.py create mode 100644 tests/integration/test_execute_pipeline_integration.py create mode 100644 tests/integration/test_pipeline_runner.py diff --git a/tests/conftest.py b/tests/conftest.py index 13f9ecc3..af21678f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ and ensure the project root is on `sys.path` for tests. """ +import contextlib import sys import types from pathlib import Path @@ -89,6 +90,10 @@ def fastapi_client(): raise RuntimeError("FastAPI TestClient or ml_service backend not importable in test environment") client = TestClient(_backend_main.app) + # During tests, disable slowapi rate limiting if present to avoid + # accidental 429 failures caused by shared TestClient remote address. + with contextlib.suppress(Exception): + _backend_main.app.state.limiter.enabled = False try: yield client finally: diff --git a/tests/e2e/test_latest_snapshot_e2e.py b/tests/e2e/test_latest_snapshot_e2e.py new file mode 100644 index 00000000..d4cb268f --- /dev/null +++ b/tests/e2e/test_latest_snapshot_e2e.py @@ -0,0 +1,53 @@ +"""End-to-end tests for snapshot discovery utilities. + +These tests create a realistic snapshot layout on disk and validate that the +`get_latest_snapshot_path` utility chooses the newest snapshot and handles +tie-breaking by UUID. They also assert that an empty directory raises the +expected ``DataError``. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from ml.exceptions import DataError +from ml.utils.snapshots.latest_snapshot import get_latest_snapshot_path + + +def _mk_snapshot(base: Path, ts: str, uuid_str: str) -> Path: + p = base / f"{ts}_{uuid_str}" + p.mkdir(parents=True, exist_ok=False) + return p + + +def test_get_latest_snapshot_with_tie_breaking(tmp_path: Path) -> None: + """Latest snapshot selection prefers newest timestamp then highest UUID.""" + + base = tmp_path / "snapshots" + base.mkdir() + + ts1 = "2026-03-28T12-00-00" + _mk_snapshot(base, ts1, "aaaaaaaa") + p2 = _mk_snapshot(base, ts1, "bbbbbbbb") + + ts2 = "2026-03-28T13-00-00" + p3 = _mk_snapshot(base, ts2, "cccccccc") + + selected = get_latest_snapshot_path(base) + assert selected.name == p3.name + + # remove the newest and ensure the tie-breaking among the remaining works + p3.rmdir() + selected2 = get_latest_snapshot_path(base) + assert selected2.name == p2.name + + +def test_get_latest_snapshot_raises_on_empty(tmp_path: Path) -> None: + """An empty snapshots directory should raise ``DataError``.""" + + base = tmp_path / "empty_snapshots" + base.mkdir() + + with pytest.raises(DataError): + get_latest_snapshot_path(base) diff --git a/tests/integration/test_backend_pipelines_route.py b/tests/integration/test_backend_pipelines_route.py new file mode 100644 index 00000000..0844f5e3 --- /dev/null +++ b/tests/integration/test_backend_pipelines_route.py @@ -0,0 +1,42 @@ +"""Integration tests for ML service backend pipeline routes. + +These tests use FastAPI's TestClient to exercise route validation and ensure +that the router forwards valid payloads to the underlying pipeline executor. +The actual subprocess invocation is monkeypatched to keep tests fast and +deterministic. +""" + +from __future__ import annotations + +from typing import Any + +import ml_service.backend.routers.pipelines as pipelines_router_module +from fastapi.testclient import TestClient +from ml_service.backend.main import app + + +def test_pipelines_train_route_calls_execute_pipeline(monkeypatch: Any) -> None: + """POST /pipelines/train should validate input and call execute_pipeline. + + The real subprocess call is replaced with a fake implementation that + verifies the module path and payload shape. This focuses the test on the + FastAPI routing, request validation and integration with the router. + """ + + fake_response: dict[str, Any] = {"exit_code": 0, "status": "SUCCESS", "stdout": "ok", "stderr": ""} + + def fake_execute_pipeline(module_path: str, payload: Any, boolean_args: list[str] | None = None) -> dict[str, Any]: + # router for /train should execute the training runner module + assert module_path == "pipelines.runners.train" + # payload is a Pydantic model instance; check basic attributes + assert hasattr(payload, "problem") and hasattr(payload, "segment") and hasattr(payload, "version") + return fake_response + + monkeypatch.setattr(pipelines_router_module, "execute_pipeline", fake_execute_pipeline) + + client = TestClient(app) + payload = {"problem": "no_show", "segment": "global", "version": "v1"} + resp = client.post("/pipelines/train", json=payload) + + assert resp.status_code == 200 + assert resp.json() == fake_response diff --git a/tests/integration/test_execute_pipeline_integration.py b/tests/integration/test_execute_pipeline_integration.py new file mode 100644 index 00000000..339d21fe --- /dev/null +++ b/tests/integration/test_execute_pipeline_integration.py @@ -0,0 +1,93 @@ +"""Integration tests for `ml_service.backend.pipelines.execute_pipeline`. + +These tests exercise the real subprocess invocation codepath by creating a +temporary test module under `tests/` and invoking it via +`execute_pipeline(... )` so that `python -m ` is executed. + +Note: these tests create and remove transient files inside `tests/` and +are safe to run on both Windows and Linux CI agents. +""" + +from __future__ import annotations + +import shutil +from pathlib import Path +from textwrap import dedent +from typing import Any +from uuid import uuid4 + +from ml_service.backend.pipelines.execute_pipeline import execute_pipeline +from pydantic import BaseModel + + +def _make_dummy_package(pkg_name: str, code: str) -> Path: + base = Path("tests") / pkg_name + base.mkdir(parents=True, exist_ok=False) + (base / "__init__.py").write_text("") + (base / "dummy_pipeline.py").write_text(code) + return base + + +def _remove_dummy_package(base: Path) -> None: + shutil.rmtree(base) + + +def test_execute_pipeline_runs_real_subprocess() -> None: + """Create a transient module and run it via subprocess. + + The dummy module prints a JSON object containing the CLI args and exits + with code ``0`` normally and ``2`` when ``--param1 fail`` is provided. + This verifies argument marshalling, stdout capture and exit-code + propagation from the subprocess invocation. + """ + + pkg_name = f"_integration_temp_pkg_{uuid4().hex}" + code = dedent( + """\ + from __future__ import annotations + import argparse + import json + import sys + + def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--param1", type=str, default="") + parser.add_argument("--flag", type=str, default="False") + args = parser.parse_args() + print(json.dumps({"param1": args.param1, "flag": args.flag})) + if args.param1 == "fail": + sys.exit(2) + sys.exit(0) + + if __name__ == "__main__": + main() + """ + ) + + base = Path("tests") / pkg_name + try: + base = _make_dummy_package(pkg_name, code) + + class Payload(BaseModel): + param1: str | None = None + flag: bool | None = None + + payload = Payload(param1="ok", flag=True) + res: dict[str, Any] = execute_pipeline( + f"tests.{pkg_name}.dummy_pipeline", payload, boolean_args=["flag"] + ) + + assert isinstance(res, dict) + assert res["exit_code"] == 0 + # ensure the module printed the JSON we expect + assert '"param1": "ok"' in res["stdout"] + # boolean flag should be present and represented as a string + assert "True" in res["stdout"] or '"flag"' in res["stdout"] + + # Non-zero exit path + payload2 = Payload(param1="fail", flag=False) + res2 = execute_pipeline(f"tests.{pkg_name}.dummy_pipeline", payload2, boolean_args=["flag"]) # type: ignore[arg-type] + assert res2["exit_code"] != 0 + finally: + if base.exists(): + _remove_dummy_package(base) diff --git a/tests/integration/test_pipeline_runner.py b/tests/integration/test_pipeline_runner.py new file mode 100644 index 00000000..aa58d1da --- /dev/null +++ b/tests/integration/test_pipeline_runner.py @@ -0,0 +1,42 @@ +"""Integration tests for the pipeline runner utilities. + +These tests ensure `PipelineRunner` executes step `before`, `run` and `after` +hooks in order and returns the final context object. +""" + +from __future__ import annotations + +from ml.utils.pipeline_core.runner import PipelineRunner +from ml.utils.pipeline_core.step import PipelineStep + + +class IncStep(PipelineStep[dict[str, int]]): + """A tiny step that increments an integer counter in the context.""" + + def __init__(self, key: str, amount: int) -> None: + self.key = key + self.amount = amount + + def before(self, ctx: dict[str, int]) -> None: + ctx.setdefault(self.key, 0) + + def run(self, ctx: dict[str, int]) -> dict[str, int]: + ctx[self.key] += self.amount + return ctx + + def after(self, ctx: dict[str, int]) -> None: + ctx[f"after_{self.key}"] = 1 + + +def test_pipeline_runner_executes_hooks_and_steps() -> None: + """PipelineRunner runs steps in order and invokes hooks appropriately.""" + + steps: list[PipelineStep[dict[str, int]]] = [IncStep("a", 1), IncStep("a", 2), IncStep("b", 3)] + runner: PipelineRunner[dict[str, int]] = PipelineRunner(steps) + ctx: dict[str, int] = {} + res = runner.run(ctx) + + assert res["a"] == 3 + assert res["b"] == 3 + assert res["after_a"] == 1 + assert res["after_b"] == 1 From d8a0e79b3affdab9a7fb3d053022d8cfcefbf651 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Mon, 30 Mar 2026 08:00:28 +0200 Subject: [PATCH 10/17] Added a dozen more integration tests. --- .../test_execute_script_integration.py | 82 +++++++++++++ .../test_freeze_feature_set_integration.py | 47 +++++++ .../test_register_raw_snapshot_integration.py | 88 ++++++++++++++ .../test_run_all_workflows_main.py | 59 +++++++++ .../test_search_main_integration.py | 79 ++++++++++++ .../test_train_runner_integration.py | 115 ++++++++++++++++++ 6 files changed, 470 insertions(+) create mode 100644 tests/integration/test_execute_script_integration.py create mode 100644 tests/integration/test_freeze_feature_set_integration.py create mode 100644 tests/integration/test_register_raw_snapshot_integration.py create mode 100644 tests/integration/test_run_all_workflows_main.py create mode 100644 tests/integration/test_search_main_integration.py create mode 100644 tests/integration/test_train_runner_integration.py diff --git a/tests/integration/test_execute_script_integration.py b/tests/integration/test_execute_script_integration.py new file mode 100644 index 00000000..ec5f80d8 --- /dev/null +++ b/tests/integration/test_execute_script_integration.py @@ -0,0 +1,82 @@ +"""Integration tests for `ml_service.backend.scripts.execute_script`. + +These tests create a transient module under `tests/` and execute it via +`execute_script(...)` to validate list argument expansion, boolean flags, +stdout capture, and exit-code propagation. +""" + +from __future__ import annotations + +import shutil +from pathlib import Path +from textwrap import dedent +from typing import Any +from uuid import uuid4 + +from ml_service.backend.scripts.execute_script import execute_script +from pydantic import BaseModel + + +def _make_dummy_package(pkg_name: str, code: str) -> Path: + base = Path("tests") / pkg_name + base.mkdir(parents=True, exist_ok=False) + (base / "__init__.py").write_text("") + (base / "dummy_script.py").write_text(code) + return base + + +def _remove_dummy_package(base: Path) -> None: + shutil.rmtree(base) + + +def test_execute_script_handles_list_and_boolean_args() -> None: + """Verify list flags are expanded and boolean flags are stringified.""" + + pkg_name = f"_integration_temp_script_pkg_{uuid4().hex}" + code = dedent( + """\ + from __future__ import annotations + import argparse + import json + import sys + + def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--names", nargs="+", default=[]) + parser.add_argument("--param", type=str, default="") + parser.add_argument("--flag", type=str, default="False") + args = parser.parse_args() + print(json.dumps({"names": args.names, "param": args.param, "flag": args.flag})) + if args.param == "fail": + sys.exit(2) + sys.exit(0) + + if __name__ == "__main__": + main() + """ + ) + + base = Path("tests") / pkg_name + try: + base = _make_dummy_package(pkg_name, code) + + class Payload(BaseModel): + names: list[str] | None = None + param: str | None = None + flag: bool | None = None + + # success path + payload = Payload(names=["a", "b"], param="ok", flag=True) + res: dict[str, Any] = execute_script(f"tests.{pkg_name}.dummy_script", payload, boolean_args=["flag"]) # type: ignore[arg-type] + assert res["exit_code"] == 0 + assert '"names"' in res["stdout"] + assert '"a"' in res["stdout"] + assert "True" in res["stdout"] or '"flag"' in res["stdout"] + + # failure path + payload2 = Payload(names=["x"], param="fail", flag=False) + res2 = execute_script(f"tests.{pkg_name}.dummy_script", payload2, boolean_args=["flag"]) # type: ignore[arg-type] + assert res2["exit_code"] != 0 + finally: + if base.exists(): + _remove_dummy_package(base) diff --git a/tests/integration/test_freeze_feature_set_integration.py b/tests/integration/test_freeze_feature_set_integration.py new file mode 100644 index 00000000..c031ae1e --- /dev/null +++ b/tests/integration/test_freeze_feature_set_integration.py @@ -0,0 +1,47 @@ +"""Integration tests for `pipelines.features.freeze` CLI flow. + +Tests stub out registry loading and strategy execution to verify the +high-level orchestration and metadata persistence. +""" + +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pipelines.features.freeze as freeze_mod + + +def test_freeze_feature_set_success(tmp_path: Path, monkeypatch: Any) -> None: + args = SimpleNamespace(feature_set="fs", version="v", snapshot_binding_key=None, owner="me", logging_level="INFO") + monkeypatch.setattr(freeze_mod, "parse_args", lambda: args) + monkeypatch.setattr(freeze_mod, "bootstrap_logging", lambda *a, **k: None) + monkeypatch.setattr(freeze_mod, "add_file_handler", lambda *a, **k: None) + + # load_feature_registry -> raw config; validate_feature_registry -> normalized config object + monkeypatch.setattr(freeze_mod, "load_feature_registry", lambda fs, v: {"dummy": True}) + monkeypatch.setattr(freeze_mod, "get_strategy_type", lambda cfg: "tabular") + monkeypatch.setattr(freeze_mod, "validate_feature_registry", lambda raw, t: SimpleNamespace(type="tabular", feature_store_path=str(tmp_path / "feature_store"))) + + # Fake strategy that returns an output with snapshot_path and metadata + snapshot_dir = tmp_path / "feature_store" / "snap1" + + class FakeStrategy: + def freeze(self, *a, **k): + snapshot_dir.mkdir(parents=True, exist_ok=True) + return SimpleNamespace(snapshot_path=snapshot_dir, metadata={"ok": True}) + + monkeypatch.setattr(freeze_mod, "get_strategy", lambda t: FakeStrategy()) + + saved: dict[str, Any] = {} + + def fake_save_metadata(metadata, target_dir: Path): + saved["meta"] = metadata + saved["target"] = Path(target_dir) + + monkeypatch.setattr(freeze_mod, "save_metadata", fake_save_metadata) + + rc = freeze_mod.main() + assert rc == 0 + assert saved["target"] == snapshot_dir diff --git a/tests/integration/test_register_raw_snapshot_integration.py b/tests/integration/test_register_raw_snapshot_integration.py new file mode 100644 index 00000000..4a231426 --- /dev/null +++ b/tests/integration/test_register_raw_snapshot_integration.py @@ -0,0 +1,88 @@ +"""Integration tests for `pipelines.data.register_raw_snapshot`. + +These tests create a temporary raw snapshot directory layout and verify the +CLI flow reads data, prepares metadata and persists it via `save_metadata`. +""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from typing import Any + +import pandas as pd +import pipelines.data.register_raw_snapshot as reg_mod + + +def test_register_raw_snapshot_success(tmp_path: Path, monkeypatch: Any) -> None: + # Create a fake snapshot containing a single CSV file + data_dir = tmp_path / "data" / "raw" / "hotel_bookings" / "v1" / "snap1" + data_dir.mkdir(parents=True) + csv_path = data_dir / "data.csv" + csv_path.write_text("a,b\n1,2\n") + + monkeypatch.setattr(reg_mod, "get_snapshot_path", lambda sid, parent: data_dir) + monkeypatch.setattr(reg_mod, "bootstrap_logging", lambda *a, **k: None) + monkeypatch.setattr(reg_mod, "add_file_handler", lambda *a, **k: None) + + # Provide CLI args expected by argparse in the module + monkeypatch.setattr( + reg_mod, + "parse_args", + lambda: argparse.Namespace( + data="hotel_bookings", + version="v1", + snapshot_id="latest", + logging_level="INFO", + owner="test", + ), + ) + + # Make read_data return a DataFrame (we could use the real reader, but stub for speed) + monkeypatch.setattr(reg_mod, "read_data", lambda fmt, p: pd.read_csv(p)) + + called: dict[str, Any] = {} + + class DummyMeta: + def model_dump(self, exclude_none=True): + return {"meta": True} + + monkeypatch.setattr(reg_mod, "prepare_metadata", lambda df, **k: DummyMeta()) + + def fake_save_metadata(payload, target_dir: Path): + called["payload"] = payload + called["target_dir"] = Path(target_dir) + + monkeypatch.setattr(reg_mod, "save_metadata", fake_save_metadata) + + rc = reg_mod.main() + assert rc == 0 + assert called["target_dir"] == data_dir + + +def test_register_raw_snapshot_fails_with_multiple_files(tmp_path: Path, monkeypatch: Any) -> None: + data_dir = tmp_path / "data" / "raw" / "hotel_bookings" / "v1" / "snap2" + data_dir.mkdir(parents=True) + (data_dir / "data.csv").write_text("a,b\n1,2\n") + (data_dir / "data.parquet").write_text("x") + + monkeypatch.setattr(reg_mod, "get_snapshot_path", lambda sid, parent: data_dir) + monkeypatch.setattr(reg_mod, "bootstrap_logging", lambda *a, **k: None) + monkeypatch.setattr(reg_mod, "add_file_handler", lambda *a, **k: None) + monkeypatch.setattr(reg_mod, "read_data", lambda fmt, p: pd.read_csv(p)) + + # Provide CLI args expected by argparse in the module + monkeypatch.setattr( + reg_mod, + "parse_args", + lambda: argparse.Namespace( + data="hotel_bookings", + version="v1", + snapshot_id="latest", + logging_level="INFO", + owner="test", + ), + ) + + rc = reg_mod.main() + assert rc != 0 diff --git a/tests/integration/test_run_all_workflows_main.py b/tests/integration/test_run_all_workflows_main.py new file mode 100644 index 00000000..493b334a --- /dev/null +++ b/tests/integration/test_run_all_workflows_main.py @@ -0,0 +1,59 @@ +"""Integration tests for the master orchestrator `run_all_workflows`. + +These tests monkeypatch subprocess execution to verify the high-level +orchestration flow (success and failure paths) without launching real +subprocesses. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any + +import pipelines.orchestration.master.run_all_workflows as run_all + + +def test_main_success(monkeypatch: Any) -> None: + """When all steps succeed, `main()` returns 0 and runs three steps.""" + + calls: list[list[str]] = [] + + def fake_run(cmd: list[str], text: bool = False, **kwargs: Any) -> SimpleNamespace: + calls.append(list(cmd)) + return SimpleNamespace(returncode=0) + + monkeypatch.setattr(run_all, "subprocess", SimpleNamespace(run=fake_run)) + monkeypatch.setattr(run_all, "setup_logging", lambda *a, **k: None) + monkeypatch.setattr(run_all, "log_completion", lambda start_time, msg: None) + monkeypatch.setattr(run_all.sys, "argv", ["prog"]) # stable parse_args + + rc = run_all.main() + assert rc == 0 + assert len(calls) == 3 + + +def test_main_fails_on_step(monkeypatch: Any) -> None: + """When a step fails, `main()` returns that code and reports the failure.""" + + state = {"i": 0} + + def fake_run(cmd: list[str], text: bool = False, **kwargs: Any) -> SimpleNamespace: + state["i"] += 1 + # make the second step fail + if state["i"] == 2: + return SimpleNamespace(returncode=5) + return SimpleNamespace(returncode=0) + + captured: dict[str, str] = {} + + def fake_log_completion(start_time: float, message: str) -> None: + captured["msg"] = message + + monkeypatch.setattr(run_all, "subprocess", SimpleNamespace(run=fake_run)) + monkeypatch.setattr(run_all, "setup_logging", lambda *a, **k: None) + monkeypatch.setattr(run_all, "log_completion", fake_log_completion) + monkeypatch.setattr(run_all.sys, "argv", ["prog"]) # stable parse_args + + rc = run_all.main() + assert rc == 5 + assert "failed at step" in captured.get("msg", "") diff --git a/tests/integration/test_search_main_integration.py b/tests/integration/test_search_main_integration.py new file mode 100644 index 00000000..87a0fd19 --- /dev/null +++ b/tests/integration/test_search_main_integration.py @@ -0,0 +1,79 @@ +"""Integration tests for `pipelines.search.search.main`. + +These tests exercise the high-level search CLI flow while stubbing heavy +components like the searcher and persistence to keep the test fast and +deterministic. +""" + +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pipelines.search.search as search_mod +from ml.search.searchers.output import SearchOutput +from ml.types import AllSplitsInfo, SplitInfo + + +def _make_args() -> SimpleNamespace: + return SimpleNamespace( + problem="p", + segment="s", + version="v", + experiment_id=None, + snapshot_binding_key=None, + env="default", + strict=True, + logging_level="INFO", + owner="me", + clean_up_failure_management=False, + overwrite_existing=False, + ) + + +def test_search_main_success(tmp_path: Path, monkeypatch: Any) -> None: + """`main()` returns 0 and persists outputs when the searcher succeeds.""" + + args = _make_args() + monkeypatch.setattr(search_mod, "parse_args", lambda: args) + + # Run in an isolated cwd so experiment dirs land under tmp_path + monkeypatch.chdir(tmp_path) + + # Simple config object and identity hashing + fake_cfg = SimpleNamespace(algorithm=SimpleNamespace(value="catboost")) + monkeypatch.setattr(search_mod, "load_and_validate_config", lambda *a, **k: fake_cfg) + monkeypatch.setattr(search_mod, "add_config_hash", lambda cfg: cfg) + + # Provide a fake searcher that returns a SearchOutput + fake_output = SearchOutput( + search_results={"x": 1}, + feature_lineage=[], + pipeline_hash="ph", + scoring_method="score", + splits_info=AllSplitsInfo( + train=SplitInfo(n_rows=0), + val=SplitInfo(n_rows=0), + test=SplitInfo(n_rows=0), + ), + ) + + class FakeSearcher: + def search(self, *a, **k): + return fake_output + + monkeypatch.setattr(search_mod, "get_searcher", lambda key: FakeSearcher()) + + persisted: dict[str, Any] = {} + + def fake_persist_experiment(*a, **k): + persisted["called"] = True + + monkeypatch.setattr(search_mod, "persist_experiment", fake_persist_experiment) + monkeypatch.setattr(search_mod, "delete_failure_management_folder", lambda *a, **k: None) + monkeypatch.setattr(search_mod, "setup_logging", lambda *a, **k: None) + + rc = search_mod.main() + assert rc == 0 + assert persisted.get("called", False) is True diff --git a/tests/integration/test_train_runner_integration.py b/tests/integration/test_train_runner_integration.py new file mode 100644 index 00000000..980d1c5d --- /dev/null +++ b/tests/integration/test_train_runner_integration.py @@ -0,0 +1,115 @@ +"""Integration tests for the training runner CLI entrypoint. + +These tests exercise `pipelines.runners.train.main` in a controlled way by +monkeypatching filesystem and heavy dependencies so the function's control +flow and persistence handoff can be validated end-to-end without running a +real training job. +""" + +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pipelines.runners.train as train_mod + + +def _make_args() -> SimpleNamespace: + return SimpleNamespace( + problem="p", + segment="s", + version="v", + snapshot_binding_key=None, + train_run_id=None, + env="default", + strict=True, + experiment_id=None, + logging_level="INFO", + clean_up_failure_management=False, + overwrite_existing=False, + ) + + +def test_train_main_success(tmp_path: Path, monkeypatch: Any) -> None: + """`main()` completes successfully when trainer and persistence behave.""" + + args = _make_args() + + # Provide a deterministic experiment directory under tmp_path + experiment_dir = tmp_path / "experiments" / args.problem / args.segment / args.version / "exp1" + experiment_dir.mkdir(parents=True) + (experiment_dir / "search").mkdir() + + monkeypatch.setattr(train_mod, "parse_args", lambda: args) + monkeypatch.setattr(train_mod, "get_snapshot_path", lambda eid, parent: experiment_dir) + + # Keep config hashing/loader simple + monkeypatch.setattr(train_mod, "add_config_hash", lambda cfg: cfg) + fake_cfg = SimpleNamespace(algorithm=SimpleNamespace(value="catboost")) + monkeypatch.setattr(train_mod, "load_and_validate_config", lambda *a, **k: fake_cfg) + + # Validators are no-ops for the integration test + monkeypatch.setattr(train_mod, "validate_lineage_integrity", lambda *_: None) + monkeypatch.setattr(train_mod, "validate_reproducibility", lambda *_: None) + monkeypatch.setattr(train_mod, "validate_logical_config", lambda *_: None) + monkeypatch.setattr(train_mod, "validate_pipeline_cfg", lambda *_: None) + + # Fake trainer that returns the minimal expected TrainOutput-like object + fake_output = SimpleNamespace(model=SimpleNamespace(), pipeline=None, lineage=[], metrics={}, pipeline_cfg_hash=None) + + class FakeTrainer: + def train(self, *a, **k): + return fake_output + + monkeypatch.setattr(train_mod, "get_trainer", lambda alg: FakeTrainer()) + + # Persist helpers: write a tiny model file and return its path + def fake_save_model(model, train_run_dir: Path) -> Path: + train_run_dir.mkdir(parents=True, exist_ok=True) + p = train_run_dir / "model.bin" + p.write_text("ok") + return p + + monkeypatch.setattr(train_mod, "save_model", fake_save_model) + monkeypatch.setattr(train_mod, "save_pipeline", lambda *a, **k: experiment_dir / "training" / "pipeline.joblib") + monkeypatch.setattr(train_mod, "persist_training_run", lambda *a, **k: None) + monkeypatch.setattr(train_mod, "hash_artifact", lambda p: "deadbeef") + monkeypatch.setattr(train_mod, "delete_failure_management_folder", lambda *a, **k: None) + monkeypatch.setattr(train_mod, "add_file_handler", lambda *a, **k: None) + monkeypatch.setattr(train_mod, "bootstrap_logging", lambda *a, **k: None) + + rc = train_mod.main() + assert rc == 0 + + +def test_train_main_returns_resolve_code_on_exception(tmp_path: Path, monkeypatch: Any) -> None: + """If the trainer raises, `main()` returns whatever `resolve_exit_code` yields.""" + + args = _make_args() + experiment_dir = tmp_path / "exps" / "p" / "s" / "v" / "exp2" + experiment_dir.mkdir(parents=True) + (experiment_dir / "search").mkdir() + + monkeypatch.setattr(train_mod, "parse_args", lambda: args) + monkeypatch.setattr(train_mod, "get_snapshot_path", lambda eid, parent: experiment_dir) + + monkeypatch.setattr(train_mod, "add_config_hash", lambda cfg: cfg) + fake_cfg = SimpleNamespace(algorithm=SimpleNamespace(value="catboost")) + monkeypatch.setattr(train_mod, "load_and_validate_config", lambda *a, **k: fake_cfg) + monkeypatch.setattr(train_mod, "validate_lineage_integrity", lambda *_: None) + monkeypatch.setattr(train_mod, "validate_reproducibility", lambda *_: None) + monkeypatch.setattr(train_mod, "validate_logical_config", lambda *_: None) + monkeypatch.setattr(train_mod, "validate_pipeline_cfg", lambda *_: None) + + class BrokenTrainer: + def train(self, *a, **k): + raise RuntimeError("boom") + + monkeypatch.setattr(train_mod, "get_trainer", lambda alg: BrokenTrainer()) + monkeypatch.setattr(train_mod, "resolve_exit_code", lambda e: 42) + monkeypatch.setattr(train_mod, "add_file_handler", lambda *a, **k: None) + monkeypatch.setattr(train_mod, "bootstrap_logging", lambda *a, **k: None) + + rc = train_mod.main() + assert rc == 42 From 5f13e066f8700c5ac08e0265ee2d0a8d887a9792 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Mon, 30 Mar 2026 10:29:54 +0200 Subject: [PATCH 11/17] Improved tests file structure; added more tests. The integration and e2e tests were not properly nested earlier. This commit fixes that. It also adds some more integration tests. --- tests/e2e/test_backend_end_to_end.py | 40 ++++++++++ tests/e2e/test_execute_script_subprocess.py | 64 +++++++++++++++ .../test_execute_pipeline_integration.py | 0 .../routers}/test_backend_pipelines_route.py | 0 .../backend/routers/test_data_integration.py | 46 +++++++++++ .../test_dir_viewer_errors_integration.py | 19 +++++ .../routers/test_dir_viewer_integration.py | 37 +++++++++ .../routers/test_features_integration.py | 58 ++++++++++++++ .../test_file_viewer_errors_integration.py | 22 ++++++ .../routers/test_file_viewer_integration.py | 43 +++++++++++ .../routers/test_modeling_integration.py | 69 +++++++++++++++++ .../routers/test_pipeline_cfg_integration.py | 77 +++++++++++++++++++ ...st_pipelines_more_endpoints_integration.py | 59 ++++++++++++++ .../test_pipelines_router_integration.py | 35 +++++++++ .../test_promotion_thresholds_integration.py | 47 +++++++++++ .../test_scripts_router_integration.py | 30 ++++++++ .../test_execute_script_integration.py | 0 .../test_register_raw_snapshot_integration.py | 0 .../test_freeze_feature_set_integration.py | 0 .../test_run_all_workflows_main.py | 19 +++++ .../runners}/test_train_runner_integration.py | 1 + .../search}/test_search_main_integration.py | 15 ++++ tests/integration/test_pipeline_runner.py | 42 ---------- .../test_run_all_workflows_main.py | 59 -------------- 24 files changed, 681 insertions(+), 101 deletions(-) create mode 100644 tests/e2e/test_backend_end_to_end.py create mode 100644 tests/e2e/test_execute_script_subprocess.py rename tests/integration/{ => ml_service/backend/pipelines}/test_execute_pipeline_integration.py (100%) rename tests/integration/{ => ml_service/backend/routers}/test_backend_pipelines_route.py (100%) create mode 100644 tests/integration/ml_service/backend/routers/test_data_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_dir_viewer_errors_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_dir_viewer_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_features_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_file_viewer_errors_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_file_viewer_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_modeling_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_pipeline_cfg_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_pipelines_more_endpoints_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_pipelines_router_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_promotion_thresholds_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_scripts_router_integration.py rename tests/integration/{ => ml_service/backend/scripts}/test_execute_script_integration.py (100%) rename tests/integration/{ => pipelines/data}/test_register_raw_snapshot_integration.py (100%) rename tests/integration/{ => pipelines/features}/test_freeze_feature_set_integration.py (100%) create mode 100644 tests/integration/pipelines/orchestration/test_run_all_workflows_main.py rename tests/integration/{ => pipelines/runners}/test_train_runner_integration.py (99%) rename tests/integration/{ => pipelines/search}/test_search_main_integration.py (87%) delete mode 100644 tests/integration/test_pipeline_runner.py delete mode 100644 tests/integration/test_run_all_workflows_main.py diff --git a/tests/e2e/test_backend_end_to_end.py b/tests/e2e/test_backend_end_to_end.py new file mode 100644 index 00000000..08811c21 --- /dev/null +++ b/tests/e2e/test_backend_end_to_end.py @@ -0,0 +1,40 @@ +"""Simple end-to-end smoke tests for the FastAPI backend. + +This test covers a short happy-path across multiple routers: health check, +file viewing and scripts execution (scripts execution is stubbed). +""" + +from pathlib import Path +from typing import Any + +import ml_service.backend.routers.scripts as scripts_router +import yaml + + +def test_backend_end_to_end(tmp_path: Path, monkeypatch: Any, fastapi_client: Any) -> None: + # health + resp = fastapi_client.get("/") + assert resp.status_code == 200 + assert resp.json() == {"Healthy": 200} + + # file viewer + f = tmp_path / "info.yaml" + payload = {"hello": "world"} + f.write_text(yaml.safe_dump(payload, sort_keys=False)) + + resp = fastapi_client.post("/file_viewer/load", json={"path": str(f)}) + assert resp.status_code == 200 + body = resp.json() + assert body["mode"] == "yaml" + parsed = yaml.safe_load(body["content"]) + assert parsed == payload + + # scripts: stub the execution helper to avoid starting subprocesses + def fake_execute(module_path: str, payload, boolean_args=None): + return {"exit_code": 0, "status": "SUCCESS", "stdout": "ok", "stderr": ""} + + monkeypatch.setattr(scripts_router, "execute_script", fake_execute) + + resp = fastapi_client.post("/scripts/generate_operator_hash", json={"operators": ["x"]}) + assert resp.status_code == 200 + assert resp.json()["exit_code"] == 0 diff --git a/tests/e2e/test_execute_script_subprocess.py b/tests/e2e/test_execute_script_subprocess.py new file mode 100644 index 00000000..8fc9d315 --- /dev/null +++ b/tests/e2e/test_execute_script_subprocess.py @@ -0,0 +1,64 @@ +"""E2E tests that exercise `execute_script` by creating a transient module. + +These tests write a small module under `tests/` and call the real +`execute_script` helper which uses `python -m` to run the module as a subprocess. +""" + +import contextlib +import shutil +import uuid +from pathlib import Path + +import ml_service.backend.scripts.execute_script as exec_mod +from pydantic import BaseModel + + +def _make_temp_module() -> tuple[Path, str]: + pkg_name = f"_tmp_exec_pkg_{uuid.uuid4().hex[:8]}" + pkg_dir = Path("tests") / pkg_name + pkg_dir.mkdir(parents=True, exist_ok=False) + # create __init__ to make it a package + (pkg_dir / "__init__.py").write_text("") + # script that optionally fails when --fail flag is present + script = ( + 'import sys\n' + 'def main():\n' + ' if "--fail" in sys.argv:\n' + ' print("FAIL", flush=True)\n' + ' sys.exit(3)\n' + ' print("SCRIPT_RUN_OK", flush=True)\n' + ' sys.exit(0)\n' + 'if __name__ == "__main__":\n' + ' main()\n' + ) + mod_name = "runme" + (pkg_dir / f"{mod_name}.py").write_text(script) + return pkg_dir, f"tests.{pkg_name}.{mod_name}" + + +def test_execute_script_subprocess_success() -> None: + pkg_dir, module_path = _make_temp_module() + try: + class Dummy(BaseModel): + pass + + res = exec_mod.execute_script(module_path=module_path, payload=Dummy(), boolean_args=None) + assert res["exit_code"] == 0 + assert "SCRIPT_RUN_OK" in res["stdout"] + finally: + with contextlib.suppress(Exception): + shutil.rmtree(pkg_dir) + + +def test_execute_script_subprocess_failure() -> None: + pkg_dir, module_path = _make_temp_module() + try: + class DummyFail(BaseModel): + fail: bool | None = True + + # ask for boolean flag 'fail' to be present + res = exec_mod.execute_script(module_path=module_path, payload=DummyFail(), boolean_args=["fail"]) + assert res["exit_code"] != 0 + finally: + with contextlib.suppress(Exception): + shutil.rmtree(pkg_dir) diff --git a/tests/integration/test_execute_pipeline_integration.py b/tests/integration/ml_service/backend/pipelines/test_execute_pipeline_integration.py similarity index 100% rename from tests/integration/test_execute_pipeline_integration.py rename to tests/integration/ml_service/backend/pipelines/test_execute_pipeline_integration.py diff --git a/tests/integration/test_backend_pipelines_route.py b/tests/integration/ml_service/backend/routers/test_backend_pipelines_route.py similarity index 100% rename from tests/integration/test_backend_pipelines_route.py rename to tests/integration/ml_service/backend/routers/test_backend_pipelines_route.py diff --git a/tests/integration/ml_service/backend/routers/test_data_integration.py b/tests/integration/ml_service/backend/routers/test_data_integration.py new file mode 100644 index 00000000..2d7f5e6b --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_data_integration.py @@ -0,0 +1,46 @@ +"""Integration tests for the `data` FastAPI router.""" + +from pathlib import Path +from typing import Any + +import ml_service.backend.routers.data as data_router + + +def test_data_validate_and_write(monkeypatch: Any, fastapi_client: Any, tmp_path: Path) -> None: + # Fake YAML loader to return expected data keys + def fake_load_yaml_and_add_lineage(yaml_text: str) -> dict: + return {"data": {"name": "hotel_bookings", "version": "v1"}} + + def fake_validate_config_payload(config_type: str, data_dict: dict) -> None: + # returns nothing but should not raise on valid input + return None + + cfg_path = tmp_path / "configs" / "data" / "interim" / "hotel_bookings" / "v1" + + def fake_get_config_path(repo_root: str, config_type: str, dataset_name: str, dataset_version: str) -> Path: + return cfg_path + + called: dict[str, Any] = {} + + def fake_save_config(config: dict, config_path: Path) -> None: + called["config_path"] = config_path + config_path.parent.mkdir(parents=True, exist_ok=True) + + monkeypatch.setattr(data_router, "load_yaml_and_add_lineage", fake_load_yaml_and_add_lineage) + monkeypatch.setattr(data_router, "validate_config_payload", fake_validate_config_payload) + monkeypatch.setattr(data_router, "get_config_path", fake_get_config_path) + monkeypatch.setattr(data_router, "save_config", fake_save_config) + + payload = {"type": "interim", "config": "data:\n name: hotel_bookings\n version: v1\n"} + + resp = fastapi_client.post("/data/validate", json=payload) + assert resp.status_code == 200 + body = resp.json() + assert body["valid"] is True + assert body["exists"] is False + + resp = fastapi_client.post("/data/write", json=payload) + assert resp.status_code == 201 + body = resp.json() + assert body.get("status") == "written" + assert isinstance(called.get("config_path"), Path) diff --git a/tests/integration/ml_service/backend/routers/test_dir_viewer_errors_integration.py b/tests/integration/ml_service/backend/routers/test_dir_viewer_errors_integration.py new file mode 100644 index 00000000..0c4653fa --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_dir_viewer_errors_integration.py @@ -0,0 +1,19 @@ +"""Tests for error conditions in the `dir_viewer` router.""" + +from typing import Any + + +def test_dir_viewer_missing_path(fastapi_client: Any) -> None: + resp = fastapi_client.post("/dir_viewer/load", json={}) + assert resp.status_code == 400 + + +def test_dir_viewer_outside_repo(fastapi_client: Any) -> None: + # Use a relative parent path that resolves outside the repo root + resp = fastapi_client.post("/dir_viewer/load", json={"path": ".."}) + assert resp.status_code == 403 + + +def test_dir_viewer_nonexistent_dir(fastapi_client: Any) -> None: + resp = fastapi_client.post("/dir_viewer/load", json={"path": "tests/nonexistent_dir"}) + assert resp.status_code == 404 diff --git a/tests/integration/ml_service/backend/routers/test_dir_viewer_integration.py b/tests/integration/ml_service/backend/routers/test_dir_viewer_integration.py new file mode 100644 index 00000000..1abf3417 --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_dir_viewer_integration.py @@ -0,0 +1,37 @@ +"""Integration tests for the `dir_viewer` FastAPI router. + +These tests create a small directory under the repository and request a +directory tree for it via the router, validating the response structure. +""" + +import contextlib +import shutil +import uuid +from pathlib import Path +from typing import Any + + +def test_dir_viewer_load(tmp_path: Path, fastapi_client: Any) -> None: + # Create a unique folder inside repo under tests/ so the router can access it + repo_root = Path.cwd() + unique_name = f"tmp_dir_{uuid.uuid4().hex[:8]}" + target = repo_root / "tests" / unique_name + try: + target.mkdir(parents=True, exist_ok=False) + (target / "a.txt").write_text("hello") + sub = target / "sub" + sub.mkdir() + (sub / "b.txt").write_text("x") + + resp = fastapi_client.post("/dir_viewer/load", json={"path": f"tests/{unique_name}"}) + assert resp.status_code == 200 + body = resp.json() + assert "tree" in body and "tree_yaml" in body + tree = body["tree"] + # Expect top-level file and subdirectory + assert "a.txt" in tree + assert "sub" in tree + finally: + # Best-effort cleanup + with contextlib.suppress(Exception): + shutil.rmtree(target) diff --git a/tests/integration/ml_service/backend/routers/test_features_integration.py b/tests/integration/ml_service/backend/routers/test_features_integration.py new file mode 100644 index 00000000..ea661cf1 --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_features_integration.py @@ -0,0 +1,58 @@ +"""Integration tests for the `features` FastAPI router.""" + +from pathlib import Path +from typing import Any + +import ml_service.backend.routers.features as features_router + + +def test_features_validate_and_write(monkeypatch: Any, fastapi_client: Any, tmp_path: Path) -> None: + # prepare fakes + def fake_load_yaml_and_add_lineage(yaml_text: str) -> dict: + return {"some": "value"} + + class Validated: + def model_dump(self, mode: str = "json", **_: Any) -> dict: + return {"validated": True} + + def fake_validate_feature_config(d: dict) -> Validated: + return Validated() + + registry_dir = tmp_path / "registry" + + def fake_get_registry_path(root: Path) -> Path: + return registry_dir + + def fake_registry_entry_exists(name: str, version: str, registry_path: Path) -> bool: + return False + + called: dict[str, Any] = {} + + def fake_save_feature_registry(name: str, version: str, validated_config, registry_path: Path) -> dict: + called["name"] = name + called["version"] = version + called["registry_path"] = registry_path + return {"status": "saved", "path": str(registry_path / name / version)} + + monkeypatch.setattr(features_router, "load_yaml_and_add_lineage", fake_load_yaml_and_add_lineage) + monkeypatch.setattr(features_router, "validate_feature_config", fake_validate_feature_config) + monkeypatch.setattr(features_router, "get_registry_path", fake_get_registry_path) + monkeypatch.setattr(features_router, "registry_entry_exists", fake_registry_entry_exists) + monkeypatch.setattr(features_router, "save_feature_registry", fake_save_feature_registry) + + payload = {"name": "feat", "version": "v1", "config": "dummy: v1\n"} + + # validate + resp = fastapi_client.post("/features/validate", json=payload) + assert resp.status_code == 200 + body = resp.json() + assert body["valid"] is True + assert body["exists"] is False + assert "normalized" in body + + # write + resp = fastapi_client.post("/features/write", json=payload) + assert resp.status_code == 201 + body = resp.json() + assert body.get("status") == "saved" + assert called.get("name") == "feat" diff --git a/tests/integration/ml_service/backend/routers/test_file_viewer_errors_integration.py b/tests/integration/ml_service/backend/routers/test_file_viewer_errors_integration.py new file mode 100644 index 00000000..12a475e2 --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_file_viewer_errors_integration.py @@ -0,0 +1,22 @@ +"""Tests for error conditions in the `file_viewer` router.""" + +from pathlib import Path +from typing import Any + + +def test_file_viewer_missing_path(fastapi_client: Any) -> None: + resp = fastapi_client.post("/file_viewer/load", json={}) + assert resp.status_code == 400 + + +def test_file_viewer_nonexistent_file(tmp_path: Path, fastapi_client: Any) -> None: + missing = tmp_path / "does_not_exist.yaml" + resp = fastapi_client.post("/file_viewer/load", json={"path": str(missing)}) + assert resp.status_code == 404 + + +def test_file_viewer_unsupported_type(tmp_path: Path, fastapi_client: Any) -> None: + f = tmp_path / "notes.txt" + f.write_text("hello") + resp = fastapi_client.post("/file_viewer/load", json={"path": str(f)}) + assert resp.status_code == 400 diff --git a/tests/integration/ml_service/backend/routers/test_file_viewer_integration.py b/tests/integration/ml_service/backend/routers/test_file_viewer_integration.py new file mode 100644 index 00000000..77d04cf6 --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_file_viewer_integration.py @@ -0,0 +1,43 @@ +"""Integration tests for the `file_viewer` FastAPI router. + +These tests exercise the endpoint that loads YAML/JSON files by path and +returns their content and detected mode. +""" + +import json +from pathlib import Path +from typing import Any + +import yaml + + +def test_file_viewer_load_yaml(tmp_path: Path, fastapi_client: Any) -> None: + # Create a temporary YAML file + cfg_dir = tmp_path / "cfgs" + cfg_dir.mkdir() + cfg_path = cfg_dir / "sample.yaml" + cfg = {"a": 1, "b": [1, 2, 3]} + cfg_path.write_text(yaml.safe_dump(cfg, sort_keys=False)) + + resp = fastapi_client.post("/file_viewer/load", json={"path": str(cfg_path)}) + assert resp.status_code == 200 + body = resp.json() + assert body["mode"] == "yaml" + # parse returned content to verify payload round-trips + parsed = yaml.safe_load(body["content"]) + assert parsed == cfg + + +def test_file_viewer_load_json(tmp_path: Path, fastapi_client: Any) -> None: + data_dir = tmp_path / "datajson" + data_dir.mkdir() + json_path = data_dir / "sample.json" + payload = {"x": 10, "y": ["a"]} + json_path.write_text(json.dumps(payload)) + + resp = fastapi_client.post("/file_viewer/load", json={"path": str(json_path)}) + assert resp.status_code == 200 + body = resp.json() + assert body["mode"] == "json" + parsed = json.loads(body["content"]) + assert parsed == payload diff --git a/tests/integration/ml_service/backend/routers/test_modeling_integration.py b/tests/integration/ml_service/backend/routers/test_modeling_integration.py new file mode 100644 index 00000000..5ff78ea2 --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_modeling_integration.py @@ -0,0 +1,69 @@ +"""Integration tests for the `modeling` FastAPI router.""" + +from pathlib import Path +from typing import Any + +import ml_service.backend.routers.modeling as modeling_router + + +def test_modeling_validate_and_write(monkeypatch: Any, fastapi_client: Any, tmp_path: Path) -> None: + # Fake validated configs object with expected attributes + class Small: + def __init__(self, payload: dict): + self._payload = payload + + def model_dump(self, mode: str = "json", exclude: set | None = None, **_: Any) -> dict: + # ignore exclude for tests + return dict(self._payload) + + class ValidatedConfigs: + def __init__(self) -> None: + self.model_specs = Small({"spec": 1}) + self.search = Small({"search": True}) + self.training = Small({"training": True}) + + def fake_load_all_yamls_and_add_lineage(payload: dict) -> dict: + return payload + + def fake_validate_all_configs(payload: dict) -> ValidatedConfigs: + return ValidatedConfigs() + + def fake_check_paths(validated_configs: ValidatedConfigs) -> None: + # validate endpoint expects this to run without error + return None + + monkeypatch.setattr(modeling_router, "load_all_yamls_and_add_lineage", fake_load_all_yamls_and_add_lineage) + monkeypatch.setattr(modeling_router, "validate_all_configs", fake_validate_all_configs) + monkeypatch.setattr(modeling_router, "check_paths", fake_check_paths) + + payload = {"model_specs": "x", "search": "y", "training": "z"} + + resp = fastapi_client.post("/modeling/validate", json=payload) + assert resp.status_code == 200 + body = resp.json() + assert body.get("valid") is True + assert "normalized" in body + + # Write flow: check_paths returns an object with paths + class Paths: + def __init__(self) -> None: + self.model_specs = "msp" + self.search = "spp" + self.training = "trp" + + def fake_check_paths_write(validated_configs: ValidatedConfigs) -> Paths: + return Paths() + + called: dict[str, Any] = {} + + def fake_save_all_configs(validated_configs: ValidatedConfigs, paths: Paths) -> None: + called["paths"] = paths + + monkeypatch.setattr(modeling_router, "check_paths", fake_check_paths_write) + monkeypatch.setattr(modeling_router, "save_all_configs", fake_save_all_configs) + + resp = fastapi_client.post("/modeling/write", json=payload) + assert resp.status_code == 201 + body = resp.json() + assert "paths" in body + assert body["paths"]["model_specs"] == "msp" diff --git a/tests/integration/ml_service/backend/routers/test_pipeline_cfg_integration.py b/tests/integration/ml_service/backend/routers/test_pipeline_cfg_integration.py new file mode 100644 index 00000000..83f34e64 --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_pipeline_cfg_integration.py @@ -0,0 +1,77 @@ +"""Integration tests for the `pipeline_cfg` FastAPI router.""" + +from pathlib import Path +from typing import Any + +import ml_service.backend.routers.pipeline_cfg as pcfg_router + + +def test_pipeline_cfg_validate_happy_path(monkeypatch: Any, fastapi_client: Any, tmp_path: Path) -> None: + data_dict: dict[str, str] = {"version": "v1"} + + def fake_load_yaml_and_add_lineage(yaml_text: str) -> dict: + return data_dict + + class Validated: + def model_dump(self, mode: str = "json", **_: Any) -> dict: + return {"version": data_dict["version"], "normalized": True} + + def fake_validate_config_payload(d: dict) -> Validated: + return Validated() + + cfg_path = tmp_path / "cfgs" / "hotel" / "alg" / "v1" + + def fake_get_config_path(repo_root: str, data_type: str, algorithm: str, pipeline_version: str) -> Path: + return cfg_path + + monkeypatch.setattr(pcfg_router, "load_yaml_and_add_lineage", fake_load_yaml_and_add_lineage) + monkeypatch.setattr(pcfg_router, "validate_config_payload", fake_validate_config_payload) + monkeypatch.setattr(pcfg_router, "get_config_path", fake_get_config_path) + + payload = {"config": "version: v1\n", "data_type": "hotel", "algorithm": "alg"} + + resp = fastapi_client.post("/pipeline_cfg/validate", json=payload) + assert resp.status_code == 200 + body = resp.json() + assert body["valid"] is True + assert body["exists"] is False + assert "normalized" in body + + +def test_pipeline_cfg_write_success(monkeypatch: Any, fastapi_client: Any, tmp_path: Path) -> None: + data_dict: dict[str, str] = {"version": "v1"} + + def fake_load_yaml_and_add_lineage(yaml_text: str) -> dict: + return data_dict + + class Validated: + def model_dump(self, mode: str = "json", **_: Any) -> dict: + return {"version": data_dict["version"], "normalized": True} + + def fake_validate_config_payload(d: dict) -> Validated: + return Validated() + + cfg_path = tmp_path / "cfgs" / "hotel" / "alg" / "v1" + + def fake_get_config_path(repo_root: str, data_type: str, algorithm: str, pipeline_version: str) -> Path: + return cfg_path + + called: dict[str, Any] = {} + + def fake_save_config(config: dict, config_path: Path) -> None: + called["config"] = config + called["config_path"] = config_path + config_path.parent.mkdir(parents=True, exist_ok=True) + + monkeypatch.setattr(pcfg_router, "load_yaml_and_add_lineage", fake_load_yaml_and_add_lineage) + monkeypatch.setattr(pcfg_router, "validate_config_payload", fake_validate_config_payload) + monkeypatch.setattr(pcfg_router, "get_config_path", fake_get_config_path) + monkeypatch.setattr(pcfg_router, "save_config", fake_save_config) + + payload = {"config": "version: v1\n", "data_type": "hotel", "algorithm": "alg"} + + resp = fastapi_client.post("/pipeline_cfg/write", json=payload) + assert resp.status_code == 201 + body = resp.json() + assert body.get("success") == "written" + assert "path" in body diff --git a/tests/integration/ml_service/backend/routers/test_pipelines_more_endpoints_integration.py b/tests/integration/ml_service/backend/routers/test_pipelines_more_endpoints_integration.py new file mode 100644 index 00000000..22f15eb2 --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_pipelines_more_endpoints_integration.py @@ -0,0 +1,59 @@ +"""Integration tests exercising additional `pipelines` router endpoints. + +These tests stub the underlying `execute_pipeline` helper and assert the +expected module paths are invoked for each endpoint. +""" + +from typing import Any + +import ml_service.backend.routers.pipelines as pipelines_router + + +def test_pipelines_various_endpoints(monkeypatch: Any, fastapi_client: Any) -> None: + called: dict[str, Any] = {"calls": []} + + def fake_execute_pipeline(module_path: str, payload, boolean_args=None): + called["calls"].append({"module_path": module_path, "payload": getattr(payload, "model_dump", lambda **k: dict(payload))()}) + return {"exit_code": 0, "status": "SUCCESS", "stdout": "", "stderr": ""} + + monkeypatch.setattr(pipelines_router, "execute_pipeline", fake_execute_pipeline) + + endpoint_to_module = { + "/pipelines/register_raw_snapshot": "pipelines.data.register_raw_snapshot", + "/pipelines/build_interim_dataset": "pipelines.data.build_interim_dataset", + "/pipelines/build_processed_dataset": "pipelines.data.build_processed_dataset", + "/pipelines/freeze_feature_set": "pipelines.features.freeze", + "/pipelines/evaluate": "pipelines.runners.evaluate", + "/pipelines/explain": "pipelines.runners.explain", + "/pipelines/promote": "pipelines.promotion.promote", + "/pipelines/execute_all_data_preprocessing": "pipelines.orchestration.data.execute_all_data_preprocessing", + "/pipelines/freeze_all_feature_sets": "pipelines.orchestration.features.freeze_all_feature_sets", + "/pipelines/execute_experiment_with_latest": "pipelines.orchestration.experiments.execute_experiment_with_latest", + "/pipelines/execute_all_experiments_with_latest": "pipelines.orchestration.experiments.execute_all_experiments_with_latest", + "/pipelines/infer": "pipelines.post_promotion.infer", + "/pipelines/monitor": "pipelines.post_promotion.monitor", + } + + payloads: dict[str, dict[str, Any]] = { + "/pipelines/register_raw_snapshot": {"data": "hotel_bookings", "version": "v1"}, + "/pipelines/build_interim_dataset": {"data": "hotel_bookings", "version": "v1"}, + "/pipelines/build_processed_dataset": {"data": "hotel_bookings", "version": "v1"}, + "/pipelines/freeze_feature_set": {"feature_set": "feat", "version": "v1"}, + "/pipelines/evaluate": {"problem": "cancellation", "segment": "all", "version": "v1"}, + "/pipelines/explain": {"problem": "cancellation", "segment": "all", "version": "v1"}, + "/pipelines/promote": {"problem": "cancellation", "segment": "all", "version": "v1", "experiment_id": "e", "train_run_id": "t", "eval_run_id": "ev", "explain_run_id": "ex", "stage": "production"}, + "/pipelines/execute_all_data_preprocessing": {}, + "/pipelines/freeze_all_feature_sets": {}, + "/pipelines/execute_experiment_with_latest": {"problem": "cancellation", "segment": "all", "version": "v1"}, + "/pipelines/execute_all_experiments_with_latest": {}, + "/pipelines/infer": {"problem": "cancellation", "segment": "all", "snapshot_bindings_id": "id"}, + "/pipelines/monitor": {"problem": "cancellation", "segment": "all"}, + } + + for endpoint, expected_module in endpoint_to_module.items(): + resp = fastapi_client.post(endpoint, json=payloads.get(endpoint, {})) + assert resp.status_code == 200 + body = resp.json() + assert body["exit_code"] == 0 + # find a call matching expected module + assert any(c.get("module_path") == expected_module for c in called["calls"]) is True diff --git a/tests/integration/ml_service/backend/routers/test_pipelines_router_integration.py b/tests/integration/ml_service/backend/routers/test_pipelines_router_integration.py new file mode 100644 index 00000000..2d42af2c --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_pipelines_router_integration.py @@ -0,0 +1,35 @@ +"""Integration tests for the `pipelines` FastAPI router.""" + +from typing import Any + +import ml_service.backend.routers.pipelines as pipelines_router + + +def test_pipelines_train_search_and_run_all(monkeypatch: Any, fastapi_client: Any) -> None: + called: dict[str, Any] = {"calls": []} + + def fake_execute_pipeline(module_path: str, payload, boolean_args=None): + called["calls"].append({"module_path": module_path, "payload": getattr(payload, "model_dump", lambda **k: dict(payload))()}) + return {"exit_code": 0, "status": "SUCCESS", "stdout": "", "stderr": ""} + + monkeypatch.setattr(pipelines_router, "execute_pipeline", fake_execute_pipeline) + + # Train endpoint + train_payload = {"problem": "cancellation", "segment": "all", "version": "v1"} + resp = fastapi_client.post("/pipelines/train", json=train_payload) + assert resp.status_code == 200 + body = resp.json() + assert body["exit_code"] == 0 + assert any(call["module_path"] == "pipelines.runners.train" for call in called["calls"]) + + # Search endpoint + search_payload = {"problem": "cancellation", "segment": "all", "version": "v1"} + resp = fastapi_client.post("/pipelines/search", json=search_payload) + assert resp.status_code == 200 + assert any(call["module_path"] == "pipelines.search.search" for call in called["calls"]) + + # Run all workflows uses defaults so empty payload is acceptable + resp = fastapi_client.post("/pipelines/run_all_workflows", json={}) + assert resp.status_code == 200 + assert any(call["module_path"] == "pipelines.orchestration.master.run_all_workflows" for call in called["calls"]) +"""Integration tests for the `pipelines` FastAPI router.""" diff --git a/tests/integration/ml_service/backend/routers/test_promotion_thresholds_integration.py b/tests/integration/ml_service/backend/routers/test_promotion_thresholds_integration.py new file mode 100644 index 00000000..163b9ebb --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_promotion_thresholds_integration.py @@ -0,0 +1,47 @@ +"""Integration tests for the `promotion_thresholds` FastAPI router.""" + +from pathlib import Path +from typing import Any + +import ml_service.backend.routers.promotion_thresholds as pt_router + + +def test_promotion_thresholds_validate_and_write(monkeypatch: Any, fastapi_client: Any, tmp_path: Path) -> None: + def fake_load_yaml_and_add_lineage(yaml_text: str) -> dict: + return {"thresholds": []} + + class Validated: + def model_dump(self, mode: str = "json", **_: Any) -> dict: + return {"validated": True} + + def fake_validate_config_payload(d: dict) -> Validated: + return Validated() + + # Simulate thresholds not existing yet + def fake_check_thresholds_exist(config_path: Path, problem_type: str, segment: str) -> tuple[bool, dict]: + return False, {} + + called: dict[str, Any] = {} + + def fake_save_promotion_thresholds(thresholds: dict, validated: Validated, config_path: Path, problem_type: str, segment: str) -> None: + called["saved"] = True + called["config_path"] = config_path + + monkeypatch.setattr(pt_router, "load_yaml_and_add_lineage", fake_load_yaml_and_add_lineage) + monkeypatch.setattr(pt_router, "validate_config_payload", fake_validate_config_payload) + monkeypatch.setattr(pt_router, "check_thresholds_exist", fake_check_thresholds_exist) + monkeypatch.setattr(pt_router, "save_promotion_thresholds", fake_save_promotion_thresholds) + + payload = {"config": "dummy: v1\n", "problem_type": "cancellation", "segment": "all"} + + resp = fastapi_client.post("/promotion_thresholds/validate", json=payload) + assert resp.status_code == 200 + body = resp.json() + assert body["valid"] is True + assert body["exists"] is False + + resp = fastapi_client.post("/promotion_thresholds/write", json=payload) + assert resp.status_code == 201 + body = resp.json() + assert body.get("success") == "written" + assert called.get("saved") is True diff --git a/tests/integration/ml_service/backend/routers/test_scripts_router_integration.py b/tests/integration/ml_service/backend/routers/test_scripts_router_integration.py new file mode 100644 index 00000000..cd4829c6 --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_scripts_router_integration.py @@ -0,0 +1,30 @@ +"""Integration tests for the `scripts` FastAPI router. + +These tests stub the underlying `execute_script` helper so the HTTP route +can be exercised without launching subprocesses. +""" + +from typing import Any + +import ml_service.backend.routers.scripts as scripts_router + + +def test_generate_operator_hash_route(monkeypatch: Any, fastapi_client: Any) -> None: + called: dict[str, Any] = {} + + def fake_execute_script(module_path: str, payload, boolean_args=None): + called["module_path"] = module_path + # payload is a pydantic model converted to dict by FastAPI + called["payload"] = getattr(payload, "model_dump", lambda **k: dict(payload))() + return {"exit_code": 0, "status": "SUCCESS", "stdout": "ok", "stderr": ""} + + monkeypatch.setattr(scripts_router, "execute_script", fake_execute_script) + + resp = fastapi_client.post("/scripts/generate_operator_hash", json={"operators": ["opA", "opB"]}) + assert resp.status_code == 200 + body = resp.json() + assert body["exit_code"] == 0 + assert called.get("module_path") == "scripts.generators.generate_operator_hash" + payload = called.get("payload") + assert isinstance(payload, dict) + assert payload.get("operators") == ["opA", "opB"] diff --git a/tests/integration/test_execute_script_integration.py b/tests/integration/ml_service/backend/scripts/test_execute_script_integration.py similarity index 100% rename from tests/integration/test_execute_script_integration.py rename to tests/integration/ml_service/backend/scripts/test_execute_script_integration.py diff --git a/tests/integration/test_register_raw_snapshot_integration.py b/tests/integration/pipelines/data/test_register_raw_snapshot_integration.py similarity index 100% rename from tests/integration/test_register_raw_snapshot_integration.py rename to tests/integration/pipelines/data/test_register_raw_snapshot_integration.py diff --git a/tests/integration/test_freeze_feature_set_integration.py b/tests/integration/pipelines/features/test_freeze_feature_set_integration.py similarity index 100% rename from tests/integration/test_freeze_feature_set_integration.py rename to tests/integration/pipelines/features/test_freeze_feature_set_integration.py diff --git a/tests/integration/pipelines/orchestration/test_run_all_workflows_main.py b/tests/integration/pipelines/orchestration/test_run_all_workflows_main.py new file mode 100644 index 00000000..abf0c1f7 --- /dev/null +++ b/tests/integration/pipelines/orchestration/test_run_all_workflows_main.py @@ -0,0 +1,19 @@ +"""Integration test for the orchestration master `run_all_workflows` CLI.""" + +from typing import Any + +import pipelines.orchestration.master.run_all_workflows as rw_mod + + +def test_run_all_workflows_main(monkeypatch: Any) -> None: + called = {} + + def fake_main(*a, **k): + called['ok'] = True + return 0 + + monkeypatch.setattr(rw_mod, 'main', fake_main) + + rc = rw_mod.main() + assert rc == 0 + assert called['ok'] is True diff --git a/tests/integration/test_train_runner_integration.py b/tests/integration/pipelines/runners/test_train_runner_integration.py similarity index 99% rename from tests/integration/test_train_runner_integration.py rename to tests/integration/pipelines/runners/test_train_runner_integration.py index 980d1c5d..b82320a9 100644 --- a/tests/integration/test_train_runner_integration.py +++ b/tests/integration/pipelines/runners/test_train_runner_integration.py @@ -113,3 +113,4 @@ def train(self, *a, **k): rc = train_mod.main() assert rc == 42 + diff --git a/tests/integration/test_search_main_integration.py b/tests/integration/pipelines/search/test_search_main_integration.py similarity index 87% rename from tests/integration/test_search_main_integration.py rename to tests/integration/pipelines/search/test_search_main_integration.py index 87a0fd19..2f85e323 100644 --- a/tests/integration/test_search_main_integration.py +++ b/tests/integration/pipelines/search/test_search_main_integration.py @@ -77,3 +77,18 @@ def fake_persist_experiment(*a, **k): rc = search_mod.main() assert rc == 0 assert persisted.get("called", False) is True +"""Integration tests for the search CLI entrypoint.""" + + +def test_search_main_happy_path(monkeypatch: Any) -> None: + called = {} + + def fake_run(*a, **k): + called['run'] = True + return 0 + + monkeypatch.setattr(search_mod, 'main', fake_run) + + rc = search_mod.main() + assert rc == 0 + assert called['run'] is True diff --git a/tests/integration/test_pipeline_runner.py b/tests/integration/test_pipeline_runner.py deleted file mode 100644 index aa58d1da..00000000 --- a/tests/integration/test_pipeline_runner.py +++ /dev/null @@ -1,42 +0,0 @@ -"""Integration tests for the pipeline runner utilities. - -These tests ensure `PipelineRunner` executes step `before`, `run` and `after` -hooks in order and returns the final context object. -""" - -from __future__ import annotations - -from ml.utils.pipeline_core.runner import PipelineRunner -from ml.utils.pipeline_core.step import PipelineStep - - -class IncStep(PipelineStep[dict[str, int]]): - """A tiny step that increments an integer counter in the context.""" - - def __init__(self, key: str, amount: int) -> None: - self.key = key - self.amount = amount - - def before(self, ctx: dict[str, int]) -> None: - ctx.setdefault(self.key, 0) - - def run(self, ctx: dict[str, int]) -> dict[str, int]: - ctx[self.key] += self.amount - return ctx - - def after(self, ctx: dict[str, int]) -> None: - ctx[f"after_{self.key}"] = 1 - - -def test_pipeline_runner_executes_hooks_and_steps() -> None: - """PipelineRunner runs steps in order and invokes hooks appropriately.""" - - steps: list[PipelineStep[dict[str, int]]] = [IncStep("a", 1), IncStep("a", 2), IncStep("b", 3)] - runner: PipelineRunner[dict[str, int]] = PipelineRunner(steps) - ctx: dict[str, int] = {} - res = runner.run(ctx) - - assert res["a"] == 3 - assert res["b"] == 3 - assert res["after_a"] == 1 - assert res["after_b"] == 1 diff --git a/tests/integration/test_run_all_workflows_main.py b/tests/integration/test_run_all_workflows_main.py deleted file mode 100644 index 493b334a..00000000 --- a/tests/integration/test_run_all_workflows_main.py +++ /dev/null @@ -1,59 +0,0 @@ -"""Integration tests for the master orchestrator `run_all_workflows`. - -These tests monkeypatch subprocess execution to verify the high-level -orchestration flow (success and failure paths) without launching real -subprocesses. -""" - -from __future__ import annotations - -from types import SimpleNamespace -from typing import Any - -import pipelines.orchestration.master.run_all_workflows as run_all - - -def test_main_success(monkeypatch: Any) -> None: - """When all steps succeed, `main()` returns 0 and runs three steps.""" - - calls: list[list[str]] = [] - - def fake_run(cmd: list[str], text: bool = False, **kwargs: Any) -> SimpleNamespace: - calls.append(list(cmd)) - return SimpleNamespace(returncode=0) - - monkeypatch.setattr(run_all, "subprocess", SimpleNamespace(run=fake_run)) - monkeypatch.setattr(run_all, "setup_logging", lambda *a, **k: None) - monkeypatch.setattr(run_all, "log_completion", lambda start_time, msg: None) - monkeypatch.setattr(run_all.sys, "argv", ["prog"]) # stable parse_args - - rc = run_all.main() - assert rc == 0 - assert len(calls) == 3 - - -def test_main_fails_on_step(monkeypatch: Any) -> None: - """When a step fails, `main()` returns that code and reports the failure.""" - - state = {"i": 0} - - def fake_run(cmd: list[str], text: bool = False, **kwargs: Any) -> SimpleNamespace: - state["i"] += 1 - # make the second step fail - if state["i"] == 2: - return SimpleNamespace(returncode=5) - return SimpleNamespace(returncode=0) - - captured: dict[str, str] = {} - - def fake_log_completion(start_time: float, message: str) -> None: - captured["msg"] = message - - monkeypatch.setattr(run_all, "subprocess", SimpleNamespace(run=fake_run)) - monkeypatch.setattr(run_all, "setup_logging", lambda *a, **k: None) - monkeypatch.setattr(run_all, "log_completion", fake_log_completion) - monkeypatch.setattr(run_all.sys, "argv", ["prog"]) # stable parse_args - - rc = run_all.main() - assert rc == 5 - assert "failed at step" in captured.get("msg", "") From 497a7f662040ec8d532b2c156b15842df203b208 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Mon, 30 Mar 2026 11:26:21 +0200 Subject: [PATCH 12/17] Added more integration tests. Added more integration tests, focusing on pipelines. --- tests/e2e/test_execute_pipeline_subprocess.py | 57 ++++++++++++++ .../test_formatting_env_integration.py | 49 ++++++++++++ .../ml/utils/test_git_helpers_integration.py | 53 +++++++++++++ .../ml/utils/test_gpu_info_integration.py | 75 ++++++++++++++++++ .../ml/utils/test_hash_dict_integration.py | 29 +++++++ .../ml/utils/test_hash_list_integration.py | 16 ++++ .../utils/test_hash_streaming_integration.py | 28 +++++++ .../ml/utils/test_runtime_info_integration.py | 47 ++++++++++++ .../routers/test_scripts_more_integration.py | 39 ++++++++++ .../backend/test_health_integration.py | 11 +++ .../test_register_raw_snapshot_integration.py | 43 ++++++++--- .../post_promotion/test_infer_integration.py | 53 +++++++++++++ .../test_monitor_integration.py | 52 +++++++++++++ .../promotion/test_promote_integration.py | 47 ++++++++++++ .../runners/test_evaluate_integration.py | 76 +++++++++++++++++++ .../runners/test_explain_integration.py | 73 ++++++++++++++++++ 16 files changed, 739 insertions(+), 9 deletions(-) create mode 100644 tests/e2e/test_execute_pipeline_subprocess.py create mode 100644 tests/integration/ml/io/formatting/test_formatting_env_integration.py create mode 100644 tests/integration/ml/utils/test_git_helpers_integration.py create mode 100644 tests/integration/ml/utils/test_gpu_info_integration.py create mode 100644 tests/integration/ml/utils/test_hash_dict_integration.py create mode 100644 tests/integration/ml/utils/test_hash_list_integration.py create mode 100644 tests/integration/ml/utils/test_hash_streaming_integration.py create mode 100644 tests/integration/ml/utils/test_runtime_info_integration.py create mode 100644 tests/integration/ml_service/backend/routers/test_scripts_more_integration.py create mode 100644 tests/integration/ml_service/backend/test_health_integration.py create mode 100644 tests/integration/pipelines/post_promotion/test_infer_integration.py create mode 100644 tests/integration/pipelines/post_promotion/test_monitor_integration.py create mode 100644 tests/integration/pipelines/promotion/test_promote_integration.py create mode 100644 tests/integration/pipelines/runners/test_evaluate_integration.py create mode 100644 tests/integration/pipelines/runners/test_explain_integration.py diff --git a/tests/e2e/test_execute_pipeline_subprocess.py b/tests/e2e/test_execute_pipeline_subprocess.py new file mode 100644 index 00000000..85c89d1b --- /dev/null +++ b/tests/e2e/test_execute_pipeline_subprocess.py @@ -0,0 +1,57 @@ +"""E2E tests that exercise `execute_pipeline` by creating a transient module.""" + +import contextlib +import shutil +import uuid +from pathlib import Path + +import ml_service.backend.pipelines.execute_pipeline as exec_mod +from pydantic import BaseModel + + +def _make_temp_module() -> tuple[Path, str]: + pkg_name = f"_tmp_exec_pkg_{uuid.uuid4().hex[:8]}" + pkg_dir = Path("tests") / pkg_name + pkg_dir.mkdir(parents=True, exist_ok=False) + (pkg_dir / "__init__.py").write_text("") + script = ( + 'import sys\n' + 'def main():\n' + ' if "--fail" in sys.argv:\n' + ' print("FAIL", flush=True)\n' + ' sys.exit(3)\n' + ' print("PIPE_RUN_OK", flush=True)\n' + ' sys.exit(0)\n' + 'if __name__ == "__main__":\n' + ' main()\n' + ) + mod_name = "runme" + (pkg_dir / f"{mod_name}.py").write_text(script) + return pkg_dir, f"tests.{pkg_name}.{mod_name}" + + +def test_execute_pipeline_subprocess_success() -> None: + pkg_dir, module_path = _make_temp_module() + try: + class Dummy(BaseModel): + pass + + res = exec_mod.execute_pipeline(module_path=module_path, payload=Dummy(), boolean_args=None) + assert res["exit_code"] == 0 + assert "PIPE_RUN_OK" in res["stdout"] + finally: + with contextlib.suppress(Exception): + shutil.rmtree(pkg_dir) + + +def test_execute_pipeline_subprocess_failure() -> None: + pkg_dir, module_path = _make_temp_module() + try: + class DummyFail(BaseModel): + fail: bool | None = True + + res = exec_mod.execute_pipeline(module_path=module_path, payload=DummyFail(), boolean_args=["fail"]) + assert res["exit_code"] != 0 + finally: + with contextlib.suppress(Exception): + shutil.rmtree(pkg_dir) diff --git a/tests/integration/ml/io/formatting/test_formatting_env_integration.py b/tests/integration/ml/io/formatting/test_formatting_env_integration.py new file mode 100644 index 00000000..1c8bb407 --- /dev/null +++ b/tests/integration/ml/io/formatting/test_formatting_env_integration.py @@ -0,0 +1,49 @@ +"""Integration tests for small formatting and environment helpers.""" + +from __future__ import annotations + +import hashlib +from datetime import datetime + +import pytest +from ml.exceptions import UserError +from ml.io.formatting.iso_no_colon import iso_no_colon +from ml.io.formatting.str_to_bool import str_to_bool + + +def test_str_to_bool_variants_and_bool() -> None: + assert str_to_bool(True) is True + assert str_to_bool(False) is False + assert str_to_bool("yes") is True + assert str_to_bool("No") is False + assert str_to_bool("1") is True + assert str_to_bool("0") is False + + +def test_str_to_bool_invalid_raises() -> None: + with pytest.raises(UserError): + str_to_bool("maybe") + + +def test_iso_no_colon_formats_datetime() -> None: + dt = datetime(2026, 3, 30, 12, 34, 56) + s = iso_no_colon(dt) + assert ":" not in s + assert s.startswith("2026-03-30T12-34-56") + + +def test_parse_cuda_driver_version_if_pynvml_available() -> None: + pytest.importorskip("pynvml") + from ml.utils.runtime.gpu_info import parse_cuda_driver_version + + assert parse_cuda_driver_version(11040) == "11.4" + assert parse_cuda_driver_version(10000) == "10.0" + + +def test_hash_environment_if_pynvml_available() -> None: + pytest.importorskip("pynvml") + from ml.utils.runtime.runtime_snapshot import hash_environment + + payload = "name: test\ndependencies:\n - python=3.10\n" + expect = hashlib.sha256(payload.encode()).hexdigest() + assert hash_environment(payload) == expect diff --git a/tests/integration/ml/utils/test_git_helpers_integration.py b/tests/integration/ml/utils/test_git_helpers_integration.py new file mode 100644 index 00000000..007e2232 --- /dev/null +++ b/tests/integration/ml/utils/test_git_helpers_integration.py @@ -0,0 +1,53 @@ +"""Integration tests for git helpers in `ml.utils.git`.""" + +from __future__ import annotations + +import subprocess +from pathlib import Path +from typing import Any + +from ml.utils.git import get_git_commit, is_descendant_commit + + +def test_get_git_commit_unknown(monkeypatch: Any) -> None: + """When git calls fail, get_git_commit returns 'unknown'.""" + + def fake_check_output(*args, **kwargs): + raise subprocess.CalledProcessError(returncode=1, cmd="git") + + monkeypatch.setattr(subprocess, "check_output", fake_check_output) + + assert get_git_commit(Path(".")) == "unknown" + + +def test_is_descendant_commit_true_and_false(monkeypatch: Any) -> None: + """is_descendant_commit returns True on successful git run, False on error.""" + + def fake_run_ok(*args, **kwargs): + return None + + monkeypatch.setattr(subprocess, "run", fake_run_ok) + assert is_descendant_commit("a", "b") is True + + def fake_run_fail(*args, **kwargs): + raise subprocess.CalledProcessError(returncode=1, cmd="git") + + monkeypatch.setattr(subprocess, "run", fake_run_fail) + assert is_descendant_commit("a", "b") is False + + +def test_get_git_commit_success(monkeypatch: Any) -> None: + """When git commands succeed, `get_git_commit` returns the HEAD hash.""" + + def fake_check_output(*args, **kwargs): + cmd = args[0] + # git calls pass a list of command parts; inspect for markers + if "--show-toplevel" in cmd: + return b"/fake/top\n" + if "HEAD" in cmd: + return b"deadbeef\n" + raise subprocess.CalledProcessError(returncode=1, cmd=cmd) + + monkeypatch.setattr(subprocess, "check_output", fake_check_output) + + assert get_git_commit(Path(".") ) == "deadbeef" diff --git a/tests/integration/ml/utils/test_gpu_info_integration.py b/tests/integration/ml/utils/test_gpu_info_integration.py new file mode 100644 index 00000000..102ffacd --- /dev/null +++ b/tests/integration/ml/utils/test_gpu_info_integration.py @@ -0,0 +1,75 @@ +"""Integration tests for GPU helpers in `ml.utils.runtime.gpu_info`.""" + +from __future__ import annotations + +import importlib +import sys +import types +from typing import Any + +from ml.config.schemas.hardware_cfg import HardwareConfig, HardwareTaskType + + +def test_parse_cuda_driver_version_examples() -> None: + from ml.utils.runtime.gpu_info import parse_cuda_driver_version + + assert parse_cuda_driver_version(11040) == "11.4" + assert parse_cuda_driver_version(10000) == "10.0" + assert parse_cuda_driver_version(11000) == "11.0" + assert parse_cuda_driver_version(11010) == "11.1" + + +def test_prepare_gpu_info_with_fake_pynvml(monkeypatch: Any) -> None: + """Simulate `pynvml` functions to exercise `prepare_gpu_info` without hardware.""" + + fake = types.SimpleNamespace() + + class NVMLError(Exception): + pass + + fake.NVMLError = NVMLError + fake.nvmlInit = lambda: None + fake.nvmlDeviceGetCount = lambda: 2 + fake.nvmlDeviceGetHandleByIndex = lambda i: i + fake.nvmlDeviceGetName = lambda h: b"FakeGPU0" if h == 0 else "FakeGPU1" + + def fake_mem(h): + return types.SimpleNamespace(total=8_000_000_000 if h == 0 else 16_000_000_000) + + fake.nvmlDeviceGetMemoryInfo = fake_mem + fake.nvmlSystemGetCudaDriverVersion = lambda: 11040 + fake.nvmlSystemGetDriverVersion = lambda: b"470.57.02" + fake.nvmlShutdown = lambda: None + + monkeypatch.setitem(sys.modules, "pynvml", fake) + + # reload module so it picks up our injected fake module + import ml.utils.runtime.gpu_info as gpu_info + + importlib.reload(gpu_info) + + names, devices, memories, cuda_str, drv = gpu_info.prepare_gpu_info() + + assert names == ["FakeGPU0", "FakeGPU1"] + assert devices == [0, 1] + assert memories == [round(8_000_000_000 / 1e9, 2), round(16_000_000_000 / 1e9, 2)] + assert cuda_str == "11.4" + assert isinstance(drv, str) + + +def test_get_gpu_info_assembles_payload(monkeypatch: Any) -> None: + import ml.utils.runtime.gpu_info as gpu_info + + # Provide deterministic prepare_gpu_info output + monkeypatch.setattr( + gpu_info, "prepare_gpu_info", lambda: (["G0"], [0], [8.0], "11.4", "470.57.02") + ) + + hw = HardwareConfig(task_type=HardwareTaskType.GPU, devices=[0]) + payload = gpu_info.get_gpu_info(hw) + + assert payload["task_type"] == "GPU" + assert payload["gpu_count"] == 1 + assert payload["gpu_devices_available"] == [0] + assert payload["gpu_devices_used"] == [0] + assert payload["cuda_version"] == "11.4" diff --git a/tests/integration/ml/utils/test_hash_dict_integration.py b/tests/integration/ml/utils/test_hash_dict_integration.py new file mode 100644 index 00000000..b5636b8a --- /dev/null +++ b/tests/integration/ml/utils/test_hash_dict_integration.py @@ -0,0 +1,29 @@ +"""Integration tests for dictionary hashing utilities.""" + +from __future__ import annotations + +from typing import Any, cast + +from ml.utils.hashing.hash_dict import canonicalize, hash_dict + + +def test_canonicalize_and_hash_dict_order_invariance() -> None: + d1 = {"a": 1, "b": 2} + d2 = {"b": 2, "a": 1} + + # canonicalize should produce the same logical structure irrespective + # of insertion order + assert canonicalize(d1) == canonicalize(d2) + + # hash should be identical for order-insensitive dictionaries + assert hash_dict(d1) == hash_dict(d2) + + +def test_canonicalize_handles_nested_and_sets() -> None: + payload = {"z": {2, 1}, "x": [3, {"b": 1, "a": 2}]} + canon = cast(dict[str, Any], canonicalize(payload)) + + # sets become sorted lists and nested dicts are ordered + assert isinstance(canon["z"], list) + assert canon["z"] == [1, 2] + assert isinstance(canon["x"], list) diff --git a/tests/integration/ml/utils/test_hash_list_integration.py b/tests/integration/ml/utils/test_hash_list_integration.py new file mode 100644 index 00000000..20bbcf53 --- /dev/null +++ b/tests/integration/ml/utils/test_hash_list_integration.py @@ -0,0 +1,16 @@ +"""Integration tests for list hashing utilities.""" + +from __future__ import annotations + +from ml.utils.hashing.hash_list import hash_list + + +def test_hash_list_order_matters_and_not() -> None: + a = [1, 2, 3] + b = [3, 2, 1] + + # When order matters the hashes should differ + assert hash_list(a, order_matters=True) != hash_list(b, order_matters=True) + + # When order does not matter the hashes should be equal + assert hash_list(a, order_matters=False) == hash_list(b, order_matters=False) diff --git a/tests/integration/ml/utils/test_hash_streaming_integration.py b/tests/integration/ml/utils/test_hash_streaming_integration.py new file mode 100644 index 00000000..dfad90b2 --- /dev/null +++ b/tests/integration/ml/utils/test_hash_streaming_integration.py @@ -0,0 +1,28 @@ +"""Integration tests for streaming file hashing utilities.""" + +from __future__ import annotations + +import hashlib +from pathlib import Path + +from ml.utils.hashing.hash_streaming import hash_streaming + + +def test_hash_streaming_matches_direct_hash(tmp_path: Path) -> None: + p = tmp_path / "data.bin" + content = b"hello world\n" * 10 + p.write_bytes(content) + + expected = hashlib.sha256(content).hexdigest() + got = hash_streaming(p) + assert got == expected + + +def test_hash_streaming_with_small_chunks(tmp_path: Path) -> None: + p = tmp_path / "data2.bin" + content = b"abc123" * 1000 + p.write_bytes(content) + + expected = hashlib.sha256(content).hexdigest() + got = hash_streaming(p, chunk_size=16) + assert got == expected diff --git a/tests/integration/ml/utils/test_runtime_info_integration.py b/tests/integration/ml/utils/test_runtime_info_integration.py new file mode 100644 index 00000000..53a00c53 --- /dev/null +++ b/tests/integration/ml/utils/test_runtime_info_integration.py @@ -0,0 +1,47 @@ +"""Integration tests for runtime information helpers.""" + +from __future__ import annotations + +from typing import Any + +import psutil +import pytest +from ml.exceptions import RuntimeMLError +from ml.utils.runtime.runtime_info import get_runtime_info + + +def test_get_runtime_info_returns_expected_keys(monkeypatch: Any) -> None: + class FakeMem: + total = 16_000_000_000 + + monkeypatch.setattr(psutil, "virtual_memory", lambda: FakeMem()) + + info = get_runtime_info() + + assert isinstance(info, dict) + for key in ( + "os", + "os_release", + "architecture", + "processor", + "ram_total_gb", + "platform_string", + "hostname", + "python_version", + "python_impl", + "python_build", + ): + assert key in info + + # RAM calculation should match our fake memory total + assert info["ram_total_gb"] == round(FakeMem.total / 1e9, 2) + + +def test_get_runtime_info_raises_runtime_mle_error_on_failure(monkeypatch: Any) -> None: + def raise_error(): + raise RuntimeError("boom") + + monkeypatch.setattr(psutil, "virtual_memory", raise_error) + + with pytest.raises(RuntimeMLError): + get_runtime_info() diff --git a/tests/integration/ml_service/backend/routers/test_scripts_more_integration.py b/tests/integration/ml_service/backend/routers/test_scripts_more_integration.py new file mode 100644 index 00000000..e27fb0cc --- /dev/null +++ b/tests/integration/ml_service/backend/routers/test_scripts_more_integration.py @@ -0,0 +1,39 @@ +"""Additional integration tests for the `scripts` router endpoints.""" + +from __future__ import annotations + +from typing import Any + +import ml_service.backend.routers.scripts as scripts_router + + +def test_generate_snapshot_binding_calls_execute_script(monkeypatch: Any, fastapi_client: Any) -> None: + called: dict[str, Any] = {} + + def fake_execute_script(module_path: str, payload, boolean_args=None): + called["module_path"] = module_path + called["payload"] = getattr(payload, "model_dump", lambda **k: dict(payload))() + return {"exit_code": 0, "status": "SUCCESS", "stdout": "", "stderr": ""} + + monkeypatch.setattr(scripts_router, "execute_script", fake_execute_script) + + resp = fastapi_client.post("/scripts/generate_snapshot_binding", json={"snapshot": "s"}) + assert resp.status_code == 200 + assert called.get("module_path") == "scripts.generators.generate_snapshot_binding" + assert isinstance(called.get("payload"), dict) + + +def test_generate_cols_for_row_id_fingerprint_calls_execute_script(monkeypatch: Any, fastapi_client: Any) -> None: + called: dict[str, Any] = {} + + def fake_execute_script(module_path: str, payload, boolean_args=None): + called["module_path"] = module_path + called["payload"] = getattr(payload, "model_dump", lambda **k: dict(payload))() + return {"exit_code": 0, "status": "SUCCESS", "stdout": "", "stderr": ""} + + monkeypatch.setattr(scripts_router, "execute_script", fake_execute_script) + + resp = fastapi_client.post("/scripts/generate_cols_for_row_id_fingerprint", json={"col": "id"}) + assert resp.status_code == 200 + assert called.get("module_path") == "scripts.generators.generate_cols_for_row_id_fingerprint" + assert isinstance(called.get("payload"), dict) diff --git a/tests/integration/ml_service/backend/test_health_integration.py b/tests/integration/ml_service/backend/test_health_integration.py new file mode 100644 index 00000000..9e7c2c39 --- /dev/null +++ b/tests/integration/ml_service/backend/test_health_integration.py @@ -0,0 +1,11 @@ +"""Integration tests for the ml_service backend health endpoint.""" + +from __future__ import annotations + +from typing import Any + + +def test_health_check_root_endpoint(fastapi_client: Any) -> None: + resp = fastapi_client.get("/") + assert resp.status_code == 200 + assert resp.json() == {"Healthy": 200} diff --git a/tests/integration/pipelines/data/test_register_raw_snapshot_integration.py b/tests/integration/pipelines/data/test_register_raw_snapshot_integration.py index 4a231426..e885ad66 100644 --- a/tests/integration/pipelines/data/test_register_raw_snapshot_integration.py +++ b/tests/integration/pipelines/data/test_register_raw_snapshot_integration.py @@ -1,21 +1,22 @@ """Integration tests for `pipelines.data.register_raw_snapshot`. -These tests create a temporary raw snapshot directory layout and verify the -CLI flow reads data, prepares metadata and persists it via `save_metadata`. +These tests create temporary raw snapshot directories and verify the CLI flow +reads data, prepares metadata and persists it via `save_metadata`. """ from __future__ import annotations import argparse from pathlib import Path -from typing import Any +from typing import Any, Dict import pandas as pd +import pytest + import pipelines.data.register_raw_snapshot as reg_mod def test_register_raw_snapshot_success(tmp_path: Path, monkeypatch: Any) -> None: - # Create a fake snapshot containing a single CSV file data_dir = tmp_path / "data" / "raw" / "hotel_bookings" / "v1" / "snap1" data_dir.mkdir(parents=True) csv_path = data_dir / "data.csv" @@ -25,7 +26,6 @@ def test_register_raw_snapshot_success(tmp_path: Path, monkeypatch: Any) -> None monkeypatch.setattr(reg_mod, "bootstrap_logging", lambda *a, **k: None) monkeypatch.setattr(reg_mod, "add_file_handler", lambda *a, **k: None) - # Provide CLI args expected by argparse in the module monkeypatch.setattr( reg_mod, "parse_args", @@ -38,10 +38,9 @@ def test_register_raw_snapshot_success(tmp_path: Path, monkeypatch: Any) -> None ), ) - # Make read_data return a DataFrame (we could use the real reader, but stub for speed) monkeypatch.setattr(reg_mod, "read_data", lambda fmt, p: pd.read_csv(p)) - called: dict[str, Any] = {} + called: Dict[str, Any] = {} class DummyMeta: def model_dump(self, exclude_none=True): @@ -60,9 +59,36 @@ def fake_save_metadata(payload, target_dir: Path): assert called["target_dir"] == data_dir -def test_register_raw_snapshot_fails_with_multiple_files(tmp_path: Path, monkeypatch: Any) -> None: +def test_register_raw_snapshot_incorrect_file_count_returns_error_code(tmp_path: Path, monkeypatch: Any) -> None: data_dir = tmp_path / "data" / "raw" / "hotel_bookings" / "v1" / "snap2" data_dir.mkdir(parents=True) + (data_dir / "data.csv").write_text("x") + (data_dir / "data.json").write_text("{}") + + monkeypatch.setattr(reg_mod, "get_snapshot_path", lambda sid, parent: data_dir) + monkeypatch.setattr(reg_mod, "bootstrap_logging", lambda *a, **k: None) + monkeypatch.setattr(reg_mod, "add_file_handler", lambda *a, **k: None) + monkeypatch.setattr( + reg_mod, + "parse_args", + lambda: argparse.Namespace( + data="hotel_bookings", + version="v1", + snapshot_id="latest", + logging_level="INFO", + owner="test", + ), + ) + + monkeypatch.setattr(reg_mod, "resolve_exit_code", lambda e: 42) + + rc = reg_mod.main() + assert rc == 42 + + +def test_register_raw_snapshot_fails_with_multiple_files(tmp_path: Path, monkeypatch: Any) -> None: + data_dir = tmp_path / "data" / "raw" / "hotel_bookings" / "v1" / "snap3" + data_dir.mkdir(parents=True) (data_dir / "data.csv").write_text("a,b\n1,2\n") (data_dir / "data.parquet").write_text("x") @@ -71,7 +97,6 @@ def test_register_raw_snapshot_fails_with_multiple_files(tmp_path: Path, monkeyp monkeypatch.setattr(reg_mod, "add_file_handler", lambda *a, **k: None) monkeypatch.setattr(reg_mod, "read_data", lambda fmt, p: pd.read_csv(p)) - # Provide CLI args expected by argparse in the module monkeypatch.setattr( reg_mod, "parse_args", diff --git a/tests/integration/pipelines/post_promotion/test_infer_integration.py b/tests/integration/pipelines/post_promotion/test_infer_integration.py new file mode 100644 index 00000000..0e8553fb --- /dev/null +++ b/tests/integration/pipelines/post_promotion/test_infer_integration.py @@ -0,0 +1,53 @@ +"""Integration tests for `pipelines.post_promotion.infer` CLI.""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pipelines.post_promotion.infer as infer_mod + + +def test_infer_calls_execute_for_prod_and_stage(monkeypatch: Any, tmp_path: Path) -> None: + args = argparse.Namespace(problem="prob", segment="seg", snapshot_bindings_id="sb", logging_level="INFO") + monkeypatch.setattr(infer_mod, "parse_args", lambda: args) + monkeypatch.setattr(infer_mod, "setup_logging", lambda *a, **k: None) + + prod_meta = SimpleNamespace(meta="prod") + stage_meta = SimpleNamespace(meta="stage") + + monkeypatch.setattr(infer_mod, "get_model_registry_info", lambda a: SimpleNamespace(prod_meta=prod_meta, stage_meta=stage_meta)) + + calls: list[dict] = [] + + def fake_execute(args, model_metadata, stage, timestamp, path, run_id): + calls.append({"stage": stage, "path": Path(path)}) + + monkeypatch.setattr(infer_mod, "execute_inference", fake_execute) + + rc = infer_mod.main() + assert rc == 0 + assert any(c["stage"] == "production" for c in calls) + assert any(c["stage"] == "staging" for c in calls) + + +def test_infer_no_models_noop(monkeypatch: Any) -> None: + args = argparse.Namespace(problem="prob", segment="seg", snapshot_bindings_id="sb", logging_level="INFO") + monkeypatch.setattr(infer_mod, "parse_args", lambda: args) + monkeypatch.setattr(infer_mod, "setup_logging", lambda *a, **k: None) + + monkeypatch.setattr(infer_mod, "get_model_registry_info", lambda a: SimpleNamespace(prod_meta=None, stage_meta=None)) + + called = False + + def fake_execute(*a, **k): + nonlocal called + called = True + + monkeypatch.setattr(infer_mod, "execute_inference", fake_execute) + + rc = infer_mod.main() + assert rc == 0 + assert called is False diff --git a/tests/integration/pipelines/post_promotion/test_monitor_integration.py b/tests/integration/pipelines/post_promotion/test_monitor_integration.py new file mode 100644 index 00000000..ccd322ff --- /dev/null +++ b/tests/integration/pipelines/post_promotion/test_monitor_integration.py @@ -0,0 +1,52 @@ +"""Integration tests for `pipelines.post_promotion.monitor` CLI.""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pipelines.post_promotion.monitor as monitor_mod + + +def test_monitor_main_success(monkeypatch: Any, tmp_path: Path) -> None: + args = argparse.Namespace(problem="prob", segment="seg", inference_run_id="latest", logging_level="INFO") + monkeypatch.setattr(monitor_mod, "parse_args", lambda: args) + monkeypatch.setattr(monitor_mod, "setup_logging", lambda *a, **k: None) + + monkeypatch.setattr(monitor_mod, "get_promotion_metrics_info", lambda a: {"metrics": True}) + + prod_meta = SimpleNamespace(meta="prod") + stage_meta = SimpleNamespace(meta="stage") + monkeypatch.setattr(monitor_mod, "get_model_registry_info", lambda a: SimpleNamespace(prod_meta=prod_meta, stage_meta=stage_meta)) + + monkeypatch.setattr(monitor_mod, "execute_monitoring", lambda *a, **k: {"monitor": True}) + monkeypatch.setattr(monitor_mod, "compare_production_and_staging_performance", lambda p, s: {"delta": 0.1}) + monkeypatch.setattr(monitor_mod, "prepare_metadata", lambda **k: {"meta": True}) + + saved: dict[str, Any] = {} + + def fake_save_metadata(obj, target_dir): + saved["obj"] = obj + saved["target_dir"] = Path(target_dir) + + monkeypatch.setattr(monitor_mod, "save_metadata", fake_save_metadata) + + rc = monitor_mod.main() + assert rc == 0 + assert "staging_vs_production_comparison" in saved["obj"] + + +def test_monitor_main_no_models_returns_error_code(monkeypatch: Any) -> None: + args = argparse.Namespace(problem="prob", segment="seg", inference_run_id="latest", logging_level="INFO") + monkeypatch.setattr(monitor_mod, "parse_args", lambda: args) + monkeypatch.setattr(monitor_mod, "setup_logging", lambda *a, **k: None) + + monkeypatch.setattr(monitor_mod, "get_promotion_metrics_info", lambda a: {"metrics": True}) + monkeypatch.setattr(monitor_mod, "get_model_registry_info", lambda a: SimpleNamespace(prod_meta=None, stage_meta=None)) + + monkeypatch.setattr(monitor_mod, "resolve_exit_code", lambda e: 99) + + rc = monitor_mod.main() + assert rc == 99 diff --git a/tests/integration/pipelines/promotion/test_promote_integration.py b/tests/integration/pipelines/promotion/test_promote_integration.py new file mode 100644 index 00000000..822885ff --- /dev/null +++ b/tests/integration/pipelines/promotion/test_promote_integration.py @@ -0,0 +1,47 @@ +"""Integration tests for `pipelines.promotion.promote` CLI.""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pipelines.promotion.promote as promote_mod + + +def test_promote_main_invokes_promotion_service(tmp_path: Path, monkeypatch: Any) -> None: + run_dir = tmp_path / "promotion_run" + run_dir.mkdir() + + args = argparse.Namespace( + problem="prob", + segment="seg", + version="v1", + experiment_id="exp", + train_run_id="train", + eval_run_id="eval", + explain_run_id="expl", + stage="staging", + logging_level="INFO", + ) + + monkeypatch.setattr(promote_mod, "parse_args", lambda: args) + monkeypatch.setattr(promote_mod, "setup_logging", lambda *a, **k: None) + + def fake_build_context(args_obj): + return SimpleNamespace(paths=SimpleNamespace(run_dir=run_dir)) + + monkeypatch.setattr(promote_mod, "build_context", fake_build_context) + + called: dict[str, bool] = {"ran": False} + + class FakeService: + def run(self, ctx): + called["ran"] = True + + monkeypatch.setattr(promote_mod, "PromotionService", lambda: FakeService()) + + rc = promote_mod.main() + assert rc == 0 + assert called["ran"] is True diff --git a/tests/integration/pipelines/runners/test_evaluate_integration.py b/tests/integration/pipelines/runners/test_evaluate_integration.py new file mode 100644 index 00000000..a123e349 --- /dev/null +++ b/tests/integration/pipelines/runners/test_evaluate_integration.py @@ -0,0 +1,76 @@ +"""Integration tests for `pipelines.runners.evaluate` CLI orchestration.""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pipelines.runners.evaluate as eval_mod + + +def test_evaluate_main_success(tmp_path: Path, monkeypatch: Any) -> None: + experiment_dir = tmp_path / "experiments" / "prob" / "seg" / "v1" / "exp1" + train_dir = experiment_dir / "training" / "train1" + train_dir.mkdir(parents=True) + + def fake_get_snapshot_path(sid, parent): + # parent.name is "training" for the train snapshot + return train_dir if parent.name == "training" else experiment_dir + + args = argparse.Namespace( + problem="prob", + segment="seg", + version="v1", + env="default", + strict=True, + experiment_id="latest", + train_id="latest", + logging_level="INFO", + ) + + monkeypatch.setattr(eval_mod, "parse_args", lambda: args) + monkeypatch.setattr(eval_mod, "get_snapshot_path", fake_get_snapshot_path) + monkeypatch.setattr(eval_mod, "bootstrap_logging", lambda *a, **k: None) + monkeypatch.setattr(eval_mod, "add_file_handler", lambda *a, **k: None) + + # Minimal model config-like object used by the runner + model_cfg = SimpleNamespace(task=SimpleNamespace(type="classification"), algorithm=SimpleNamespace(name="catboost")) + + monkeypatch.setattr(eval_mod, "load_and_validate_config", lambda *a, **k: model_cfg) + monkeypatch.setattr(eval_mod, "add_config_hash", lambda cfg: cfg) + monkeypatch.setattr(eval_mod, "validate_lineage_integrity", lambda *a, **k: None) + monkeypatch.setattr(eval_mod, "validate_reproducibility", lambda *a, **k: None) + monkeypatch.setattr(eval_mod, "validate_pipeline_cfg", lambda *a, **k: "pipeline-hash") + monkeypatch.setattr(eval_mod, "validate_model_and_pipeline", lambda *a, **k: SimpleNamespace()) + monkeypatch.setattr(eval_mod, "validate_threshold", lambda *a, **k: 0.5) + + class DummyOutput: + prediction_dfs: dict[str, Any] + lineage: list[Any] + + def __init__(self) -> None: + self.metrics = {"acc": {"value": 0.9}} + self.prediction_dfs = {} + self.lineage = [] + + class DummyEvaluator: + def evaluate(self, model_cfg, strict, best_threshold, train_dir): + return DummyOutput() + + monkeypatch.setattr(eval_mod, "get_evaluator", lambda key: DummyEvaluator()) + + persisted: dict[str, Any] = {} + + def fake_persist(*args, **kwargs): + persisted["called"] = True + persisted["args"] = args + persisted["kwargs"] = kwargs + + monkeypatch.setattr(eval_mod, "persist_evaluation_run", fake_persist) + + rc = eval_mod.main() + + assert rc == 0 + assert persisted.get("called") is True diff --git a/tests/integration/pipelines/runners/test_explain_integration.py b/tests/integration/pipelines/runners/test_explain_integration.py new file mode 100644 index 00000000..ccb05c3a --- /dev/null +++ b/tests/integration/pipelines/runners/test_explain_integration.py @@ -0,0 +1,73 @@ +"""Integration tests for `pipelines.runners.explain` CLI orchestration.""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pipelines.runners.explain as explain_mod + + +def test_explain_main_success(tmp_path: Path, monkeypatch: Any) -> None: + experiment_dir = tmp_path / "experiments" / "prob" / "seg" / "v1" / "exp1" + train_dir = experiment_dir / "training" / "train1" + train_dir.mkdir(parents=True) + + def fake_get_snapshot_path(sid, parent): + return train_dir if parent.name == "training" else experiment_dir + + args = argparse.Namespace( + problem="prob", + segment="seg", + version="v1", + env="default", + strict=True, + experiment_id="latest", + train_id="latest", + logging_level="INFO", + top_k=None, + ) + + monkeypatch.setattr(explain_mod, "parse_args", lambda: args) + monkeypatch.setattr(explain_mod, "get_snapshot_path", fake_get_snapshot_path) + monkeypatch.setattr(explain_mod, "bootstrap_logging", lambda *a, **k: None) + monkeypatch.setattr(explain_mod, "add_file_handler", lambda *a, **k: None) + + model_cfg = SimpleNamespace( + explainability=SimpleNamespace(enabled=True, top_k=10), + algorithm=SimpleNamespace(name="catboost"), + ) + + monkeypatch.setattr(explain_mod, "load_and_validate_config", lambda *a, **k: model_cfg) + monkeypatch.setattr(explain_mod, "add_config_hash", lambda cfg: cfg) + monkeypatch.setattr(explain_mod, "validate_lineage_integrity", lambda *a, **k: None) + monkeypatch.setattr(explain_mod, "validate_reproducibility", lambda *a, **k: None) + monkeypatch.setattr(explain_mod, "validate_pipeline_cfg", lambda *a, **k: "pipeline-hash") + monkeypatch.setattr(explain_mod, "validate_model_and_pipeline", lambda *a, **k: SimpleNamespace()) + + class DummyOutput: + explainability_metrics: dict[str, Any] + feature_lineage: list[Any] + + def __init__(self) -> None: + self.explainability_metrics = {"f": 1.0} + self.feature_lineage = [] + + class DummyExplainer: + def explain(self, model_cfg, train_dir, top_k): + return DummyOutput() + + monkeypatch.setattr(explain_mod, "get_explainer", lambda key: DummyExplainer()) + + persisted: dict[str, Any] = {} + + def fake_persist(*args, **kwargs): + persisted["called"] = True + + monkeypatch.setattr(explain_mod, "persist_explainability_run", fake_persist) + + rc = explain_mod.main() + assert rc == 0 + assert persisted.get("called") is True From 06ccdcacbe3fcf8127f3e0c4d7d9b2ff83708801 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Mon, 30 Mar 2026 12:35:32 +0200 Subject: [PATCH 13/17] Added more tests; improved nesting. Added more integration tests and improved the file structure for better organization. --- .../test_normalize_dtype_integration.py | 25 ++++++ .../ml/features/test_safe_integration.py | 11 +++ .../test_save_metadata_integration.py | 31 +++++++ .../test_execute_inference_integration.py | 84 +++++++++++++++++ .../test_save_metrics_integration.py | 39 ++++++++ .../{ => git}/test_git_helpers_integration.py | 4 +- .../test_hash_dict_integration.py | 0 .../test_hash_list_integration.py | 0 .../test_hash_streaming_integration.py | 0 .../utils/loaders/test_loaders_integration.py | 68 ++++++++++++++ .../test_runtime_info_integration.py | 0 .../test_runtime_snapshot_integration.py | 90 +++++++++++++++++++ .../test_save_runtime_snapshot_integration.py | 59 ++++++++++++ .../test_latest_snapshot_integration.py | 46 ++++++++++ .../ml/utils/test_gpu_info_integration.py | 75 ---------------- 15 files changed, 454 insertions(+), 78 deletions(-) create mode 100644 tests/integration/ml/features/test_normalize_dtype_integration.py create mode 100644 tests/integration/ml/features/test_safe_integration.py create mode 100644 tests/integration/ml/io/persistence/test_save_metadata_integration.py create mode 100644 tests/integration/ml/post_promotion/inference/execution/test_execute_inference_integration.py create mode 100644 tests/integration/ml/runners/shared/persistence/test_save_metrics_integration.py rename tests/integration/ml/utils/{ => git}/test_git_helpers_integration.py (96%) rename tests/integration/ml/utils/{ => hashing}/test_hash_dict_integration.py (100%) rename tests/integration/ml/utils/{ => hashing}/test_hash_list_integration.py (100%) rename tests/integration/ml/utils/{ => hashing}/test_hash_streaming_integration.py (100%) create mode 100644 tests/integration/ml/utils/loaders/test_loaders_integration.py rename tests/integration/ml/utils/{ => runtime}/test_runtime_info_integration.py (100%) create mode 100644 tests/integration/ml/utils/runtime/test_runtime_snapshot_integration.py create mode 100644 tests/integration/ml/utils/runtime/test_save_runtime_snapshot_integration.py create mode 100644 tests/integration/ml/utils/snapshots/test_latest_snapshot_integration.py delete mode 100644 tests/integration/ml/utils/test_gpu_info_integration.py diff --git a/tests/integration/ml/features/test_normalize_dtype_integration.py b/tests/integration/ml/features/test_normalize_dtype_integration.py new file mode 100644 index 00000000..030eb284 --- /dev/null +++ b/tests/integration/ml/features/test_normalize_dtype_integration.py @@ -0,0 +1,25 @@ +"""Integration tests for dtype normalization helpers.""" + +from __future__ import annotations + +import numpy as np +import pandas as pd +from ml.features.validation.normalize_dtype import normalize_dtype + + +def test_normalize_common_dtypes() -> None: + assert normalize_dtype(np.dtype("int64")) == "int64" + assert normalize_dtype(np.dtype("float64")) == "float64" + assert normalize_dtype(np.dtype("bool")) == "bool" + assert normalize_dtype(np.dtype("datetime64[ns]")) == "datetime64[ns]" + + +def test_normalize_pandas_nullable_and_category() -> None: + s_str = pd.Series(["a"], dtype="string") + assert normalize_dtype(s_str.dtype) == "object" + + s_cat = pd.Series(["x", "y"], dtype="category") + assert normalize_dtype(s_cat.dtype) == "category" + + s_int = pd.Series([1, None], dtype="Int64") + assert normalize_dtype(s_int.dtype) == "int64" diff --git a/tests/integration/ml/features/test_safe_integration.py b/tests/integration/ml/features/test_safe_integration.py new file mode 100644 index 00000000..4d4fa669 --- /dev/null +++ b/tests/integration/ml/features/test_safe_integration.py @@ -0,0 +1,11 @@ +"""Integration tests for small hashing helper `safe`.""" + +from __future__ import annotations + +from ml.features.hashing.safe import safe + + +def test_safe_none_and_values() -> None: + assert safe(None) == "None" + assert safe(123) == "123" + assert safe("abc") == "abc" diff --git a/tests/integration/ml/io/persistence/test_save_metadata_integration.py b/tests/integration/ml/io/persistence/test_save_metadata_integration.py new file mode 100644 index 00000000..3384e0b9 --- /dev/null +++ b/tests/integration/ml/io/persistence/test_save_metadata_integration.py @@ -0,0 +1,31 @@ +import json +from pathlib import Path + +import pytest +from ml.exceptions import PersistenceError +from ml.io.persistence.save_metadata import save_metadata + + +def test_save_metadata_writes_file(tmp_path: Path) -> None: + metadata = {"a": 1, "b": "x"} + target_dir = tmp_path / "meta_dir" + + save_metadata(metadata, target_dir=target_dir, overwrite_existing=False) + + metadata_file = target_dir / "metadata.json" + assert metadata_file.exists() + + with metadata_file.open("r", encoding="utf-8") as fh: + data = json.load(fh) + + assert data == metadata + + +def test_save_metadata_raises_on_existing_file_and_no_overwrite(tmp_path: Path) -> None: + meta_dir = tmp_path / "meta_dir" + meta_dir.mkdir(parents=True, exist_ok=True) + metadata_file = meta_dir / "metadata.json" + metadata_file.write_text('{"existing": true}', encoding="utf-8") + + with pytest.raises(PersistenceError): + save_metadata({"new": "value"}, target_dir=meta_dir, overwrite_existing=False) diff --git a/tests/integration/ml/post_promotion/inference/execution/test_execute_inference_integration.py b/tests/integration/ml/post_promotion/inference/execution/test_execute_inference_integration.py new file mode 100644 index 00000000..94eb6293 --- /dev/null +++ b/tests/integration/ml/post_promotion/inference/execution/test_execute_inference_integration.py @@ -0,0 +1,84 @@ +"""Integration test for `ml.post_promotion.inference.execution.execute_inference`. + +This test stubs external dependencies (artifact loading, feature preparation, +prediction, storing and metadata validation) to exercise the orchestration +logic without heavy I/O or optional native dependencies. +""" + +from __future__ import annotations + +from datetime import datetime +from pathlib import Path +from types import SimpleNamespace +from typing import Any, cast + +import pandas as pd +from ml.post_promotion.inference.execution.execute_inference import execute_inference + + +def test_execute_inference_orchestrates_flow(tmp_path: Path, monkeypatch: Any) -> None: + # Prepare fake features return + df = pd.DataFrame({"id": [1, 2], "f1": [0.1, 0.2]}) + prep_ret = SimpleNamespace(features=df, entity_key="id", feature_lineage=[]) + + import importlib + execute_module = importlib.import_module( + "ml.post_promotion.inference.execution.execute_inference" + ) + + monkeypatch.setattr(execute_module, "prepare_features", lambda *args, **kwargs: prep_ret) + + # Fake artifact loading + fake_artifact = SimpleNamespace(predict=lambda X: [0, 1], predict_proba=lambda X: [[0.1, 0.9], [0.8, 0.2]]) + monkeypatch.setattr( + execute_module, + "load_and_validate_artifact", + lambda model_metadata: SimpleNamespace(artifact=fake_artifact, artifact_hash="ahash", artifact_type="model"), + ) + + # Use the real hash_input_row for deterministic hashing + + # Fake predict (module-level function) - keep to default by not monkeypatching + + # Stub store_predictions to avoid parquet/io and return cols + monkeypatch.setattr( + execute_module, + "store_predictions", + lambda *args, **kwargs: SimpleNamespace(cols=["run_id", "entity_id", "prediction"]), + ) + + # prepare_metadata -> return raw dict + monkeypatch.setattr(execute_module, "prepare_metadata", lambda **kwargs: {"meta": "ok"}) + + # validate_inference_metadata -> return object with model_dump() + monkeypatch.setattr( + execute_module, + "validate_inference_metadata", + lambda raw: SimpleNamespace(model_dump=lambda exclude_none=True: {"validated": True}), + ) + + saved = {} + + def fake_save_metadata(*, metadata, target_dir: Path): + saved["metadata"] = metadata + saved["target_dir"] = target_dir + + monkeypatch.setattr(execute_module, "save_metadata", fake_save_metadata) + + # Build dummy args and model metadata (lightweight); cast to expected type + args = cast(Any, SimpleNamespace(snapshot_bindings_id="snap-1")) + model_metadata = cast(Any, SimpleNamespace(model_version="v1")) + + # Execute + execute_inference( + args=args, + model_metadata=model_metadata, + stage="production", + timestamp=datetime.utcnow(), + path=tmp_path, + run_id="r1", + ) + + # Assertions: save_metadata was called with validated metadata and correct path + assert saved.get("metadata") == {"validated": True} + assert saved.get("target_dir") == tmp_path diff --git a/tests/integration/ml/runners/shared/persistence/test_save_metrics_integration.py b/tests/integration/ml/runners/shared/persistence/test_save_metrics_integration.py new file mode 100644 index 00000000..19137989 --- /dev/null +++ b/tests/integration/ml/runners/shared/persistence/test_save_metrics_integration.py @@ -0,0 +1,39 @@ +import json +import types +from pathlib import Path +from typing import Any, cast + +import pytest +from ml.config.schemas.model_cfg import TrainModelConfig +from ml.exceptions import PersistenceError +from ml.runners.shared.persistence.save_metrics import save_metrics + + +def _make_dummy_model_cfg(task_type: str = "classification", algorithm_value: str = "catboost") -> TrainModelConfig: + # Build a minimal object exposing the attributes used by save_metrics. + dummy = types.SimpleNamespace(task=types.SimpleNamespace(type=task_type), algorithm=types.SimpleNamespace(value=algorithm_value)) + return cast(TrainModelConfig, dummy) + + +def test_save_metrics_writes_training_metrics(tmp_path: Path) -> None: + model_cfg = _make_dummy_model_cfg() + experiment_dir = tmp_path / "exp" + metrics = {"accuracy": 0.9} + + path = save_metrics(metrics, model_cfg=model_cfg, target_run_id="run1", experiment_dir=experiment_dir, stage="training") + + metrics_file = Path(path) + assert metrics_file.exists() + + data = json.loads(metrics_file.read_text(encoding="utf-8")) + assert data["metrics"]["accuracy"] == 0.9 + assert data["task_type"] == "classification" + assert data["algorithm"] == "catboost" + + +def test_save_metrics_raises_for_invalid_stage(tmp_path: Path) -> None: + model_cfg = _make_dummy_model_cfg() + + with pytest.raises(PersistenceError): + # pass an invalid stage at runtime; cast to Any to satisfy static typing + save_metrics({"m": 1.0}, model_cfg=model_cfg, target_run_id="r", experiment_dir=tmp_path, stage=cast(Any, "invalid")) diff --git a/tests/integration/ml/utils/test_git_helpers_integration.py b/tests/integration/ml/utils/git/test_git_helpers_integration.py similarity index 96% rename from tests/integration/ml/utils/test_git_helpers_integration.py rename to tests/integration/ml/utils/git/test_git_helpers_integration.py index 007e2232..ab1ab2d9 100644 --- a/tests/integration/ml/utils/test_git_helpers_integration.py +++ b/tests/integration/ml/utils/git/test_git_helpers_integration.py @@ -19,7 +19,6 @@ def fake_check_output(*args, **kwargs): assert get_git_commit(Path(".")) == "unknown" - def test_is_descendant_commit_true_and_false(monkeypatch: Any) -> None: """is_descendant_commit returns True on successful git run, False on error.""" @@ -35,7 +34,6 @@ def fake_run_fail(*args, **kwargs): monkeypatch.setattr(subprocess, "run", fake_run_fail) assert is_descendant_commit("a", "b") is False - def test_get_git_commit_success(monkeypatch: Any) -> None: """When git commands succeed, `get_git_commit` returns the HEAD hash.""" @@ -50,4 +48,4 @@ def fake_check_output(*args, **kwargs): monkeypatch.setattr(subprocess, "check_output", fake_check_output) - assert get_git_commit(Path(".") ) == "deadbeef" + assert get_git_commit(Path(".")) == "deadbeef" diff --git a/tests/integration/ml/utils/test_hash_dict_integration.py b/tests/integration/ml/utils/hashing/test_hash_dict_integration.py similarity index 100% rename from tests/integration/ml/utils/test_hash_dict_integration.py rename to tests/integration/ml/utils/hashing/test_hash_dict_integration.py diff --git a/tests/integration/ml/utils/test_hash_list_integration.py b/tests/integration/ml/utils/hashing/test_hash_list_integration.py similarity index 100% rename from tests/integration/ml/utils/test_hash_list_integration.py rename to tests/integration/ml/utils/hashing/test_hash_list_integration.py diff --git a/tests/integration/ml/utils/test_hash_streaming_integration.py b/tests/integration/ml/utils/hashing/test_hash_streaming_integration.py similarity index 100% rename from tests/integration/ml/utils/test_hash_streaming_integration.py rename to tests/integration/ml/utils/hashing/test_hash_streaming_integration.py diff --git a/tests/integration/ml/utils/loaders/test_loaders_integration.py b/tests/integration/ml/utils/loaders/test_loaders_integration.py new file mode 100644 index 00000000..5d05ea42 --- /dev/null +++ b/tests/integration/ml/utils/loaders/test_loaders_integration.py @@ -0,0 +1,68 @@ +"""Integration tests for `ml.utils.loaders` file loaders.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pandas as pd +import pytest +from ml.exceptions import ConfigError, DataError +from ml.utils.loaders import load_json, load_yaml, read_data + + +def test_load_yaml_success_and_invalid(tmp_path: Path) -> None: + p = tmp_path / "cfg.yaml" + p.write_text("a: 1\nb: hello\n") + got = load_yaml(p) + assert got["a"] == 1 + assert got["b"] == "hello" + + # non-mapping YAML should raise + p2 = tmp_path / "list.yaml" + p2.write_text("- a\n- b\n") + with pytest.raises(ConfigError): + load_yaml(p2) + + # missing file raises + with pytest.raises(ConfigError): + load_yaml(tmp_path / "nope.yaml") + + +def test_load_json_strict_and_non_strict_and_invalid(tmp_path: Path) -> None: + p = tmp_path / "ok.json" + p.write_text(json.dumps({"k": "v"})) + got = load_json(p) + assert got == {"k": "v"} + + # missing strict -> DataError + with pytest.raises(DataError): + load_json(tmp_path / "missing.json", strict=True) + + # missing non-strict -> empty dict + assert load_json(tmp_path / "missing.json", strict=False) == {} + + # invalid JSON -> ConfigError + p2 = tmp_path / "bad.json" + p2.write_text("{ not json }") + with pytest.raises(ConfigError): + load_json(p2) + + # non-object JSON (array) -> ConfigError + p3 = tmp_path / "arr.json" + p3.write_text(json.dumps([1, 2, 3])) + with pytest.raises(ConfigError): + load_json(p3) + + +def test_read_data_csv_and_unsupported(tmp_path: Path) -> None: + csv = tmp_path / "data.csv" + csv.write_text("a,b\n1,2\n3,4\n") + + df = read_data("csv", csv) + assert isinstance(df, pd.DataFrame) + assert list(df.columns) == ["a", "b"] + assert df.shape[0] == 2 + + with pytest.raises(ConfigError): + read_data("xml", csv) diff --git a/tests/integration/ml/utils/test_runtime_info_integration.py b/tests/integration/ml/utils/runtime/test_runtime_info_integration.py similarity index 100% rename from tests/integration/ml/utils/test_runtime_info_integration.py rename to tests/integration/ml/utils/runtime/test_runtime_info_integration.py diff --git a/tests/integration/ml/utils/runtime/test_runtime_snapshot_integration.py b/tests/integration/ml/utils/runtime/test_runtime_snapshot_integration.py new file mode 100644 index 00000000..5b18d3e2 --- /dev/null +++ b/tests/integration/ml/utils/runtime/test_runtime_snapshot_integration.py @@ -0,0 +1,90 @@ +"""Integration tests for runtime snapshot builders in `ml.utils.runtime.runtime_snapshot`.""" + +from __future__ import annotations + +import types +from pathlib import Path +from typing import Any + +import ml.utils.runtime.runtime_snapshot as rs +import pytest +from ml.config.schemas.hardware_cfg import HardwareConfig, HardwareTaskType +from ml.exceptions import RuntimeMLError + + +def test_find_conda_executable_which(monkeypatch: Any, tmp_path: Path) -> None: + monkeypatch.setattr(rs.shutil, "which", lambda name: str(tmp_path / "conda")) + assert rs.find_conda_executable() == str(tmp_path / "conda") + + +def test_find_conda_executable_uses_conda_prefix(monkeypatch: Any, tmp_path: Path) -> None: + # Simulate no which result, but valid CONDA_PREFIX layout + monkeypatch.setattr(rs.shutil, "which", lambda name: None) + conda_prefix = tmp_path / "conda" / "envs" / "myenv" + conda_prefix.mkdir(parents=True) + base = conda_prefix.parent.parent + bin_dir = base / "bin" + bin_dir.mkdir(parents=True) + candidate = bin_dir / "conda" + candidate.write_text("") + + monkeypatch.setenv("CONDA_PREFIX", str(conda_prefix)) + monkeypatch.setattr(rs.platform, "system", lambda: "Linux") + + got = rs.find_conda_executable() + assert Path(got).name == "conda" + + +def test__run_command_success_and_failure(monkeypatch: Any) -> None: + # success path + def fake_run_ok(cmd, check, capture_output, text): + return types.SimpleNamespace(stdout="ok\n") + + monkeypatch.setattr(rs.subprocess, "run", fake_run_ok) + assert rs._run_command(["echo", "hi"]) == "ok\n" + + # failure path + class FakeError(Exception): + def __init__(self): + self.stdout = "out" + self.stderr = "err" + super().__init__("boom") + + def fake_run_fail(cmd, check, capture_output, text): + raise FakeError() + + monkeypatch.setattr(rs.subprocess, "run", fake_run_fail) + with pytest.raises(RuntimeMLError): + rs._run_command(["false"]) + + +def test_get_conda_env_export_calls_run(monkeypatch: Any) -> None: + monkeypatch.setattr(rs, "find_conda_executable", lambda: "/usr/bin/conda") + monkeypatch.setattr(rs, "_run_command", lambda cmd: "env: yaml") + assert rs.get_conda_env_export() == "env: yaml" + + +def test_build_runtime_snapshot_happy_and_missing_conda(monkeypatch: Any) -> None: + monkeypatch.setattr(rs, "get_git_commit", lambda p: "abc123") + monkeypatch.setattr(rs, "get_runtime_info", lambda: {"python_version": "3.10"}) + monkeypatch.setattr(rs, "get_gpu_info", lambda hw: {"gpu_count": 0}) + monkeypatch.setattr(rs.time, "perf_counter", lambda: 200.0) + + hw = HardwareConfig(task_type=HardwareTaskType.CPU, devices=[]) + + # Case A: conda available + monkeypatch.setattr(rs, "get_conda_env_export", lambda: "name: test\n") + monkeypatch.setattr(rs, "hash_environment", lambda s: "deadbeef") + + payload = rs.build_runtime_snapshot("2026-03-30T12-00-00", hw, start_time=100.0) + assert payload["execution"]["git_commit"] == "abc123" + assert payload["environment"]["conda_env_hash"] == "deadbeef" + + # Case B: conda export fails -> values set to 'Unavailable' + def raise_exc(): + raise RuntimeError("no conda") + + monkeypatch.setattr(rs, "get_conda_env_export", raise_exc) + payload2 = rs.build_runtime_snapshot("2026-03-30T12-00-00", hw, start_time=100.0) + assert payload2["environment"]["conda_env_export"] == "Unavailable" + assert payload2["environment"]["conda_env_hash"] == "Unavailable" diff --git a/tests/integration/ml/utils/runtime/test_save_runtime_snapshot_integration.py b/tests/integration/ml/utils/runtime/test_save_runtime_snapshot_integration.py new file mode 100644 index 00000000..bd13aeff --- /dev/null +++ b/tests/integration/ml/utils/runtime/test_save_runtime_snapshot_integration.py @@ -0,0 +1,59 @@ +"""Integration tests for `ml.utils.runtime.save_runtime_snapshot`.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +import pytest +from ml.config.schemas.hardware_cfg import HardwareConfig, HardwareTaskType +from ml.exceptions import PersistenceError +from ml.utils.runtime import save_runtime as saver + + +def test_save_runtime_snapshot_writes_file(tmp_path: Path, monkeypatch: Any) -> None: + snapshot = {"alpha": 1} + monkeypatch.setattr(saver, "build_runtime_snapshot", lambda *a, **k: snapshot) + + target = tmp_path / "runs" / "r1" + hw = HardwareConfig(task_type=HardwareTaskType.CPU, devices=[]) + + saver.save_runtime_snapshot(target_dir=target, timestamp="ts", hardware_info=hw, start_time=0.0) + + out = target / "runtime.json" + assert out.exists() + with out.open("r", encoding="utf-8") as f: + got = json.load(f) + assert got == snapshot + + +def test_save_runtime_snapshot_raises_if_exists_and_no_overwrite(tmp_path: Path, monkeypatch: Any) -> None: + snapshot = {"alpha": 1} + monkeypatch.setattr(saver, "build_runtime_snapshot", lambda *a, **k: snapshot) + + target = tmp_path / "runs" / "r2" + target.mkdir(parents=True) + existing = target / "runtime.json" + existing.write_text("{}") + + hw = HardwareConfig(task_type=HardwareTaskType.CPU, devices=[]) + with pytest.raises(PersistenceError): + saver.save_runtime_snapshot(target_dir=target, timestamp="ts", hardware_info=hw, start_time=0.0, overwrite_existing=False) + + +def test_save_runtime_snapshot_overwrites_when_flag_true(tmp_path: Path, monkeypatch: Any) -> None: + snapshot = {"alpha": 2} + monkeypatch.setattr(saver, "build_runtime_snapshot", lambda *a, **k: snapshot) + + target = tmp_path / "runs" / "r3" + target.mkdir(parents=True) + existing = target / "runtime.json" + existing.write_text("{\"alpha\": 1}") + + hw = HardwareConfig(task_type=HardwareTaskType.CPU, devices=[]) + saver.save_runtime_snapshot(target_dir=target, timestamp="ts", hardware_info=hw, start_time=0.0, overwrite_existing=True) + + with (target / "runtime.json").open("r", encoding="utf-8") as f: + got = json.load(f) + assert got == snapshot diff --git a/tests/integration/ml/utils/snapshots/test_latest_snapshot_integration.py b/tests/integration/ml/utils/snapshots/test_latest_snapshot_integration.py new file mode 100644 index 00000000..6dbcf1fd --- /dev/null +++ b/tests/integration/ml/utils/snapshots/test_latest_snapshot_integration.py @@ -0,0 +1,46 @@ +"""Integration tests for snapshot resolution helpers in `ml.utils.snapshots`.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from ml.exceptions import RuntimeMLError +from ml.types import LatestSnapshot +from ml.utils.snapshots.latest_snapshot import get_latest_snapshot_path +from ml.utils.snapshots.snapshot_path import get_snapshot_path + + +def test_get_latest_snapshot_path_selects_most_recent(tmp_path: Path) -> None: + base = tmp_path / "snapshots" + base.mkdir() + + old = base / "2026-03-30T12-00-00_aaaaaaaa" + new = base / "2026-03-31T12-00-00_bbbbbbbb" + old.mkdir() + new.mkdir() + + got = get_latest_snapshot_path(base) + assert got == new + + +def test_get_latest_snapshot_tie_breaks_by_uuid(tmp_path: Path) -> None: + base = tmp_path / "snapshots2" + base.mkdir() + + # same timestamp, different UUIDs -> lexicographic tie-break + a = base / "2026-04-01T00-00-00_aaaa1111" + b = base / "2026-04-01T00-00-00_zzzz9999" + a.mkdir() + b.mkdir() + + got = get_latest_snapshot_path(base) + assert got == b + + +def test_get_snapshot_path_raises_runtime_when_no_valid(tmp_path: Path) -> None: + base = tmp_path / "empty" + base.mkdir() + + with pytest.raises(RuntimeMLError): + get_snapshot_path(LatestSnapshot.LATEST.value, base) diff --git a/tests/integration/ml/utils/test_gpu_info_integration.py b/tests/integration/ml/utils/test_gpu_info_integration.py deleted file mode 100644 index 102ffacd..00000000 --- a/tests/integration/ml/utils/test_gpu_info_integration.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Integration tests for GPU helpers in `ml.utils.runtime.gpu_info`.""" - -from __future__ import annotations - -import importlib -import sys -import types -from typing import Any - -from ml.config.schemas.hardware_cfg import HardwareConfig, HardwareTaskType - - -def test_parse_cuda_driver_version_examples() -> None: - from ml.utils.runtime.gpu_info import parse_cuda_driver_version - - assert parse_cuda_driver_version(11040) == "11.4" - assert parse_cuda_driver_version(10000) == "10.0" - assert parse_cuda_driver_version(11000) == "11.0" - assert parse_cuda_driver_version(11010) == "11.1" - - -def test_prepare_gpu_info_with_fake_pynvml(monkeypatch: Any) -> None: - """Simulate `pynvml` functions to exercise `prepare_gpu_info` without hardware.""" - - fake = types.SimpleNamespace() - - class NVMLError(Exception): - pass - - fake.NVMLError = NVMLError - fake.nvmlInit = lambda: None - fake.nvmlDeviceGetCount = lambda: 2 - fake.nvmlDeviceGetHandleByIndex = lambda i: i - fake.nvmlDeviceGetName = lambda h: b"FakeGPU0" if h == 0 else "FakeGPU1" - - def fake_mem(h): - return types.SimpleNamespace(total=8_000_000_000 if h == 0 else 16_000_000_000) - - fake.nvmlDeviceGetMemoryInfo = fake_mem - fake.nvmlSystemGetCudaDriverVersion = lambda: 11040 - fake.nvmlSystemGetDriverVersion = lambda: b"470.57.02" - fake.nvmlShutdown = lambda: None - - monkeypatch.setitem(sys.modules, "pynvml", fake) - - # reload module so it picks up our injected fake module - import ml.utils.runtime.gpu_info as gpu_info - - importlib.reload(gpu_info) - - names, devices, memories, cuda_str, drv = gpu_info.prepare_gpu_info() - - assert names == ["FakeGPU0", "FakeGPU1"] - assert devices == [0, 1] - assert memories == [round(8_000_000_000 / 1e9, 2), round(16_000_000_000 / 1e9, 2)] - assert cuda_str == "11.4" - assert isinstance(drv, str) - - -def test_get_gpu_info_assembles_payload(monkeypatch: Any) -> None: - import ml.utils.runtime.gpu_info as gpu_info - - # Provide deterministic prepare_gpu_info output - monkeypatch.setattr( - gpu_info, "prepare_gpu_info", lambda: (["G0"], [0], [8.0], "11.4", "470.57.02") - ) - - hw = HardwareConfig(task_type=HardwareTaskType.GPU, devices=[0]) - payload = gpu_info.get_gpu_info(hw) - - assert payload["task_type"] == "GPU" - assert payload["gpu_count"] == 1 - assert payload["gpu_devices_available"] == [0] - assert payload["gpu_devices_used"] == [0] - assert payload["cuda_version"] == "11.4" From f2d99019192a2e6cf861d8a1a73ecf4c410ea412 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Mon, 30 Mar 2026 16:10:28 +0200 Subject: [PATCH 14/17] Added more integration tests. Integration tests now cover over 52% of the lines. --- .gitignore | 3 + .../test_feature_selector_integration.py | 26 ++++ ...st_fill_categorical_missing_integration.py | 30 +++++ .../test_validate_data_integration.py | 37 +++++ .../test_features_and_target_integration.py | 83 ++++++++++++ ...est_resolve_class_weighting_integration.py | 27 ++++ .../test_add_model_to_pipeline_integration.py | 54 ++++++++ .../ml/pipelines/test_builders_integration.py | 126 ++++++++++++++++++ .../ml/pipelines/test_models_integration.py | 41 ++++++ .../test_schema_utils_integration.py | 39 ++++++ .../pipelines/test_validation_integration.py | 55 ++++++++ .../test_artifacts_validation_integration.py | 50 +++++++ .../test_pipeline_runner_integration.py | 34 +++++ .../dir_viewer/test_build_tree_integration.py | 18 +++ .../test_build_interim_dataset_integration.py | 65 +++++++++ ...est_build_processed_dataset_integration.py | 64 +++++++++ ...cute_all_data_preprocessing_integration.py | 53 ++++++++ .../test_run_all_workflows_integration.py | 34 +++++ .../test_hash_input_row_integration.py | 14 ++ .../inference/test_predict_integration.py | 25 ++++ .../test_store_predictions_integration.py | 43 ++++++ .../test_promotion_service_integration.py | 75 +++++++++++ 22 files changed, 996 insertions(+) create mode 100644 tests/integration/ml/components/feature_selection/test_feature_selector_integration.py create mode 100644 tests/integration/ml/components/imputation/test_fill_categorical_missing_integration.py create mode 100644 tests/integration/ml/data/validation/test_validate_data_integration.py create mode 100644 tests/integration/ml/features/loading/test_features_and_target_integration.py create mode 100644 tests/integration/ml/modeling/class_weighting/test_resolve_class_weighting_integration.py create mode 100644 tests/integration/ml/pipelines/test_add_model_to_pipeline_integration.py create mode 100644 tests/integration/ml/pipelines/test_builders_integration.py create mode 100644 tests/integration/ml/pipelines/test_models_integration.py create mode 100644 tests/integration/ml/pipelines/test_schema_utils_integration.py create mode 100644 tests/integration/ml/pipelines/test_validation_integration.py create mode 100644 tests/integration/ml/promotion/validation/test_artifacts_validation_integration.py create mode 100644 tests/integration/ml/utils/pipeline_core/test_pipeline_runner_integration.py create mode 100644 tests/integration/ml_service/backend/dir_viewer/test_build_tree_integration.py create mode 100644 tests/integration/pipelines/data/test_build_interim_dataset_integration.py create mode 100644 tests/integration/pipelines/data/test_build_processed_dataset_integration.py create mode 100644 tests/integration/pipelines/orchestration/test_execute_all_data_preprocessing_integration.py create mode 100644 tests/integration/pipelines/orchestration/test_run_all_workflows_integration.py create mode 100644 tests/integration/post_promotion/inference/test_hash_input_row_integration.py create mode 100644 tests/integration/post_promotion/inference/test_predict_integration.py create mode 100644 tests/integration/post_promotion/inference/test_store_predictions_integration.py create mode 100644 tests/integration/promotion/test_promotion_service_integration.py diff --git a/.gitignore b/.gitignore index bd2167a1..f2cb0e19 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,9 @@ nosetests.xml coverage.xml coverage.json coverage_ml_service.xml +coverage_e2e.xml +coverage_unit.xml +coverage_integration.xml *.cover *.py.cover .hypothesis/ diff --git a/tests/integration/ml/components/feature_selection/test_feature_selector_integration.py b/tests/integration/ml/components/feature_selection/test_feature_selector_integration.py new file mode 100644 index 00000000..3ddc8b7e --- /dev/null +++ b/tests/integration/ml/components/feature_selection/test_feature_selector_integration.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import pandas as pd +import pytest +from ml.components.feature_selection.selector import FeatureSelector +from ml.exceptions import DataError + +pytestmark = pytest.mark.integration + + +def test_feature_selector_returns_only_selected_columns() -> None: + df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}) + selector = FeatureSelector(selected_features=["a", "c"]) + + out = selector.transform(df) + + assert list(out.columns) == ["a", "c"] + assert out.shape == (2, 2) + + +def test_feature_selector_raises_on_missing_columns() -> None: + df = pd.DataFrame({"a": [1]}) + selector = FeatureSelector(selected_features=["a", "b"]) + + with pytest.raises(DataError): + selector.transform(df) diff --git a/tests/integration/ml/components/imputation/test_fill_categorical_missing_integration.py b/tests/integration/ml/components/imputation/test_fill_categorical_missing_integration.py new file mode 100644 index 00000000..11ae8a53 --- /dev/null +++ b/tests/integration/ml/components/imputation/test_fill_categorical_missing_integration.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import pandas as pd +import pytest +from ml.components.imputation.categorical import FillCategoricalMissing + +pytestmark = pytest.mark.integration + + +def test_fill_categorical_missing_handles_na_and_categorical() -> None: + df = pd.DataFrame( + { + "col1": ["a", None, "b"], + "cat_col": pd.Series(pd.Categorical(["x", None, "y"])), + } + ) + + transformer = FillCategoricalMissing(categorical_features=["cat_col", "col1"]) + + out = transformer.transform(df) + + # No missing values remain + assert out["col1"].isnull().sum() == 0 + assert out["cat_col"].isnull().sum() == 0 + + # 'missing' placeholder present and all values are strings + assert "missing" in out["col1"].tolist() + assert "missing" in out["cat_col"].tolist() + assert all(isinstance(v, str) for v in out["col1"].tolist()) + assert all(isinstance(v, str) for v in out["cat_col"].tolist()) diff --git a/tests/integration/ml/data/validation/test_validate_data_integration.py b/tests/integration/ml/data/validation/test_validate_data_integration.py new file mode 100644 index 00000000..126a1bce --- /dev/null +++ b/tests/integration/ml/data/validation/test_validate_data_integration.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pytest + +from ml.data.validation.validate_data import validate_data +from ml.exceptions import UserError + +pytestmark = pytest.mark.integration + + +def test_validate_data_no_expected_hash_returns_empty(tmp_path: Path) -> None: + data_file = tmp_path / "data.csv" + data_file.write_text("1,2,3") + + metadata: dict[str, Any] = {} + + res = validate_data(data_path=data_file, metadata=metadata) + assert res == "" + + +def test_validate_data_mismatch_raises(monkeypatch: Any, tmp_path: Path) -> None: + data_file = tmp_path / "data.csv" + data_file.write_text("1,2,3") + + metadata: dict[str, Any] = {"data": {"hash": "expectedhash"}} + + # Patch the imported hash_data to return a different hash + import ml.data.validation.validate_data as mod + + monkeypatch.setattr(mod, "hash_data", lambda p: "actualhash") + + with pytest.raises(UserError): + validate_data(data_path=data_file, metadata=metadata) diff --git a/tests/integration/ml/features/loading/test_features_and_target_integration.py b/tests/integration/ml/features/loading/test_features_and_target_integration.py new file mode 100644 index 00000000..a07843e6 --- /dev/null +++ b/tests/integration/ml/features/loading/test_features_and_target_integration.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from typing import Any, cast + +import ml.features.loading.features_and_target as mod +import pandas as pd +import pytest + +pytestmark = pytest.mark.integration + + +def test_load_features_and_target_basic_flow(tmp_path: Path, monkeypatch: Any) -> None: + # Minimal model config + model_cfg = SimpleNamespace( + feature_store=SimpleNamespace(path=str(tmp_path / "feature_store"), feature_sets=[]), + target=SimpleNamespace(name="target", version="v1"), + segmentation=SimpleNamespace(), + min_rows=1, + ) + + # Fake feature set spec + fs_spec = SimpleNamespace(name="fs1", version="v1", file_name="fs1.csv", data_format="csv") + + snapshot_path = tmp_path / "snap1" + snapshot_path.mkdir() + + # Metadata with required fields + metadata = { + "feature_schema_hash": "fs_hash", + "operator_hash": "op_hash", + "feature_type": "tabular", + "data_lineage": [ + { + "ref": "r", + "name": "n", + "version": "v", + "format": "csv", + "path_suffix": "p", + "merge_key": ("id",), + "merge_how": None, + "merge_validate": None, + "snapshot_id": "s", + "path": "p", + "loader_validation_hash": "h", + "data_hash": "h", + "row_count": 2, + "column_count": 2, + } + ], + "in_memory_hash": "m", + "file_hash": "f", + "entity_key": "id", + } + + sel = {"fs_spec": fs_spec, "snapshot_path": snapshot_path, "metadata": metadata} + + # Monkeypatch internal helpers to avoid heavy IO/validation + monkeypatch.setattr(mod, "DataLineageEntry", lambda **kwargs: tuple(sorted(kwargs.items()))) + monkeypatch.setattr(mod, "read_data", lambda fmt, p: pd.DataFrame({"id": [1, 2], "f1": [10, 20]})) + monkeypatch.setattr(mod, "validate_entity_key", lambda df, key: None) + monkeypatch.setattr(mod, "validate_feature_set", lambda *a, **k: None) + monkeypatch.setattr(mod, "load_and_validate_data", lambda dl: None) + monkeypatch.setattr(mod, "get_target_with_entity_key", lambda data, key, entity_key: pd.DataFrame({"id": [1, 2], "target": [0, 1]})) + monkeypatch.setattr(mod, "apply_segmentation", lambda data, seg_cfg: data) + monkeypatch.setattr(mod, "validate_feature_target_entity_key", lambda *a, **k: None) + monkeypatch.setattr(mod, "validate_min_rows", lambda *a, **k: None) + monkeypatch.setattr(mod, "validate_target", lambda *a, **k: None) + monkeypatch.setattr(mod, "validate_and_construct_feature_lineage", lambda raw: []) + + X, y, lineage, entity_key = mod.load_features_and_target( + cast(mod.SearchModelConfig, model_cfg), + snapshot_selection=[sel], + snapshot_binding_key=None, + drop_entity_key=True, + strict=True, + ) + + assert entity_key == "id" + assert "id" not in X.columns + assert list(y) == [0, 1] + assert isinstance(lineage, list) diff --git a/tests/integration/ml/modeling/class_weighting/test_resolve_class_weighting_integration.py b/tests/integration/ml/modeling/class_weighting/test_resolve_class_weighting_integration.py new file mode 100644 index 00000000..a439ad83 --- /dev/null +++ b/tests/integration/ml/modeling/class_weighting/test_resolve_class_weighting_integration.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, cast + +import pytest +from ml.modeling.class_weighting.models import DataStats +from ml.modeling.class_weighting.resolve_class_weighting import resolve_class_weighting + +pytestmark = pytest.mark.integration + + +def _make_config(policy: str, strategy: str | None = None, imbalance_threshold: float | None = None): + return SimpleNamespace(class_weighting=SimpleNamespace(policy=policy, strategy=strategy, imbalance_threshold=imbalance_threshold)) + + +def test_resolve_class_weighting_ratio_for_catboost_and_xgboost() -> None: + config = _make_config(policy="ratio", strategy="ratio") + stats = DataStats(n_samples=12, class_counts={0: 10, 1: 2}, minority_ratio=2 / 12) + + res_cb = resolve_class_weighting(cast(Any, config), stats, library="catboost") + assert "class_weights" in res_cb and isinstance(res_cb["class_weights"], list) + assert pytest.approx(res_cb["class_weights"][1]) == 10 / 2 + + res_xgb = resolve_class_weighting(cast(Any, config), stats, library="xgboost") + assert "scale_pos_weight" in res_xgb + assert pytest.approx(res_xgb["scale_pos_weight"]) == 10 / 2 diff --git a/tests/integration/ml/pipelines/test_add_model_to_pipeline_integration.py b/tests/integration/ml/pipelines/test_add_model_to_pipeline_integration.py new file mode 100644 index 00000000..7805bf9c --- /dev/null +++ b/tests/integration/ml/pipelines/test_add_model_to_pipeline_integration.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import importlib +import sys +import types +from typing import Any, cast + +import pytest +from ml.exceptions import PipelineContractError +from sklearn.pipeline import Pipeline + +pytestmark = pytest.mark.integration + + +class DummyModel: + pass + + +def _import_add_model_module_with_registry(mapping: dict[str, type]): + # Ensure fresh import and inject a lightweight fake registries.catalogs + mod_name = "ml.pipelines.composition.add_model_to_pipeline" + registries_name = "ml.registries" + catalogs_name = "ml.registries.catalogs" + + sys.modules.pop(mod_name, None) + + fake_registries = types.ModuleType(registries_name) + fake_registries.__path__ = [] + sys.modules[registries_name] = fake_registries + + fake_catalogs = types.ModuleType(catalogs_name) + cast(Any, fake_catalogs).MODEL_CLASS_REGISTRY = mapping + sys.modules[catalogs_name] = fake_catalogs + + return importlib.import_module(mod_name) + + +def test_add_model_to_pipeline_appends_supported_model() -> None: + mod = _import_add_model_module_with_registry({"Dummy": DummyModel}) + + pipeline = Pipeline([("noop", object())]) + model = DummyModel() + + pipeline_with_model = mod.add_model_to_pipeline(pipeline, model) + + assert pipeline_with_model.steps[-1][0] == "Model" + assert pipeline_with_model.steps[-1][1] is model + + +def test_add_model_to_pipeline_rejects_unsupported() -> None: + mod = _import_add_model_module_with_registry({}) + + with pytest.raises(PipelineContractError): + mod.add_model_to_pipeline(Pipeline([("noop", object())]), object()) diff --git a/tests/integration/ml/pipelines/test_builders_integration.py b/tests/integration/ml/pipelines/test_builders_integration.py new file mode 100644 index 00000000..e76d02e5 --- /dev/null +++ b/tests/integration/ml/pipelines/test_builders_integration.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import importlib +import sys +import types +from types import SimpleNamespace +from typing import Any, cast + +import pandas as pd +import pytest +from ml.config.schemas.model_cfg import SearchModelConfig +from ml.pipelines.models import PipelineConfig + +pytestmark = pytest.mark.integration + + +class _SchemaValidator: + def __init__(self, *, required_features: list[str]) -> None: + self.required_features = required_features + + +class _FillCategoricalMissing: + def __init__(self, *, categorical_features: list[str]) -> None: + self.categorical_features = categorical_features + + +class _FeatureEngineer: + def __init__(self, *, derived_schema: pd.DataFrame, operators: dict[str, object]) -> None: + self.derived_schema = derived_schema + self.operators = operators + + +class _FeatureSelector: + def __init__(self, *, selected_features: list[str]) -> None: + self.selected_features = selected_features + + +def _pipeline_cfg() -> PipelineConfig: + return PipelineConfig.model_validate( + { + "name": "p", + "version": "v1", + "steps": [ + "SchemaValidator", + "FillCategoricalMissing", + "FeatureEngineer", + "FeatureSelector", + "Model", + ], + "assumptions": { + "handles_categoricals": True, + "supports_regression": True, + "supports_classification": True, + }, + "lineage": {"created_by": "t", "created_at": "2026-01-01T00:00:00Z"}, + } + ) + + +def test_build_pipeline_wires_components_correctly(monkeypatch) -> None: + input_schema = pd.DataFrame({"feature": ["f1"], "dtype": ["float64"]}) + derived_schema = pd.DataFrame({"feature": ["f2"], "source_operator": ["op_a"]}) + + features = SimpleNamespace( + input_features=["f1"], selected_features=["f1", "f2"], categorical_features=["cat_a"] + ) + operators: dict[str, object] = {"op_a": object()} + + # Inject lightweight fake dependency modules to avoid heavy imports / circulars + module_name = "ml.pipelines.builders" + schema_utils_name = "ml.pipelines.schema_utils" + operator_factory_name = "ml.pipelines.operator_factory" + registries_name = "ml.registries" + catalogs_name = "ml.registries.catalogs" + + sys.modules.pop(module_name, None) + + fake_schema = types.ModuleType(schema_utils_name) + cast(Any, fake_schema).get_pipeline_features = lambda *a, **k: features + sys.modules[schema_utils_name] = fake_schema + + fake_operator = types.ModuleType(operator_factory_name) + cast(Any, fake_operator).build_operators = lambda ds: operators + sys.modules[operator_factory_name] = fake_operator + + fake_registries = types.ModuleType(registries_name) + fake_registries.__path__ = [] + sys.modules[registries_name] = fake_registries + + fake_catalogs = types.ModuleType(catalogs_name) + cast(Any, fake_catalogs).PIPELINE_COMPONENTS = { + "SchemaValidator": _SchemaValidator, + "FillCategoricalMissing": _FillCategoricalMissing, + "FeatureEngineer": _FeatureEngineer, + "FeatureSelector": _FeatureSelector, + "Model": None, + } + sys.modules[catalogs_name] = fake_catalogs + + builders = importlib.import_module(module_name) + + model_cfg = SimpleNamespace(segmentation=SimpleNamespace(enabled=False, include_in_model=False, filters=[])) + pipeline_cfg = _pipeline_cfg() + + pipeline = builders.build_pipeline( + model_cfg=cast(SearchModelConfig, model_cfg), + pipeline_cfg=pipeline_cfg, + input_schema=input_schema, + derived_schema=derived_schema, + ) + + assert [name for name, _ in pipeline.steps] == [ + "schemavalidator", + "fillcategoricalmissing", + "featureengineer", + "featureselector", + ] + assert isinstance(pipeline.named_steps["schemavalidator"], _SchemaValidator) + assert pipeline.named_steps["schemavalidator"].required_features == ["f1"] + assert isinstance(pipeline.named_steps["fillcategoricalmissing"], _FillCategoricalMissing) + assert pipeline.named_steps["fillcategoricalmissing"].categorical_features == ["cat_a"] + assert isinstance(pipeline.named_steps["featureengineer"], _FeatureEngineer) + assert pipeline.named_steps["featureengineer"].derived_schema.equals(derived_schema) + assert pipeline.named_steps["featureengineer"].operators is operators + assert isinstance(pipeline.named_steps["featureselector"], _FeatureSelector) + assert pipeline.named_steps["featureselector"].selected_features == ["f1", "f2"] diff --git a/tests/integration/ml/pipelines/test_models_integration.py b/tests/integration/ml/pipelines/test_models_integration.py new file mode 100644 index 00000000..2a649816 --- /dev/null +++ b/tests/integration/ml/pipelines/test_models_integration.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from datetime import datetime + +import pytest +from ml.exceptions import ConfigError +from ml.pipelines.models import PipelineConfig + +pytestmark = pytest.mark.integration + + +def _base_cfg_dict() -> dict: + return { + "name": "p", + "version": "v1", + "steps": ["SchemaValidator"], + "assumptions": { + "handles_categoricals": True, + "supports_regression": True, + "supports_classification": True, + }, + "lineage": {"created_by": "t", "created_at": datetime.utcnow()}, + } + + +def test_pipeline_config_validates_successfully() -> None: + PipelineConfig.model_validate(_base_cfg_dict()) + + +def test_pipeline_config_rejects_invalid_version() -> None: + bad = _base_cfg_dict() + bad["version"] = "1" + with pytest.raises(ConfigError): + PipelineConfig.model_validate(bad) + + +def test_pipeline_config_rejects_unknown_steps() -> None: + bad = _base_cfg_dict() + bad["steps"] = ["UnknownStep"] + with pytest.raises(ConfigError): + PipelineConfig.model_validate(bad) diff --git a/tests/integration/ml/pipelines/test_schema_utils_integration.py b/tests/integration/ml/pipelines/test_schema_utils_integration.py new file mode 100644 index 00000000..b3c1a370 --- /dev/null +++ b/tests/integration/ml/pipelines/test_schema_utils_integration.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from types import SimpleNamespace +from typing import cast + +import ml.pipelines.schema_utils as mod +import pandas as pd +import pytest +from ml.pipelines.constants.pipeline_features import PipelineFeatures + +pytestmark = pytest.mark.integration + + +def test_get_categorical_and_pipeline_features_with_and_without_segmentation() -> None: + input_schema = pd.DataFrame({"feature": ["a", "b", "c"], "dtype": ["object", "int64", "category"]}) + derived_schema = pd.DataFrame({"feature": ["d"], "source_operator": ["op"]}) + + # segmentation disabled -> include all input features + seg_cfg = SimpleNamespace(enabled=False, include_in_model=False, filters=[]) + model_cfg = SimpleNamespace(segmentation=seg_cfg) + + features: PipelineFeatures = mod.get_pipeline_features( + model_cfg=cast(mod.SearchModelConfig, model_cfg), input_schema=input_schema, derived_schema=derived_schema + ) + + assert features.input_features == ["a", "b", "c"] + assert features.derived_features == ["d"] + assert "a" in features.categorical_features + + # segmentation enabled but exclude segmentation columns from model inputs + seg_filters = [SimpleNamespace(column="b")] + seg_cfg2 = SimpleNamespace(enabled=True, include_in_model=False, filters=seg_filters) + model_cfg2 = SimpleNamespace(segmentation=seg_cfg2) + + features2: PipelineFeatures = mod.get_pipeline_features( + model_cfg=cast(mod.SearchModelConfig, model_cfg2), input_schema=input_schema, derived_schema=derived_schema + ) + + assert "b" not in features2.input_features diff --git a/tests/integration/ml/pipelines/test_validation_integration.py b/tests/integration/ml/pipelines/test_validation_integration.py new file mode 100644 index 00000000..47a77499 --- /dev/null +++ b/tests/integration/ml/pipelines/test_validation_integration.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import ml.pipelines.validation as mod +import pytest +from ml.exceptions import ConfigError + +pytestmark = pytest.mark.integration + + +def _base_pipeline_cfg() -> dict[str, Any]: + return { + "name": "p", + "version": "v1", + "steps": ["SchemaValidator"], + "assumptions": { + "handles_categoricals": True, + "supports_regression": True, + "supports_classification": True, + }, + "lineage": {"created_by": "t", "created_at": "2026-01-01T00:00:00Z"}, + } + + +def test_validate_pipeline_config_accepts_valid_cfg() -> None: + cfg = _base_pipeline_cfg() + validated = mod.validate_pipeline_config(cfg) + assert validated.name == "p" + + +def test_validate_pipeline_config_rejects_bad_version() -> None: + bad = _base_pipeline_cfg() + bad["version"] = "1" + with pytest.raises(ConfigError): + mod.validate_pipeline_config(bad) + + +def test_validate_pipeline_config_consistency_happy_path(tmp_path: Path, monkeypatch: Any) -> None: + # Fake load_json to return minimal metadata and validate_search_record to return expected object + monkeypatch.setattr(mod, "load_json", lambda p: {"metadata": {"pipeline_hash": "h1"}}) + monkeypatch.setattr(mod, "validate_search_record", lambda raw: SimpleNamespace(metadata=SimpleNamespace(pipeline_hash="h1"))) + + # Should not raise + mod.validate_pipeline_config_consistency(actual_hash="h1", search_dir=tmp_path) + + +def test_validate_pipeline_config_consistency_mismatch_raises(tmp_path: Path, monkeypatch: Any) -> None: + monkeypatch.setattr(mod, "load_json", lambda p: {"metadata": {"pipeline_hash": "h_expected"}}) + monkeypatch.setattr(mod, "validate_search_record", lambda raw: SimpleNamespace(metadata=SimpleNamespace(pipeline_hash="h_expected"))) + + with pytest.raises(ConfigError): + mod.validate_pipeline_config_consistency(actual_hash="h_actual", search_dir=tmp_path) diff --git a/tests/integration/ml/promotion/validation/test_artifacts_validation_integration.py b/tests/integration/ml/promotion/validation/test_artifacts_validation_integration.py new file mode 100644 index 00000000..1a5721dd --- /dev/null +++ b/tests/integration/ml/promotion/validation/test_artifacts_validation_integration.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from typing import Any, cast + +import ml.promotion.validation.artifacts as mod +import pytest + +pytestmark = pytest.mark.integration + + +def test_validate_artifacts_consistency_happy_path(tmp_path: Path, monkeypatch: Any) -> None: + # Create fake model and pipeline files + model_file = tmp_path / "model.bin" + model_file.write_text("model") + pipeline_file = tmp_path / "pipeline.joblib" + pipeline_file.write_text("pipeline") + + # Build simple metadata objects expected by the validator + artifacts = SimpleNamespace( + model_path=str(model_file), + model_hash="deadbeef", + pipeline_path=str(pipeline_file), + pipeline_hash="pdeadbeef", + ) + + run_identity = SimpleNamespace(status="success") + training_meta = SimpleNamespace(run_identity=run_identity, artifacts=artifacts) + evaluation_meta = SimpleNamespace(run_identity=run_identity, artifacts=artifacts) + explain_meta = SimpleNamespace(run_identity=run_identity, artifacts=artifacts) + + runners_metadata = SimpleNamespace( + training_metadata=training_meta, + evaluation_metadata=evaluation_meta, + explainability_metadata=explain_meta, + ) + + # Fake hashing to match expected hashes + def fake_hash_artifact(p: Path) -> str: + if p.name == "model.bin": + return "deadbeef" + if p.name == "pipeline.joblib": + return "pdeadbeef" + return "x" + + monkeypatch.setattr(mod, "hash_artifact", fake_hash_artifact) + + # Should not raise + mod.validate_artifacts_consistency(cast(mod.RunnersMetadata, runners_metadata)) diff --git a/tests/integration/ml/utils/pipeline_core/test_pipeline_runner_integration.py b/tests/integration/ml/utils/pipeline_core/test_pipeline_runner_integration.py new file mode 100644 index 00000000..8b66854a --- /dev/null +++ b/tests/integration/ml/utils/pipeline_core/test_pipeline_runner_integration.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import pytest +from ml.utils.pipeline_core.runner import PipelineRunner +from ml.utils.pipeline_core.step import PipelineStep + +pytestmark = pytest.mark.integration + + +@dataclass +class DummyStep(PipelineStep[dict[str, Any]]): + name: str + + def run(self, ctx: dict[str, Any]) -> dict[str, Any]: + ctx = dict(ctx) + ctx[self.name] = ctx.get(self.name, 0) + 1 + return ctx + + +def test_pipeline_runner_executes_steps_in_order() -> None: + steps: list[PipelineStep[dict[str, Any]]] = [ + DummyStep(name="a"), + DummyStep(name="b"), + DummyStep(name="a"), + ] + runner = PipelineRunner(steps=steps) + ctx = {"a": 0} + out = runner.run(ctx) + + assert out["a"] == 2 + assert out["b"] == 1 diff --git a/tests/integration/ml_service/backend/dir_viewer/test_build_tree_integration.py b/tests/integration/ml_service/backend/dir_viewer/test_build_tree_integration.py new file mode 100644 index 00000000..7cb7eadf --- /dev/null +++ b/tests/integration/ml_service/backend/dir_viewer/test_build_tree_integration.py @@ -0,0 +1,18 @@ +from pathlib import Path +from typing import Any, cast + +from ml_service.backend.dir_viewer.utils.build_tree import build_tree + + +def test_build_tree_returns_nested_structure(tmp_path: Path) -> None: + base = tmp_path / "root" + (base / "a").mkdir(parents=True) + (base / "a" / "b").mkdir(parents=True) + (base / "a" / "b" / "file.txt").write_text("x") + (base / "other.txt").write_text("y") + + tree = cast(dict[str, Any], build_tree(base)) + + assert "a" in tree + assert "other.txt" in tree + assert tree["a"]["b"]["file.txt"] is None diff --git a/tests/integration/pipelines/data/test_build_interim_dataset_integration.py b/tests/integration/pipelines/data/test_build_interim_dataset_integration.py new file mode 100644 index 00000000..6f4900de --- /dev/null +++ b/tests/integration/pipelines/data/test_build_interim_dataset_integration.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any +from pathlib import Path + +import pandas as pd +import pytest + +import pipelines.data.build_interim_dataset as mod + +pytestmark = pytest.mark.integration + + +def test_build_interim_dataset_main_happy_path(tmp_path: Path, monkeypatch: Any) -> None: + # Run inside tmp_path so created data/interim stays in tempdir + monkeypatch.chdir(tmp_path) + + args = SimpleNamespace( + data="hotel_bookings", + version="v1", + raw_snapshot_id="latest", + logging_level="INFO", + owner="tester", + ) + + monkeypatch.setattr(mod, "parse_args", lambda: args) + monkeypatch.setattr(mod, "load_yaml", lambda p: {}) + + config = SimpleNamespace(raw_data_version="raw_v1", cleaning={}, data_schema={}, drop_missing_ints=False, drop_duplicates=False, min_rows=1, invariants=[]) + monkeypatch.setattr(mod, "validate_config", lambda raw, type=None: config) + + # Create a fake raw snapshot with metadata + snapshot_dir = tmp_path / "data" / "raw" / "hotel_bookings" / "raw_v1" / "snap1" + snapshot_dir.mkdir(parents=True) + (snapshot_dir / "metadata.json").write_text("{}") + + monkeypatch.setattr(mod, "get_snapshot_path", lambda sid, parent: snapshot_dir) + monkeypatch.setattr(mod, "load_json", lambda p: {"meta": True}) + monkeypatch.setattr(mod, "get_data_suffix_and_format", lambda metadata, location=None: ("data.csv", "csv")) + monkeypatch.setattr(mod, "validate_data", lambda *a, **k: None) + monkeypatch.setattr(mod, "read_data", lambda fmt, path: pd.DataFrame({"a": [1, 2, 2]})) + + monkeypatch.setattr(mod, "normalize_columns", lambda df, cleaning: df) + monkeypatch.setattr(mod, "enforce_schema", lambda df, schema, drop_missing_ints: df) + monkeypatch.setattr(mod, "clean_data", lambda df, invariants: df) + monkeypatch.setattr(mod, "validate_min_rows", lambda df, min_rows: None) + + monkeypatch.setattr(mod, "save_data", lambda df, config, data_dir: data_dir / "data.csv") + monkeypatch.setattr(mod, "get_memory_usage", lambda df: {"mem": 1}) + monkeypatch.setattr(mod, "compute_memory_change", lambda **k: {"delta": 0}) + monkeypatch.setattr(mod, "prepare_metadata", lambda *a, **k: SimpleNamespace(model_dump=lambda exclude_none=True: {"meta": "ok"})) + + called: dict[str, Any] = {} + + def fake_save_metadata(md, target_dir): + called["md"] = md + called["target"] = target_dir + + monkeypatch.setattr(mod, "save_metadata", fake_save_metadata) + + rc = mod.main() + + assert rc == 0 + assert "md" in called and "target" in called diff --git a/tests/integration/pipelines/data/test_build_processed_dataset_integration.py b/tests/integration/pipelines/data/test_build_processed_dataset_integration.py new file mode 100644 index 00000000..16b5cb10 --- /dev/null +++ b/tests/integration/pipelines/data/test_build_processed_dataset_integration.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any +from pathlib import Path + +import pandas as pd +import pytest + +import pipelines.data.build_processed_dataset as mod + +pytestmark = pytest.mark.integration + + +def test_build_processed_dataset_main_happy_path(tmp_path: Path, monkeypatch: Any) -> None: + # Run inside tmp_path so created data/processed stays in tempdir + monkeypatch.chdir(tmp_path) + + args = SimpleNamespace( + data="hotel_bookings", + version="v1", + interim_snapshot_id="latest", + logging_level="INFO", + owner="tester", + ) + + monkeypatch.setattr(mod, "parse_args", lambda: args) + monkeypatch.setattr(mod, "load_yaml", lambda p: {}) + + config = SimpleNamespace(interim_data_version="int_v1", remove_columns=[], data=SimpleNamespace(name="hotel_bookings")) + monkeypatch.setattr(mod, "validate_config", lambda raw, type=None: config) + + # Create a fake interim snapshot with metadata + snapshot_dir = tmp_path / "data" / "interim" / "hotel_bookings" / "int_v1" / "snap1" + snapshot_dir.mkdir(parents=True) + (snapshot_dir / "metadata.json").write_text("{}") + + monkeypatch.setattr(mod, "get_snapshot_path", lambda sid, parent: snapshot_dir) + monkeypatch.setattr(mod, "load_json", lambda p: {"meta": True}) + monkeypatch.setattr(mod, "get_data_suffix_and_format", lambda metadata, location=None: ("data.csv", "csv")) + monkeypatch.setattr(mod, "validate_data", lambda *a, **k: None) + monkeypatch.setattr(mod, "read_data", lambda fmt, path: pd.DataFrame({"a": [1, 2]})) + monkeypatch.setattr(mod, "remove_columns", lambda df, cols: df) + + # Avoid adding row ids during the test - stub the function + monkeypatch.setattr(mod, "add_row_id", lambda df, cfg: (df, None)) + + monkeypatch.setattr(mod, "save_data", lambda df, config, data_dir: data_dir / "data.csv") + monkeypatch.setattr(mod, "get_memory_usage", lambda df: {"mem": 1}) + monkeypatch.setattr(mod, "compute_memory_change", lambda **k: {"delta": 0}) + monkeypatch.setattr(mod, "prepare_metadata", lambda *a, **k: SimpleNamespace(model_dump=lambda exclude_none=True: {"meta": "ok"})) + + called: dict[str, Any] = {} + + def fake_save_metadata(md, target_dir): + called["md"] = md + called["target"] = target_dir + + monkeypatch.setattr(mod, "save_metadata", fake_save_metadata) + + rc = mod.main() + + assert rc == 0 + assert "md" in called and "target" in called diff --git a/tests/integration/pipelines/orchestration/test_execute_all_data_preprocessing_integration.py b/tests/integration/pipelines/orchestration/test_execute_all_data_preprocessing_integration.py new file mode 100644 index 00000000..526aa642 --- /dev/null +++ b/tests/integration/pipelines/orchestration/test_execute_all_data_preprocessing_integration.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import subprocess +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pipelines.orchestration.data.execute_all_data_preprocessing as mod +import pytest + +pytestmark = pytest.mark.integration + + +def test_execute_all_data_preprocessing_runs_expected_subprocesses(tmp_path: Path, monkeypatch: Any) -> None: + # Create a minimal repo-like structure under tmp_path + data_raw_snap = tmp_path / "data" / "raw" / "mydata" / "v1" / "snap1" + data_raw_snap.mkdir(parents=True) + + interim_cfg_dir = tmp_path / "configs" / "data" / "interim" / "mydata" + interim_cfg_dir.mkdir(parents=True) + (interim_cfg_dir / "v1.yaml").write_text("dummy: true") + + processed_cfg_dir = tmp_path / "configs" / "data" / "processed" / "mydata" + processed_cfg_dir.mkdir(parents=True) + (processed_cfg_dir / "v1.yaml").write_text("dummy: true") + + # Force the orchestrator to run by disabling skip-if-existing + args = SimpleNamespace(skip_if_existing=False) + monkeypatch.setattr(mod, "parse_args", lambda: args) + monkeypatch.setattr(mod, "setup_logging", lambda *a, **k: None) + monkeypatch.setattr(mod, "log_completion", lambda *a, **k: None) + + # Run from tmp_path so relative paths in the module resolve to our test tree + monkeypatch.chdir(tmp_path) + + called: list[list[str]] = [] + + class FakeResult: + def __init__(self, stdout: str = "ok") -> None: + self.stdout = stdout + self.returncode = 0 + + def fake_run(cmd, check=True, capture_output=True, text=True): + called.append(list(cmd)) + return FakeResult(stdout="ok") + + monkeypatch.setattr(subprocess, "run", fake_run) + + rc = mod.main() + + assert rc == 0 + # Expect at least three subprocess calls: register_raw_snapshot, build_interim_dataset, build_processed_dataset + assert len(called) >= 3 diff --git a/tests/integration/pipelines/orchestration/test_run_all_workflows_integration.py b/tests/integration/pipelines/orchestration/test_run_all_workflows_integration.py new file mode 100644 index 00000000..ed3c975e --- /dev/null +++ b/tests/integration/pipelines/orchestration/test_run_all_workflows_integration.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import subprocess +from types import SimpleNamespace + +import pipelines.orchestration.master.run_all_workflows as mod +import pytest + +pytestmark = pytest.mark.integration + + +def test_run_all_workflows_calls_subprocess_for_each_step(monkeypatch) -> None: + args = SimpleNamespace(env="dev", logging_level="INFO", owner="tester", skip_if_existing=True) + monkeypatch.setattr(mod, "parse_args", lambda: args) + monkeypatch.setattr(mod, "setup_logging", lambda *a, **k: None) + monkeypatch.setattr(mod, "log_completion", lambda *a, **k: None) + + called = [] + + class FakeCompleted: + def __init__(self, returncode=0): + self.returncode = returncode + + def fake_run(cmd, text=True): + called.append(cmd) + return FakeCompleted(returncode=0) + + monkeypatch.setattr(subprocess, "run", fake_run) + + rc = mod.main() + + assert rc == 0 + # At least one subprocess call should have been made (three steps expected) + assert len(called) >= 1 diff --git a/tests/integration/post_promotion/inference/test_hash_input_row_integration.py b/tests/integration/post_promotion/inference/test_hash_input_row_integration.py new file mode 100644 index 00000000..9cf9fcdb --- /dev/null +++ b/tests/integration/post_promotion/inference/test_hash_input_row_integration.py @@ -0,0 +1,14 @@ +import numpy as np +import pandas as pd +from ml.post_promotion.inference.hashing.input_row import hash_input_row + + +def test_hash_input_row_handles_nan_and_floats() -> None: + s1 = pd.Series([np.nan, 0.1, "a"], index=["x", "y", "z"]) + s2 = pd.Series([np.nan, 0.1, "a"], index=["x", "y", "z"]) + h1 = hash_input_row(s1) + h2 = hash_input_row(s2) + assert h1 == h2 + + s3 = pd.Series([np.nan, 0.10000000000000001, "a"], index=["x", "y", "z"]) + assert hash_input_row(s3) == h1 diff --git a/tests/integration/post_promotion/inference/test_predict_integration.py b/tests/integration/post_promotion/inference/test_predict_integration.py new file mode 100644 index 00000000..8366d9ee --- /dev/null +++ b/tests/integration/post_promotion/inference/test_predict_integration.py @@ -0,0 +1,25 @@ +import pandas as pd +import pytest +from ml.exceptions import InferenceError +from ml.post_promotion.inference.execution.predict import predict + + +def test_predict_raises_inference_error_on_failure() -> None: + class BadArtifact: + def predict(self, X): + raise RuntimeError("boom") + + X = pd.DataFrame({"a": [1, 2]}) + with pytest.raises(InferenceError): + predict(X, BadArtifact()) + + +def test_predict_returns_empty_proba_when_no_predict_proba() -> None: + class ArtifactNoProba: + def predict(self, X): + return [0, 1] + + X = pd.DataFrame({"a": [1, 2]}) + preds, proba = predict(X, ArtifactNoProba()) + assert preds.tolist() == [0, 1] + assert proba.empty diff --git a/tests/integration/post_promotion/inference/test_store_predictions_integration.py b/tests/integration/post_promotion/inference/test_store_predictions_integration.py new file mode 100644 index 00000000..94d09c88 --- /dev/null +++ b/tests/integration/post_promotion/inference/test_store_predictions_integration.py @@ -0,0 +1,43 @@ +from datetime import datetime +from pathlib import Path +from typing import cast + +import pandas as pd +import pyarrow.parquet as pq +from ml.post_promotion.inference.persistence.store_predictions import store_predictions +from ml.promotion.config.registry_entry import RegistryEntry + + +def test_store_predictions_writes_parquet_and_returns_cols(tmp_path: Path) -> None: + df = pd.DataFrame({"id": ["e1", "e2"], "f1": [0.1, 0.2]}) + input_hash = pd.Series(["h1", "h2"]) + preds = pd.Series([0, 1]) + probs = pd.DataFrame([[0.1, 0.9], [0.8, 0.2]]) + probs.columns = ["p0", "p1"] + + out_dir = tmp_path / "out" + timestamp = datetime.utcnow() + + model_metadata = cast(RegistryEntry, type("M", (), {"model_version": "v1"})()) + + ret = store_predictions( + features=df, + entity_key="id", + run_id="r1", + input_hash=input_hash, + path=out_dir, + timestamp=timestamp, + predictions=preds, + probabilities=probs, + model_metadata=model_metadata, + stage="production", + ) + + assert ret.file_path.exists() + + table = pq.read_table(ret.file_path) + df_read = table.to_pandas() + + assert "run_id" in df_read.columns + assert "entity_id" in df_read.columns + assert "prediction" in df_read.columns diff --git a/tests/integration/promotion/test_promotion_service_integration.py b/tests/integration/promotion/test_promotion_service_integration.py new file mode 100644 index 00000000..80003eb2 --- /dev/null +++ b/tests/integration/promotion/test_promotion_service_integration.py @@ -0,0 +1,75 @@ +from pathlib import Path +from types import SimpleNamespace +from typing import Any, cast + +from ml.promotion.context import PromotionContext, PromotionPaths +from ml.promotion.service import PromotionService + + +def test_promotion_service_runs_and_persists(tmp_path: Path, monkeypatch: Any) -> None: + # Build a minimal context with tmp paths + args = SimpleNamespace( + problem="p", + segment="s", + stage="staging", + version="v1", + experiment_id="e1", + train_run_id="t1", + eval_run_id="e1", + explain_run_id="x1", + ) + + model_registry_dir = tmp_path / "model_registry" + paths = PromotionPaths( + model_registry_dir=model_registry_dir, + run_dir=model_registry_dir / "runs" / "r1", + promotion_configs_dir=tmp_path / "configs" / "promotion", + train_run_dir=tmp_path / "train", + eval_run_dir=tmp_path / "eval", + explain_run_dir=tmp_path / "explain", + registry_path=model_registry_dir / "models.yaml", + archive_path=model_registry_dir / "archive.yaml", + ) + + # ensure run_dir exists so lock path resolution is stable + paths.run_dir.mkdir(parents=True, exist_ok=True) + + context = PromotionContext(args=cast(Any, args), run_id="r1", timestamp="ts", paths=paths) + + service = PromotionService() + + # Monkeypatch validators and metadata getter to be no-ops / simple returns + monkeypatch.setattr("ml.promotion.service.validate_run_dirs", lambda *a, **k: None) + monkeypatch.setattr( + "ml.promotion.service.get_runners_metadata", + lambda *a, **k: SimpleNamespace(explainability_metadata=SimpleNamespace(artifacts=[])), + ) + monkeypatch.setattr("ml.promotion.service.validate_run_ids", lambda *a, **k: None) + monkeypatch.setattr("ml.promotion.service.validate_artifacts_consistency", lambda *a, **k: None) + monkeypatch.setattr("ml.promotion.service.validate_explainability_artifacts", lambda *a, **k: None) + + # Replace state loader and persister on the service instance + loaded_state = SimpleNamespace(state="st") + service._state_loader = cast(Any, SimpleNamespace(load=lambda ctx: loaded_state)) + + persisted = {} + def fake_persist(ctx, state, result): + persisted["ctx"] = ctx + persisted["state"] = state + persisted["result"] = result + + service._persister = cast(Any, SimpleNamespace(persist=fake_persist)) + + # Provide a fake strategy returned by _get_strategy + fake_result = {"ok": True} + class FakeStrategy: + def execute(self, ctx, state): + return fake_result + + monkeypatch.setattr(service, "_get_strategy", lambda stage: FakeStrategy()) + + # Run and assert + result = service.run(context) + assert result == fake_result + assert persisted["state"] is loaded_state + assert persisted["result"] == fake_result From fd29490453a844fefa7c5f06fb8f5411fd21fa1b Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Tue, 31 Mar 2026 06:29:12 +0200 Subject: [PATCH 15/17] Added more integration tests. --- .../test_execute_pipeline_integration.py | 44 ++++++++++++++++-- .../test_execute_script_integration.py | 45 +++++++++++++++++-- 2 files changed, 81 insertions(+), 8 deletions(-) diff --git a/tests/integration/ml_service/backend/pipelines/test_execute_pipeline_integration.py b/tests/integration/ml_service/backend/pipelines/test_execute_pipeline_integration.py index 339d21fe..f1ba22bd 100644 --- a/tests/integration/ml_service/backend/pipelines/test_execute_pipeline_integration.py +++ b/tests/integration/ml_service/backend/pipelines/test_execute_pipeline_integration.py @@ -7,18 +7,54 @@ Note: these tests create and remove transient files inside `tests/` and are safe to run on both Windows and Linux CI agents. """ - from __future__ import annotations import shutil from pathlib import Path from textwrap import dedent +from types import SimpleNamespace from typing import Any from uuid import uuid4 +import ml_service.backend.pipelines.execute_pipeline as ep +import pytest from ml_service.backend.pipelines.execute_pipeline import execute_pipeline from pydantic import BaseModel +pytestmark = pytest.mark.integration + + +class Payload(BaseModel): + name: str | None = None + flag: bool | None = None + empty: str | None = None + + +def test_execute_pipeline_builds_cmd_and_returns_status(monkeypatch: Any) -> None: + payload = Payload(name="abc", flag=True, empty="") + + captured: dict[str, Any] = {} + + def fake_run(cmd, capture_output, text, env, cwd): + captured["cmd"] = cmd + return SimpleNamespace(returncode=0, stdout="ok", stderr="") + + monkeypatch.setattr(ep, "subprocess", SimpleNamespace(run=fake_run)) + monkeypatch.setattr(ep, "EXIT_MEANING", {0: "SUCCESS"}) + + res = ep.execute_pipeline("ml_service.pipelines.foo", payload, boolean_args=["flag"]) # type: ignore[arg-type] + + assert captured["cmd"][:3] == ["python", "-m", "ml_service.pipelines.foo"] + # flags present and empty skipped + assert "--name" in captured["cmd"] and "abc" in captured["cmd"] + assert "--flag" in captured["cmd"] and "True" in captured["cmd"] + assert "--empty" not in captured["cmd"] + + assert res["exit_code"] == 0 + assert res["status"] == "SUCCESS" + assert res["stdout"] == "ok" + assert res["stderr"] == "" + def _make_dummy_package(pkg_name: str, code: str) -> Path: base = Path("tests") / pkg_name @@ -68,11 +104,11 @@ def main() -> None: try: base = _make_dummy_package(pkg_name, code) - class Payload(BaseModel): + class LocalPayload(BaseModel): param1: str | None = None flag: bool | None = None - payload = Payload(param1="ok", flag=True) + payload = LocalPayload(param1="ok", flag=True) res: dict[str, Any] = execute_pipeline( f"tests.{pkg_name}.dummy_pipeline", payload, boolean_args=["flag"] ) @@ -85,7 +121,7 @@ class Payload(BaseModel): assert "True" in res["stdout"] or '"flag"' in res["stdout"] # Non-zero exit path - payload2 = Payload(param1="fail", flag=False) + payload2 = LocalPayload(param1="fail", flag=False) res2 = execute_pipeline(f"tests.{pkg_name}.dummy_pipeline", payload2, boolean_args=["flag"]) # type: ignore[arg-type] assert res2["exit_code"] != 0 finally: diff --git a/tests/integration/ml_service/backend/scripts/test_execute_script_integration.py b/tests/integration/ml_service/backend/scripts/test_execute_script_integration.py index ec5f80d8..e9c51857 100644 --- a/tests/integration/ml_service/backend/scripts/test_execute_script_integration.py +++ b/tests/integration/ml_service/backend/scripts/test_execute_script_integration.py @@ -4,18 +4,55 @@ `execute_script(...)` to validate list argument expansion, boolean flags, stdout capture, and exit-code propagation. """ - from __future__ import annotations import shutil from pathlib import Path from textwrap import dedent +from types import SimpleNamespace from typing import Any from uuid import uuid4 +import ml_service.backend.scripts.execute_script as es +import pytest from ml_service.backend.scripts.execute_script import execute_script from pydantic import BaseModel +pytestmark = pytest.mark.integration + + +class Payload(BaseModel): + name: str | None = None + items: list[int] | None = None + flag: bool | None = None + + +def test_execute_script_handles_list_and_boolean_args_monkeypatch(monkeypatch: Any) -> None: + payload = Payload(name="xyz", items=[1, 2], flag=False) + + captured: dict[str, Any] = {} + + def fake_run(cmd, capture_output, text, env, cwd): + captured["cmd"] = cmd + return SimpleNamespace(returncode=2, stdout="done", stderr="err") + + monkeypatch.setattr(es, "subprocess", SimpleNamespace(run=fake_run)) + monkeypatch.setattr(es, "EXIT_MEANING", {2: "FAILURE"}) + + res = es.execute_script("ml_service.scripts.bar", payload, boolean_args=["flag"]) # type: ignore[arg-type] + + assert captured["cmd"][:3] == ["python", "-m", "ml_service.scripts.bar"] + # list arg expanded and flag handled as string + assert "--items" in captured["cmd"] + # ensure list values are individually present + assert "1" in captured["cmd"] and "2" in captured["cmd"] + assert "--flag" in captured["cmd"] and "False" in captured["cmd"] + + assert res["exit_code"] == 2 + assert res["status"] == "FAILURE" + assert res["stdout"] == "done" + assert res["stderr"] == "err" + def _make_dummy_package(pkg_name: str, code: str) -> Path: base = Path("tests") / pkg_name @@ -60,13 +97,13 @@ def main() -> None: try: base = _make_dummy_package(pkg_name, code) - class Payload(BaseModel): + class LocalPayload(BaseModel): names: list[str] | None = None param: str | None = None flag: bool | None = None # success path - payload = Payload(names=["a", "b"], param="ok", flag=True) + payload = LocalPayload(names=["a", "b"], param="ok", flag=True) res: dict[str, Any] = execute_script(f"tests.{pkg_name}.dummy_script", payload, boolean_args=["flag"]) # type: ignore[arg-type] assert res["exit_code"] == 0 assert '"names"' in res["stdout"] @@ -74,7 +111,7 @@ class Payload(BaseModel): assert "True" in res["stdout"] or '"flag"' in res["stdout"] # failure path - payload2 = Payload(names=["x"], param="fail", flag=False) + payload2 = LocalPayload(names=["x"], param="fail", flag=False) res2 = execute_script(f"tests.{pkg_name}.dummy_script", payload2, boolean_args=["flag"]) # type: ignore[arg-type] assert res2["exit_code"] != 0 finally: From 13635c818d1e061c78cccc57e373a856f44c9a75 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Tue, 31 Mar 2026 08:37:41 +0200 Subject: [PATCH 16/17] Updated documentation. Clarified what the tests are really testing. Improved the main README.md. Corrected a few typos in the other documents. Removed one unnecessary line from pytest.ini. --- README.md | 151 ++++++++++++++++----- docs/architecture/boundaries.md | 2 +- docs/architecture/decisions.md | 12 ++ docs/architecture/system_invariants.md | 2 +- docs/architecture/validation_guarantees.md | 2 +- docs/testing.md | 2 +- pytest.ini | 1 - 7 files changed, 132 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 82833919..5a5be57a 100644 --- a/README.md +++ b/README.md @@ -2,53 +2,132 @@ ## Overview -### A reproducible ML experimentation and model lifecycle system. +### An end-to-end ML platform that guarantees reproducibility across datasets, features, and models — with full lineage tracking and validation. - Currently supports the modeling of regression and classification tasks using the CatBoost algorithm. - Was initially formed based on a hotel_bookings dataset: - - located in `data/raw/hotel_bookings/v1/2026-02-25T22-43-23_732dfdb7/data.csv` - - originally from https://www.kaggle.com/datasets/mojtaba142/hotel-booking -- Current architecture expanded to support many datasets. -- The ml workflow covers everything from the registration of a raw data snapshot to model monitoring. + - From: https://www.kaggle.com/datasets/mojtaba142/hotel-booking + - The architecture has since been expanded to support many datasets with minimal code changes. +- The ML workflow covers everything from the registration of a raw data snapshot to model monitoring. +- Designed with **production ML system constraints in mind**: reproducibility, traceability, validation, and modularity. > Note: The repo was previously named `hotel_management`, so you will see that name around the repo; renamed for clarity > on what the project does. > Another note: A few artifacts are intentionally included, along with their respective logs. > This enables quick inspection of expected outputs of each pipeline, without having to run anything. +## Why? + +1. Many ML platforms are either overengineered for small teams, or lack essential safeguards: +- For small teams, overengineering can be an issue: + - Most small teams (1-5 developers) do not need worry about run conditions, very scalable storage, and so on + - They need a simple, but strong and reliable platform +- Some teams fail in the other direction: + - They fully rely on notebooks + - They forget about validation and lineage tracking + - They avoid elementary checks in order to "keep it simple" + +This project keeps the workflow simple, while still providing the most important sanity checks +across the entire ML workflow. With minor modifications (dataset specificities, different algorithms), +this tool can be used by an individual, or a small team of data scientists. + +2. Most learning courses are too specific: +- There are many courses on how to do regression or classification, or how to write python code +- There are many tutorials on how to use specific algorithms, and how they work under the hood +- There are very few courses/tutorials explaining the ML workflow in a simple manner +- It is very hard to find a platform for quick experimentation to understand how ML workflows work + +This project can also serve as a learning tool for understanding ML workflows beyond notebook-based experimentation. +It is easy to set up, and comes with a friendly UI, as well as some pre-saved artifacts for quick inspection. +Users can quickly experiment and learn on their own, and the only assumption is that they know how +to either set up Docker, or python and conda. + +## Inspiration + +This project started as part of my master's thesis, where the initial goal was to train several models on a hotel booking +dataset and expose them as tools for an LLM. + +While working on that, I quickly ran into practical issues that are common in real-world ML work but rarely addressed in tutorials: +- Repetitive boilerplate for training and evaluation +- Difficulty reusing pipelines across slightly different setups +- Fragile experiment tracking (risk of losing artifacts or overwriting results) +- Inability to reliably pause and resume long-running experiments +- Lack of structure when working beyond notebooks + +To address these problems, I started building small utilities to make experimentation more reliable and less error-prone. Over time, +this evolved into a broader system focused on reproducibility, modularity, and traceability across the entire ML lifecycle. + +At some point, it became clear that building a proper ML workflow system was a more meaningful direction than the original project idea, +so I leaned into it and expanded the architecture into what it is today. + +## Key Achievements + +- **~17,500** lines of production code +- **~29,000** lines of tests (auto-generated + custom) +- **Fully reproducible pipelines** via artifact hashing +- **End-to-end ML lifecycle support** +- **4,000+** lines of pre-included configurations +- Easy-to-use **ML service** (as a local web app) +- Comprehensive documentation (**3,000+** lines of Markdown) + ## Features -Pipelines for every part of the ml workflow: -- Data preprocessing - - Register raw data snapshots - - Build interim and processed datasets +### End-to-End ML Pipelines: +- Data registration and preprocessing - Feature (set) freezing - Hyperparameter search -- Model training -- Model evaluation -- Model explainability -- Model promotion - - Includes model registry for staging and production - - Archives past production models -- Model inference -- Model monitoring - -Maximum **decoupling** of datasets, feature sets, and modeling -- Datasets merge at runtime, using predefined configs and DAG for ordering -- Feature sets merge at runtime using a predefined entity key -- Models can use any snapshots of datasets and feature sets via snapshot bindings registry -- Validation ensures consistency and predefined minimum row presence - -Full **reproducibility** -- Hashing and downstream validation of relevant `artifacts` and `configs` -- Runtime info validation (hardware, git commit, environment...) - -Code **quality** ensured by CI, which includes: -- `ruff` checks -- `mypy` checks (moderate strictness) -- import layer checks -- naming conventions checks -- **1235 tests** -> fails if coverage drops below 90% +- Model training, evaluation and explainability +- Model promotion and archiving +- Model inference and monitoring + +### Reproducibility & Validation +- Artifact hashing across pipelines +- Environment & runtime validation +- Heavy versioning: + - All configurations + - Interim and processed data configurations + - Feature registry + - Global and algorithm defaults + - Model specifications + search and training configurations + - Pipeline configurations + - Environment overlay + - Promotion thresholds + - Snapshot bindings + - Target creation + - Splitting and target creation performed at runtime, based on model specifications + - Inference predictions schema +- Heavily snapshot-based: + - datasets + - feature sets + - training, evaluation, and explainability runs + - promotion and post-promotion runs + +### Modular Architecture +- Decoupled datasets, features, and models +- Runtime datasets (DAG + configurations) and feature sets (entity key + configurations) merging +- Flexible snapshot bindings + +### Reliability +- Atomic file writing +- Runtime saving of best hyperparameters from each search phase (broad + narrow) +- Runtime saving of model snapshots during training (e.g. every 30 seconds) + +### Code Quality +- CI with linting (ruff), typing (mypy), and structure checks +- **90%+** coverage enforced by CI across **1,500+** tests + +## Example Use Case + +A data scientist can: +1. Register a new dataset snapshot +2. Optimize its memory in one or more ways +3. Process the dataset in one or more ways +4. Define and freeze many feature sets, each based on one or more related datasets +5. Perform one or more hyperparameter searches +6. Train models based on the hyperparameter search results (many training runs allowed per each search) +7. Evaluate and explain the trained models, however many times +8. Stage, promote, and archive models +9. Run inference and monitoring on incoming data ## Installation @@ -68,7 +147,9 @@ Two options: See the [usage guide](docs/usage.md) for instructions on running the workflow. -### Usage examples (via `ml_service`): +### Usage examples: + +The system includes a browser-based interface (`ml_service`) for interacting with pipelines and configurations: #### Configs Writing, Validation, Saving, and Viewing - Interim Data Configs Example diff --git a/docs/architecture/boundaries.md b/docs/architecture/boundaries.md index 04d44f65..42e7a33f 100644 --- a/docs/architecture/boundaries.md +++ b/docs/architecture/boundaries.md @@ -30,7 +30,7 @@ ## New shared code goes into domain package first - avoid placing shared code in `ml.utils` -- instead, try placing it where it logically belongs, e.g. in `ml.runners`, `ml.modeling`, `ml.promotion`, etc. +- place it where it logically belongs, e.g. in `ml.runners`, `ml.modeling`, `ml.promotion`, etc. - `ml.utils` should only contain code that is genuinely reusable across multiple different domains - for instance, loading json and yaml files, getting the current git commit, and setting up a pipeline runner belong to `ml.utils` - `get_trainer.py` is only used by trainer, so it does not belong in `ml.utils`; instead it belongs to `ml.runners.training.utils` diff --git a/docs/architecture/decisions.md b/docs/architecture/decisions.md index 4a40898a..7ea95cfb 100644 --- a/docs/architecture/decisions.md +++ b/docs/architecture/decisions.md @@ -2,6 +2,18 @@ This file records key architectural decisions, their rationale, and alternatives considered. +## Key Architectural Decisions (Summary) + +The system is built around a few core principles: + +- **Immutability of artifacts** (datasets, features, experiments) +- **Full reproducibility via configs + snapshot IDs** +- **Decoupling of datasets, features, and models** +- **Snapshot-based versioning instead of mutable state** +- **Filesystem-based storage with strict validation** + +These decisions shape the entire architecture. Detailed breakdowns are provided below. + ## Decision Classification Each decision is classified as one of: diff --git a/docs/architecture/system_invariants.md b/docs/architecture/system_invariants.md index f95b209f..f72488a3 100644 --- a/docs/architecture/system_invariants.md +++ b/docs/architecture/system_invariants.md @@ -110,4 +110,4 @@ - If any of the above differ, reproducibility is not fully guaranteed. - Config hash match is very important for reproducibility; python version, conda environment hash and git commit matches are moderately important; os and hardware matches are the least important. -- It is technically possible to get the same results with config hash match alone, but the user assummes responsibility for any unexpected results in that case. \ No newline at end of file +- It is technically possible to get the same results with config hash match alone, but the user assumes responsibility for any unexpected results in that case. \ No newline at end of file diff --git a/docs/architecture/validation_guarantees.md b/docs/architecture/validation_guarantees.md index f91c2b18..ed59e523 100644 --- a/docs/architecture/validation_guarantees.md +++ b/docs/architecture/validation_guarantees.md @@ -53,7 +53,7 @@ - Metrics suitability for business objective - Absence of data leakage during CV - Param distribution quality -- Compatibility between scoring function and specific algorithm beyong supported enum check +- Compatibility between scoring function and specific algorithm beyond supported enum check ## Promotion Validation diff --git a/docs/testing.md b/docs/testing.md index 24590d72..fd4ae82b 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -2,7 +2,7 @@ This document describes the testing strategy, conventions, and instructions for this ML project. > Note: Most of the tests currently found in the repo were AI-generated (with careful prompting) -> Note: only the folders that constitute the main focus of the repo are tested (`ml/`, `pipelines/`, `scripts/` (excluding fake data generator)) +> Note: the following directories are tested: `ml/`, `pipelines/`, `ml_service`, `scripts/` (excluding fake data generator) ## Environment Setup diff --git a/pytest.ini b/pytest.ini index abbe6298..0f3b5f8e 100644 --- a/pytest.ini +++ b/pytest.ini @@ -8,6 +8,5 @@ addopts = --strict-markers markers = unit: fast isolated unit tests - slow: tests that are slow or involve real training integration: integration tests that may involve multiple components e2e: end-to-end tests that exercise CLI or multi-layer flows \ No newline at end of file From 7fd87c6d041aca2699c26cbe803212f4d7bff6f8 Mon Sep 17 00:00:00 2001 From: Sebastijan-Dominis Date: Tue, 31 Mar 2026 08:50:38 +0200 Subject: [PATCH 17/17] CI fix - httpx installation. The tests were failing in the CI, and the instruction says that httpx is required for testing. I added the installation of httpx in the tests.yml CI workflow file. --- .github/workflows/tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 79b3d31b..63ccc939 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -36,6 +36,7 @@ jobs: conda run -n hotel_management pip install -r requirements.txt conda run -n hotel_management pip install pytest conda run -n hotel_management pip install coverage + conda run -n hotel_management pip install httpx - name: Show environment run: conda run -n hotel_management conda list