From 35aed66dd510c2171e730194b97d7c318c5aadd1 Mon Sep 17 00:00:00 2001 From: taniya-das Date: Wed, 18 Jun 2025 11:29:17 +0200 Subject: [PATCH 1/2] convert static_cache_dir and workdir to fixture and test to pytest --- pyproject.toml | 1 + tests/conftest.py | 29 +++++++ tests/test_datasets/test_dataset.py | 114 ++++++++++++++-------------- 3 files changed, 89 insertions(+), 55 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fa9a70dc1..24701d08a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ test=[ "mypy", "ruff", "requests-mock", + "pytest-mock", ] examples=[ "matplotlib", diff --git a/tests/conftest.py b/tests/conftest.py index b523117c1..9167edc57 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,6 +37,7 @@ import openml from openml.testing import TestBase +import inspect # creating logger for unit test file deletion status logger = logging.getLogger("unit_tests") @@ -288,3 +289,31 @@ def with_test_cache(test_files_directory, request): openml.config.set_root_cache_directory(_root_cache_directory) if tmp_cache.exists(): shutil.rmtree(tmp_cache) + + +def find_test_files_dir(start_path: Path, max_levels: int = 1) -> Path: + """ + Starting from start_path, climb up to max_levels parents looking for 'files' directory. + Returns the Path to the 'files' directory if found. + Raises FileNotFoundError if not found within max_levels parents. + """ + current = start_path.resolve() + for _ in range(max_levels): + candidate = current / "files" + if candidate.is_dir(): + return candidate + current = current.parent + raise FileNotFoundError(f"Cannot find 'files' directory within {max_levels} levels up from {start_path}") + +@pytest.fixture +def static_cache_dir(): + + start_path = Path(__file__).parent + return find_test_files_dir(start_path) + +@pytest.fixture +def workdir(tmp_path): + original_cwd = os.getcwd() + os.chdir(tmp_path) + yield tmp_path + os.chdir(original_cwd) diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index 2f323b38a..e839b09f2 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -15,6 +15,8 @@ from openml.exceptions import PyOpenMLError from openml.testing import TestBase +import pytest + @pytest.mark.production() class OpenMLDatasetTest(TestBase): @@ -398,61 +400,63 @@ def test_get_sparse_categorical_data_id_395(self): assert len(feature.nominal_values) == 25 -class OpenMLDatasetFunctionTest(TestBase): - @unittest.mock.patch("openml.datasets.dataset.pickle") - @unittest.mock.patch("openml.datasets.dataset._get_features_pickle_file") - def test__read_features(self, filename_mock, pickle_mock): - """Test we read the features from the xml if no cache pickle is available. - - This test also does some simple checks to verify that the features are read correctly - """ - filename_mock.return_value = os.path.join(self.workdir, "features.xml.pkl") - pickle_mock.load.side_effect = FileNotFoundError - features = openml.datasets.dataset._read_features( - os.path.join( - self.static_cache_dir, - "org", - "openml", - "test", - "datasets", - "2", - "features.xml", - ), - ) - assert isinstance(features, dict) - assert len(features) == 39 - assert isinstance(features[0], OpenMLDataFeature) - assert features[0].name == "family" - assert len(features[0].nominal_values) == 9 - # pickle.load is never called because the features pickle file didn't exist - assert pickle_mock.load.call_count == 0 - assert pickle_mock.dump.call_count == 1 - - @unittest.mock.patch("openml.datasets.dataset.pickle") - @unittest.mock.patch("openml.datasets.dataset._get_qualities_pickle_file") - def test__read_qualities(self, filename_mock, pickle_mock): - """Test we read the qualities from the xml if no cache pickle is available. - - This test also does some minor checks to ensure that the qualities are read correctly. - """ - filename_mock.return_value = os.path.join(self.workdir, "qualities.xml.pkl") - pickle_mock.load.side_effect = FileNotFoundError - qualities = openml.datasets.dataset._read_qualities( - os.path.join( - self.static_cache_dir, - "org", - "openml", - "test", - "datasets", - "2", - "qualities.xml", - ), - ) - assert isinstance(qualities, dict) - assert len(qualities) == 106 - # pickle.load is never called because the qualities pickle file didn't exist - assert pickle_mock.load.call_count == 0 - assert pickle_mock.dump.call_count == 1 +def test__read_features(mocker, workdir, static_cache_dir): + """Test we read the features from the xml if no cache pickle is available. + This test also does some simple checks to verify that the features are read correctly + """ + filename_mock = mocker.patch("openml.datasets.dataset._get_features_pickle_file") + pickle_mock = mocker.patch("openml.datasets.dataset.pickle") + + filename_mock.return_value = os.path.join(workdir, "features.xml.pkl") + pickle_mock.load.side_effect = FileNotFoundError + + features = openml.datasets.dataset._read_features( + os.path.join( + static_cache_dir, + "org", + "openml", + "test", + "datasets", + "2", + "features.xml", + ), + ) + assert isinstance(features, dict) + assert len(features) == 39 + assert isinstance(features[0], OpenMLDataFeature) + assert features[0].name == "family" + assert len(features[0].nominal_values) == 9 + # pickle.load is never called because the features pickle file didn't exist + assert pickle_mock.load.call_count == 0 + assert pickle_mock.dump.call_count == 1 + + +def test__read_qualities(static_cache_dir, workdir, mocker): + """Test we read the qualities from the xml if no cache pickle is available. + This test also does some minor checks to ensure that the qualities are read correctly. + """ + + filename_mock = mocker.patch("openml.datasets.dataset._get_qualities_pickle_file") + pickle_mock = mocker.patch("openml.datasets.dataset.pickle") + + filename_mock.return_value=os.path.join(workdir, "qualities.xml.pkl") + pickle_mock.load.side_effect = FileNotFoundError + + qualities = openml.datasets.dataset._read_qualities( + os.path.join( + static_cache_dir, + "org", + "openml", + "test", + "datasets", + "2", + "qualities.xml", + ), + ) + assert isinstance(qualities, dict) + assert len(qualities) == 106 + assert pickle_mock.load.call_count == 0 + assert pickle_mock.dump.call_count == 1 From 0f1791823d22e289279bfcbb501f349bd0ee4ce8 Mon Sep 17 00:00:00 2001 From: taniya-das Date: Thu, 19 Jun 2025 12:24:47 +0200 Subject: [PATCH 2/2] corrections --- tests/conftest.py | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9167edc57..e4d75a6ef 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -291,29 +291,15 @@ def with_test_cache(test_files_directory, request): shutil.rmtree(tmp_cache) -def find_test_files_dir(start_path: Path, max_levels: int = 1) -> Path: - """ - Starting from start_path, climb up to max_levels parents looking for 'files' directory. - Returns the Path to the 'files' directory if found. - Raises FileNotFoundError if not found within max_levels parents. - """ - current = start_path.resolve() - for _ in range(max_levels): - candidate = current / "files" - if candidate.is_dir(): - return candidate - current = current.parent - raise FileNotFoundError(f"Cannot find 'files' directory within {max_levels} levels up from {start_path}") @pytest.fixture def static_cache_dir(): - - start_path = Path(__file__).parent - return find_test_files_dir(start_path) + + return Path(__file__).parent / "files" @pytest.fixture def workdir(tmp_path): - original_cwd = os.getcwd() + original_cwd = Path.cwd() os.chdir(tmp_path) yield tmp_path os.chdir(original_cwd)