From c5598e0561d2906577e509bcba4cce6deec79dbd Mon Sep 17 00:00:00 2001 From: taniya-das Date: Wed, 18 Jun 2025 10:56:54 +0200 Subject: [PATCH 1/2] create static_cache_dir and workdir --- tests/conftest.py | 29 +++++++ tests/test_datasets/test_dataset.py | 120 +++++++++++++++------------- 2 files changed, 95 insertions(+), 54 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b523117c1..553938157 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,6 +37,7 @@ import openml from openml.testing import TestBase +import inspect # creating logger for unit test file deletion status logger = logging.getLogger("unit_tests") @@ -288,3 +289,31 @@ def with_test_cache(test_files_directory, request): openml.config.set_root_cache_directory(_root_cache_directory) if tmp_cache.exists(): shutil.rmtree(tmp_cache) + + +def find_test_files_dir(start_path: Path, max_levels: int = 1) -> Path: + """ + Starting from start_path, climb up to max_levels parents looking for 'files' directory. + Returns the Path to the 'files' directory if found. + Raises FileNotFoundError if not found within max_levels parents. + """ + current = start_path.resolve() + for _ in range(max_levels): + candidate = current / "files" + if candidate.is_dir(): + return candidate + current = current.parent + raise FileNotFoundError(f"Cannot find 'files' directory within {max_levels} levels up from {start_path}") + +@pytest.fixture +def static_cache_dir(): + + start_path = Path(__file__).parent + return find_test_files_dir(start_path) + +@pytest.fixture +def workdir(tmp_path): + original_cwd = os.getcwd() + os.chdir(tmp_path) + yield tmp_path + os.chdir(original_cwd) \ No newline at end of file diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index d132c4233..8fc9a20ad 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -15,6 +15,7 @@ from openml.exceptions import PyOpenMLError from openml.testing import TestBase +import pytest @pytest.mark.production() class OpenMLDatasetTest(TestBase): @@ -399,60 +400,7 @@ def test_get_sparse_categorical_data_id_395(self): class OpenMLDatasetFunctionTest(TestBase): - @unittest.mock.patch("openml.datasets.dataset.pickle") - @unittest.mock.patch("openml.datasets.dataset._get_features_pickle_file") - def test__read_features(self, filename_mock, pickle_mock): - """Test we read the features from the xml if no cache pickle is available. - - This test also does some simple checks to verify that the features are read correctly - """ - filename_mock.return_value = os.path.join(self.workdir, "features.xml.pkl") - pickle_mock.load.side_effect = FileNotFoundError - features = openml.datasets.dataset._read_features( - os.path.join( - self.static_cache_dir, - "org", - "openml", - "test", - "datasets", - "2", - "features.xml", - ), - ) - assert isinstance(features, dict) - assert len(features) == 39 - assert isinstance(features[0], OpenMLDataFeature) - assert features[0].name == "family" - assert len(features[0].nominal_values) == 9 - # pickle.load is never called because the features pickle file didn't exist - assert pickle_mock.load.call_count == 0 - assert pickle_mock.dump.call_count == 1 - - @unittest.mock.patch("openml.datasets.dataset.pickle") - @unittest.mock.patch("openml.datasets.dataset._get_qualities_pickle_file") - def test__read_qualities(self, filename_mock, pickle_mock): - """Test we read the qualities from the xml if no cache pickle is available. - - This test also does some minor checks to ensure that the qualities are read correctly. - """ - filename_mock.return_value = os.path.join(self.workdir, "qualities.xml.pkl") - pickle_mock.load.side_effect = FileNotFoundError - qualities = openml.datasets.dataset._read_qualities( - os.path.join( - self.static_cache_dir, - "org", - "openml", - "test", - "datasets", - "2", - "qualities.xml", - ), - ) - assert isinstance(qualities, dict) - assert len(qualities) == 106 - # pickle.load is never called because the qualities pickle file didn't exist - assert pickle_mock.load.call_count == 0 - assert pickle_mock.dump.call_count == 1 + def test__check_qualities(self): qualities = [{"oml:name": "a", "oml:value": "0.5"}] @@ -466,3 +414,67 @@ def test__check_qualities(self): qualities = [{"oml:name": "a", "oml:value": None}] qualities = openml.datasets.dataset._check_qualities(qualities) assert qualities["a"] != qualities["a"] + + + +def test__read_features(mocker, workdir, static_cache_dir): + """Test we read the features from the xml if no cache pickle is available. + + This test also does some simple checks to verify that the features are read correctly + """ + filename_mock = mocker.patch("openml.datasets.dataset._get_features_pickle_file") + pickle_mock = mocker.patch("openml.datasets.dataset.pickle") + + filename_mock.return_value = os.path.join(workdir, "features.xml.pkl") + pickle_mock.load.side_effect = FileNotFoundError + + features = openml.datasets.dataset._read_features( + os.path.join( + static_cache_dir, + "org", + "openml", + "test", + "datasets", + "2", + "features.xml", + ), + ) + assert isinstance(features, dict) + assert len(features) == 39 + assert isinstance(features[0], OpenMLDataFeature) + assert features[0].name == "family" + assert len(features[0].nominal_values) == 9 + # pickle.load is never called because the features pickle file didn't exist + assert pickle_mock.load.call_count == 0 + assert pickle_mock.dump.call_count == 1 + + +def test__read_qualities(static_cache_dir, workdir, mocker): + """Test we read the qualities from the xml if no cache pickle is available. + + This test also does some minor checks to ensure that the qualities are read correctly. + """ + + filename_mock = mocker.patch("openml.datasets.dataset._get_qualities_pickle_file") + pickle_mock = mocker.patch("openml.datasets.dataset.pickle") + + filename_mock.return_value=os.path.join(workdir, "qualities.xml.pkl") + pickle_mock.load.side_effect = FileNotFoundError + + qualities = openml.datasets.dataset._read_qualities( + os.path.join( + static_cache_dir, + "org", + "openml", + "test", + "datasets", + "2", + "qualities.xml", + ), + ) + assert isinstance(qualities, dict) + assert len(qualities) == 106 + assert pickle_mock.load.call_count == 0 + assert pickle_mock.dump.call_count == 1 + + From 45632e825ffaa702f886b312950de16f422516f2 Mon Sep 17 00:00:00 2001 From: taniya-das Date: Wed, 18 Jun 2025 10:58:58 +0200 Subject: [PATCH 2/2] resolve merge conflict --- tests/test_datasets/test_dataset.py | 72 ++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 12 deletions(-) diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index cfd67f21e..2515499b1 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -464,16 +464,64 @@ def test__read_qualities(static_cache_dir, workdir, mocker): assert pickle_mock.dump.call_count == 1 -def test__check_qualities(): - qualities = [{"oml:name": "a", "oml:value": "0.5"}] - qualities = openml.datasets.dataset._check_qualities(qualities) - assert qualities["a"] == 0.5 - - qualities = [{"oml:name": "a", "oml:value": "null"}] - qualities = openml.datasets.dataset._check_qualities(qualities) - assert qualities["a"] != qualities["a"] - - qualities = [{"oml:name": "a", "oml:value": None}] - qualities = openml.datasets.dataset._check_qualities(qualities) - assert qualities["a"] != qualities["a"] +def test__read_features(mocker, workdir, static_cache_dir): + """Test we read the features from the xml if no cache pickle is available. + + This test also does some simple checks to verify that the features are read correctly + """ + filename_mock = mocker.patch("openml.datasets.dataset._get_features_pickle_file") + pickle_mock = mocker.patch("openml.datasets.dataset.pickle") + + filename_mock.return_value = os.path.join(workdir, "features.xml.pkl") + pickle_mock.load.side_effect = FileNotFoundError + + features = openml.datasets.dataset._read_features( + os.path.join( + static_cache_dir, + "org", + "openml", + "test", + "datasets", + "2", + "features.xml", + ), + ) + assert isinstance(features, dict) + assert len(features) == 39 + assert isinstance(features[0], OpenMLDataFeature) + assert features[0].name == "family" + assert len(features[0].nominal_values) == 9 + # pickle.load is never called because the features pickle file didn't exist + assert pickle_mock.load.call_count == 0 + assert pickle_mock.dump.call_count == 1 + + +def test__read_qualities(static_cache_dir, workdir, mocker): + """Test we read the qualities from the xml if no cache pickle is available. + + This test also does some minor checks to ensure that the qualities are read correctly. + """ + + filename_mock = mocker.patch("openml.datasets.dataset._get_qualities_pickle_file") + pickle_mock = mocker.patch("openml.datasets.dataset.pickle") + + filename_mock.return_value=os.path.join(workdir, "qualities.xml.pkl") + pickle_mock.load.side_effect = FileNotFoundError + + qualities = openml.datasets.dataset._read_qualities( + os.path.join( + static_cache_dir, + "org", + "openml", + "test", + "datasets", + "2", + "qualities.xml", + ), + ) + assert isinstance(qualities, dict) + assert len(qualities) == 106 + assert pickle_mock.load.call_count == 0 + assert pickle_mock.dump.call_count == 1 +