Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import openml
from openml.testing import TestBase

import inspect

# creating logger for unit test file deletion status
logger = logging.getLogger("unit_tests")
Expand Down Expand Up @@ -288,3 +289,31 @@ def with_test_cache(test_files_directory, request):
openml.config.set_root_cache_directory(_root_cache_directory)
if tmp_cache.exists():
shutil.rmtree(tmp_cache)


def find_test_files_dir(start_path: Path, max_levels: int = 1) -> Path:
"""
Starting from start_path, climb up to max_levels parents looking for 'files' directory.
Returns the Path to the 'files' directory if found.
Raises FileNotFoundError if not found within max_levels parents.
"""
current = start_path.resolve()
for _ in range(max_levels):
candidate = current / "files"
if candidate.is_dir():
return candidate
current = current.parent
raise FileNotFoundError(f"Cannot find 'files' directory within {max_levels} levels up from {start_path}")

@pytest.fixture
def static_cache_dir():

start_path = Path(__file__).parent
return find_test_files_dir(start_path)

@pytest.fixture
def workdir(tmp_path):
original_cwd = os.getcwd()
os.chdir(tmp_path)
yield tmp_path
os.chdir(original_cwd)
185 changes: 121 additions & 64 deletions tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from openml.exceptions import PyOpenMLError
from openml.testing import TestBase

import pytest

@pytest.mark.production()
class OpenMLDatasetTest(TestBase):
Expand Down Expand Up @@ -399,72 +400,128 @@ def test_get_sparse_categorical_data_id_395(self):


class OpenMLDatasetFunctionTest(TestBase):
@unittest.mock.patch("openml.datasets.dataset.pickle")
@unittest.mock.patch("openml.datasets.dataset._get_features_pickle_file")
def test__read_features(self, filename_mock, pickle_mock):
"""Test we read the features from the xml if no cache pickle is available.

This test also does some simple checks to verify that the features are read correctly
"""
filename_mock.return_value = os.path.join(self.workdir, "features.xml.pkl")
pickle_mock.load.side_effect = FileNotFoundError
features = openml.datasets.dataset._read_features(
os.path.join(
self.static_cache_dir,
"org",
"openml",
"test",
"datasets",
"2",
"features.xml",
),
)
assert isinstance(features, dict)
assert len(features) == 39
assert isinstance(features[0], OpenMLDataFeature)
assert features[0].name == "family"
assert len(features[0].nominal_values) == 9
# pickle.load is never called because the features pickle file didn't exist
assert pickle_mock.load.call_count == 0
assert pickle_mock.dump.call_count == 1

@unittest.mock.patch("openml.datasets.dataset.pickle")
@unittest.mock.patch("openml.datasets.dataset._get_qualities_pickle_file")
def test__read_qualities(self, filename_mock, pickle_mock):
"""Test we read the qualities from the xml if no cache pickle is available.

This test also does some minor checks to ensure that the qualities are read correctly.
"""
filename_mock.return_value = os.path.join(self.workdir, "qualities.xml.pkl")
pickle_mock.load.side_effect = FileNotFoundError
qualities = openml.datasets.dataset._read_qualities(
os.path.join(
self.static_cache_dir,
"org",
"openml",
"test",
"datasets",
"2",
"qualities.xml",
),
)
assert isinstance(qualities, dict)
assert len(qualities) == 106
# pickle.load is never called because the qualities pickle file didn't exist
assert pickle_mock.load.call_count == 0
assert pickle_mock.dump.call_count == 1



def test__check_qualities():
qualities = [{"oml:name": "a", "oml:value": "0.5"}]
qualities = openml.datasets.dataset._check_qualities(qualities)
assert qualities["a"] == 0.5
def test__read_features(mocker, workdir, static_cache_dir):
"""Test we read the features from the xml if no cache pickle is available.

This test also does some simple checks to verify that the features are read correctly
"""
filename_mock = mocker.patch("openml.datasets.dataset._get_features_pickle_file")
pickle_mock = mocker.patch("openml.datasets.dataset.pickle")

filename_mock.return_value = os.path.join(workdir, "features.xml.pkl")
pickle_mock.load.side_effect = FileNotFoundError

features = openml.datasets.dataset._read_features(
os.path.join(
static_cache_dir,
"org",
"openml",
"test",
"datasets",
"2",
"features.xml",
),
)
assert isinstance(features, dict)
assert len(features) == 39
assert isinstance(features[0], OpenMLDataFeature)
assert features[0].name == "family"
assert len(features[0].nominal_values) == 9
# pickle.load is never called because the features pickle file didn't exist
assert pickle_mock.load.call_count == 0
assert pickle_mock.dump.call_count == 1


def test__read_qualities(static_cache_dir, workdir, mocker):
"""Test we read the qualities from the xml if no cache pickle is available.

This test also does some minor checks to ensure that the qualities are read correctly.
"""

filename_mock = mocker.patch("openml.datasets.dataset._get_qualities_pickle_file")
pickle_mock = mocker.patch("openml.datasets.dataset.pickle")

filename_mock.return_value=os.path.join(workdir, "qualities.xml.pkl")
pickle_mock.load.side_effect = FileNotFoundError

qualities = openml.datasets.dataset._read_qualities(
os.path.join(
static_cache_dir,
"org",
"openml",
"test",
"datasets",
"2",
"qualities.xml",
),
)
assert isinstance(qualities, dict)
assert len(qualities) == 106
assert pickle_mock.load.call_count == 0
assert pickle_mock.dump.call_count == 1


def test__read_features(mocker, workdir, static_cache_dir):
"""Test we read the features from the xml if no cache pickle is available.

This test also does some simple checks to verify that the features are read correctly
"""
filename_mock = mocker.patch("openml.datasets.dataset._get_features_pickle_file")
pickle_mock = mocker.patch("openml.datasets.dataset.pickle")

filename_mock.return_value = os.path.join(workdir, "features.xml.pkl")
pickle_mock.load.side_effect = FileNotFoundError

features = openml.datasets.dataset._read_features(
os.path.join(
static_cache_dir,
"org",
"openml",
"test",
"datasets",
"2",
"features.xml",
),
)
assert isinstance(features, dict)
assert len(features) == 39
assert isinstance(features[0], OpenMLDataFeature)
assert features[0].name == "family"
assert len(features[0].nominal_values) == 9
# pickle.load is never called because the features pickle file didn't exist
assert pickle_mock.load.call_count == 0
assert pickle_mock.dump.call_count == 1


def test__read_qualities(static_cache_dir, workdir, mocker):
"""Test we read the qualities from the xml if no cache pickle is available.

This test also does some minor checks to ensure that the qualities are read correctly.
"""

filename_mock = mocker.patch("openml.datasets.dataset._get_qualities_pickle_file")
pickle_mock = mocker.patch("openml.datasets.dataset.pickle")

filename_mock.return_value=os.path.join(workdir, "qualities.xml.pkl")
pickle_mock.load.side_effect = FileNotFoundError

qualities = openml.datasets.dataset._read_qualities(
os.path.join(
static_cache_dir,
"org",
"openml",
"test",
"datasets",
"2",
"qualities.xml",
),
)
assert isinstance(qualities, dict)
assert len(qualities) == 106
assert pickle_mock.load.call_count == 0
assert pickle_mock.dump.call_count == 1

qualities = [{"oml:name": "a", "oml:value": "null"}]
qualities = openml.datasets.dataset._check_qualities(qualities)
assert qualities["a"] != qualities["a"]

qualities = [{"oml:name": "a", "oml:value": None}]
qualities = openml.datasets.dataset._check_qualities(qualities)
assert qualities["a"] != qualities["a"]
Loading