Skip to content

Commit e72caf8

Browse files
authored
Merge branch 'main' into cache_size_utility
2 parents 5d07fb8 + bd8ae77 commit e72caf8

File tree

14 files changed

+84
-77
lines changed

14 files changed

+84
-77
lines changed

.github/workflows/test.yml

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,74 +23,97 @@ jobs:
2323
test:
2424
name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }}, sk-only:${{ matrix.sklearn-only }})
2525
runs-on: ${{ matrix.os }}
26+
2627
strategy:
28+
fail-fast: false
2729
matrix:
28-
python-version: ["3.11"]
29-
scikit-learn: ["1.3.*", "1.4.*", "1.5.*"]
30+
python-version: ["3.10", "3.11", "3.12", "3.13"]
31+
scikit-learn: ["1.3.*", "1.4.*", "1.5.*", "1.6.*", "1.7.*"]
3032
os: [ubuntu-latest]
3133
sklearn-only: ["true"]
32-
fail-fast: false
34+
35+
exclude:
36+
# incompatible version combinations
37+
- python-version: "3.13"
38+
scikit-learn: "1.3.*"
39+
- python-version: "3.13"
40+
scikit-learn: "1.4.*"
41+
42+
include:
43+
# Full test run on Windows
44+
- os: windows-latest
45+
python-version: "3.12"
46+
scikit-learn: "1.5.*"
47+
sklearn-only: "false"
48+
49+
# Coverage run
50+
- os: ubuntu-latest
51+
python-version: "3.12"
52+
scikit-learn: "1.5.*"
53+
sklearn-only: "false"
54+
code-cov: true
3355

3456
steps:
3557
- uses: actions/checkout@v6
3658
with:
3759
fetch-depth: 2
60+
3861
- name: Setup Python ${{ matrix.python-version }}
39-
if: matrix.os != 'windows-latest' # windows-latest only uses preinstalled Python (3.9.13)
4062
uses: actions/setup-python@v5
4163
with:
4264
python-version: ${{ matrix.python-version }}
43-
- name: Install test dependencies
65+
66+
- name: Install test dependencies and scikit-learn
4467
run: |
4568
python -m pip install --upgrade pip
46-
pip install -e .[test]
47-
- name: Install scikit-learn ${{ matrix.scikit-learn }}
48-
run: |
49-
pip install scikit-learn==${{ matrix.scikit-learn }}
50-
- name: Install numpy for Python 3.8
51-
# Python 3.8 & scikit-learn<0.24 requires numpy<=1.23.5
52-
if: ${{ matrix.python-version == '3.8' && matrix.scikit-learn == '0.23.1' }}
53-
run: |
54-
pip install numpy==1.23.5
55-
- name: "Install NumPy 1.x and SciPy <1.11 for scikit-learn < 1.4"
56-
if: ${{ contains(fromJSON('["1.0.*", "1.1.*", "1.2.*", "1.3.*"]'), matrix.scikit-learn) }}
57-
run: |
58-
# scipy has a change to the 'mode' behavior which breaks scikit-learn < 1.4
59-
# numpy 2.0 has several breaking changes
60-
pip install "numpy<2.0" "scipy<1.11"
61-
- name: Install scipy ${{ matrix.scipy }}
62-
if: ${{ matrix.scipy }}
63-
run: |
64-
pip install scipy==${{ matrix.scipy }}
69+
pip install -e .[test] scikit-learn==${{ matrix.scikit-learn }}
70+
6571
- name: Store repository status
6672
id: status-before
6773
if: matrix.os != 'windows-latest'
6874
run: |
6975
git_status=$(git status --porcelain -b)
7076
echo "BEFORE=$git_status" >> $GITHUB_ENV
7177
echo "Repository status before tests: $git_status"
78+
7279
- name: Show installed dependencies
7380
run: python -m pip list
81+
7482
- name: Run tests on Ubuntu Test
7583
if: matrix.os == 'ubuntu-latest'
7684
run: |
77-
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
78-
# Most of the time, running only the scikit-learn tests is sufficient
79-
if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and not production'; else marks='not production'; fi
80-
echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
85+
if [ "${{ matrix.code-cov }}" = "true" ]; then
86+
codecov="--cov=openml --long --cov-report=xml"
87+
fi
88+
89+
if [ "${{ matrix.sklearn-only }}" = "true" ]; then
90+
marks="sklearn and not production"
91+
else
92+
marks="not production"
93+
fi
94+
8195
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
96+
8297
- name: Run tests on Ubuntu Production
8398
if: matrix.os == 'ubuntu-latest'
8499
run: |
85-
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
86-
# Most of the time, running only the scikit-learn tests is sufficient
87-
if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and production'; else marks='production'; fi
88-
echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
100+
if [ "${{ matrix.code-cov }}" = "true" ]; then
101+
codecov="--cov=openml --long --cov-report=xml"
102+
fi
103+
104+
if [ "${{ matrix.sklearn-only }}" = "true" ]; then
105+
marks="sklearn and production"
106+
else
107+
marks="production"
108+
fi
109+
89110
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
111+
90112
- name: Run tests on Windows
91113
if: matrix.os == 'windows-latest'
92114
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
93115
pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1
116+
94117
- name: Check for files left behind by test
95118
if: matrix.os != 'windows-latest' && always()
96119
run: |
@@ -102,6 +125,7 @@ jobs:
102125
echo "Not all generated files have been deleted!"
103126
exit 1
104127
fi
128+
105129
- name: Upload coverage
106130
if: matrix.code-cov && always()
107131
uses: codecov/codecov-action@v4

.gitignore

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ target/
8888
.idea
8989
*.swp
9090
.vscode
91+
.cursorignore
92+
.cursorindexingignore
9193

9294
# MYPY
9395
.mypy_cache
@@ -96,4 +98,7 @@ dmypy.sock
9698

9799
# Tests
98100
.pytest_cache
99-
.venv
101+
.venv
102+
103+
# Ruff
104+
.ruff-cache/

mkdocs.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ plugins:
127127
docstring_options:
128128
ignore_init_summary: true
129129
trim_doctest_flags: true
130-
returns_multiple_items: false
131130
show_docstring_attributes: true
132131
show_docstring_description: true
133132
show_root_heading: true
@@ -138,7 +137,7 @@ plugins:
138137
merge_init_into_class: true
139138
show_symbol_type_heading: true
140139
show_symbol_type_toc: true
141-
docstring_style: google
140+
docstring_style: numpy
142141
inherited_members: true
143142
show_if_no_docstring: false
144143
show_bases: true

openml/tasks/functions.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -415,8 +415,9 @@ def get_task(
415415
if not isinstance(task_id, int):
416416
raise TypeError(f"Task id should be integer, is {type(task_id)}")
417417

418-
tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
419-
418+
cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
419+
tid_cache_dir = cache_key_dir / str(task_id)
420+
tid_cache_dir_existed = tid_cache_dir.exists()
420421
try:
421422
task = _get_task_description(task_id)
422423
dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
@@ -430,7 +431,8 @@ def get_task(
430431
if download_splits and isinstance(task, OpenMLSupervisedTask):
431432
task.download_split()
432433
except Exception as e:
433-
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
434+
if not tid_cache_dir_existed:
435+
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
434436
raise e
435437

436438
return task

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,11 @@ classifiers = [
5050
"Operating System :: Unix",
5151
"Operating System :: MacOS",
5252
"Programming Language :: Python :: 3",
53-
"Programming Language :: Python :: 3.8",
54-
"Programming Language :: Python :: 3.9",
5553
"Programming Language :: Python :: 3.10",
5654
"Programming Language :: Python :: 3.11",
5755
"Programming Language :: Python :: 3.12",
5856
"Programming Language :: Python :: 3.13",
57+
"Programming Language :: Python :: 3.14",
5958
]
6059
license = { file = "LICENSE" }
6160

tests/test_datasets/test_dataset.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ def test_tagging():
294294
datasets = openml.datasets.list_datasets(tag=tag)
295295
assert datasets.empty
296296

297+
@pytest.mark.xfail(reason="failures_issue_1544")
297298
def test_get_feature_with_ontology_data_id_11():
298299
# test on car dataset, which has built-in ontology references
299300
dataset = openml.datasets.get_dataset(11)
@@ -470,4 +471,4 @@ def test__check_qualities():
470471

471472
qualities = [{"oml:name": "a", "oml:value": None}]
472473
qualities = openml.datasets.dataset._check_qualities(qualities)
473-
assert qualities["a"] != qualities["a"]
474+
assert qualities["a"] != qualities["a"]

tests/test_runs/test_run.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ def _check_array(array, type_):
118118
assert run_prime_trace_content is None
119119

120120
@pytest.mark.sklearn()
121-
@pytest.mark.xfail(reason="failures_issue_1544")
122121
def test_to_from_filesystem_vanilla(self):
123122
model = Pipeline(
124123
[
@@ -154,7 +153,6 @@ def test_to_from_filesystem_vanilla(self):
154153

155154
@pytest.mark.sklearn()
156155
@pytest.mark.flaky()
157-
@pytest.mark.xfail(reason="failures_issue_1544")
158156
def test_to_from_filesystem_search(self):
159157
model = Pipeline(
160158
[
@@ -189,7 +187,6 @@ def test_to_from_filesystem_search(self):
189187
)
190188

191189
@pytest.mark.sklearn()
192-
@pytest.mark.xfail(reason="failures_issue_1544")
193190
def test_to_from_filesystem_no_model(self):
194191
model = Pipeline(
195192
[("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())],
@@ -295,7 +292,6 @@ def assert_run_prediction_data(task, run, model):
295292
assert_method(y_test, saved_y_test)
296293

297294
@pytest.mark.sklearn()
298-
@pytest.mark.xfail(reason="failures_issue_1544")
299295
def test_publish_with_local_loaded_flow(self):
300296
"""
301297
Publish a run tied to a local flow after it has first been saved to
@@ -339,7 +335,6 @@ def test_publish_with_local_loaded_flow(self):
339335
openml.runs.get_run(loaded_run.run_id)
340336

341337
@pytest.mark.sklearn()
342-
@pytest.mark.xfail(reason="failures_issue_1544")
343338
def test_offline_and_online_run_identical(self):
344339
extension = SklearnExtension()
345340

0 commit comments

Comments
 (0)