Skip to content

Commit acc1c79

Browse files
authored
Merge branch 'main' into dataclass_impl_openmlparameter
2 parents 09690a9 + 3454bbb commit acc1c79

File tree

19 files changed

+120
-60
lines changed

19 files changed

+120
-60
lines changed

.github/workflows/dist.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
- name: Setup Python
2828
uses: actions/setup-python@v5
2929
with:
30-
python-version: 3.8
30+
python-version: "3.10"
3131
- name: Build dist
3232
run: |
3333
pip install build

.github/workflows/docs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
- name: Setup Python
2929
uses: actions/setup-python@v5
3030
with:
31-
python-version: 3.8
31+
python-version: "3.10"
3232
- name: Install dependencies
3333
run: |
3434
pip install -e .[docs,examples]

.github/workflows/test.yml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
---
12
name: Tests
23

34
on:
@@ -21,13 +22,13 @@ concurrency:
2122

2223
jobs:
2324
test:
24-
name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }}, sk-only:${{ matrix.sklearn-only }})
25+
name: (${{ matrix.os }},Py${{ matrix.python-version }},sk${{ matrix.scikit-learn }},sk-only:${{ matrix.sklearn-only }})
2526
runs-on: ${{ matrix.os }}
2627

2728
strategy:
2829
fail-fast: false
2930
matrix:
30-
python-version: ["3.10", "3.11", "3.12", "3.13"]
31+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
3132
scikit-learn: ["1.3.*", "1.4.*", "1.5.*", "1.6.*", "1.7.*"]
3233
os: [ubuntu-latest]
3334
sklearn-only: ["true"]
@@ -38,8 +39,18 @@ jobs:
3839
scikit-learn: "1.3.*"
3940
- python-version: "3.13"
4041
scikit-learn: "1.4.*"
42+
- python-version: "3.14"
43+
scikit-learn: "1.3.*"
44+
- python-version: "3.14"
45+
scikit-learn: "1.4.*"
4146

4247
include:
48+
# Full test run on ubuntu, 3.14
49+
- os: ubuntu-latest
50+
python-version: "3.14"
51+
scikit-learn: "1.7.*"
52+
sklearn-only: "false"
53+
4354
# Full test run on Windows
4455
- os: windows-latest
4556
python-version: "3.12"

.gitignore

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,17 @@ dmypy.sock
9898

9999
# Tests
100100
.pytest_cache
101+
102+
# Virtual environments
103+
oenv/
104+
venv/
105+
.env/
101106
.venv
107+
.venv/
108+
109+
# Python cache
110+
__pycache__/
111+
*.pyc
102112

103113
# Ruff
104-
.ruff-cache/
114+
.ruff-cache/

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
## The Python API for a World of Data and More :dizzy:
1616

1717
[![Latest Release](https://img.shields.io/github/v/release/openml/openml-python)](https://github.com/openml/openml-python/releases)
18-
[![Python Versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)](https://pypi.org/project/openml/)
18+
[![Python Versions](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue)](https://pypi.org/project/openml/)
1919
[![Downloads](https://static.pepy.tech/badge/openml)](https://pepy.tech/project/openml)
2020
[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
2121
<!-- Add green badges for CI and precommit -->
@@ -60,7 +60,7 @@ for task_id in suite.tasks:
6060

6161
## :magic_wand: Installation
6262

63-
OpenML-Python is supported on Python 3.8 - 3.13 and is available on Linux, MacOS, and Windows.
63+
OpenML-Python is supported on Python 3.10 - 3.14 and is available on Linux, MacOS, and Windows.
6464

6565
You can install OpenML-Python with:
6666

openml/evaluations/evaluation.py

Lines changed: 21 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
# License: BSD 3-Clause
22
from __future__ import annotations
33

4+
from dataclasses import asdict, dataclass
5+
46
import openml.config
57
import openml.datasets
68
import openml.flows
79
import openml.runs
810
import openml.tasks
911

1012

11-
# TODO(eddiebergman): A lot of this class is automatically
12-
# handled by a dataclass
13+
@dataclass
1314
class OpenMLEvaluation:
1415
"""
1516
Contains all meta-information about a run / evaluation combination,
@@ -48,55 +49,23 @@ class OpenMLEvaluation:
4849
(e.g., in case of precision, auroc, recall)
4950
"""
5051

51-
def __init__( # noqa: PLR0913
52-
self,
53-
run_id: int,
54-
task_id: int,
55-
setup_id: int,
56-
flow_id: int,
57-
flow_name: str,
58-
data_id: int,
59-
data_name: str,
60-
function: str,
61-
upload_time: str,
62-
uploader: int,
63-
uploader_name: str,
64-
value: float | None,
65-
values: list[float] | None,
66-
array_data: str | None = None,
67-
):
68-
self.run_id = run_id
69-
self.task_id = task_id
70-
self.setup_id = setup_id
71-
self.flow_id = flow_id
72-
self.flow_name = flow_name
73-
self.data_id = data_id
74-
self.data_name = data_name
75-
self.function = function
76-
self.upload_time = upload_time
77-
self.uploader = uploader
78-
self.uploader_name = uploader_name
79-
self.value = value
80-
self.values = values
81-
self.array_data = array_data
52+
run_id: int
53+
task_id: int
54+
setup_id: int
55+
flow_id: int
56+
flow_name: str
57+
data_id: int
58+
data_name: str
59+
function: str
60+
upload_time: str
61+
uploader: int
62+
uploader_name: str
63+
value: float | None
64+
values: list[float] | None
65+
array_data: str | None = None
8266

8367
def _to_dict(self) -> dict:
84-
return {
85-
"run_id": self.run_id,
86-
"task_id": self.task_id,
87-
"setup_id": self.setup_id,
88-
"flow_id": self.flow_id,
89-
"flow_name": self.flow_name,
90-
"data_id": self.data_id,
91-
"data_name": self.data_name,
92-
"function": self.function,
93-
"upload_time": self.upload_time,
94-
"uploader": self.uploader,
95-
"uploader_name": self.uploader_name,
96-
"value": self.value,
97-
"values": self.values,
98-
"array_data": self.array_data,
99-
}
68+
return asdict(self)
10069

10170
def __repr__(self) -> str:
10271
header = "OpenML Evaluation"
@@ -119,11 +88,12 @@ def __repr__(self) -> str:
11988
}
12089

12190
order = [
122-
"Uploader Date",
91+
"Upload Date",
12392
"Run ID",
12493
"OpenML Run URL",
12594
"Task ID",
126-
"OpenML Task URL" "Flow ID",
95+
"OpenML Task URL",
96+
"Flow ID",
12797
"OpenML Flow URL",
12898
"Setup ID",
12999
"Data ID",

openml/extensions/functions.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# License: BSD 3-Clause
22
from __future__ import annotations
33

4+
import importlib.util
45
from typing import TYPE_CHECKING, Any
56

67
# Need to implement the following by its full path because otherwise it won't be possible to
@@ -16,8 +17,9 @@
1617
SKLEARN_HINT = (
1718
"But it looks related to scikit-learn. "
1819
"Please install the OpenML scikit-learn extension (openml-sklearn) and try again. "
20+
"You can use `pip install openml-sklearn` for installation."
1921
"For more information, see "
20-
"https://github.com/openml/openml-sklearn?tab=readme-ov-file#installation"
22+
"https://docs.openml.org/python/extensions/"
2123
)
2224

2325

@@ -58,6 +60,10 @@ def get_extension_by_flow(
5860
-------
5961
Extension or None
6062
"""
63+
# import openml_sklearn to register SklearnExtension
64+
if importlib.util.find_spec("openml_sklearn"):
65+
import openml_sklearn # noqa: F401
66+
6167
candidates = []
6268
for extension_class in openml.extensions.extensions:
6369
if extension_class.can_handle_flow(flow):
@@ -103,6 +109,10 @@ def get_extension_by_model(
103109
-------
104110
Extension or None
105111
"""
112+
# import openml_sklearn to register SklearnExtension
113+
if importlib.util.find_spec("openml_sklearn"):
114+
import openml_sklearn # noqa: F401
115+
106116
candidates = []
107117
for extension_class in openml.extensions.extensions:
108118
if extension_class.can_handle_model(model):

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ dependencies = [
2121
"pyarrow",
2222
"tqdm", # For MinIO download progress bars
2323
]
24-
requires-python = ">=3.8"
24+
requires-python = ">=3.10,<3.15"
2525
maintainers = [
2626
{ name = "Pieter Gijsbers", email="p.gijsbers@tue.nl"},
2727
{ name = "Lennart Purucker"},

tests/test_datasets/test_dataset_functions.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ def test_dataset_by_name_cannot_access_private_data(self):
280280
self.use_production_server()
281281
self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE")
282282

283+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
283284
def test_get_dataset_lazy_all_functions(self):
284285
"""Test that all expected functionality is available without downloading the dataset."""
285286
dataset = openml.datasets.get_dataset(1)
@@ -664,6 +665,7 @@ def test_attributes_arff_from_df_unknown_dtype(self):
664665
with pytest.raises(ValueError, match=err_msg):
665666
attributes_arff_from_df(df)
666667

668+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
667669
def test_create_dataset_numpy(self):
668670
data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T
669671

@@ -751,6 +753,7 @@ def test_create_dataset_list(self):
751753
), "Uploaded ARFF does not match original one"
752754
assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
753755

756+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
754757
def test_create_dataset_sparse(self):
755758
# test the scipy.sparse.coo_matrix
756759
sparse_data = scipy.sparse.coo_matrix(
@@ -868,6 +871,7 @@ def test_get_online_dataset_arff(self):
868871
return_type=arff.DENSE if d_format == "arff" else arff.COO,
869872
), "ARFF files are not equal"
870873

874+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
871875
def test_topic_api_error(self):
872876
# Check server exception when non-admin accessses apis
873877
self.assertRaisesRegex(
@@ -895,6 +899,7 @@ def test_get_online_dataset_format(self):
895899
dataset_id
896900
), "The format of the ARFF files is different"
897901

902+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
898903
def test_create_dataset_pandas(self):
899904
data = [
900905
["a", "sunny", 85.0, 85.0, "FALSE", "no"],
@@ -1119,6 +1124,7 @@ def test_ignore_attributes_dataset(self):
11191124
paper_url=paper_url,
11201125
)
11211126

1127+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
11221128
def test_publish_fetch_ignore_attribute(self):
11231129
"""Test to upload and retrieve dataset and check ignore_attributes"""
11241130
data = [
@@ -1237,6 +1243,7 @@ def test_create_dataset_row_id_attribute_error(self):
12371243
paper_url=paper_url,
12381244
)
12391245

1246+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
12401247
def test_create_dataset_row_id_attribute_inference(self):
12411248
# meta-information
12421249
name = f"{self._get_sentinel()}-pandas_testing_dataset"
@@ -1400,6 +1407,7 @@ def test_data_edit_non_critical_field(self):
14001407
edited_dataset = openml.datasets.get_dataset(did)
14011408
assert edited_dataset.description == desc
14021409

1410+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
14031411
def test_data_edit_critical_field(self):
14041412
# Case 2
14051413
# only owners (or admin) can edit all critical fields of datasets
@@ -1448,6 +1456,7 @@ def test_data_edit_requires_valid_dataset(self):
14481456
description="xor operation dataset",
14491457
)
14501458

1459+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
14511460
def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self):
14521461
# Need to own a dataset to be able to edit meta-data
14531462
# Will be creating a forked version of an existing dataset to allow the unit test user

tests/test_flows/test_flow.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ def test_to_xml_from_xml(self):
178178
assert new_flow is not flow
179179

180180
@pytest.mark.sklearn()
181+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
181182
def test_publish_flow(self):
182183
flow = openml.OpenMLFlow(
183184
name="sklearn.dummy.DummyClassifier",
@@ -219,6 +220,7 @@ def test_publish_existing_flow(self, flow_exists_mock):
219220
)
220221

221222
@pytest.mark.sklearn()
223+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
222224
def test_publish_flow_with_similar_components(self):
223225
clf = sklearn.ensemble.VotingClassifier(
224226
[("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))],
@@ -269,6 +271,7 @@ def test_publish_flow_with_similar_components(self):
269271
TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}")
270272

271273
@pytest.mark.sklearn()
274+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
272275
def test_semi_legal_flow(self):
273276
# TODO: Test if parameters are set correctly!
274277
# should not throw error as it contains two differentiable forms of
@@ -377,6 +380,7 @@ def get_sentinel():
377380
assert not flow_id
378381

379382
@pytest.mark.sklearn()
383+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
380384
def test_existing_flow_exists(self):
381385
# create a flow
382386
nb = sklearn.naive_bayes.GaussianNB()
@@ -417,6 +421,7 @@ def test_existing_flow_exists(self):
417421
assert downloaded_flow_id == flow.flow_id
418422

419423
@pytest.mark.sklearn()
424+
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
420425
def test_sklearn_to_upload_to_flow(self):
421426
iris = sklearn.datasets.load_iris()
422427
X = iris.data

0 commit comments

Comments
 (0)