openml
diff --git a/‎.github/workflows/dist.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/dist.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/docs.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/docs.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 13 additions & 2 deletions b/‎.github/workflows/test.yml‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 11 additions & 1 deletion b/‎.gitignore‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎openml/evaluations/evaluation.py‎
Lines changed: 21 additions & 51 deletions b/‎openml/evaluations/evaluation.py‎
Lines changed: 21 additions & 51 deletions
diff --git a/‎openml/extensions/functions.py‎
Lines changed: 11 additions & 1 deletion b/‎openml/extensions/functions.py‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/test_datasets/test_dataset_functions.py‎
Lines changed: 9 additions & 0 deletions b/‎tests/test_datasets/test_dataset_functions.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎tests/test_flows/test_flow.py‎
Lines changed: 5 additions & 0 deletions b/‎tests/test_flows/test_flow.py‎
Lines changed: 5 additions & 0 deletions
@@ -27,7 +27,7 @@ jobs:
     - name: Setup Python
       uses: actions/setup-python@v5
       with:
-        python-version: 3.8
+        python-version: "3.10"
     - name: Build dist
       run: |
         pip install build
 
@@ -28,7 +28,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: 3.8
+          python-version: "3.10"
       - name: Install dependencies
         run: |
           pip install -e .[docs,examples]
 
@@ -1,3 +1,4 @@
+---
 name: Tests
 
 on:
@@ -21,13 +22,13 @@ concurrency:
 
 jobs:
   test:
-    name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }}, sk-only:${{ matrix.sklearn-only }})
+    name: (${{ matrix.os }},Py${{ matrix.python-version }},sk${{ matrix.scikit-learn }},sk-only:${{ matrix.sklearn-only }})
     runs-on: ${{ matrix.os }}
 
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
         scikit-learn: ["1.3.*", "1.4.*", "1.5.*", "1.6.*", "1.7.*"]
         os: [ubuntu-latest]
         sklearn-only: ["true"]
@@ -38,8 +39,18 @@ jobs:
             scikit-learn: "1.3.*"
           - python-version: "3.13"
             scikit-learn: "1.4.*"
+          - python-version: "3.14"
+            scikit-learn: "1.3.*"
+          - python-version: "3.14"
+            scikit-learn: "1.4.*"
 
         include:
+          # Full test run on ubuntu, 3.14
+          - os: ubuntu-latest
+            python-version: "3.14"
+            scikit-learn: "1.7.*"
+            sklearn-only: "false"
+
           # Full test run on Windows
           - os: windows-latest
             python-version: "3.12"
 
@@ -98,7 +98,17 @@ dmypy.sock
 
 # Tests
 .pytest_cache
+
+# Virtual environments
+oenv/
+venv/
+.env/
 .venv
+.venv/
+
+# Python cache
+__pycache__/
+*.pyc
 
 # Ruff
-.ruff-cache/
+.ruff-cache/
@@ -15,7 +15,7 @@
 ## The Python API for a World of Data and More :dizzy:
 
 [![Latest Release](https://img.shields.io/github/v/release/openml/openml-python)](https://github.com/openml/openml-python/releases)
-[![Python Versions](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)](https://pypi.org/project/openml/)
+[![Python Versions](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue)](https://pypi.org/project/openml/)
 [![Downloads](https://static.pepy.tech/badge/openml)](https://pepy.tech/project/openml)
 [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
 <!-- Add green badges for CI and precommit -->
@@ -60,7 +60,7 @@ for task_id in suite.tasks:
 
 ## :magic_wand: Installation
 
-OpenML-Python is supported on Python 3.8 - 3.13 and is available on Linux, MacOS, and Windows.
+OpenML-Python is supported on Python 3.10 - 3.14 and is available on Linux, MacOS, and Windows.
 
 You can install OpenML-Python with:
 
 
@@ -1,15 +1,16 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
+from dataclasses import asdict, dataclass
+
 import openml.config
 import openml.datasets
 import openml.flows
 import openml.runs
 import openml.tasks
 
 
-# TODO(eddiebergman): A lot of this class is automatically
-# handled by a dataclass
+@dataclass
 class OpenMLEvaluation:
     """
     Contains all meta-information about a run / evaluation combination,
@@ -48,55 +49,23 @@ class OpenMLEvaluation:
         (e.g., in case of precision, auroc, recall)
     """
 
-    def __init__(  # noqa: PLR0913
-        self,
-        run_id: int,
-        task_id: int,
-        setup_id: int,
-        flow_id: int,
-        flow_name: str,
-        data_id: int,
-        data_name: str,
-        function: str,
-        upload_time: str,
-        uploader: int,
-        uploader_name: str,
-        value: float | None,
-        values: list[float] | None,
-        array_data: str | None = None,
-    ):
-        self.run_id = run_id
-        self.task_id = task_id
-        self.setup_id = setup_id
-        self.flow_id = flow_id
-        self.flow_name = flow_name
-        self.data_id = data_id
-        self.data_name = data_name
-        self.function = function
-        self.upload_time = upload_time
-        self.uploader = uploader
-        self.uploader_name = uploader_name
-        self.value = value
-        self.values = values
-        self.array_data = array_data
+    run_id: int
+    task_id: int
+    setup_id: int
+    flow_id: int
+    flow_name: str
+    data_id: int
+    data_name: str
+    function: str
+    upload_time: str
+    uploader: int
+    uploader_name: str
+    value: float | None
+    values: list[float] | None
+    array_data: str | None = None
 
     def _to_dict(self) -> dict:
-        return {
-            "run_id": self.run_id,
-            "task_id": self.task_id,
-            "setup_id": self.setup_id,
-            "flow_id": self.flow_id,
-            "flow_name": self.flow_name,
-            "data_id": self.data_id,
-            "data_name": self.data_name,
-            "function": self.function,
-            "upload_time": self.upload_time,
-            "uploader": self.uploader,
-            "uploader_name": self.uploader_name,
-            "value": self.value,
-            "values": self.values,
-            "array_data": self.array_data,
-        }
+        return asdict(self)
 
     def __repr__(self) -> str:
         header = "OpenML Evaluation"
@@ -119,11 +88,12 @@ def __repr__(self) -> str:
         }
 
         order = [
-            "Uploader Date",
+            "Upload Date",
             "Run ID",
             "OpenML Run URL",
             "Task ID",
-            "OpenML Task URL" "Flow ID",
+            "OpenML Task URL",
+            "Flow ID",
             "OpenML Flow URL",
             "Setup ID",
             "Data ID",
 
@@ -1,6 +1,7 @@
 # License: BSD 3-Clause
 from __future__ import annotations
 
+import importlib.util
 from typing import TYPE_CHECKING, Any
 
 # Need to implement the following by its full path because otherwise it won't be possible to
@@ -16,8 +17,9 @@
 SKLEARN_HINT = (
     "But it looks related to scikit-learn. "
     "Please install the OpenML scikit-learn extension (openml-sklearn) and try again. "
+    "You can use `pip install openml-sklearn` for installation."
     "For more information, see "
-    "https://github.com/openml/openml-sklearn?tab=readme-ov-file#installation"
+    "https://docs.openml.org/python/extensions/"
 )
 
 
@@ -58,6 +60,10 @@ def get_extension_by_flow(
     -------
     Extension or None
     """
+    # import openml_sklearn to register SklearnExtension
+    if importlib.util.find_spec("openml_sklearn"):
+        import openml_sklearn  # noqa: F401
+
     candidates = []
     for extension_class in openml.extensions.extensions:
         if extension_class.can_handle_flow(flow):
@@ -103,6 +109,10 @@ def get_extension_by_model(
     -------
     Extension or None
     """
+    # import openml_sklearn to register SklearnExtension
+    if importlib.util.find_spec("openml_sklearn"):
+        import openml_sklearn  # noqa: F401
+
     candidates = []
     for extension_class in openml.extensions.extensions:
         if extension_class.can_handle_model(model):
 
@@ -21,7 +21,7 @@ dependencies = [
   "pyarrow",
   "tqdm",  # For MinIO download progress bars
 ]
-requires-python = ">=3.8"
+requires-python = ">=3.10,<3.15"    
 maintainers = [
   { name = "Pieter Gijsbers", email="p.gijsbers@tue.nl"},
   { name = "Lennart Purucker"},
 
@@ -280,6 +280,7 @@ def test_dataset_by_name_cannot_access_private_data(self):
         self.use_production_server()
         self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE")
 
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_get_dataset_lazy_all_functions(self):
         """Test that all expected functionality is available without downloading the dataset."""
         dataset = openml.datasets.get_dataset(1)
@@ -664,6 +665,7 @@ def test_attributes_arff_from_df_unknown_dtype(self):
             with pytest.raises(ValueError, match=err_msg):
                 attributes_arff_from_df(df)
 
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_create_dataset_numpy(self):
         data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T
 
@@ -751,6 +753,7 @@ def test_create_dataset_list(self):
         ), "Uploaded ARFF does not match original one"
         assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
 
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_create_dataset_sparse(self):
         # test the scipy.sparse.coo_matrix
         sparse_data = scipy.sparse.coo_matrix(
@@ -868,6 +871,7 @@ def test_get_online_dataset_arff(self):
             return_type=arff.DENSE if d_format == "arff" else arff.COO,
         ), "ARFF files are not equal"
 
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_topic_api_error(self):
         # Check server exception when non-admin accessses apis
         self.assertRaisesRegex(
@@ -895,6 +899,7 @@ def test_get_online_dataset_format(self):
             dataset_id
         ), "The format of the ARFF files is different"
 
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_create_dataset_pandas(self):
         data = [
             ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
@@ -1119,6 +1124,7 @@ def test_ignore_attributes_dataset(self):
                 paper_url=paper_url,
             )
 
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_publish_fetch_ignore_attribute(self):
         """Test to upload and retrieve dataset and check ignore_attributes"""
         data = [
@@ -1237,6 +1243,7 @@ def test_create_dataset_row_id_attribute_error(self):
                 paper_url=paper_url,
             )
 
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_create_dataset_row_id_attribute_inference(self):
         # meta-information
         name = f"{self._get_sentinel()}-pandas_testing_dataset"
@@ -1400,6 +1407,7 @@ def test_data_edit_non_critical_field(self):
         edited_dataset = openml.datasets.get_dataset(did)
         assert edited_dataset.description == desc
 
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_data_edit_critical_field(self):
         # Case 2
         # only owners (or admin) can edit all critical fields of datasets
@@ -1448,6 +1456,7 @@ def test_data_edit_requires_valid_dataset(self):
             description="xor operation dataset",
         )
 
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self):
         # Need to own a dataset to be able to edit meta-data
         # Will be creating a forked version of an existing dataset to allow the unit test user
 
@@ -178,6 +178,7 @@ def test_to_xml_from_xml(self):
         assert new_flow is not flow
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_publish_flow(self):
         flow = openml.OpenMLFlow(
             name="sklearn.dummy.DummyClassifier",
@@ -219,6 +220,7 @@ def test_publish_existing_flow(self, flow_exists_mock):
         )
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_publish_flow_with_similar_components(self):
         clf = sklearn.ensemble.VotingClassifier(
             [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))],
@@ -269,6 +271,7 @@ def test_publish_flow_with_similar_components(self):
         TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}")
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_semi_legal_flow(self):
         # TODO: Test if parameters are set correctly!
         # should not throw error as it contains two differentiable forms of
@@ -377,6 +380,7 @@ def get_sentinel():
         assert not flow_id
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_existing_flow_exists(self):
         # create a flow
         nb = sklearn.naive_bayes.GaussianNB()
@@ -417,6 +421,7 @@ def test_existing_flow_exists(self):
             assert downloaded_flow_id == flow.flow_id
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
     def test_sklearn_to_upload_to_flow(self):
         iris = sklearn.datasets.load_iris()
         X = iris.data
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ dependencies = [`
`21`	`21`	`"pyarrow",`
`22`	`22`	`"tqdm", # For MinIO download progress bars`
`23`	`23`	`]`
`24`		`-requires-python = ">=3.8"`
	`24`	`+requires-python = ">=3.10,<3.15"`
`25`	`25`	`maintainers = [`
`26`	`26`	`{ name = "Pieter Gijsbers", email="p.gijsbers@tue.nl"},`
`27`	`27`	`{ name = "Lennart Purucker"},`