openml
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 56 additions & 32 deletions b/‎.github/workflows/test.yml‎
Lines changed: 56 additions & 32 deletions
diff --git a/‎.gitignore‎
Lines changed: 6 additions & 1 deletion b/‎.gitignore‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎mkdocs.yml‎
Lines changed: 1 addition & 2 deletions b/‎mkdocs.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎openml/tasks/functions.py‎
Lines changed: 5 additions & 3 deletions b/‎openml/tasks/functions.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 2 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎tests/test_datasets/test_dataset.py‎
Lines changed: 2 additions & 1 deletion b/‎tests/test_datasets/test_dataset.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/test_runs/test_run.py‎
Lines changed: 0 additions & 5 deletions b/‎tests/test_runs/test_run.py‎
Lines changed: 0 additions & 5 deletions
@@ -23,74 +23,97 @@ jobs:
   test:
     name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }}, sk-only:${{ matrix.sklearn-only }})
     runs-on: ${{ matrix.os }}
+
     strategy:
+      fail-fast: false
       matrix:
-        python-version: ["3.11"]
-        scikit-learn: ["1.3.*", "1.4.*", "1.5.*"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        scikit-learn: ["1.3.*", "1.4.*", "1.5.*", "1.6.*", "1.7.*"]
         os: [ubuntu-latest]
         sklearn-only: ["true"]
-      fail-fast:  false
+
+        exclude:
+          # incompatible version combinations
+          - python-version: "3.13"
+            scikit-learn: "1.3.*"
+          - python-version: "3.13"
+            scikit-learn: "1.4.*"
+
+        include:
+          # Full test run on Windows
+          - os: windows-latest
+            python-version: "3.12"
+            scikit-learn: "1.5.*"
+            sklearn-only: "false"
+
+          # Coverage run
+          - os: ubuntu-latest
+            python-version: "3.12"
+            scikit-learn: "1.5.*"
+            sklearn-only: "false"
+            code-cov: true
 
     steps:
     - uses: actions/checkout@v6
       with:
         fetch-depth: 2
+
     - name: Setup Python ${{ matrix.python-version }}
-      if: matrix.os != 'windows-latest'  # windows-latest only uses preinstalled Python (3.9.13)
       uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Install test dependencies
+
+    - name: Install test dependencies and scikit-learn
       run: |
         python -m pip install --upgrade pip
-        pip install -e .[test]
-    - name: Install scikit-learn ${{ matrix.scikit-learn }}
-      run: |
-        pip install scikit-learn==${{ matrix.scikit-learn }}
-    - name: Install numpy for Python 3.8
-      # Python 3.8 & scikit-learn<0.24 requires numpy<=1.23.5
-      if: ${{ matrix.python-version == '3.8' && matrix.scikit-learn == '0.23.1' }}
-      run: |
-        pip install numpy==1.23.5
-    - name: "Install NumPy 1.x and SciPy <1.11 for scikit-learn < 1.4"
-      if: ${{ contains(fromJSON('["1.0.*", "1.1.*", "1.2.*", "1.3.*"]'), matrix.scikit-learn) }}
-      run: |
-        # scipy has a change to the 'mode' behavior which breaks scikit-learn < 1.4
-        # numpy 2.0 has several breaking changes
-        pip install "numpy<2.0" "scipy<1.11"
-    - name: Install scipy ${{ matrix.scipy }}
-      if: ${{ matrix.scipy }}
-      run: |
-        pip install scipy==${{ matrix.scipy }}
+        pip install -e .[test] scikit-learn==${{ matrix.scikit-learn }}
+
     - name: Store repository status
       id: status-before
       if: matrix.os != 'windows-latest'
       run: |
         git_status=$(git status --porcelain -b)
         echo "BEFORE=$git_status" >> $GITHUB_ENV
         echo "Repository status before tests: $git_status"
+
     - name: Show installed dependencies
       run: python -m pip list
+
     - name: Run tests on Ubuntu Test
       if: matrix.os == 'ubuntu-latest'
       run: |
-        if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long  --cov-report=xml'; fi
-        # Most of the time, running only the scikit-learn tests is sufficient
-        if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and not production'; else marks='not production'; fi
-        echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+        if [ "${{ matrix.code-cov }}" = "true" ]; then
+          codecov="--cov=openml --long --cov-report=xml"
+        fi
+
+        if [ "${{ matrix.sklearn-only }}" = "true" ]; then
+          marks="sklearn and not production"
+        else
+          marks="not production"
+        fi
+
         pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+
     - name: Run tests on Ubuntu Production
       if: matrix.os == 'ubuntu-latest'
       run: |
-        if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long  --cov-report=xml'; fi
-        # Most of the time, running only the scikit-learn tests is sufficient
-        if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and production'; else marks='production'; fi
-        echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+        if [ "${{ matrix.code-cov }}" = "true" ]; then
+          codecov="--cov=openml --long --cov-report=xml"
+        fi
+
+        if [ "${{ matrix.sklearn-only }}" = "true" ]; then
+          marks="sklearn and production"
+        else
+          marks="production"
+        fi
+
         pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+
     - name: Run tests on Windows
       if: matrix.os == 'windows-latest'
       run: |  # we need a separate step because of the bash-specific if-statement in the previous one.
         pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1
+
     - name: Check for files left behind by test
       if: matrix.os != 'windows-latest' && always()
       run: |
@@ -102,6 +125,7 @@ jobs:
             echo "Not all generated files have been deleted!"
             exit 1
         fi
+
     - name: Upload coverage
       if: matrix.code-cov && always()
       uses: codecov/codecov-action@v4
 
@@ -88,6 +88,8 @@ target/
 .idea
 *.swp
 .vscode
+.cursorignore
+.cursorindexingignore
 
 # MYPY
 .mypy_cache
@@ -96,4 +98,7 @@ dmypy.sock
 
 # Tests
 .pytest_cache
-.venv
+.venv
+
+# Ruff
+.ruff-cache/
@@ -127,7 +127,6 @@ plugins:
             docstring_options:
               ignore_init_summary: true
               trim_doctest_flags: true
-              returns_multiple_items: false
             show_docstring_attributes: true
             show_docstring_description: true
             show_root_heading: true
@@ -138,7 +137,7 @@ plugins:
             merge_init_into_class: true
             show_symbol_type_heading: true
             show_symbol_type_toc: true
-            docstring_style: google
+            docstring_style: numpy
             inherited_members: true
             show_if_no_docstring: false
             show_bases: true
 
@@ -415,8 +415,9 @@ def get_task(
     if not isinstance(task_id, int):
         raise TypeError(f"Task id should be integer, is {type(task_id)}")
 
-    tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
-
+    cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
+    tid_cache_dir = cache_key_dir / str(task_id)
+    tid_cache_dir_existed = tid_cache_dir.exists()
     try:
         task = _get_task_description(task_id)
         dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
@@ -430,7 +431,8 @@ def get_task(
         if download_splits and isinstance(task, OpenMLSupervisedTask):
             task.download_split()
     except Exception as e:
-        openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
+        if not tid_cache_dir_existed:
+            openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
         raise e
 
     return task
 
@@ -50,12 +50,11 @@ classifiers = [
   "Operating System :: Unix",
   "Operating System :: MacOS",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.8",
-  "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: 3.14",
 ]
 license = { file = "LICENSE" }
 
 
@@ -294,6 +294,7 @@ def test_tagging():
     datasets = openml.datasets.list_datasets(tag=tag)
     assert datasets.empty
 
+@pytest.mark.xfail(reason="failures_issue_1544")
 def test_get_feature_with_ontology_data_id_11():
     # test on car dataset, which has built-in ontology references
     dataset = openml.datasets.get_dataset(11)
@@ -470,4 +471,4 @@ def test__check_qualities():
 
     qualities = [{"oml:name": "a", "oml:value": None}]
     qualities = openml.datasets.dataset._check_qualities(qualities)
-    assert qualities["a"] != qualities["a"]
+    assert qualities["a"] != qualities["a"]
@@ -118,7 +118,6 @@ def _check_array(array, type_):
             assert run_prime_trace_content is None
 
     @pytest.mark.sklearn()
-    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_to_from_filesystem_vanilla(self):
         model = Pipeline(
             [
@@ -154,7 +153,6 @@ def test_to_from_filesystem_vanilla(self):
 
     @pytest.mark.sklearn()
     @pytest.mark.flaky()
-    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_to_from_filesystem_search(self):
         model = Pipeline(
             [
@@ -189,7 +187,6 @@ def test_to_from_filesystem_search(self):
         )
 
     @pytest.mark.sklearn()
-    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_to_from_filesystem_no_model(self):
         model = Pipeline(
             [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())],
@@ -295,7 +292,6 @@ def assert_run_prediction_data(task, run, model):
             assert_method(y_test, saved_y_test)
 
     @pytest.mark.sklearn()
-    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_publish_with_local_loaded_flow(self):
         """
         Publish a run tied to a local flow after it has first been saved to
@@ -339,7 +335,6 @@ def test_publish_with_local_loaded_flow(self):
             openml.runs.get_run(loaded_run.run_id)
 
     @pytest.mark.sklearn()
-    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_offline_and_online_run_identical(self):
         extension = SklearnExtension()