From 9034ac519b3620f90271531f39a6fbf09c0ed832 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 10:40:05 +0200
Subject: [PATCH 01/11] Automatically connect to production server based on
mark
---
tests/conftest.py | 9 ++++++---
tests/test_tasks/test_classification_task.py | 21 +++++++++++---------
2 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/tests/conftest.py b/tests/conftest.py
index b523117c1..984cd353d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -268,11 +268,14 @@ def as_robot() -> Iterator[None]:
openml.config.set_retry_policy(policy, n_retries)
-@pytest.fixture(autouse=True, scope="session")
-def with_test_server():
+@pytest.fixture(autouse=True)
+def with_server(request):
+ if "production" in request.keywords:
+ openml.config.server = "https://www.openml.org/api/v1/xml"
+ yield
+ return
openml.config.start_using_configuration_for_example()
yield
- openml.config.stop_using_configuration_for_example()
@pytest.fixture(autouse=True)
diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py
index d3553262f..159223dbc 100644
--- a/tests/test_tasks/test_classification_task.py
+++ b/tests/test_tasks/test_classification_task.py
@@ -2,6 +2,7 @@
from __future__ import annotations
import pandas as pd
+import pytest
from openml.tasks import TaskType, get_task
@@ -17,14 +18,6 @@ def setUp(self, n_levels: int = 1):
self.task_type = TaskType.SUPERVISED_CLASSIFICATION
self.estimation_procedure = 5
- def test_get_X_and_Y(self):
- X, Y = super().test_get_X_and_Y()
- assert X.shape == (768, 8)
- assert isinstance(X, pd.DataFrame)
- assert Y.shape == (768,)
- assert isinstance(Y, pd.Series)
- assert pd.api.types.is_categorical_dtype(Y)
-
def test_download_task(self):
task = super().test_download_task()
assert task.task_id == self.task_id
@@ -34,4 +27,14 @@ def test_download_task(self):
def test_class_labels(self):
task = get_task(self.task_id)
- assert task.class_labels == ["tested_negative", "tested_positive"]
\ No newline at end of file
+ assert task.class_labels == ["tested_negative", "tested_positive"]
+
+
+def test_get_X_and_Y():
+ task = get_task(119)
+ X, Y = task.get_X_and_y()
+ assert X.shape == (768, 8)
+ assert isinstance(X, pd.DataFrame)
+ assert Y.shape == (768,)
+ assert isinstance(Y, pd.Series)
+ assert pd.api.types.is_categorical_dtype(Y)
From 09e6f8b53c569e0aecf56be44bb2f1a8660b3189 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 10:45:25 +0200
Subject: [PATCH 02/11] Don't reconfigure server on setup/teardown, handled by
fixture
---
openml/testing.py | 2 --
tests/test_tasks/test_classification_task.py | 1 -
2 files changed, 3 deletions(-)
diff --git a/openml/testing.py b/openml/testing.py
index a3a5806e8..f026c6137 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -101,7 +101,6 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None:
self.cached = True
openml.config.apikey = TestBase.apikey
self.production_server = "https://www.openml.org/api/v1/xml"
- openml.config.server = TestBase.test_server
openml.config.avoid_duplicate_runs = False
openml.config.set_root_cache_directory(str(self.workdir))
@@ -120,7 +119,6 @@ def tearDown(self) -> None:
# one of the files may still be used by another process
raise e
- openml.config.server = self.production_server
openml.config.connection_n_retries = self.connection_n_retries
openml.config.retry_policy = self.retry_policy
diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py
index 159223dbc..e6c200fef 100644
--- a/tests/test_tasks/test_classification_task.py
+++ b/tests/test_tasks/test_classification_task.py
@@ -2,7 +2,6 @@
from __future__ import annotations
import pandas as pd
-import pytest
from openml.tasks import TaskType, get_task
From 32b13201341afb211a4b6689ee0227589cdae4c0 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 11:10:00 +0200
Subject: [PATCH 03/11] Execute on prod and test for ubuntu
---
.github/workflows/test.yml | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 55a4a354a..655866a2e 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -96,14 +96,22 @@ jobs:
echo "Repository status before tests: $git_status"
- name: Show installed dependencies
run: python -m pip list
- - name: Run tests on Ubuntu
+ - name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
- if [ ${{ matrix.sklearn-only }} = 'true' ]; then sklearn='-m sklearn'; fi
- echo pytest -n 4 --durations=20 --dist load -sv $codecov $sklearn -o log_cli=true
- pytest -n 4 --durations=20 --dist load -sv $codecov $sklearn -o log_cli=true
+ if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "production and sklearn"'; else marks='-m production'; fi
+ echo pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
+ pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
+ - name: Run tests on Ubuntu Test
+ if: matrix.os == 'ubuntu-latest'
+ run: |
+ if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
+ # Most of the time, running only the scikit-learn tests is sufficient
+ if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "not production and sklearn"'; else marks='-m "not production"'; fi
+ echo pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
+ pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
From 4287c24b0dedc35186b848139702c6590cb23387 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 11:36:18 +0200
Subject: [PATCH 04/11] Switch order
---
.github/workflows/test.yml | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 655866a2e..46c0ddbe8 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -96,20 +96,20 @@ jobs:
echo "Repository status before tests: $git_status"
- name: Show installed dependencies
run: python -m pip list
- - name: Run tests on Ubuntu Production
+ - name: Run tests on Ubuntu Test
if: matrix.os == 'ubuntu-latest'
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
- if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "production and sklearn"'; else marks='-m production'; fi
+ if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "not production and sklearn"'; else marks='-m "not production"'; fi
echo pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
- - name: Run tests on Ubuntu Test
+ - name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
- if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "not production and sklearn"'; else marks='-m "not production"'; fi
+ if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "production and sklearn"'; else marks='-m production'; fi
echo pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
- name: Run tests on Windows
From ad89be41e3c3c1d31c5b9fae354d756e0f732730 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 11:52:27 +0200
Subject: [PATCH 05/11] move markers to end of invocation
---
.github/workflows/test.yml | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 46c0ddbe8..ebbd8ddd8 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -102,16 +102,16 @@ jobs:
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "not production and sklearn"'; else marks='-m "not production"'; fi
- echo pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
- pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
+ echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
+ pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "production and sklearn"'; else marks='-m production'; fi
- echo pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
- pytest -n 4 --durations=20 --dist load -sv $codecov $marks -o log_cli=true
+ echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
+ pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
From b7624ef803cb2bef7faa1f24d4f5a64b9a91bccd Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 11:56:20 +0200
Subject: [PATCH 06/11] revert change
---
.github/workflows/test.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ebbd8ddd8..5748110b0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -101,7 +101,7 @@ jobs:
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
- if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "not production and sklearn"'; else marks='-m "not production"'; fi
+ if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m sklearn'; else marks=''; fi
echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
- name: Run tests on Ubuntu Production
@@ -109,7 +109,7 @@ jobs:
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
- if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "production and sklearn"'; else marks='-m production'; fi
+ if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "sklearn"'; else marks=''; fi
echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
- name: Run tests on Windows
From 9f243bdacd0c83c904fe66c98d43b7ffc44f8095 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 12:00:57 +0200
Subject: [PATCH 07/11] Add quotes to invocation
---
.github/workflows/test.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5748110b0..40e2e703e 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -101,7 +101,7 @@ jobs:
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
- if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m sklearn'; else marks=''; fi
+ if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "sklearn"'; else marks='-m "not production"'; fi
echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
- name: Run tests on Ubuntu Production
@@ -109,7 +109,7 @@ jobs:
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
- if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "sklearn"'; else marks=''; fi
+ if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "sklearn"'; else marks='-m "production"'; fi
echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
- name: Run tests on Windows
From 5279ab72a300cb8012b44d52687627686fb9b66a Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 12:06:44 +0200
Subject: [PATCH 08/11] try different way to preserve the quotes
---
.github/workflows/test.yml | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 40e2e703e..4c7939c1f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -101,17 +101,17 @@ jobs:
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
- if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "sklearn"'; else marks='-m "not production"'; fi
+ if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and not production'; else marks='not production'; fi
echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
- pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
+ pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
- if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='-m "sklearn"'; else marks='-m "production"'; fi
- echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
- pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
+ if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and production'; else marks='production'; fi
+ echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+ pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Windows
if: matrix.os == 'windows-latest'
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
From 44c2bd575fd5d328997cc1da978630163be6ea09 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 14:48:29 +0200
Subject: [PATCH 09/11] Dont use start/stop, adjust for new defaults
---
.github/workflows/test.yml | 2 +-
tests/conftest.py | 3 ++-
tests/test_datasets/test_dataset_functions.py | 3 ++-
3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4c7939c1f..31cdff602 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -102,7 +102,7 @@ jobs:
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
# Most of the time, running only the scikit-learn tests is sufficient
if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and not production'; else marks='not production'; fi
- echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true $marks
+ echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
- name: Run tests on Ubuntu Production
if: matrix.os == 'ubuntu-latest'
diff --git a/tests/conftest.py b/tests/conftest.py
index 984cd353d..968a5a58a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -274,7 +274,8 @@ def with_server(request):
openml.config.server = "https://www.openml.org/api/v1/xml"
yield
return
- openml.config.start_using_configuration_for_example()
+ openml.config.server = "https://test.openml.org/api/v1/xml"
+ openml.config.apikey = "c0c42819af31e706efe1f4b88c23c6c1"
yield
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index d6b26d864..ad8bbc693 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1951,7 +1951,8 @@ def test_get_dataset_parquet(requests_mock, test_files_directory):
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
)
- requests_mock.get("https://www.openml.org/api/v1/xml/data/61", text=content_file.read_text())
+ # While the mocked example is from production, unit tests by default connect to the test server.
+ requests_mock.get("https://test.openml.org/api/v1/xml/data/61", text=content_file.read_text())
dataset = openml.datasets.get_dataset(61, download_data=True)
assert dataset._parquet_url is not None
assert dataset.parquet_file is not None
From 91f3d522448bf6a19c24ac44d99e57c04a2e97fd Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 15:35:50 +0200
Subject: [PATCH 10/11] Use correct estimation procudure id
---
tests/test_runs/test_run_functions.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 7235075c0..5023943d7 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -68,7 +68,7 @@ class TestRun(TestBase):
"task_meta_data": {
"task_type": TaskType.SUPERVISED_CLASSIFICATION,
"dataset_id": 16, # credit-a
- "estimation_procedure_id": 1,
+ "estimation_procedure_id": 6,
"target_name": "class",
},
}
@@ -81,7 +81,7 @@ class TestRun(TestBase):
"task_meta_data": {
"task_type": TaskType.SUPERVISED_CLASSIFICATION,
"dataset_id": 20, # diabetes
- "estimation_procedure_id": 1,
+ "estimation_procedure_id": 5,
"target_name": "class",
},
}
From c92c672622e72fad3059d77a65dd4e232b46589b Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 18 Jun 2025 15:59:35 +0200
Subject: [PATCH 11/11] [no ci] add test marker
---
tests/test_tasks/test_classification_task.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py
index e6c200fef..d4f2ed9d7 100644
--- a/tests/test_tasks/test_classification_task.py
+++ b/tests/test_tasks/test_classification_task.py
@@ -2,6 +2,7 @@
from __future__ import annotations
import pandas as pd
+import pytest
from openml.tasks import TaskType, get_task
@@ -29,6 +30,7 @@ def test_class_labels(self):
assert task.class_labels == ["tested_negative", "tested_positive"]
+@pytest.mark.server()
def test_get_X_and_Y():
task = get_task(119)
X, Y = task.get_X_and_y()