From 8db2d821eba3c77ffa4e3a77f115da58a81d50f4 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 19 Jun 2025 19:50:31 +0200
Subject: [PATCH 1/7] Don't actually execute the test body
---
tests/test_runs/test_run_functions.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 725421d4f..04be16e1a 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1753,6 +1753,7 @@ def test_format_prediction_task_regression(self):
)
@mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
def test__run_task_get_arffcontent_2(self, parallel_mock):
+ return
"""Tests if a run executed in parallel is collated correctly."""
task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp
x, y = task.get_X_and_y()
From 0a506adf26802c0a38f8b7a7ce1d61ddfe6b8cbe Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 19 Jun 2025 23:18:12 +0200
Subject: [PATCH 2/7] only do setup
---
tests/test_runs/test_run_functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 04be16e1a..db484ba88 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1753,7 +1753,6 @@ def test_format_prediction_task_regression(self):
)
@mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
def test__run_task_get_arffcontent_2(self, parallel_mock):
- return
"""Tests if a run executed in parallel is collated correctly."""
task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp
x, y = task.get_X_and_y()
@@ -1780,6 +1779,7 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
)
n_jobs = 2
backend = "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
+ return
with parallel_backend(backend, n_jobs=n_jobs):
res = openml.runs.functions._run_task_get_arffcontent(
extension=self.extension,
From 5699299253b18d728ab198c520e5b05b4bad06af Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 20 Jun 2025 09:44:19 +0200
Subject: [PATCH 3/7] get task but not data
---
tests/test_runs/test_run_functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index db484ba88..2c9548add 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1755,6 +1755,7 @@ def test_format_prediction_task_regression(self):
def test__run_task_get_arffcontent_2(self, parallel_mock):
"""Tests if a run executed in parallel is collated correctly."""
task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp
+ return
x, y = task.get_X_and_y()
num_instances = x.shape[0]
line_length = 6 + len(task.class_labels)
@@ -1779,7 +1780,6 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
)
n_jobs = 2
backend = "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
- return
with parallel_backend(backend, n_jobs=n_jobs):
res = openml.runs.functions._run_task_get_arffcontent(
extension=self.extension,
From d52d65968e481184e60e79563e3d598200c23723 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 20 Jun 2025 10:02:24 +0200
Subject: [PATCH 4/7] Also get data
---
tests/test_runs/test_run_functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 2c9548add..816c433c3 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1755,8 +1755,8 @@ def test_format_prediction_task_regression(self):
def test__run_task_get_arffcontent_2(self, parallel_mock):
"""Tests if a run executed in parallel is collated correctly."""
task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp
- return
x, y = task.get_X_and_y()
+ return
num_instances = x.shape[0]
line_length = 6 + len(task.class_labels)
loss = "log" if Version(sklearn.__version__) < Version("1.3") else "log_loss"
From 78111f577ffc80c6839a1a1086493e5872c25234 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 20 Jun 2025 10:17:44 +0200
Subject: [PATCH 5/7] Execute full test
---
tests/test_runs/test_run_functions.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 816c433c3..725421d4f 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1756,7 +1756,6 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
"""Tests if a run executed in parallel is collated correctly."""
task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp
x, y = task.get_X_and_y()
- return
num_instances = x.shape[0]
line_length = 6 + len(task.class_labels)
loss = "log" if Version(sklearn.__version__) < Version("1.3") else "log_loss"
From ae6e55db20165a3e8f2698f18ea809871ede46fc Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 20 Jun 2025 17:31:00 +0200
Subject: [PATCH 6/7] Convert from unittest to pytest
---
tests/test_runs/test_run_functions.py | 150 +++++++++++++-------------
1 file changed, 77 insertions(+), 73 deletions(-)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 725421d4f..687be7bb6 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1746,79 +1746,6 @@ def test_format_prediction_task_regression(self):
res = format_prediction(regression, *ignored_input)
self.assertListEqual(res, [0] * 5)
- @pytest.mark.sklearn()
- @unittest.skipIf(
- Version(sklearn.__version__) < Version("0.21"),
- reason="couldn't perform local tests successfully w/o bloating RAM",
- )
- @mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
- def test__run_task_get_arffcontent_2(self, parallel_mock):
- """Tests if a run executed in parallel is collated correctly."""
- task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp
- x, y = task.get_X_and_y()
- num_instances = x.shape[0]
- line_length = 6 + len(task.class_labels)
- loss = "log" if Version(sklearn.__version__) < Version("1.3") else "log_loss"
- clf = sklearn.pipeline.Pipeline(
- [
- (
- "cat_handling",
- ColumnTransformer(
- transformers=[
- (
- "cat",
- OneHotEncoder(handle_unknown="ignore"),
- x.select_dtypes(include=["object", "category"]).columns,
- )
- ],
- remainder="passthrough",
- ),
- ),
- ("clf", SGDClassifier(loss=loss, random_state=1)),
- ]
- )
- n_jobs = 2
- backend = "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
- with parallel_backend(backend, n_jobs=n_jobs):
- res = openml.runs.functions._run_task_get_arffcontent(
- extension=self.extension,
- model=clf,
- task=task,
- add_local_measures=True,
- n_jobs=n_jobs,
- )
- # This unit test will fail if joblib is unable to distribute successfully since the
- # function _run_model_on_fold is being mocked out. However, for a new spawned worker, it
- # is not and the mock call_count should remain 0 while the subsequent check of actual
- # results should also hold, only on successful distribution of tasks to workers.
- # The _prevent_optimize_n_jobs() is a function executed within the _run_model_on_fold()
- # block and mocking this function doesn't affect rest of the pipeline, but is adequately
- # indicative if _run_model_on_fold() is being called or not.
- assert parallel_mock.call_count == 0
- assert isinstance(res[0], list)
- assert len(res[0]) == num_instances
- assert len(res[0][0]) == line_length
- assert len(res[2]) == 7
- assert len(res[3]) == 7
- expected_scores = [
- 0.9625,
- 0.953125,
- 0.965625,
- 0.9125,
- 0.98125,
- 0.975,
- 0.9247648902821317,
- 0.9404388714733543,
- 0.9780564263322884,
- 0.9623824451410659,
- ]
- scores = [v for k, v in res[2]["predictive_accuracy"][0].items()]
- np.testing.assert_array_almost_equal(
- scores,
- expected_scores,
- decimal=2,
- err_msg="Observed performance scores deviate from expected ones.",
- )
@pytest.mark.sklearn()
@unittest.skipIf(
@@ -1993,3 +1920,80 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
run_url = "https://test.openml.org/api/v1/xml/run/9999999"
assert run_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
+
+
+@pytest.mark.sklearn()
+@unittest.skipIf(
+ Version(sklearn.__version__) < Version("0.21"),
+ reason="couldn't perform local tests successfully w/o bloating RAM",
+ )
+@mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
+def test__run_task_get_arffcontent_2(parallel_mock):
+ """Tests if a run executed in parallel is collated correctly."""
+ task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp
+ x, y = task.get_X_and_y()
+ num_instances = x.shape[0]
+ line_length = 6 + len(task.class_labels)
+ loss = "log" if Version(sklearn.__version__) < Version("1.3") else "log_loss"
+ clf = sklearn.pipeline.Pipeline(
+ [
+ (
+ "cat_handling",
+ ColumnTransformer(
+ transformers=[
+ (
+ "cat",
+ OneHotEncoder(handle_unknown="ignore"),
+ x.select_dtypes(include=["object", "category"]).columns,
+ )
+ ],
+ remainder="passthrough",
+ ),
+ ),
+ ("clf", SGDClassifier(loss=loss, random_state=1)),
+ ]
+ )
+ n_jobs = 2
+ backend = "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
+ from openml_sklearn import SklearnExtension
+ extension = SklearnExtension()
+ with parallel_backend(backend, n_jobs=n_jobs):
+ res = openml.runs.functions._run_task_get_arffcontent(
+ extension=extension,
+ model=clf,
+ task=task,
+ add_local_measures=True,
+ n_jobs=n_jobs,
+ )
+ # This unit test will fail if joblib is unable to distribute successfully since the
+ # function _run_model_on_fold is being mocked out. However, for a new spawned worker, it
+ # is not and the mock call_count should remain 0 while the subsequent check of actual
+ # results should also hold, only on successful distribution of tasks to workers.
+ # The _prevent_optimize_n_jobs() is a function executed within the _run_model_on_fold()
+ # block and mocking this function doesn't affect rest of the pipeline, but is adequately
+ # indicative if _run_model_on_fold() is being called or not.
+ assert parallel_mock.call_count == 0
+ assert isinstance(res[0], list)
+ assert len(res[0]) == num_instances
+ assert len(res[0][0]) == line_length
+ assert len(res[2]) == 7
+ assert len(res[3]) == 7
+ expected_scores = [
+ 0.9625,
+ 0.953125,
+ 0.965625,
+ 0.9125,
+ 0.98125,
+ 0.975,
+ 0.9247648902821317,
+ 0.9404388714733543,
+ 0.9780564263322884,
+ 0.9623824451410659,
+ ]
+ scores = [v for k, v in res[2]["predictive_accuracy"][0].items()]
+ np.testing.assert_array_almost_equal(
+ scores,
+ expected_scores,
+ decimal=2,
+ err_msg="Observed performance scores deviate from expected ones.",
+ )
From 0223c360659c4f2408be7b833f10036ffb59f7d7 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 20 Jun 2025 17:49:32 +0200
Subject: [PATCH 7/7] Convert from unittest to pytest, parametrize outside of
test
---
tests/test_runs/test_run_functions.py | 163 ++++++++++++++------------
1 file changed, 87 insertions(+), 76 deletions(-)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 687be7bb6..0517c8c9c 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1747,82 +1747,6 @@ def test_format_prediction_task_regression(self):
self.assertListEqual(res, [0] * 5)
- @pytest.mark.sklearn()
- @unittest.skipIf(
- Version(sklearn.__version__) < Version("0.21"),
- reason="couldn't perform local tests successfully w/o bloating RAM",
- )
- @mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
- def test_joblib_backends(self, parallel_mock):
- """Tests evaluation of a run using various joblib backends and n_jobs."""
- task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp
- x, y = task.get_X_and_y()
- num_instances = x.shape[0]
- line_length = 6 + len(task.class_labels)
-
- backend_choice = (
- "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
- )
- for n_jobs, backend, call_count in [
- (1, backend_choice, 10),
- (2, backend_choice, 10),
- (-1, backend_choice, 10),
- (1, "threading", 20),
- (-1, "threading", 30),
- (1, "sequential", 40),
- ]:
- clf = sklearn.model_selection.RandomizedSearchCV(
- estimator=sklearn.pipeline.Pipeline(
- [
- (
- "cat_handling",
- ColumnTransformer(
- transformers=[
- (
- "cat",
- OneHotEncoder(handle_unknown="ignore"),
- x.select_dtypes(include=["object", "category"]).columns,
- )
- ],
- remainder="passthrough",
- ),
- ),
- ("clf", sklearn.ensemble.RandomForestClassifier(n_estimators=5)),
- ]
- ),
- param_distributions={
- "clf__max_depth": [3, None],
- "clf__max_features": [1, 2, 3, 4],
- "clf__min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
- "clf__min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
- "clf__bootstrap": [True, False],
- "clf__criterion": ["gini", "entropy"],
- },
- random_state=1,
- cv=sklearn.model_selection.StratifiedKFold(
- n_splits=2,
- shuffle=True,
- random_state=1,
- ),
- n_iter=5,
- n_jobs=n_jobs,
- )
- with parallel_backend(backend, n_jobs=n_jobs):
- res = openml.runs.functions._run_task_get_arffcontent(
- extension=self.extension,
- model=clf,
- task=task,
- add_local_measures=True,
- n_jobs=n_jobs,
- )
- assert type(res[0]) == list
- assert len(res[0]) == num_instances
- assert len(res[0][0]) == line_length
- # usercpu_time_millis_* not recorded when n_jobs > 1
- # *_time_millis_* not recorded when n_jobs = -1
- assert len(res[2]["predictive_accuracy"][0]) == 10
- assert len(res[3]["predictive_accuracy"][0]) == 10
- assert parallel_mock.call_count == call_count
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
@@ -1997,3 +1921,90 @@ def test__run_task_get_arffcontent_2(parallel_mock):
decimal=2,
err_msg="Observed performance scores deviate from expected ones.",
)
+
+
+@pytest.mark.sklearn()
+@unittest.skipIf(
+ Version(sklearn.__version__) < Version("0.21"),
+ reason="couldn't perform local tests successfully w/o bloating RAM",
+ )
+@mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
+@pytest.mark.parametrize(
+ ("n_jobs", "backend", "call_count"),
+ [
+ # `None` picks the backend based on joblib version (loky or multiprocessing) and
+ # spawns multiple processes if n_jobs != 1, which means the mock is not applied.
+ (2, None, 0),
+ (-1, None, 0),
+ (1, None, 10), # with n_jobs=1 the mock *is* applied, since there is no new subprocess
+ (1, "sequential", 10),
+ (1, "threading", 10),
+ (-1, "threading", 10), # the threading backend does preserve mocks even with parallelizing
+ ]
+)
+def test_joblib_backends(parallel_mock, n_jobs, backend, call_count):
+ """Tests evaluation of a run using various joblib backends and n_jobs."""
+ if backend is None:
+ backend = (
+ "loky" if Version(joblib.__version__) > Version("0.11") else "multiprocessing"
+ )
+
+ task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp
+ x, y = task.get_X_and_y()
+ num_instances = x.shape[0]
+ line_length = 6 + len(task.class_labels)
+
+ clf = sklearn.model_selection.RandomizedSearchCV(
+ estimator=sklearn.pipeline.Pipeline(
+ [
+ (
+ "cat_handling",
+ ColumnTransformer(
+ transformers=[
+ (
+ "cat",
+ OneHotEncoder(handle_unknown="ignore"),
+ x.select_dtypes(include=["object", "category"]).columns,
+ )
+ ],
+ remainder="passthrough",
+ ),
+ ),
+ ("clf", sklearn.ensemble.RandomForestClassifier(n_estimators=5)),
+ ]
+ ),
+ param_distributions={
+ "clf__max_depth": [3, None],
+ "clf__max_features": [1, 2, 3, 4],
+ "clf__min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
+ "clf__min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+ "clf__bootstrap": [True, False],
+ "clf__criterion": ["gini", "entropy"],
+ },
+ random_state=1,
+ cv=sklearn.model_selection.StratifiedKFold(
+ n_splits=2,
+ shuffle=True,
+ random_state=1,
+ ),
+ n_iter=5,
+ n_jobs=n_jobs,
+ )
+ from openml_sklearn import SklearnExtension
+ extension = SklearnExtension()
+ with parallel_backend(backend, n_jobs=n_jobs):
+ res = openml.runs.functions._run_task_get_arffcontent(
+ extension=extension,
+ model=clf,
+ task=task,
+ add_local_measures=True,
+ n_jobs=n_jobs,
+ )
+ assert type(res[0]) == list
+ assert len(res[0]) == num_instances
+ assert len(res[0][0]) == line_length
+ # usercpu_time_millis_* not recorded when n_jobs > 1
+ # *_time_millis_* not recorded when n_jobs = -1
+ assert len(res[2]["predictive_accuracy"][0]) == 10
+ assert len(res[3]["predictive_accuracy"][0]) == 10
+ assert parallel_mock.call_count == call_count