From dd130fddbdfa956428db5c794f77816b47323af2 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 31 Dec 2025 10:20:44 +0200
Subject: [PATCH 1/2] Remove xfail since they pass with updated test server
state
---
tests/test_runs/test_run.py | 5 ----
tests/test_runs/test_run_functions.py | 28 -----------------------
tests/test_setups/test_setup_functions.py | 3 ---
3 files changed, 36 deletions(-)
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index 088856450..034b731aa 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -118,7 +118,6 @@ def _check_array(array, type_):
assert run_prime_trace_content is None
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_to_from_filesystem_vanilla(self):
model = Pipeline(
[
@@ -154,7 +153,6 @@ def test_to_from_filesystem_vanilla(self):
@pytest.mark.sklearn()
@pytest.mark.flaky()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_to_from_filesystem_search(self):
model = Pipeline(
[
@@ -189,7 +187,6 @@ def test_to_from_filesystem_search(self):
)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_to_from_filesystem_no_model(self):
model = Pipeline(
[("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())],
@@ -295,7 +292,6 @@ def assert_run_prediction_data(task, run, model):
assert_method(y_test, saved_y_test)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_publish_with_local_loaded_flow(self):
"""
Publish a run tied to a local flow after it has first been saved to
@@ -339,7 +335,6 @@ def test_publish_with_local_loaded_flow(self):
openml.runs.get_run(loaded_run.run_id)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_offline_and_online_run_identical(self):
extension = SklearnExtension()
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 3bb4b0a0c..94ffa5001 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -398,7 +398,6 @@ def _check_sample_evaluations(
assert evaluation < max_time_allowed
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_regression_on_classif_task(self):
task_id = 259 # collins; crossvalidation; has numeric targets
@@ -415,7 +414,6 @@ def test_run_regression_on_classif_task(self):
)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_check_erronous_sklearn_flow_fails(self):
task_id = 115 # diabetes; crossvalidation
task = openml.tasks.get_task(task_id)
@@ -628,7 +626,6 @@ def _run_and_upload_regression(
)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_and_upload_logistic_regression(self):
lr = LogisticRegression(solver="lbfgs", max_iter=1000)
task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
@@ -637,7 +634,6 @@ def test_run_and_upload_logistic_regression(self):
self._run_and_upload_classification(lr, task_id, n_missing_vals, n_test_obs, "62501")
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_and_upload_linear_regression(self):
lr = LinearRegression()
task_id = self.TEST_SERVER_TASK_REGRESSION["task_id"]
@@ -668,7 +664,6 @@ def test_run_and_upload_linear_regression(self):
self._run_and_upload_regression(lr, task_id, n_missing_vals, n_test_obs, "62501")
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_and_upload_pipeline_dummy_pipeline(self):
pipeline1 = Pipeline(
steps=[
@@ -682,7 +677,6 @@ def test_run_and_upload_pipeline_dummy_pipeline(self):
self._run_and_upload_classification(pipeline1, task_id, n_missing_vals, n_test_obs, "62501")
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
reason="columntransformer introduction in 0.20.0",
@@ -799,7 +793,6 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock):
assert call_count == 3
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_and_upload_gridsearch(self):
estimator_name = (
"base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
@@ -822,7 +815,6 @@ def test_run_and_upload_gridsearch(self):
assert len(run.trace.trace_iterations) == 9
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_and_upload_randomsearch(self):
randomsearch = RandomizedSearchCV(
RandomForestClassifier(n_estimators=5),
@@ -855,7 +847,6 @@ def test_run_and_upload_randomsearch(self):
assert len(trace.trace_iterations) == 5
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_and_upload_maskedarrays(self):
# This testcase is important for 2 reasons:
# 1) it verifies the correct handling of masked arrays (not all
@@ -883,7 +874,6 @@ def test_run_and_upload_maskedarrays(self):
##########################################################################
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_learning_curve_task_1(self):
task_id = 801 # diabates dataset
num_test_instances = 6144 # for learning curve
@@ -908,7 +898,6 @@ def test_learning_curve_task_1(self):
self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_learning_curve_task_2(self):
task_id = 801 # diabates dataset
num_test_instances = 6144 # for learning curve
@@ -945,7 +934,6 @@ def test_learning_curve_task_2(self):
self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.21"),
reason="Pipelines don't support indexing (used for the assert check)",
@@ -1024,7 +1012,6 @@ def _test_local_evaluations(self, run):
assert alt_scores[idx] <= 1
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_local_run_swapped_parameter_order_model(self):
clf = DecisionTreeClassifier()
australian_task = 595 # Australian; crossvalidation
@@ -1040,7 +1027,6 @@ def test_local_run_swapped_parameter_order_model(self):
self._test_local_evaluations(run)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1069,7 +1055,6 @@ def test_local_run_swapped_parameter_order_flow(self):
self._test_local_evaluations(run)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1107,7 +1092,6 @@ def test_online_run_metric_score(self):
self._test_local_evaluations(run)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1173,7 +1157,6 @@ def test_initialize_model_from_run(self):
Version(sklearn.__version__) < Version("0.20"),
reason="SimpleImputer doesn't handle mixed type DataFrame as input",
)
- @pytest.mark.xfail(reason="failures_issue_1544")
def test__run_exists(self):
# would be better to not sentinel these clfs,
# so we do not have to perform the actual runs
@@ -1229,7 +1212,6 @@ def test__run_exists(self):
assert run_ids, (run_ids, clf)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_with_illegal_flow_id(self):
# check the case where the user adds an illegal flow id to a
# non-existing flo
@@ -1249,7 +1231,6 @@ def test_run_with_illegal_flow_id(self):
)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_with_illegal_flow_id_after_load(self):
# Same as `test_run_with_illegal_flow_id`, but test this error is also
# caught if the run is stored to and loaded from disk first.
@@ -1281,7 +1262,6 @@ def test_run_with_illegal_flow_id_after_load(self):
TestBase.logger.info(f"collected from test_run_functions: {loaded_run.run_id}")
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_with_illegal_flow_id_1(self):
# Check the case where the user adds an illegal flow id to an existing
# flow. Comes to a different value error than the previous test
@@ -1307,7 +1287,6 @@ def test_run_with_illegal_flow_id_1(self):
)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_with_illegal_flow_id_1_after_load(self):
# Same as `test_run_with_illegal_flow_id_1`, but test this error is
# also caught if the run is stored to and loaded from disk first.
@@ -1346,7 +1325,6 @@ def test_run_with_illegal_flow_id_1_after_load(self):
)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
reason="OneHotEncoder cannot handle mixed type DataFrame as input",
@@ -1574,7 +1552,6 @@ def test_get_runs_list_by_tag(self):
assert len(runs) >= 1
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
reason="columntransformer introduction in 0.20.0",
@@ -1611,7 +1588,6 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
assert len(row) == 12
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.20"),
reason="columntransformer introduction in 0.20.0",
@@ -1664,7 +1640,6 @@ def test_get_uncached_run(self):
openml.runs.functions._get_cached_run(10)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_run_flow_on_task_downloaded_flow(self):
model = sklearn.ensemble.RandomForestClassifier(n_estimators=33)
flow = self.extension.model_to_flow(model)
@@ -1765,7 +1740,6 @@ def test_format_prediction_task_regression(self):
reason="SimpleImputer doesn't handle mixed type DataFrame as input",
)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_delete_run(self):
rs = np.random.randint(1, 2**31 - 1)
clf = sklearn.pipeline.Pipeline(
@@ -1861,7 +1835,6 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
@pytest.mark.sklearn()
-@pytest.mark.xfail(reason="failures_issue_1544")
@unittest.skipIf(
Version(sklearn.__version__) < Version("0.21"),
reason="couldn't perform local tests successfully w/o bloating RAM",
@@ -1957,7 +1930,6 @@ def test__run_task_get_arffcontent_2(parallel_mock):
(-1, "threading", 10), # the threading backend does preserve mocks even with parallelizing
]
)
-@pytest.mark.xfail(reason="failures_issue_1544")
def test_joblib_backends(parallel_mock, n_jobs, backend, call_count):
"""Tests evaluation of a run using various joblib backends and n_jobs."""
if backend is None:
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 18d7f5cc6..42af5362b 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -82,7 +82,6 @@ def _existing_setup_exists(self, classif):
assert setup_id == run.setup_id
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_existing_setup_exists_1(self):
def side_effect(self):
self.var_smoothing = 1e-9
@@ -98,13 +97,11 @@ def side_effect(self):
self._existing_setup_exists(nb)
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_exisiting_setup_exists_2(self):
# Check a flow with one hyperparameter
self._existing_setup_exists(sklearn.naive_bayes.GaussianNB())
@pytest.mark.sklearn()
- @pytest.mark.xfail(reason="failures_issue_1544")
def test_existing_setup_exists_3(self):
# Check a flow with many hyperparameters
self._existing_setup_exists(
From bb5a2c68d6043bc7a99ed035510432e621cc9689 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 31 Dec 2025 10:21:03 +0200
Subject: [PATCH 2/2] Test cannot pass due to test server state
---
tests/test_datasets/test_dataset.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index 86a4d3f57..66e9b8554 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -294,6 +294,7 @@ def test_tagging():
datasets = openml.datasets.list_datasets(tag=tag)
assert datasets.empty
+@pytest.mark.xfail(reason="failures_issue_1544")
def test_get_feature_with_ontology_data_id_11():
# test on car dataset, which has built-in ontology references
dataset = openml.datasets.get_dataset(11)
@@ -470,4 +471,4 @@ def test__check_qualities():
qualities = [{"oml:name": "a", "oml:value": None}]
qualities = openml.datasets.dataset._check_qualities(qualities)
- assert qualities["a"] != qualities["a"]
\ No newline at end of file
+ assert qualities["a"] != qualities["a"]