Revert "[MNT] Update xfail for new test server state (openml#1585)"

rohansen856 · rohansen856 · commit e1489791d718 · 2026-01-27T16:46:45.000+05:30
This reverts commit 6043686.
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
@@ -294,7 +294,6 @@ def test_tagging():
     datasets = openml.datasets.list_datasets(tag=tag)
     assert datasets.empty
 
-@pytest.mark.xfail(reason="failures_issue_1544")
 def test_get_feature_with_ontology_data_id_11():
     # test on car dataset, which has built-in ontology references
     dataset = openml.datasets.get_dataset(11)
@@ -471,4 +470,4 @@ def test__check_qualities():
 
     qualities = [{"oml:name": "a", "oml:value": None}]
     qualities = openml.datasets.dataset._check_qualities(qualities)
-    assert qualities["a"] != qualities["a"]
+    assert qualities["a"] != qualities["a"]
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
@@ -118,6 +118,7 @@ def _check_array(array, type_):
             assert run_prime_trace_content is None
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_to_from_filesystem_vanilla(self):
         model = Pipeline(
             [
@@ -153,6 +154,7 @@ def test_to_from_filesystem_vanilla(self):
 
     @pytest.mark.sklearn()
     @pytest.mark.flaky()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_to_from_filesystem_search(self):
         model = Pipeline(
             [
@@ -187,6 +189,7 @@ def test_to_from_filesystem_search(self):
         )
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_to_from_filesystem_no_model(self):
         model = Pipeline(
             [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())],
@@ -292,6 +295,7 @@ def assert_run_prediction_data(task, run, model):
             assert_method(y_test, saved_y_test)
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_publish_with_local_loaded_flow(self):
         """
         Publish a run tied to a local flow after it has first been saved to
@@ -335,6 +339,7 @@ def test_publish_with_local_loaded_flow(self):
             openml.runs.get_run(loaded_run.run_id)
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_offline_and_online_run_identical(self):
         extension = SklearnExtension()
 
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -398,6 +398,7 @@ def _check_sample_evaluations(
                             assert evaluation < max_time_allowed
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_regression_on_classif_task(self):
         task_id = 259  # collins; crossvalidation; has numeric targets
 
@@ -414,6 +415,7 @@ def test_run_regression_on_classif_task(self):
             )
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_check_erronous_sklearn_flow_fails(self):
         task_id = 115  # diabetes; crossvalidation
         task = openml.tasks.get_task(task_id)
@@ -627,6 +629,7 @@ def _run_and_upload_regression(
 
     @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_and_upload_logistic_regression(self):
         lr = LogisticRegression(solver="lbfgs", max_iter=1000)
         task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
@@ -636,6 +639,7 @@ def test_run_and_upload_logistic_regression(self):
 
     @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_and_upload_linear_regression(self):
         lr = LinearRegression()
         task_id = self.TEST_SERVER_TASK_REGRESSION["task_id"]
@@ -667,6 +671,7 @@ def test_run_and_upload_linear_regression(self):
 
     @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_and_upload_pipeline_dummy_pipeline(self):
         pipeline1 = Pipeline(
             steps=[
@@ -681,6 +686,7 @@ def test_run_and_upload_pipeline_dummy_pipeline(self):
 
     @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
         reason="columntransformer introduction in 0.20.0",
@@ -799,6 +805,7 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock):
 
     @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_and_upload_gridsearch(self):
         estimator_name = (
             "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
@@ -821,7 +828,7 @@ def test_run_and_upload_gridsearch(self):
         assert len(run.trace.trace_iterations) == 9
 
     @pytest.mark.sklearn()
-    @pytest.mark.skip(reason="failures_issue_1544")
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_and_upload_randomsearch(self):
         randomsearch = RandomizedSearchCV(
             RandomForestClassifier(n_estimators=5),
@@ -855,6 +862,7 @@ def test_run_and_upload_randomsearch(self):
 
     @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_and_upload_maskedarrays(self):
         # This testcase is important for 2 reasons:
         # 1) it verifies the correct handling of masked arrays (not all
@@ -882,6 +890,7 @@ def test_run_and_upload_maskedarrays(self):
     ##########################################################################
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_learning_curve_task_1(self):
         task_id = 801  # diabates dataset
         num_test_instances = 6144  # for learning curve
@@ -906,6 +915,7 @@ def test_learning_curve_task_1(self):
         self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_learning_curve_task_2(self):
         task_id = 801  # diabates dataset
         num_test_instances = 6144  # for learning curve
@@ -942,6 +952,7 @@ def test_learning_curve_task_2(self):
         self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.21"),
         reason="Pipelines don't support indexing (used for the assert check)",
@@ -1020,6 +1031,7 @@ def _test_local_evaluations(self, run):
                 assert alt_scores[idx] <= 1
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_local_run_swapped_parameter_order_model(self):
         clf = DecisionTreeClassifier()
         australian_task = 595  # Australian; crossvalidation
@@ -1035,6 +1047,7 @@ def test_local_run_swapped_parameter_order_model(self):
         self._test_local_evaluations(run)
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1063,6 +1076,7 @@ def test_local_run_swapped_parameter_order_flow(self):
         self._test_local_evaluations(run)
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1100,6 +1114,7 @@ def test_online_run_metric_score(self):
         self._test_local_evaluations(run)
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1165,6 +1180,7 @@ def test_initialize_model_from_run(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test__run_exists(self):
         # would be better to not sentinel these clfs,
         # so we do not have to perform the actual runs
@@ -1220,6 +1236,7 @@ def test__run_exists(self):
             assert run_ids, (run_ids, clf)
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_with_illegal_flow_id(self):
         # check the case where the user adds an illegal flow id to a
         # non-existing flo
@@ -1239,6 +1256,7 @@ def test_run_with_illegal_flow_id(self):
             )
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_with_illegal_flow_id_after_load(self):
         # Same as `test_run_with_illegal_flow_id`, but test this error is also
         # caught if the run is stored to and loaded from disk first.
@@ -1270,6 +1288,7 @@ def test_run_with_illegal_flow_id_after_load(self):
             TestBase.logger.info(f"collected from test_run_functions: {loaded_run.run_id}")
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_with_illegal_flow_id_1(self):
         # Check the case where the user adds an illegal flow id to an existing
         # flow. Comes to a different value error than the previous test
@@ -1295,6 +1314,7 @@ def test_run_with_illegal_flow_id_1(self):
             )
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_with_illegal_flow_id_1_after_load(self):
         # Same as `test_run_with_illegal_flow_id_1`, but test this error is
         # also caught if the run is stored to and loaded from disk first.
@@ -1333,6 +1353,7 @@ def test_run_with_illegal_flow_id_1_after_load(self):
         )
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
         reason="OneHotEncoder cannot handle mixed type DataFrame as input",
@@ -1560,6 +1581,7 @@ def test_get_runs_list_by_tag(self):
         assert len(runs) >= 1
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
         reason="columntransformer introduction in 0.20.0",
@@ -1596,6 +1618,7 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
             assert len(row) == 12
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
         reason="columntransformer introduction in 0.20.0",
@@ -1648,6 +1671,7 @@ def test_get_uncached_run(self):
             openml.runs.functions._get_cached_run(10)
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_run_flow_on_task_downloaded_flow(self):
         model = sklearn.ensemble.RandomForestClassifier(n_estimators=33)
         flow = self.extension.model_to_flow(model)
@@ -1748,6 +1772,7 @@ def test_format_prediction_task_regression(self):
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_delete_run(self):
         rs = np.random.randint(1, 2**31 - 1)
         clf = sklearn.pipeline.Pipeline(
@@ -1843,6 +1868,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
 
 
 @pytest.mark.sklearn()
+@pytest.mark.xfail(reason="failures_issue_1544")
 @unittest.skipIf(
     Version(sklearn.__version__) < Version("0.21"),
     reason="couldn't perform local tests successfully w/o bloating RAM",
@@ -1938,6 +1964,7 @@ def test__run_task_get_arffcontent_2(parallel_mock):
         (-1, "threading", 10),  # the threading backend does preserve mocks even with parallelizing
     ]
 )
+@pytest.mark.xfail(reason="failures_issue_1544")
 def test_joblib_backends(parallel_mock, n_jobs, backend, call_count):
     """Tests evaluation of a run using various joblib backends and n_jobs."""
     if backend is None:
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
@@ -82,6 +82,7 @@ def _existing_setup_exists(self, classif):
         assert setup_id == run.setup_id
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_existing_setup_exists_1(self):
         def side_effect(self):
             self.var_smoothing = 1e-9
@@ -97,11 +98,13 @@ def side_effect(self):
             self._existing_setup_exists(nb)
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_exisiting_setup_exists_2(self):
         # Check a flow with one hyperparameter
         self._existing_setup_exists(sklearn.naive_bayes.GaussianNB())
 
     @pytest.mark.sklearn()
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_existing_setup_exists_3(self):
         # Check a flow with many hyperparameters
         self._existing_setup_exists(