From 24a80fed8803de640088a2b38912d804851d1555 Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Sat, 20 Sep 2025 20:17:54 +0200 Subject: [PATCH 1/2] Do not use test api_key for production calls inside the unittests --- openml/testing.py | 5 +++++ tests/test_datasets/test_dataset.py | 4 ++-- tests/test_datasets/test_dataset_functions.py | 16 +++++++------- .../test_evaluation_functions.py | 20 +++++++++--------- tests/test_flows/test_flow.py | 6 +++--- tests/test_flows/test_flow_functions.py | 19 +++++++++-------- tests/test_runs/test_run_functions.py | 21 ++++++++++--------- tests/test_setups/test_setup_functions.py | 4 ++-- tests/test_study/test_study_functions.py | 12 +++++------ tests/test_tasks/test_clustering_task.py | 4 ++-- 10 files changed, 59 insertions(+), 52 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index 547405df0..d6d2866c5 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -51,6 +51,7 @@ class TestBase(unittest.TestCase): # amueller's read/write key that he will throw away later apikey = "610344db6388d9ba34f6db45a3cf71de" + # creating logger for tracking files uploaded to test server logger = logging.getLogger("unit_tests_published_entities") logger.setLevel(logging.DEBUG) @@ -108,6 +109,10 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: self.connection_n_retries = openml.config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) + def use_production_server(self): + openml.config.server = self.production_server + openml.config.apikey = None + def tearDown(self) -> None: """Tear down the test""" os.chdir(self.cwd) diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index c48086a72..86a4d3f57 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -24,7 +24,7 @@ class OpenMLDatasetTest(TestBase): def setUp(self): super().setUp() - openml.config.server = self.production_server + self.use_production_server() # Load dataset id 2 - dataset 2 is interesting because it contains # missing values, categorical features etc. @@ -344,7 +344,7 @@ class OpenMLDatasetTestSparse(TestBase): def setUp(self): super().setUp() - openml.config.server = self.production_server + self.use_production_server() self.sparse_dataset = openml.datasets.get_dataset(4136, download_data=False) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 1c06cc4b5..4145b86ad 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -139,7 +139,7 @@ def test_list_datasets_empty(self): @pytest.mark.production() def test_check_datasets_active(self): # Have to test on live because there is no deactivated dataset on the test server. - openml.config.server = self.production_server + self.use_production_server() active = openml.datasets.check_datasets_active( [2, 17, 79], raise_error_if_not_exist=False, @@ -176,7 +176,7 @@ def test_illegal_length_tag(self): @pytest.mark.production() def test__name_to_id_with_deactivated(self): """Check that an activated dataset is returned if an earlier deactivated one exists.""" - openml.config.server = self.production_server + self.use_production_server() # /d/1 was deactivated assert openml.datasets.functions._name_to_id("anneal") == 2 openml.config.server = self.test_server @@ -184,19 +184,19 @@ def test__name_to_id_with_deactivated(self): @pytest.mark.production() def test__name_to_id_with_multiple_active(self): """With multiple active datasets, retrieve the least recent active.""" - openml.config.server = self.production_server + self.use_production_server() assert openml.datasets.functions._name_to_id("iris") == 61 @pytest.mark.production() def test__name_to_id_with_version(self): """With multiple active datasets, retrieve the least recent active.""" - openml.config.server = self.production_server + self.use_production_server() assert openml.datasets.functions._name_to_id("iris", version=3) == 969 @pytest.mark.production() def test__name_to_id_with_multiple_active_error(self): """With multiple active datasets, retrieve the least recent active.""" - openml.config.server = self.production_server + self.use_production_server() self.assertRaisesRegex( ValueError, "Multiple active datasets exist with name 'iris'.", @@ -272,12 +272,12 @@ def test_get_dataset_uint8_dtype(self): @pytest.mark.production() def test_get_dataset_cannot_access_private_data(self): # Issue324 Properly handle private datasets when trying to access them - openml.config.server = self.production_server + self.use_production_server() self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, 45) @pytest.mark.skip("Need to find dataset name of private dataset") def test_dataset_by_name_cannot_access_private_data(self): - openml.config.server = self.production_server + self.use_production_server() self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE") def test_get_dataset_lazy_all_functions(self): @@ -1501,7 +1501,7 @@ def test_data_fork(self): @pytest.mark.production() def test_list_datasets_with_high_size_parameter(self): # Testing on prod since concurrent deletion of uploded datasets make the test fail - openml.config.server = self.production_server + self.use_production_server() datasets_a = openml.datasets.list_datasets() datasets_b = openml.datasets.list_datasets(size=np.inf) diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py index 37b0ce7c8..ffd3d9f78 100644 --- a/tests/test_evaluations/test_evaluation_functions.py +++ b/tests/test_evaluations/test_evaluation_functions.py @@ -52,7 +52,7 @@ def _check_list_evaluation_setups(self, **kwargs): @pytest.mark.production() def test_evaluation_list_filter_task(self): - openml.config.server = self.production_server + self.use_production_server() task_id = 7312 @@ -72,7 +72,7 @@ def test_evaluation_list_filter_task(self): @pytest.mark.production() def test_evaluation_list_filter_uploader_ID_16(self): - openml.config.server = self.production_server + self.use_production_server() uploader_id = 16 evaluations = openml.evaluations.list_evaluations( @@ -87,7 +87,7 @@ def test_evaluation_list_filter_uploader_ID_16(self): @pytest.mark.production() def test_evaluation_list_filter_uploader_ID_10(self): - openml.config.server = self.production_server + self.use_production_server() setup_id = 10 evaluations = openml.evaluations.list_evaluations( @@ -106,7 +106,7 @@ def test_evaluation_list_filter_uploader_ID_10(self): @pytest.mark.production() def test_evaluation_list_filter_flow(self): - openml.config.server = self.production_server + self.use_production_server() flow_id = 100 @@ -126,7 +126,7 @@ def test_evaluation_list_filter_flow(self): @pytest.mark.production() def test_evaluation_list_filter_run(self): - openml.config.server = self.production_server + self.use_production_server() run_id = 12 @@ -146,7 +146,7 @@ def test_evaluation_list_filter_run(self): @pytest.mark.production() def test_evaluation_list_limit(self): - openml.config.server = self.production_server + self.use_production_server() evaluations = openml.evaluations.list_evaluations( "predictive_accuracy", @@ -164,7 +164,7 @@ def test_list_evaluations_empty(self): @pytest.mark.production() def test_evaluation_list_per_fold(self): - openml.config.server = self.production_server + self.use_production_server() size = 1000 task_ids = [6] uploader_ids = [1] @@ -202,7 +202,7 @@ def test_evaluation_list_per_fold(self): @pytest.mark.production() def test_evaluation_list_sort(self): - openml.config.server = self.production_server + self.use_production_server() size = 10 task_id = 6 # Get all evaluations of the task @@ -239,7 +239,7 @@ def test_list_evaluation_measures(self): @pytest.mark.production() def test_list_evaluations_setups_filter_flow(self): - openml.config.server = self.production_server + self.use_production_server() flow_id = [405] size = 100 evals = self._check_list_evaluation_setups(flows=flow_id, size=size) @@ -257,7 +257,7 @@ def test_list_evaluations_setups_filter_flow(self): @pytest.mark.production() def test_list_evaluations_setups_filter_task(self): - openml.config.server = self.production_server + self.use_production_server() task_id = [6] size = 121 self._check_list_evaluation_setups(tasks=task_id, size=size) diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py index e6407a51c..0b034c3b4 100644 --- a/tests/test_flows/test_flow.py +++ b/tests/test_flows/test_flow.py @@ -48,7 +48,7 @@ def tearDown(self): def test_get_flow(self): # We need to use the production server here because 4024 is not the # test server - openml.config.server = self.production_server + self.use_production_server() flow = openml.flows.get_flow(4024) assert isinstance(flow, openml.OpenMLFlow) @@ -82,7 +82,7 @@ def test_get_structure(self): # also responsible for testing: flow.get_subflow # We need to use the production server here because 4024 is not the # test server - openml.config.server = self.production_server + self.use_production_server() flow = openml.flows.get_flow(4024) flow_structure_name = flow.get_structure("name") @@ -558,7 +558,7 @@ def test_extract_tags(self): @pytest.mark.production() def test_download_non_scikit_learn_flows(self): - openml.config.server = self.production_server + self.use_production_server() flow = openml.flows.get_flow(6742) assert isinstance(flow, openml.OpenMLFlow) diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py index 4a9b03fd7..ef4759e54 100644 --- a/tests/test_flows/test_flow_functions.py +++ b/tests/test_flows/test_flow_functions.py @@ -48,7 +48,7 @@ def _check_flow(self, flow): @pytest.mark.production() def test_list_flows(self): - openml.config.server = self.production_server + self.use_production_server() # We can only perform a smoke test here because we test on dynamic # data from the internet... flows = openml.flows.list_flows() @@ -59,7 +59,7 @@ def test_list_flows(self): @pytest.mark.production() def test_list_flows_output_format(self): - openml.config.server = self.production_server + self.use_production_server() # We can only perform a smoke test here because we test on dynamic # data from the internet... flows = openml.flows.list_flows() @@ -68,13 +68,14 @@ def test_list_flows_output_format(self): @pytest.mark.production() def test_list_flows_empty(self): + self.use_production_server() openml.config.server = self.production_server flows = openml.flows.list_flows(tag="NoOneEverUsesThisTag123") assert flows.empty @pytest.mark.production() def test_list_flows_by_tag(self): - openml.config.server = self.production_server + self.use_production_server() flows = openml.flows.list_flows(tag="weka") assert len(flows) >= 5 for flow in flows.to_dict(orient="index").values(): @@ -82,7 +83,7 @@ def test_list_flows_by_tag(self): @pytest.mark.production() def test_list_flows_paginate(self): - openml.config.server = self.production_server + self.use_production_server() size = 10 maximum = 100 for i in range(0, maximum, size): @@ -302,7 +303,7 @@ def test_sklearn_to_flow_list_of_lists(self): def test_get_flow1(self): # Regression test for issue #305 # Basically, this checks that a flow without an external version can be loaded - openml.config.server = self.production_server + self.use_production_server() flow = openml.flows.get_flow(1) assert flow.external_version is None @@ -335,7 +336,7 @@ def test_get_flow_reinstantiate_model_no_extension(self): ) @pytest.mark.production() def test_get_flow_with_reinstantiate_strict_with_wrong_version_raises_exception(self): - openml.config.server = self.production_server + self.use_production_server() flow = 8175 expected = "Trying to deserialize a model with dependency sklearn==0.19.1 not satisfied." self.assertRaisesRegex( @@ -356,7 +357,7 @@ def test_get_flow_with_reinstantiate_strict_with_wrong_version_raises_exception( ) @pytest.mark.production() def test_get_flow_reinstantiate_flow_not_strict_post_1(self): - openml.config.server = self.production_server + self.use_production_server() flow = openml.flows.get_flow(flow_id=19190, reinstantiate=True, strict_version=False) assert flow.flow_id is None assert "sklearn==1.0.0" not in flow.dependencies @@ -370,7 +371,7 @@ def test_get_flow_reinstantiate_flow_not_strict_post_1(self): ) @pytest.mark.production() def test_get_flow_reinstantiate_flow_not_strict_023_and_024(self): - openml.config.server = self.production_server + self.use_production_server() flow = openml.flows.get_flow(flow_id=18587, reinstantiate=True, strict_version=False) assert flow.flow_id is None assert "sklearn==0.23.1" not in flow.dependencies @@ -382,7 +383,7 @@ def test_get_flow_reinstantiate_flow_not_strict_023_and_024(self): ) @pytest.mark.production() def test_get_flow_reinstantiate_flow_not_strict_pre_023(self): - openml.config.server = self.production_server + self.use_production_server() flow = openml.flows.get_flow(flow_id=8175, reinstantiate=True, strict_version=False) assert flow.flow_id is None assert "sklearn==0.19.1" not in flow.dependencies diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 7dff05cfc..b02acdf51 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -1083,7 +1083,7 @@ def test_local_run_metric_score(self): @pytest.mark.production() def test_online_run_metric_score(self): - openml.config.server = self.production_server + self.use_production_server() # important to use binary classification task, # due to assertions @@ -1388,7 +1388,7 @@ def test__create_trace_from_arff(self): @pytest.mark.production() def test_get_run(self): # this run is not available on test - openml.config.server = self.production_server + self.use_production_server() run = openml.runs.get_run(473351) assert run.dataset_id == 357 assert run.evaluations["f_measure"] == 0.841225 @@ -1424,7 +1424,7 @@ def _check_run(self, run): @pytest.mark.production() def test_get_runs_list(self): # TODO: comes from live, no such lists on test - openml.config.server = self.production_server + self.use_production_server() runs = openml.runs.list_runs(id=[2], display_errors=True) assert len(runs) == 1 for run in runs.to_dict(orient="index").values(): @@ -1437,7 +1437,7 @@ def test_list_runs_empty(self): @pytest.mark.production() def test_get_runs_list_by_task(self): # TODO: comes from live, no such lists on test - openml.config.server = self.production_server + self.use_production_server() task_ids = [20] runs = openml.runs.list_runs(task=task_ids) assert len(runs) >= 590 @@ -1456,7 +1456,7 @@ def test_get_runs_list_by_task(self): @pytest.mark.production() def test_get_runs_list_by_uploader(self): # TODO: comes from live, no such lists on test - openml.config.server = self.production_server + self.use_production_server() # 29 is Dominik Kirchhoff uploader_ids = [29] @@ -1478,7 +1478,7 @@ def test_get_runs_list_by_uploader(self): @pytest.mark.production() def test_get_runs_list_by_flow(self): # TODO: comes from live, no such lists on test - openml.config.server = self.production_server + self.use_production_server() flow_ids = [1154] runs = openml.runs.list_runs(flow=flow_ids) assert len(runs) >= 1 @@ -1497,7 +1497,7 @@ def test_get_runs_list_by_flow(self): @pytest.mark.production() def test_get_runs_pagination(self): # TODO: comes from live, no such lists on test - openml.config.server = self.production_server + self.use_production_server() uploader_ids = [1] size = 10 max = 100 @@ -1510,7 +1510,7 @@ def test_get_runs_pagination(self): @pytest.mark.production() def test_get_runs_list_by_filters(self): # TODO: comes from live, no such lists on test - openml.config.server = self.production_server + self.use_production_server() ids = [505212, 6100] tasks = [2974, 339] uploaders_1 = [1, 2] @@ -1548,7 +1548,8 @@ def test_get_runs_list_by_filters(self): def test_get_runs_list_by_tag(self): # TODO: comes from live, no such lists on test # Unit test works on production server only - openml.config.server = self.production_server + + self.use_production_server() runs = openml.runs.list_runs(tag="curves") assert len(runs) >= 1 @@ -1663,7 +1664,7 @@ def test_run_flow_on_task_downloaded_flow(self): @pytest.mark.production() def test_format_prediction_non_supervised(self): # non-supervised tasks don't exist on the test server - openml.config.server = self.production_server + self.use_production_server() clustering = openml.tasks.get_task(126033, download_data=False) ignored_input = [0] * 5 with pytest.raises( diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py index b805ca9d3..6fd11638f 100644 --- a/tests/test_setups/test_setup_functions.py +++ b/tests/test_setups/test_setup_functions.py @@ -134,7 +134,7 @@ def test_get_setup(self): @pytest.mark.production() def test_setup_list_filter_flow(self): - openml.config.server = self.production_server + self.use_production_server() flow_id = 5873 @@ -153,7 +153,7 @@ def test_list_setups_empty(self): @pytest.mark.production() def test_list_setups_output_format(self): - openml.config.server = self.production_server + self.use_production_server() flow_id = 6794 setups = openml.setups.list_setups(flow=flow_id, size=10) assert isinstance(setups, dict) diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py index 22f5b0d03..40026592f 100644 --- a/tests/test_study/test_study_functions.py +++ b/tests/test_study/test_study_functions.py @@ -14,7 +14,7 @@ class TestStudyFunctions(TestBase): @pytest.mark.production() def test_get_study_old(self): - openml.config.server = self.production_server + self.use_production_server() study = openml.study.get_study(34) assert len(study.data) == 105 @@ -25,7 +25,7 @@ def test_get_study_old(self): @pytest.mark.production() def test_get_study_new(self): - openml.config.server = self.production_server + self.use_production_server() study = openml.study.get_study(123) assert len(study.data) == 299 @@ -36,7 +36,7 @@ def test_get_study_new(self): @pytest.mark.production() def test_get_openml100(self): - openml.config.server = self.production_server + self.use_production_server() study = openml.study.get_study("OpenML100", "tasks") assert isinstance(study, openml.study.OpenMLBenchmarkSuite) @@ -46,7 +46,7 @@ def test_get_openml100(self): @pytest.mark.production() def test_get_study_error(self): - openml.config.server = self.production_server + self.use_production_server() with pytest.raises( ValueError, match="Unexpected entity type 'task' reported by the server, expected 'run'" @@ -55,7 +55,7 @@ def test_get_study_error(self): @pytest.mark.production() def test_get_suite(self): - openml.config.server = self.production_server + self.use_production_server() study = openml.study.get_suite(99) assert len(study.data) == 72 @@ -66,7 +66,7 @@ def test_get_suite(self): @pytest.mark.production() def test_get_suite_error(self): - openml.config.server = self.production_server + self.use_production_server() with pytest.raises( ValueError, match="Unexpected entity type 'run' reported by the server, expected 'task'" diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py index bc0876228..dcc024388 100644 --- a/tests/test_tasks/test_clustering_task.py +++ b/tests/test_tasks/test_clustering_task.py @@ -23,14 +23,14 @@ def setUp(self, n_levels: int = 1): @pytest.mark.production() def test_get_dataset(self): # no clustering tasks on test server - openml.config.server = self.production_server + self.use_production_server() task = openml.tasks.get_task(self.task_id) task.get_dataset() @pytest.mark.production() def test_download_task(self): # no clustering tasks on test server - openml.config.server = self.production_server + self.use_production_server() task = super().test_download_task() assert task.task_id == self.task_id assert task.task_type_id == TaskType.CLUSTERING From 69875d70d351081e11b17460df88272f2c3f544b Mon Sep 17 00:00:00 2001 From: Jos van der Velde Date: Sat, 20 Sep 2025 20:21:20 +0200 Subject: [PATCH 2/2] Precommit checks --- openml/testing.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index d6d2866c5..2003bb1b9 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -51,7 +51,6 @@ class TestBase(unittest.TestCase): # amueller's read/write key that he will throw away later apikey = "610344db6388d9ba34f6db45a3cf71de" - # creating logger for tracking files uploaded to test server logger = logging.getLogger("unit_tests_published_entities") logger.setLevel(logging.DEBUG) @@ -109,9 +108,14 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: self.connection_n_retries = openml.config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) - def use_production_server(self): + def use_production_server(self) -> None: + """ + Use the production server for the OpenML API calls. + + Please use this sparingly - it is better to use the test server. + """ openml.config.server = self.production_server - openml.config.apikey = None + openml.config.apikey = "" def tearDown(self) -> None: """Tear down the test"""