diff --git a/.gitignore b/.gitignore index 90548b2c3..5687e41f1 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,4 @@ dmypy.sock # Tests .pytest_cache +.venv \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3d6d40b60..7b8cdeaa7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -168,6 +168,7 @@ to create a pull request from your fork. (If any of the above seems like magic to you, please look up the [Git documentation](https://git-scm.com/documentation) on the web, or ask a friend or another contributor for help.) + ## Pre-commit Details [Pre-commit](https://pre-commit.com/) is used for various style checking and code formatting. Before each commit, it will automatically run: diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py index 107adee79..8adf37d31 100644 --- a/examples/30_extended/fetch_runtimes_tutorial.py +++ b/examples/30_extended/fetch_runtimes_tutorial.py @@ -119,7 +119,7 @@ def print_compare_runtimes(measures): ) for repeat, val1 in measures["predictive_accuracy"].items(): for fold, val2 in val1.items(): - print("Repeat #{}-Fold #{}: {:.4f}".format(repeat, fold, val2)) + print(f"Repeat #{repeat}-Fold #{fold}: {val2:.4f}") print() ################################################################################ @@ -242,7 +242,7 @@ def print_compare_runtimes(measures): # the 2-fold (inner) CV search performed. # We earlier extracted the number of repeats and folds for this task: -print("# repeats: {}\n# folds: {}".format(n_repeats, n_folds)) +print(f"# repeats: {n_repeats}\n# folds: {n_folds}") # To extract the training runtime of the first repeat, first fold: print(run4.fold_evaluations["wall_clock_time_millis_training"][0][0]) diff --git a/openml/base.py b/openml/base.py index 37693a2ec..fbfb9dfc8 100644 --- a/openml/base.py +++ b/openml/base.py @@ -78,7 +78,7 @@ def _apply_repr_template( self.__class__.__name__[len("OpenML") :], ) header_text = f"OpenML {name_with_spaces}" - header = "{}\n{}\n".format(header_text, "=" * len(header_text)) + header = f"{header_text}\n{'=' * len(header_text)}\n" _body_fields: list[tuple[str, str | int | list[str]]] = [ (k, "None" if v is None else v) for k, v in body_fields diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 59f1da521..ac5466a44 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -191,7 +191,7 @@ def _list_datasets( if value is not None: api_call += f"/{operator}/{value}" if data_id is not None: - api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id])) + api_call += f"/data_id/{','.join([str(int(i)) for i in data_id])}" return __list_datasets(api_call=api_call) diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py index 70fab9f28..6d69d377e 100644 --- a/openml/evaluations/evaluation.py +++ b/openml/evaluations/evaluation.py @@ -100,7 +100,7 @@ def _to_dict(self) -> dict: def __repr__(self) -> str: header = "OpenML Evaluation" - header = "{}\n{}\n".format(header, "=" * len(header)) + header = f"{header}\n{'=' * len(header)}\n" fields = { "Upload Date": self.upload_time, diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index f44fe3a93..7747294d7 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -204,15 +204,15 @@ def _list_evaluations( # noqa: C901 if value is not None: api_call += f"/{operator}/{value}" if tasks is not None: - api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks])) + api_call += f"/task/{','.join([str(int(i)) for i in tasks])}" if setups is not None: - api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups])) + api_call += f"/setup/{','.join([str(int(i)) for i in setups])}" if flows is not None: - api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows])) + api_call += f"/flow/{','.join([str(int(i)) for i in flows])}" if runs is not None: - api_call += "/run/{}".format(",".join([str(int(i)) for i in runs])) + api_call += f"/run/{','.join([str(int(i)) for i in runs])}" if uploaders is not None: - api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders])) + api_call += f"/uploader/{','.join([str(int(i)) for i in uploaders])}" if study is not None: api_call += f"/study/{study}" if sort_order is not None: diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index fc8697e84..0c7588cdd 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -223,7 +223,7 @@ def remove_all_in_parentheses(string: str) -> str: # then the pipeline steps are formatted e.g.: # step1name=sklearn.submodule.ClassName,step2name... components = [component.split(".")[-1] for component in pipeline.split(",")] - pipeline = "{}({})".format(pipeline_class, ",".join(components)) + pipeline = f"{pipeline_class}({','.join(components)})" if len(short_name.format(pipeline)) > extra_trim_length: pipeline = f"{pipeline_class}(...,{components[-1]})" else: @@ -482,9 +482,7 @@ def _deserialize_sklearn( # noqa: PLR0915, C901, PLR0912 ) else: raise TypeError(o) - logger.info( - "-{} flow_to_sklearn END o={}, rval={}".format("-" * recursion_depth, o, rval) - ) + logger.info(f"-{'-' * recursion_depth} flow_to_sklearn END o={o}, rval={rval}") return rval def model_to_flow(self, model: Any) -> OpenMLFlow: @@ -574,7 +572,7 @@ def get_version_information(self) -> list[str]: import sklearn major, minor, micro, _, _ = sys.version_info - python_version = "Python_{}.".format(".".join([str(major), str(minor), str(micro)])) + python_version = f"Python_{'.'.join([str(major), str(minor), str(micro)])}." sklearn_version = f"Sklearn_{sklearn.__version__}." numpy_version = f"NumPy_{numpy.__version__}." # type: ignore scipy_version = f"SciPy_{scipy.__version__}." @@ -628,7 +626,7 @@ def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str: """ def match_format(s): - return "{}\n{}\n".format(s, len(s) * "-") + return f"{s}\n{len(s) * '-'}\n" s = inspect.getdoc(model) if s is None: @@ -680,7 +678,7 @@ def _extract_sklearn_parameter_docstring(self, model) -> None | str: """ def match_format(s): - return "{}\n{}\n".format(s, len(s) * "-") + return f"{s}\n{len(s) * '-'}\n" s = inspect.getdoc(model) if s is None: @@ -689,7 +687,7 @@ def match_format(s): index1 = s.index(match_format("Parameters")) except ValueError as e: # when sklearn docstring has no 'Parameters' section - logger.warning("{} {}".format(match_format("Parameters"), e)) + logger.warning(f"{match_format('Parameters')} {e}") return None headings = ["Attributes", "Notes", "See also", "Note", "References"] @@ -1151,7 +1149,7 @@ def _deserialize_model( # noqa: C901 recursion_depth: int, strict_version: bool = True, # noqa: FBT002, FBT001 ) -> Any: - logger.info("-{} deserialize {}".format("-" * recursion_depth, flow.name)) + logger.info(f"-{'-' * recursion_depth} deserialize {flow.name}") model_name = flow.class_name self._check_dependencies(flow.dependencies, strict_version=strict_version) @@ -1168,9 +1166,7 @@ def _deserialize_model( # noqa: C901 for name in parameters: value = parameters.get(name) - logger.info( - "--{} flow_parameter={}, value={}".format("-" * recursion_depth, name, value) - ) + logger.info(f"--{'-' * recursion_depth} flow_parameter={name}, value={value}") rval = self._deserialize_sklearn( value, components=components_, @@ -1186,9 +1182,7 @@ def _deserialize_model( # noqa: C901 if name not in components_: continue value = components[name] - logger.info( - "--{} flow_component={}, value={}".format("-" * recursion_depth, name, value) - ) + logger.info(f"--{'-' * recursion_depth} flow_component={name}, value={value}") rval = self._deserialize_sklearn( value, recursion_depth=recursion_depth + 1, diff --git a/openml/runs/functions.py b/openml/runs/functions.py index e66af7b15..06fe49662 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -1154,15 +1154,15 @@ def _list_runs( # noqa: PLR0913, C901 if offset is not None: api_call += f"/offset/{offset}" if id is not None: - api_call += "/run/{}".format(",".join([str(int(i)) for i in id])) + api_call += f"/run/{','.join([str(int(i)) for i in id])}" if task is not None: - api_call += "/task/{}".format(",".join([str(int(i)) for i in task])) + api_call += f"/task/{','.join([str(int(i)) for i in task])}" if setup is not None: - api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup])) + api_call += f"/setup/{','.join([str(int(i)) for i in setup])}" if flow is not None: - api_call += "/flow/{}".format(",".join([str(int(i)) for i in flow])) + api_call += f"/flow/{','.join([str(int(i)) for i in flow])}" if uploader is not None: - api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploader])) + api_call += f"/uploader/{','.join([str(int(i)) for i in uploader])}" if study is not None: api_call += "/study/%d" % study if display_errors: diff --git a/openml/setups/functions.py b/openml/setups/functions.py index cc71418df..374911901 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -207,7 +207,7 @@ def _list_setups( if offset is not None: api_call += f"/offset/{offset}" if setup is not None: - api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup])) + api_call += f"/setup/{','.join([str(int(i)) for i in setup])}" if flow is not None: api_call += f"/flow/{flow}" if tag is not None: diff --git a/openml/setups/setup.py b/openml/setups/setup.py index c3d8149e7..0960ad4c1 100644 --- a/openml/setups/setup.py +++ b/openml/setups/setup.py @@ -45,7 +45,7 @@ def _to_dict(self) -> dict[str, Any]: def __repr__(self) -> str: header = "OpenML Setup" - header = "{}\n{}\n".format(header, "=" * len(header)) + header = f"{header}\n{'=' * len(header)}\n" fields = { "Setup ID": self.setup_id, @@ -125,7 +125,7 @@ def _to_dict(self) -> dict[str, Any]: def __repr__(self) -> str: header = "OpenML Parameter" - header = "{}\n{}\n".format(header, "=" * len(header)) + header = f"{header}\n{'=' * len(header)}\n" fields = { "ID": self.id, @@ -137,7 +137,7 @@ def __repr__(self) -> str: } # indented prints for parameter attributes # indention = 2 spaces + 1 | + 2 underscores - indent = "{}|{}".format(" " * 2, "_" * 2) + indent = f"{' ' * 2}|{'_' * 2}" parameter_data_type = f"{indent}Data Type" fields[parameter_data_type] = self.data_type parameter_default = f"{indent}Default" diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index 25156f2e5..4f8cc001f 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -521,7 +521,7 @@ def _create_task_from_xml(xml: str) -> OpenMLTask: TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, }.get(task_type) if cls is None: - raise NotImplementedError("Task type {} not supported.".format(common_kwargs["task_type"])) + raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") return cls(**common_kwargs) # type: ignore diff --git a/tests/conftest.py b/tests/conftest.py index b523117c1..94118fd8e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -109,7 +109,7 @@ def delete_remote_files(tracker, flow_names) -> None: # deleting all collected entities published to test server # 'run's are deleted first to prevent dependency issue of entities on deletion - logger.info("Entity Types: {}".format(["run", "data", "flow", "task", "study"])) + logger.info(f"Entity Types: {['run', 'data', 'flow', 'task', 'study']}") for entity_type in ["run", "data", "flow", "task", "study"]: logger.info(f"Deleting {entity_type}s...") for _i, entity in enumerate(tracker[entity_type]): diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index fb29009a3..9218fc205 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -61,7 +61,7 @@ def _remove_pickle_files(self): self.lock_path = os.path.join(openml.config.get_cache_directory(), "locks") for did in ["-1", "2"]: with lockutils.external_lock( - name="datasets.functions.get_dataset:%s" % did, + name=f"datasets.functions.get_dataset:{did}", lock_path=self.lock_path, ): pickle_path = os.path.join( @@ -534,7 +534,7 @@ def test_publish_dataset(self): dataset.publish() TestBase._mark_entity_for_removal("data", dataset.dataset_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], dataset.dataset_id), + f"collected from {__file__.split('/')[-1]}: {dataset.dataset_id}", ) assert isinstance(dataset.dataset_id, int) @@ -556,7 +556,7 @@ def test__retrieve_class_labels(self): def test_upload_dataset_with_url(self): dataset = OpenMLDataset( - "%s-UploadTestWithURL" % self._get_sentinel(), + f"{self._get_sentinel()}-UploadTestWithURL", "test", data_format="arff", version=1, @@ -565,7 +565,7 @@ def test_upload_dataset_with_url(self): dataset.publish() TestBase._mark_entity_for_removal("data", dataset.dataset_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], dataset.dataset_id), + f"collected from {__file__.split('/')[-1]}: {dataset.dataset_id}", ) assert isinstance(dataset.dataset_id, int) @@ -582,7 +582,7 @@ def _assert_status_of_dataset(self, *, did: int, status: str): @pytest.mark.flaky() def test_data_status(self): dataset = OpenMLDataset( - "%s-UploadTestWithURL" % self._get_sentinel(), + f"{self._get_sentinel()}-UploadTestWithURL", "test", "ARFF", version=1, @@ -590,7 +590,7 @@ def test_data_status(self): ) dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") did = dataset.id # admin key for test server (only adminds can activate datasets. @@ -677,7 +677,7 @@ def test_create_dataset_numpy(self): attributes = [(f"col_{i}", "REAL") for i in range(data.shape[1])] dataset = create_dataset( - name="%s-NumPy_testing_dataset" % self._get_sentinel(), + name=f"{self._get_sentinel()}-NumPy_testing_dataset", description="Synthetic dataset created from a NumPy array", creator="OpenML tester", contributor=None, @@ -697,7 +697,7 @@ def test_create_dataset_numpy(self): dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") assert ( _get_online_dataset_arff(dataset.id) == dataset._dataset @@ -732,7 +732,7 @@ def test_create_dataset_list(self): ] dataset = create_dataset( - name="%s-ModifiedWeather" % self._get_sentinel(), + name=f"{self._get_sentinel()}-ModifiedWeather", description=("Testing dataset upload when the data is a list of lists"), creator="OpenML test", contributor=None, @@ -752,7 +752,7 @@ def test_create_dataset_list(self): dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") assert ( _get_online_dataset_arff(dataset.id) == dataset._dataset ), "Uploaded ARFF does not match original one" @@ -774,7 +774,7 @@ def test_create_dataset_sparse(self): ] xor_dataset = create_dataset( - name="%s-XOR" % self._get_sentinel(), + name=f"{self._get_sentinel()}-XOR", description="Dataset representing the XOR operation", creator=None, contributor=None, @@ -793,7 +793,7 @@ def test_create_dataset_sparse(self): xor_dataset.publish() TestBase._mark_entity_for_removal("data", xor_dataset.id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], xor_dataset.id), + f"collected from {__file__.split('/')[-1]}: {xor_dataset.id}", ) assert ( _get_online_dataset_arff(xor_dataset.id) == xor_dataset._dataset @@ -806,7 +806,7 @@ def test_create_dataset_sparse(self): sparse_data = [{0: 0.0}, {1: 1.0, 2: 1.0}, {0: 1.0, 2: 1.0}, {0: 1.0, 1: 1.0}] xor_dataset = create_dataset( - name="%s-XOR" % self._get_sentinel(), + name=f"{self._get_sentinel()}-XOR", description="Dataset representing the XOR operation", creator=None, contributor=None, @@ -825,7 +825,7 @@ def test_create_dataset_sparse(self): xor_dataset.publish() TestBase._mark_entity_for_removal("data", xor_dataset.id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], xor_dataset.id), + f"collected from {__file__.split('/')[-1]}: {xor_dataset.id}", ) assert ( _get_online_dataset_arff(xor_dataset.id) == xor_dataset._dataset @@ -924,7 +924,7 @@ def test_create_dataset_pandas(self): df["windy"] = df["windy"].astype("bool") df["play"] = df["play"].astype("category") # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" @@ -953,7 +953,7 @@ def test_create_dataset_pandas(self): ) dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") assert ( _get_online_dataset_arff(dataset.id) == dataset._dataset ), "Uploaded ARFF does not match original one" @@ -989,7 +989,7 @@ def test_create_dataset_pandas(self): ) dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") assert ( _get_online_dataset_arff(dataset.id) == dataset._dataset ), "Uploaded ARFF does not match original one" @@ -1021,7 +1021,7 @@ def test_create_dataset_pandas(self): ) dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") downloaded_data = _get_online_dataset_arff(dataset.id) assert downloaded_data == dataset._dataset, "Uploaded ARFF does not match original one" assert "@ATTRIBUTE rnd_str {a, b, c, d, e, f, g}" in downloaded_data @@ -1048,7 +1048,7 @@ def test_ignore_attributes_dataset(self): df["windy"] = df["windy"].astype("bool") df["play"] = df["play"].astype("category") # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" @@ -1149,7 +1149,7 @@ def test_publish_fetch_ignore_attribute(self): df["windy"] = df["windy"].astype("bool") df["play"] = df["play"].astype("category") # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" @@ -1184,7 +1184,7 @@ def test_publish_fetch_ignore_attribute(self): # publish dataset dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") # test if publish was successful assert isinstance(dataset.id, int) @@ -1208,7 +1208,7 @@ def _wait_for_dataset_being_processed( def test_create_dataset_row_id_attribute_error(self): # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" @@ -1246,7 +1246,7 @@ def test_create_dataset_row_id_attribute_error(self): def test_create_dataset_row_id_attribute_inference(self): # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" @@ -1290,7 +1290,7 @@ def test_create_dataset_row_id_attribute_inference(self): dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], dataset.id), + f"collected from {__file__.split('/')[-1]}: {dataset.id}", ) arff_dataset = arff.loads(_get_online_dataset_arff(dataset.id)) arff_data = np.array(arff_dataset["data"], dtype=object) @@ -1666,7 +1666,7 @@ def test_delete_dataset(self): df["windy"] = df["windy"].astype("bool") df["play"] = df["play"].astype("category") # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py index 706a67aa6..9913436e4 100644 --- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py +++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py @@ -886,7 +886,7 @@ def test_serialize_complex_flow(self): module_name_encoder = ( "_encoders" if Version(sklearn.__version__) >= Version("0.20") else "data" ) - ohe_name = "sklearn.preprocessing.%s.OneHotEncoder" % module_name_encoder + ohe_name = f"sklearn.preprocessing.{module_name_encoder}.OneHotEncoder" scaler_name = "sklearn.preprocessing.{}.StandardScaler".format( "data" if Version(sklearn.__version__) < Version("0.22") else "_data", ) @@ -904,7 +904,7 @@ def test_serialize_complex_flow(self): boosting_name, ) fixture_name = ( - "sklearn.model_selection._search.RandomizedSearchCV(estimator=%s)" % pipeline_name + f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name})" ) fixture_structure = { ohe_name: ["estimator", "ohe"], @@ -1597,7 +1597,7 @@ def test_openml_param_name_to_sklearn(self): run = openml.runs.run_flow_on_task(flow, task) run = run.publish() TestBase._mark_entity_for_removal("run", run.run_id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}") run = openml.runs.get_run(run.run_id) setup = openml.setups.get_setup(run.setup_id) @@ -2181,7 +2181,7 @@ def test__extract_trace_data(self): assert len(trace_iteration.parameters) == len(param_grid) for param in param_grid: # Prepend with the "parameter_" prefix - param_in_trace = "parameter_%s" % param + param_in_trace = f"parameter_{param}" assert param_in_trace in trace_iteration.parameters param_value = json.loads(trace_iteration.parameters[param_in_trace]) assert param_value in param_grid[param] diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py index dcf074c8f..4a5241b62 100644 --- a/tests/test_flows/test_flow.py +++ b/tests/test_flows/test_flow.py @@ -198,7 +198,7 @@ def test_publish_flow(self): flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") assert isinstance(flow.flow_id, int) @pytest.mark.sklearn() @@ -213,7 +213,7 @@ def test_publish_existing_flow(self, flow_exists_mock): TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id), + f"collected from {__file__.split('/')[-1]}: {flow.flow_id}", ) @pytest.mark.sklearn() @@ -225,7 +225,7 @@ def test_publish_flow_with_similar_components(self): flow, _ = self._add_sentinel_to_flow_name(flow, None) flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") # For a flow where both components are published together, the upload # date should be equal assert flow.upload_date == flow.components["lr"].upload_date, ( @@ -240,7 +240,7 @@ def test_publish_flow_with_similar_components(self): flow1, sentinel = self._add_sentinel_to_flow_name(flow1, None) flow1.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow1.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow1.flow_id}") # In order to assign different upload times to the flows! time.sleep(1) @@ -252,7 +252,7 @@ def test_publish_flow_with_similar_components(self): flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel) flow2.publish() TestBase._mark_entity_for_removal("flow", flow2.flow_id, flow2.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow2.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow2.flow_id}") # If one component was published before the other, the components in # the flow should have different upload dates assert flow2.upload_date != flow2.components["dt"].upload_date @@ -264,7 +264,7 @@ def test_publish_flow_with_similar_components(self): # correctly on the server should thus not check the child's parameters! flow3.publish() TestBase._mark_entity_for_removal("flow", flow3.flow_id, flow3.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow3.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}") @pytest.mark.sklearn() def test_semi_legal_flow(self): @@ -288,7 +288,7 @@ def test_semi_legal_flow(self): flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") @pytest.mark.sklearn() @mock.patch("openml.flows.functions.get_flow") @@ -341,7 +341,7 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock): TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id), + f"collected from {__file__.split('/')[-1]}: {flow.flow_id}", ) assert get_flow_mock.call_count == 2 @@ -366,7 +366,7 @@ def get_sentinel(): md5 = hashlib.md5() md5.update(str(time.time()).encode("utf-8")) sentinel = md5.hexdigest()[:10] - return "TEST%s" % sentinel + return f"TEST{sentinel}" name = get_sentinel() + get_sentinel() version = get_sentinel() @@ -401,7 +401,7 @@ def test_existing_flow_exists(self): flow = flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id), + f"collected from {__file__.split('/')[-1]}: {flow.flow_id}", ) # redownload the flow flow = openml.flows.get_flow(flow.flow_id) @@ -466,7 +466,7 @@ def test_sklearn_to_upload_to_flow(self): flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") assert isinstance(flow.flow_id, int) # Check whether we can load the flow again diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py index a25c2d740..40c78c822 100644 --- a/tests/test_flows/test_flow_functions.py +++ b/tests/test_flows/test_flow_functions.py @@ -292,7 +292,7 @@ def test_sklearn_to_flow_list_of_lists(self): self._add_sentinel_to_flow_name(flow) flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") # Test deserialization works server_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True) assert server_flow.parameters["categories"] == "[[0, 1], [0, 1]]" @@ -313,7 +313,7 @@ def test_get_flow_reinstantiate_model(self): flow = extension.model_to_flow(model) flow.publish(raise_error_if_exists=False) TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") downloaded_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True) assert isinstance(downloaded_flow.model, sklearn.ensemble.RandomForestClassifier) @@ -398,7 +398,7 @@ def test_get_flow_id(self): flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id), + f"collected from {__file__.split('/')[-1]}: {flow.flow_id}", ) assert openml.flows.get_flow_id(model=clf, exact_version=True) == flow.flow_id diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py index 58a0dddf5..e58c72e2d 100644 --- a/tests/test_runs/test_run.py +++ b/tests/test_runs/test_run.py @@ -149,7 +149,7 @@ def test_to_from_filesystem_vanilla(self): run_prime.publish() TestBase._mark_entity_for_removal("run", run_prime.run_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id), + f"collected from {__file__.split('/')[-1]}: {run_prime.run_id}", ) @pytest.mark.sklearn() @@ -185,7 +185,7 @@ def test_to_from_filesystem_search(self): run_prime.publish() TestBase._mark_entity_for_removal("run", run_prime.run_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id), + f"collected from {__file__.split('/')[-1]}: {run_prime.run_id}", ) @pytest.mark.sklearn() @@ -330,7 +330,7 @@ def test_publish_with_local_loaded_flow(self): # Clean up TestBase._mark_entity_for_removal("run", loaded_run.run_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id), + f"collected from {__file__.split('/')[-1]}: {loaded_run.run_id}", ) # make sure the flow is published as part of publishing the run. @@ -377,7 +377,7 @@ def test_offline_and_online_run_identical(self): # Clean up TestBase._mark_entity_for_removal("run", run.run_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id), + f"collected from {__file__.split('/')[-1]}: {loaded_run.run_id}", ) def test_run_setup_string_included_in_xml(self): diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 7235075c0..9b051a341 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -1669,7 +1669,7 @@ def test_run_flow_on_task_downloaded_flow(self): run.publish() TestBase._mark_entity_for_removal("run", run.run_id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}") @pytest.mark.production() def test_format_prediction_non_supervised(self): diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py index b17d876b9..88ac84805 100644 --- a/tests/test_setups/test_setup_functions.py +++ b/tests/test_setups/test_setup_functions.py @@ -24,7 +24,7 @@ def get_sentinel(): md5 = hashlib.md5() md5.update(str(time.time()).encode("utf-8")) sentinel = md5.hexdigest()[:10] - return "TEST%s" % sentinel + return f"TEST{sentinel}" class TestSetupFunctions(TestBase): @@ -44,7 +44,7 @@ def test_nonexisting_setup_exists(self): flow.name = f"TEST{sentinel}{flow.name}" flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") # although the flow exists (created as of previous statement), # we can be sure there are no setups (yet) as it was just created @@ -57,7 +57,7 @@ def _existing_setup_exists(self, classif): flow.name = f"TEST{get_sentinel()}{flow.name}" flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") # although the flow exists, we can be sure there are no # setups (yet) as it hasn't been ran @@ -73,7 +73,7 @@ def _existing_setup_exists(self, classif): run.flow_id = flow.flow_id run.publish() TestBase._mark_entity_for_removal("run", run.run_id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}") # download the run, as it contains the right setup id run = openml.runs.get_run(run.run_id) diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py index 9e5cb4e5e..e3b21fc8c 100644 --- a/tests/test_study/test_study_examples.py +++ b/tests/test_study/test_study_examples.py @@ -72,6 +72,6 @@ def test_Figure1a(self): run.publish() # publish the experiment on OpenML (optional) TestBase._mark_entity_for_removal("run", run.run_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], run.run_id), + f"collected from {__file__.split('/')[-1]}: {run.run_id}", ) TestBase.logger.info("URL for run: %s/run/%d" % (openml.config.server, run.run_id)) diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py index 8652d5547..22f5b0d03 100644 --- a/tests/test_study/test_study_functions.py +++ b/tests/test_study/test_study_functions.py @@ -87,7 +87,7 @@ def test_publish_benchmark_suite(self): ) study.publish() TestBase._mark_entity_for_removal("study", study.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}") assert study.id > 0 @@ -134,7 +134,7 @@ def _test_publish_empty_study_is_allowed(self, explicit: bool): study.publish() TestBase._mark_entity_for_removal("study", study.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}") assert study.id > 0 study_downloaded = openml.study.get_study(study.id) @@ -169,7 +169,7 @@ def test_publish_study(self): ) study.publish() TestBase._mark_entity_for_removal("study", study.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}") assert study.id > 0 study_downloaded = openml.study.get_study(study.id) assert study_downloaded.alias == fixt_alias @@ -232,7 +232,7 @@ def test_study_attach_illegal(self): ) study.publish() TestBase._mark_entity_for_removal("study", study.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}") study_original = openml.study.get_study(study.id) with pytest.raises( diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py index bc59ad26c..bc0876228 100644 --- a/tests/test_tasks/test_clustering_task.py +++ b/tests/test_tasks/test_clustering_task.py @@ -50,7 +50,7 @@ def test_upload_task(self): task = task.publish() TestBase._mark_entity_for_removal("task", task.id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], task.id), + f"collected from {__file__.split('/')[-1]}: {task.id}", ) # success break diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py index 311ffd365..e4c9418f2 100644 --- a/tests/test_tasks/test_task.py +++ b/tests/test_tasks/test_task.py @@ -53,7 +53,7 @@ def test_upload_task(self): task.publish() TestBase._mark_entity_for_removal("task", task.id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], task.id), + f"collected from {__file__.split('/')[-1]}: {task.id}", ) # success break