From e2e746c6ef51075e4575b1d0b97eca8ebe1f8a36 Mon Sep 17 00:00:00 2001
From: SubhadityaMukherjee <msubhaditya@gmail.com>
Date: Tue, 17 Jun 2025 09:44:52 +0200
Subject: [PATCH 1/9] f strings guidelines

---
 CONTRIBUTING.md                               |  1 +
 .../30_extended/fetch_evaluations_tutorial.py |  2 +-
 .../30_extended/fetch_runtimes_tutorial.py    |  4 +-
 .../40_paper/2015_neurips_feurer_example.py   |  2 +-
 .../40_paper/2018_neurips_perrone_example.py  |  2 +-
 openml/base.py                                |  2 +-
 openml/datasets/functions.py                  |  2 +-
 openml/evaluations/evaluation.py              |  2 +-
 openml/evaluations/functions.py               | 10 ++--
 openml/extensions/sklearn/extension.py        | 18 +++----
 openml/runs/functions.py                      | 10 ++--
 openml/setups/functions.py                    |  2 +-
 openml/setups/setup.py                        |  6 +--
 openml/tasks/functions.py                     |  2 +-
 tests/conftest.py                             |  2 +-
 tests/test_datasets/test_dataset_functions.py | 50 +++++++++----------
 .../test_sklearn_extension.py                 |  8 +--
 tests/test_flows/test_flow.py                 | 22 ++++----
 tests/test_flows/test_flow_functions.py       |  6 +--
 tests/test_runs/test_run.py                   |  8 +--
 tests/test_runs/test_run_functions.py         |  2 +-
 tests/test_setups/test_setup_functions.py     |  8 +--
 tests/test_study/test_study_examples.py       |  2 +-
 tests/test_study/test_study_functions.py      |  8 +--
 tests/test_tasks/test_clustering_task.py      |  2 +-
 tests/test_tasks/test_task.py                 |  2 +-
 26 files changed, 93 insertions(+), 92 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index cc8633f84..da1beed04 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -138,6 +138,7 @@ following rules before you submit a pull request:
     - Use the [`str.format`](https://docs.python.org/3/library/stdtypes.html#str.format) over [`printf`](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting) style formatting.
      E.g. use `"{} {}".format('hello', 'world')` not `"%s %s" % ('hello', 'world')`.
      (note: old code may still use `printf`-formatting, this is work in progress.)
+     Try to use [`f-strings`](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings) if you can. You can use [flynt](https://github.com/ikamensh/flynt) to make sure your contributed code uses f-strings. `flynt {source_file_or_directory}`
 
 -  If your pull request addresses an issue, please use the pull request title
    to describe the issue and mention the issue number in the pull request description. This will make sure a link back to the original issue is
diff --git a/examples/30_extended/fetch_evaluations_tutorial.py b/examples/30_extended/fetch_evaluations_tutorial.py
index 86302e2d1..9e9e0f0d9 100644
--- a/examples/30_extended/fetch_evaluations_tutorial.py
+++ b/examples/30_extended/fetch_evaluations_tutorial.py
@@ -94,7 +94,7 @@ def plot_cdf(values, metric="predictive_accuracy"):
     plt.minorticks_on()
     plt.grid(visible=True, which="minor", linestyle="--")
     plt.axvline(max_val, linestyle="--", color="gray")
-    plt.text(max_val, 0, "%.3f" % max_val, fontsize=9)
+    plt.text(max_val, 0, f"{max_val:.3f}", fontsize=9)
     plt.show()
 
 
diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py
index 107adee79..8adf37d31 100644
--- a/examples/30_extended/fetch_runtimes_tutorial.py
+++ b/examples/30_extended/fetch_runtimes_tutorial.py
@@ -119,7 +119,7 @@ def print_compare_runtimes(measures):
 )
 for repeat, val1 in measures["predictive_accuracy"].items():
     for fold, val2 in val1.items():
-        print("Repeat #{}-Fold #{}: {:.4f}".format(repeat, fold, val2))
+        print(f"Repeat #{repeat}-Fold #{fold}: {val2:.4f}")
     print()
 
 ################################################################################
@@ -242,7 +242,7 @@ def print_compare_runtimes(measures):
 # the 2-fold (inner) CV search performed.
 
 # We earlier extracted the number of repeats and folds for this task:
-print("# repeats: {}\n# folds: {}".format(n_repeats, n_folds))
+print(f"# repeats: {n_repeats}\n# folds: {n_folds}")
 
 # To extract the training runtime of the first repeat, first fold:
 print(run4.fold_evaluations["wall_clock_time_millis_training"][0][0])
diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py
index ae59c9ced..5b5f84a0d 100644
--- a/examples/40_paper/2015_neurips_feurer_example.py
+++ b/examples/40_paper/2015_neurips_feurer_example.py
@@ -68,7 +68,7 @@
 
 task_ids = []
 for did in dataset_ids:
-    tasks_ = list(tasks.query("did == {}".format(did)).tid)
+    tasks_ = list(tasks.query(f"did == {did}").tid)
     if len(tasks_) >= 1:  # if there are multiple task, take the one with lowest ID (oldest).
         task_id = min(tasks_)
     else:
diff --git a/examples/40_paper/2018_neurips_perrone_example.py b/examples/40_paper/2018_neurips_perrone_example.py
index 0d72846ac..ec5fe3c60 100644
--- a/examples/40_paper/2018_neurips_perrone_example.py
+++ b/examples/40_paper/2018_neurips_perrone_example.py
@@ -206,7 +206,7 @@ def list_categorical_attributes(flow_type="svm"):
 model.fit(X, y)
 y_pred = model.predict(X)
 
-print("Training RMSE : {:.5}".format(mean_squared_error(y, y_pred)))
+print(f"Training RMSE : {mean_squared_error(y, y_pred):.5}")
 
 
 #############################################################################
diff --git a/openml/base.py b/openml/base.py
index 37693a2ec..fbfb9dfc8 100644
--- a/openml/base.py
+++ b/openml/base.py
@@ -78,7 +78,7 @@ def _apply_repr_template(
             self.__class__.__name__[len("OpenML") :],
         )
         header_text = f"OpenML {name_with_spaces}"
-        header = "{}\n{}\n".format(header_text, "=" * len(header_text))
+        header = f"{header_text}\n{'=' * len(header_text)}\n"
 
         _body_fields: list[tuple[str, str | int | list[str]]] = [
             (k, "None" if v is None else v) for k, v in body_fields
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 3f3c709f9..d96db6507 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -253,7 +253,7 @@ def _list_datasets(
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if data_id is not None:
-        api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id]))
+        api_call += f"/data_id/{','.join([str(int(i)) for i in data_id])}"
     return __list_datasets(api_call=api_call, output_format=output_format)
 
 
diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py
index 3cf732f25..3b10e798a 100644
--- a/openml/evaluations/evaluation.py
+++ b/openml/evaluations/evaluation.py
@@ -80,7 +80,7 @@ def __init__(  # noqa: PLR0913
 
     def __repr__(self) -> str:
         header = "OpenML Evaluation"
-        header = "{}\n{}\n".format(header, "=" * len(header))
+        header = f"{header}\n{'=' * len(header)}\n"
 
         fields = {
             "Upload Date": self.upload_time,
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index a39096a58..7f3c0a365 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -207,15 +207,15 @@ def _list_evaluations(
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if tasks is not None:
-        api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks]))
+        api_call += f"/task/{','.join([str(int(i)) for i in tasks])}"
     if setups is not None:
-        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups]))
+        api_call += f"/setup/{','.join([str(int(i)) for i in setups])}"
     if flows is not None:
-        api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows]))
+        api_call += f"/flow/{','.join([str(int(i)) for i in flows])}"
     if runs is not None:
-        api_call += "/run/{}".format(",".join([str(int(i)) for i in runs]))
+        api_call += f"/run/{','.join([str(int(i)) for i in runs])}"
     if uploaders is not None:
-        api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders]))
+        api_call += f"/uploader/{','.join([str(int(i)) for i in uploaders])}"
     if study is not None:
         api_call += "/study/%d" % study
     if sort_order is not None:
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 2d40d03b8..b837c1a18 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -223,7 +223,7 @@ def remove_all_in_parentheses(string: str) -> str:
             # then the pipeline steps are formatted e.g.:
             # step1name=sklearn.submodule.ClassName,step2name...
             components = [component.split(".")[-1] for component in pipeline.split(",")]
-            pipeline = "{}({})".format(pipeline_class, ",".join(components))
+            pipeline = f"{pipeline_class}({','.join(components)})"
             if len(short_name.format(pipeline)) > extra_trim_length:
                 pipeline = f"{pipeline_class}(...,{components[-1]})"
         else:
@@ -483,7 +483,7 @@ def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0912
         else:
             raise TypeError(o)
         logger.info(
-            "-{} flow_to_sklearn END   o={}, rval={}".format("-" * recursion_depth, o, rval)
+            f"-{'-' * recursion_depth} flow_to_sklearn END   o={o}, rval={rval}"
         )
         return rval
 
@@ -574,7 +574,7 @@ def get_version_information(self) -> list[str]:
         import sklearn
 
         major, minor, micro, _, _ = sys.version_info
-        python_version = "Python_{}.".format(".".join([str(major), str(minor), str(micro)]))
+        python_version = f"Python_{'.'.join([str(major), str(minor), str(micro)])}."
         sklearn_version = f"Sklearn_{sklearn.__version__}."
         numpy_version = f"NumPy_{numpy.__version__}."  # type: ignore
         scipy_version = f"SciPy_{scipy.__version__}."
@@ -628,7 +628,7 @@ def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
         """
 
         def match_format(s):
-            return "{}\n{}\n".format(s, len(s) * "-")
+            return f"{s}\n{len(s) * '-'}\n"
 
         s = inspect.getdoc(model)
         if s is None:
@@ -680,7 +680,7 @@ def _extract_sklearn_parameter_docstring(self, model) -> None | str:
         """
 
         def match_format(s):
-            return "{}\n{}\n".format(s, len(s) * "-")
+            return f"{s}\n{len(s) * '-'}\n"
 
         s = inspect.getdoc(model)
         if s is None:
@@ -689,7 +689,7 @@ def match_format(s):
             index1 = s.index(match_format("Parameters"))
         except ValueError as e:
             # when sklearn docstring has no 'Parameters' section
-            logger.warning("{} {}".format(match_format("Parameters"), e))
+            logger.warning(f"{match_format('Parameters')} {e}")
             return None
 
         headings = ["Attributes", "Notes", "See also", "Note", "References"]
@@ -1151,7 +1151,7 @@ def _deserialize_model(
         recursion_depth: int,
         strict_version: bool = True,  # noqa: FBT002, FBT001
     ) -> Any:
-        logger.info("-{} deserialize {}".format("-" * recursion_depth, flow.name))
+        logger.info(f"-{'-' * recursion_depth} deserialize {flow.name}")
         model_name = flow.class_name
         self._check_dependencies(flow.dependencies, strict_version=strict_version)
 
@@ -1169,7 +1169,7 @@ def _deserialize_model(
         for name in parameters:
             value = parameters.get(name)
             logger.info(
-                "--{} flow_parameter={}, value={}".format("-" * recursion_depth, name, value)
+                f"--{'-' * recursion_depth} flow_parameter={name}, value={value}"
             )
             rval = self._deserialize_sklearn(
                 value,
@@ -1187,7 +1187,7 @@ def _deserialize_model(
                 continue
             value = components[name]
             logger.info(
-                "--{} flow_component={}, value={}".format("-" * recursion_depth, name, value)
+                f"--{'-' * recursion_depth} flow_component={name}, value={value}"
             )
             rval = self._deserialize_sklearn(
                 value,
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index b6f950020..f95a56675 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -1204,15 +1204,15 @@ def _list_runs(  # noqa: PLR0913
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
     if id is not None:
-        api_call += "/run/{}".format(",".join([str(int(i)) for i in id]))
+        api_call += f"/run/{','.join([str(int(i)) for i in id])}"
     if task is not None:
-        api_call += "/task/{}".format(",".join([str(int(i)) for i in task]))
+        api_call += f"/task/{','.join([str(int(i)) for i in task])}"
     if setup is not None:
-        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup]))
+        api_call += f"/setup/{','.join([str(int(i)) for i in setup])}"
     if flow is not None:
-        api_call += "/flow/{}".format(",".join([str(int(i)) for i in flow]))
+        api_call += f"/flow/{','.join([str(int(i)) for i in flow])}"
     if uploader is not None:
-        api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploader]))
+        api_call += f"/uploader/{','.join([str(int(i)) for i in uploader])}"
     if study is not None:
         api_call += "/study/%d" % study
     if display_errors:
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 877384636..11fc19573 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -212,7 +212,7 @@ def _list_setups(
     """
     api_call = "setup/list"
     if setup is not None:
-        api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup]))
+        api_call += f"/setup/{','.join([str(int(i)) for i in setup])}"
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += f"/{operator}/{value}"
diff --git a/openml/setups/setup.py b/openml/setups/setup.py
index e8dc059e7..e186e4d27 100644
--- a/openml/setups/setup.py
+++ b/openml/setups/setup.py
@@ -36,7 +36,7 @@ def __init__(self, setup_id: int, flow_id: int, parameters: dict[int, Any] | Non
 
     def __repr__(self) -> str:
         header = "OpenML Setup"
-        header = "{}\n{}\n".format(header, "=" * len(header))
+        header = f"{header}\n{'=' * len(header)}\n"
 
         fields = {
             "Setup ID": self.setup_id,
@@ -104,7 +104,7 @@ def __init__(  # noqa: PLR0913
 
     def __repr__(self) -> str:
         header = "OpenML Parameter"
-        header = "{}\n{}\n".format(header, "=" * len(header))
+        header = f"{header}\n{'=' * len(header)}\n"
 
         fields = {
             "ID": self.id,
@@ -116,7 +116,7 @@ def __repr__(self) -> str:
         }
         # indented prints for parameter attributes
         # indention = 2 spaces + 1 | + 2 underscores
-        indent = "{}|{}".format(" " * 2, "_" * 2)
+        indent = f"{' ' * 2}|{'_' * 2}"
         parameter_data_type = f"{indent}Data Type"
         fields[parameter_data_type] = self.data_type
         parameter_default = f"{indent}Default"
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 54030422d..599b86efd 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -528,7 +528,7 @@ def _create_task_from_xml(xml: str) -> OpenMLTask:
         TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
     }.get(task_type)
     if cls is None:
-        raise NotImplementedError("Task type {} not supported.".format(common_kwargs["task_type"]))
+        raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.")
     return cls(**common_kwargs)  # type: ignore
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 79ee2bbd3..afa2cba6b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -104,7 +104,7 @@ def delete_remote_files(tracker, flow_names) -> None:
 
     # deleting all collected entities published to test server
     # 'run's are deleted first to prevent dependency issue of entities on deletion
-    logger.info("Entity Types: {}".format(["run", "data", "flow", "task", "study"]))
+    logger.info(f"Entity Types: {['run', 'data', 'flow', 'task', 'study']}")
     for entity_type in ["run", "data", "flow", "task", "study"]:
         logger.info(f"Deleting {entity_type}s...")
         for _i, entity in enumerate(tracker[entity_type]):
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index a15100070..158dac8f2 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -61,7 +61,7 @@ def _remove_pickle_files(self):
         self.lock_path = os.path.join(openml.config.get_cache_directory(), "locks")
         for did in ["-1", "2"]:
             with lockutils.external_lock(
-                name="datasets.functions.get_dataset:%s" % did,
+                name=f"datasets.functions.get_dataset:{did}",
                 lock_path=self.lock_path,
             ):
                 pickle_path = os.path.join(
@@ -531,7 +531,7 @@ def test_publish_dataset(self):
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.dataset_id)
         TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], dataset.dataset_id),
+            f"collected from {__file__.split('/')[-1]}: {dataset.dataset_id}",
         )
         assert isinstance(dataset.dataset_id, int)
 
@@ -553,7 +553,7 @@ def test__retrieve_class_labels(self):
 
     def test_upload_dataset_with_url(self):
         dataset = OpenMLDataset(
-            "%s-UploadTestWithURL" % self._get_sentinel(),
+            f"{self._get_sentinel()}-UploadTestWithURL",
             "test",
             data_format="arff",
             version=1,
@@ -562,7 +562,7 @@ def test_upload_dataset_with_url(self):
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.dataset_id)
         TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], dataset.dataset_id),
+            f"collected from {__file__.split('/')[-1]}: {dataset.dataset_id}",
         )
         assert isinstance(dataset.dataset_id, int)
 
@@ -583,7 +583,7 @@ def _assert_status_of_dataset(self, *, did: int, status: str):
     @pytest.mark.flaky()
     def test_data_status(self):
         dataset = OpenMLDataset(
-            "%s-UploadTestWithURL" % self._get_sentinel(),
+            f"{self._get_sentinel()}-UploadTestWithURL",
             "test",
             "ARFF",
             version=1,
@@ -591,7 +591,7 @@ def test_data_status(self):
         )
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}")
         did = dataset.id
 
         # admin key for test server (only adminds can activate datasets.
@@ -678,7 +678,7 @@ def test_create_dataset_numpy(self):
         attributes = [(f"col_{i}", "REAL") for i in range(data.shape[1])]
 
         dataset = create_dataset(
-            name="%s-NumPy_testing_dataset" % self._get_sentinel(),
+            name=f"{self._get_sentinel()}-NumPy_testing_dataset",
             description="Synthetic dataset created from a NumPy array",
             creator="OpenML tester",
             contributor=None,
@@ -698,7 +698,7 @@ def test_create_dataset_numpy(self):
 
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}")
 
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
@@ -733,7 +733,7 @@ def test_create_dataset_list(self):
         ]
 
         dataset = create_dataset(
-            name="%s-ModifiedWeather" % self._get_sentinel(),
+            name=f"{self._get_sentinel()}-ModifiedWeather",
             description=("Testing dataset upload when the data is a list of lists"),
             creator="OpenML test",
             contributor=None,
@@ -753,7 +753,7 @@ def test_create_dataset_list(self):
 
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}")
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
         ), "Uploaded ARFF does not match original one"
@@ -775,7 +775,7 @@ def test_create_dataset_sparse(self):
         ]
 
         xor_dataset = create_dataset(
-            name="%s-XOR" % self._get_sentinel(),
+            name=f"{self._get_sentinel()}-XOR",
             description="Dataset representing the XOR operation",
             creator=None,
             contributor=None,
@@ -794,7 +794,7 @@ def test_create_dataset_sparse(self):
         xor_dataset.publish()
         TestBase._mark_entity_for_removal("data", xor_dataset.id)
         TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], xor_dataset.id),
+            f"collected from {__file__.split('/')[-1]}: {xor_dataset.id}",
         )
         assert (
             _get_online_dataset_arff(xor_dataset.id) == xor_dataset._dataset
@@ -807,7 +807,7 @@ def test_create_dataset_sparse(self):
         sparse_data = [{0: 0.0}, {1: 1.0, 2: 1.0}, {0: 1.0, 2: 1.0}, {0: 1.0, 1: 1.0}]
 
         xor_dataset = create_dataset(
-            name="%s-XOR" % self._get_sentinel(),
+            name=f"{self._get_sentinel()}-XOR",
             description="Dataset representing the XOR operation",
             creator=None,
             contributor=None,
@@ -826,7 +826,7 @@ def test_create_dataset_sparse(self):
         xor_dataset.publish()
         TestBase._mark_entity_for_removal("data", xor_dataset.id)
         TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], xor_dataset.id),
+            f"collected from {__file__.split('/')[-1]}: {xor_dataset.id}",
         )
         assert (
             _get_online_dataset_arff(xor_dataset.id) == xor_dataset._dataset
@@ -925,7 +925,7 @@ def test_create_dataset_pandas(self):
         df["windy"] = df["windy"].astype("bool")
         df["play"] = df["play"].astype("category")
         # meta-information
-        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        name = f"{self._get_sentinel()}-pandas_testing_dataset"
         description = "Synthetic dataset created from a Pandas DataFrame"
         creator = "OpenML tester"
         collection_date = "01-01-2018"
@@ -954,7 +954,7 @@ def test_create_dataset_pandas(self):
         )
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}")
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
         ), "Uploaded ARFF does not match original one"
@@ -990,7 +990,7 @@ def test_create_dataset_pandas(self):
         )
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}")
         assert (
             _get_online_dataset_arff(dataset.id) == dataset._dataset
         ), "Uploaded ARFF does not match original one"
@@ -1022,7 +1022,7 @@ def test_create_dataset_pandas(self):
         )
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}")
         downloaded_data = _get_online_dataset_arff(dataset.id)
         assert downloaded_data == dataset._dataset, "Uploaded ARFF does not match original one"
         assert "@ATTRIBUTE rnd_str {a, b, c, d, e, f, g}" in downloaded_data
@@ -1049,7 +1049,7 @@ def test_ignore_attributes_dataset(self):
         df["windy"] = df["windy"].astype("bool")
         df["play"] = df["play"].astype("category")
         # meta-information
-        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        name = f"{self._get_sentinel()}-pandas_testing_dataset"
         description = "Synthetic dataset created from a Pandas DataFrame"
         creator = "OpenML tester"
         collection_date = "01-01-2018"
@@ -1150,7 +1150,7 @@ def test_publish_fetch_ignore_attribute(self):
         df["windy"] = df["windy"].astype("bool")
         df["play"] = df["play"].astype("category")
         # meta-information
-        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        name = f"{self._get_sentinel()}-pandas_testing_dataset"
         description = "Synthetic dataset created from a Pandas DataFrame"
         creator = "OpenML tester"
         collection_date = "01-01-2018"
@@ -1185,7 +1185,7 @@ def test_publish_fetch_ignore_attribute(self):
         # publish dataset
         dataset.publish()
         TestBase._mark_entity_for_removal("data", dataset.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}")
         # test if publish was successful
         assert isinstance(dataset.id, int)
 
@@ -1209,7 +1209,7 @@ def _wait_for_dataset_being_processed(
 
     def test_create_dataset_row_id_attribute_error(self):
         # meta-information
-        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        name = f"{self._get_sentinel()}-pandas_testing_dataset"
         description = "Synthetic dataset created from a Pandas DataFrame"
         creator = "OpenML tester"
         collection_date = "01-01-2018"
@@ -1247,7 +1247,7 @@ def test_create_dataset_row_id_attribute_error(self):
 
     def test_create_dataset_row_id_attribute_inference(self):
         # meta-information
-        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        name = f"{self._get_sentinel()}-pandas_testing_dataset"
         description = "Synthetic dataset created from a Pandas DataFrame"
         creator = "OpenML tester"
         collection_date = "01-01-2018"
@@ -1291,7 +1291,7 @@ def test_create_dataset_row_id_attribute_inference(self):
             dataset.publish()
             TestBase._mark_entity_for_removal("data", dataset.id)
             TestBase.logger.info(
-                "collected from {}: {}".format(__file__.split("/")[-1], dataset.id),
+                f"collected from {__file__.split('/')[-1]}: {dataset.id}",
             )
             arff_dataset = arff.loads(_get_online_dataset_arff(dataset.id))
             arff_data = np.array(arff_dataset["data"], dtype=object)
@@ -1667,7 +1667,7 @@ def test_delete_dataset(self):
         df["windy"] = df["windy"].astype("bool")
         df["play"] = df["play"].astype("category")
         # meta-information
-        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        name = f"{self._get_sentinel()}-pandas_testing_dataset"
         description = "Synthetic dataset created from a Pandas DataFrame"
         creator = "OpenML tester"
         collection_date = "01-01-2018"
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index e181aaa15..70024768b 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -837,7 +837,7 @@ def test_serialize_complex_flow(self):
         structure = serialized.get_structure("name")
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if Version(sklearn.__version__) >= Version("0.20") else "data"
-        ohe_name = "sklearn.preprocessing.%s.OneHotEncoder" % module_name_encoder
+        ohe_name = f"sklearn.preprocessing.{module_name_encoder}.OneHotEncoder"
         scaler_name = "sklearn.preprocessing.{}.StandardScaler".format(
             "data" if Version(sklearn.__version__) < Version("0.22") else "_data",
         )
@@ -855,7 +855,7 @@ def test_serialize_complex_flow(self):
             boosting_name,
         )
         fixture_name = (
-            "sklearn.model_selection._search.RandomizedSearchCV" "(estimator=%s)" % pipeline_name
+            f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name})"
         )
         fixture_structure = {
             ohe_name: ["estimator", "ohe"],
@@ -1542,7 +1542,7 @@ def test_openml_param_name_to_sklearn(self):
         run = openml.runs.run_flow_on_task(flow, task)
         run = run.publish()
         TestBase._mark_entity_for_removal("run", run.run_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}")
         run = openml.runs.get_run(run.run_id)
         setup = openml.setups.get_setup(run.setup_id)
 
@@ -2105,7 +2105,7 @@ def test__extract_trace_data(self):
             assert len(trace_iteration.parameters) == len(param_grid)
             for param in param_grid:
                 # Prepend with the "parameter_" prefix
-                param_in_trace = "parameter_%s" % param
+                param_in_trace = f"parameter_{param}"
                 assert param_in_trace in trace_iteration.parameters
                 param_value = json.loads(trace_iteration.parameters[param_in_trace])
                 assert param_value in param_grid[param]
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index dafbeaf3c..7533cc0f2 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -196,7 +196,7 @@ def test_publish_flow(self):
 
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
         assert isinstance(flow.flow_id, int)
 
     @pytest.mark.sklearn()
@@ -211,7 +211,7 @@ def test_publish_existing_flow(self, flow_exists_mock):
 
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
         TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id),
+            f"collected from {__file__.split('/')[-1]}: {flow.flow_id}",
         )
 
     @pytest.mark.sklearn()
@@ -223,7 +223,7 @@ def test_publish_flow_with_similar_components(self):
         flow, _ = self._add_sentinel_to_flow_name(flow, None)
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
         # For a flow where both components are published together, the upload
         # date should be equal
         assert flow.upload_date == flow.components["lr"].upload_date, (
@@ -238,7 +238,7 @@ def test_publish_flow_with_similar_components(self):
         flow1, sentinel = self._add_sentinel_to_flow_name(flow1, None)
         flow1.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow1.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow1.flow_id}")
 
         # In order to assign different upload times to the flows!
         time.sleep(1)
@@ -250,7 +250,7 @@ def test_publish_flow_with_similar_components(self):
         flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel)
         flow2.publish()
         TestBase._mark_entity_for_removal("flow", flow2.flow_id, flow2.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow2.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow2.flow_id}")
         # If one component was published before the other, the components in
         # the flow should have different upload dates
         assert flow2.upload_date != flow2.components["dt"].upload_date
@@ -262,7 +262,7 @@ def test_publish_flow_with_similar_components(self):
         # correctly on the server should thus not check the child's parameters!
         flow3.publish()
         TestBase._mark_entity_for_removal("flow", flow3.flow_id, flow3.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow3.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}")
 
     @pytest.mark.sklearn()
     def test_semi_legal_flow(self):
@@ -284,7 +284,7 @@ def test_semi_legal_flow(self):
 
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
 
     @pytest.mark.sklearn()
     @mock.patch("openml.flows.functions.get_flow")
@@ -337,7 +337,7 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock):
 
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
         TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id),
+            f"collected from {__file__.split('/')[-1]}: {flow.flow_id}",
         )
 
         assert get_flow_mock.call_count == 2
@@ -362,7 +362,7 @@ def get_sentinel():
             md5 = hashlib.md5()
             md5.update(str(time.time()).encode("utf-8"))
             sentinel = md5.hexdigest()[:10]
-            return "TEST%s" % sentinel
+            return f"TEST{sentinel}"
 
         name = get_sentinel() + get_sentinel()
         version = get_sentinel()
@@ -397,7 +397,7 @@ def test_existing_flow_exists(self):
             flow = flow.publish()
             TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
             TestBase.logger.info(
-                "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id),
+                f"collected from {__file__.split('/')[-1]}: {flow.flow_id}",
             )
             # redownload the flow
             flow = openml.flows.get_flow(flow.flow_id)
@@ -460,7 +460,7 @@ def test_sklearn_to_upload_to_flow(self):
 
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
         assert isinstance(flow.flow_id, int)
 
         # Check whether we can load the flow again
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index b3d5be1a6..86b45dfe7 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -288,7 +288,7 @@ def test_sklearn_to_flow_list_of_lists(self):
         self._add_sentinel_to_flow_name(flow)
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
         # Test deserialization works
         server_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True)
         assert server_flow.parameters["categories"] == "[[0, 1], [0, 1]]"
@@ -309,7 +309,7 @@ def test_get_flow_reinstantiate_model(self):
         flow = extension.model_to_flow(model)
         flow.publish(raise_error_if_exists=False)
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
 
         downloaded_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True)
         assert isinstance(downloaded_flow.model, sklearn.ensemble.RandomForestClassifier)
@@ -394,7 +394,7 @@ def test_get_flow_id(self):
             flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish()
             TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
             TestBase.logger.info(
-                "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id),
+                f"collected from {__file__.split('/')[-1]}: {flow.flow_id}",
             )
 
             assert openml.flows.get_flow_id(model=clf, exact_version=True) == flow.flow_id
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index ce46b6548..0a7f6343a 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -149,7 +149,7 @@ def test_to_from_filesystem_vanilla(self):
         run_prime.publish()
         TestBase._mark_entity_for_removal("run", run_prime.run_id)
         TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id),
+            f"collected from {__file__.split('/')[-1]}: {run_prime.run_id}",
         )
 
     @pytest.mark.sklearn()
@@ -185,7 +185,7 @@ def test_to_from_filesystem_search(self):
         run_prime.publish()
         TestBase._mark_entity_for_removal("run", run_prime.run_id)
         TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id),
+            f"collected from {__file__.split('/')[-1]}: {run_prime.run_id}",
         )
 
     @pytest.mark.sklearn()
@@ -308,7 +308,7 @@ def test_publish_with_local_loaded_flow(self):
             # Clean up
             TestBase._mark_entity_for_removal("run", loaded_run.run_id)
             TestBase.logger.info(
-                "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id),
+                f"collected from {__file__.split('/')[-1]}: {loaded_run.run_id}",
             )
 
             # make sure the flow is published as part of publishing the run.
@@ -355,7 +355,7 @@ def test_offline_and_online_run_identical(self):
                 # Clean up
                 TestBase._mark_entity_for_removal("run", run.run_id)
                 TestBase.logger.info(
-                    "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id),
+                    f"collected from {__file__.split('/')[-1]}: {loaded_run.run_id}",
                 )
 
     def test_run_setup_string_included_in_xml(self):
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 2bd9ee0ed..44e12df88 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1682,7 +1682,7 @@ def test_run_flow_on_task_downloaded_flow(self):
 
         run.publish()
         TestBase._mark_entity_for_removal("run", run.run_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}")
 
     @pytest.mark.production()
     def test_format_prediction_non_supervised(self):
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 259cb98b4..3a273ef9a 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -25,7 +25,7 @@ def get_sentinel():
     md5 = hashlib.md5()
     md5.update(str(time.time()).encode("utf-8"))
     sentinel = md5.hexdigest()[:10]
-    return "TEST%s" % sentinel
+    return f"TEST{sentinel}"
 
 
 class TestSetupFunctions(TestBase):
@@ -45,7 +45,7 @@ def test_nonexisting_setup_exists(self):
         flow.name = f"TEST{sentinel}{flow.name}"
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
 
         # although the flow exists (created as of previous statement),
         # we can be sure there are no setups (yet) as it was just created
@@ -58,7 +58,7 @@ def _existing_setup_exists(self, classif):
         flow.name = f"TEST{get_sentinel()}{flow.name}"
         flow.publish()
         TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
 
         # although the flow exists, we can be sure there are no
         # setups (yet) as it hasn't been ran
@@ -74,7 +74,7 @@ def _existing_setup_exists(self, classif):
         run.flow_id = flow.flow_id
         run.publish()
         TestBase._mark_entity_for_removal("run", run.run_id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}")
         # download the run, as it contains the right setup id
         run = openml.runs.get_run(run.run_id)
 
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index 9e5cb4e5e..e3b21fc8c 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -72,6 +72,6 @@ def test_Figure1a(self):
             run.publish()  # publish the experiment on OpenML (optional)
             TestBase._mark_entity_for_removal("run", run.run_id)
             TestBase.logger.info(
-                "collected from {}: {}".format(__file__.split("/")[-1], run.run_id),
+                f"collected from {__file__.split('/')[-1]}: {run.run_id}",
             )
             TestBase.logger.info("URL for run: %s/run/%d" % (openml.config.server, run.run_id))
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index d01a1dcf4..963876a77 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -88,7 +88,7 @@ def test_publish_benchmark_suite(self):
         )
         study.publish()
         TestBase._mark_entity_for_removal("study", study.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}")
 
         assert study.id > 0
 
@@ -135,7 +135,7 @@ def _test_publish_empty_study_is_allowed(self, explicit: bool):
 
         study.publish()
         TestBase._mark_entity_for_removal("study", study.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}")
 
         assert study.id > 0
         study_downloaded = openml.study.get_study(study.id)
@@ -170,7 +170,7 @@ def test_publish_study(self):
         )
         study.publish()
         TestBase._mark_entity_for_removal("study", study.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}")
         assert study.id > 0
         study_downloaded = openml.study.get_study(study.id)
         assert study_downloaded.alias == fixt_alias
@@ -232,7 +232,7 @@ def test_study_attach_illegal(self):
         )
         study.publish()
         TestBase._mark_entity_for_removal("study", study.id)
-        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
+        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}")
         study_original = openml.study.get_study(study.id)
 
         with pytest.raises(
diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py
index bc59ad26c..bc0876228 100644
--- a/tests/test_tasks/test_clustering_task.py
+++ b/tests/test_tasks/test_clustering_task.py
@@ -50,7 +50,7 @@ def test_upload_task(self):
                 task = task.publish()
                 TestBase._mark_entity_for_removal("task", task.id)
                 TestBase.logger.info(
-                    "collected from {}: {}".format(__file__.split("/")[-1], task.id),
+                    f"collected from {__file__.split('/')[-1]}: {task.id}",
                 )
                 # success
                 break
diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py
index ec5a8caf5..8ec8704c3 100644
--- a/tests/test_tasks/test_task.py
+++ b/tests/test_tasks/test_task.py
@@ -53,7 +53,7 @@ def test_upload_task(self):
                 task.publish()
                 TestBase._mark_entity_for_removal("task", task.id)
                 TestBase.logger.info(
-                    "collected from {}: {}".format(__file__.split("/")[-1], task.id),
+                    f"collected from {__file__.split('/')[-1]}: {task.id}",
                 )
                 # success
                 break

From 82603c10317121781b35e350b224595635d5dc06 Mon Sep 17 00:00:00 2001
From: SubhadityaMukherjee <msubhaditya@gmail.com>
Date: Tue, 17 Jun 2025 09:45:10 +0200
Subject: [PATCH 2/9] f strings guidelines

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 90548b2c3..5687e41f1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -94,3 +94,4 @@ dmypy.sock
 
 # Tests
 .pytest_cache
+.venv
\ No newline at end of file

From c7bc1a7ff91ac8294eda2b5218c343f9ef955904 Mon Sep 17 00:00:00 2001
From: Subhaditya Mukherjee
 <26865436+SubhadityaMukherjee@users.noreply.github.com>
Date: Tue, 17 Jun 2025 09:48:39 +0200
Subject: [PATCH 3/9] Update CONTRIBUTING.md

minor readme changes for better readability
---
 CONTRIBUTING.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index da1beed04..698928af0 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -138,7 +138,10 @@ following rules before you submit a pull request:
     - Use the [`str.format`](https://docs.python.org/3/library/stdtypes.html#str.format) over [`printf`](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting) style formatting.
      E.g. use `"{} {}".format('hello', 'world')` not `"%s %s" % ('hello', 'world')`.
      (note: old code may still use `printf`-formatting, this is work in progress.)
-     Try to use [`f-strings`](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings) if you can. You can use [flynt](https://github.com/ikamensh/flynt) to make sure your contributed code uses f-strings. `flynt {source_file_or_directory}`
+     Try to use [`f-strings`](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings) if you can. You can use [flynt](https://github.com/ikamensh/flynt) to make sure your contributed code uses f-strings.
+   ```python
+   flynt {source_file_or_directory}
+   ```
 
 -  If your pull request addresses an issue, please use the pull request title
    to describe the issue and mention the issue number in the pull request description. This will make sure a link back to the original issue is

From 414dd93525c7d92a53548c53f3323a999e39e007 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 17 Jun 2025 08:02:59 +0000
Subject: [PATCH 4/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openml/extensions/sklearn/extension.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index b837c1a18..ec402e038 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -482,9 +482,7 @@ def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0912
             )
         else:
             raise TypeError(o)
-        logger.info(
-            f"-{'-' * recursion_depth} flow_to_sklearn END   o={o}, rval={rval}"
-        )
+        logger.info(f"-{'-' * recursion_depth} flow_to_sklearn END   o={o}, rval={rval}")
         return rval
 
     def model_to_flow(self, model: Any) -> OpenMLFlow:
@@ -1168,9 +1166,7 @@ def _deserialize_model(
 
         for name in parameters:
             value = parameters.get(name)
-            logger.info(
-                f"--{'-' * recursion_depth} flow_parameter={name}, value={value}"
-            )
+            logger.info(f"--{'-' * recursion_depth} flow_parameter={name}, value={value}")
             rval = self._deserialize_sklearn(
                 value,
                 components=components_,
@@ -1186,9 +1182,7 @@ def _deserialize_model(
             if name not in components_:
                 continue
             value = components[name]
-            logger.info(
-                f"--{'-' * recursion_depth} flow_component={name}, value={value}"
-            )
+            logger.info(f"--{'-' * recursion_depth} flow_component={name}, value={value}")
             rval = self._deserialize_sklearn(
                 value,
                 recursion_depth=recursion_depth + 1,

From d66cd4633b7bb1a71978b9957c8fb7af32267604 Mon Sep 17 00:00:00 2001
From: Subhaditya Mukherjee
 <26865436+SubhadityaMukherjee@users.noreply.github.com>
Date: Tue, 17 Jun 2025 11:29:17 +0200
Subject: [PATCH 5/9] Update CONTRIBUTING.md

Co-authored-by: Pieter Gijsbers <p.gijsbers@tue.nl>
---
 CONTRIBUTING.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 698928af0..f65e46ab7 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -135,13 +135,9 @@ following rules before you submit a pull request:
     - When creating a multi-line expression with binary operators, break before the operator.
     - Add type hints to all function signatures.
     (note: not all functions have type hints yet, this is work in progress.)
-    - Use the [`str.format`](https://docs.python.org/3/library/stdtypes.html#str.format) over [`printf`](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting) style formatting.
-     E.g. use `"{} {}".format('hello', 'world')` not `"%s %s" % ('hello', 'world')`.
-     (note: old code may still use `printf`-formatting, this is work in progress.)
-     Try to use [`f-strings`](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings) if you can. You can use [flynt](https://github.com/ikamensh/flynt) to make sure your contributed code uses f-strings.
+    - Use [`f-strings`](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings) for text interpolation. You can use [flynt](https://github.com/ikamensh/flynt) to make sure your contributed code uses f-strings.
    ```python
    flynt {source_file_or_directory}
-   ```
 
 -  If your pull request addresses an issue, please use the pull request title
    to describe the issue and mention the issue number in the pull request description. This will make sure a link back to the original issue is

From efb143fd9028d6382b84fb7390d171eeed52b794 Mon Sep 17 00:00:00 2001
From: Subhaditya Mukherjee
 <26865436+SubhadityaMukherjee@users.noreply.github.com>
Date: Tue, 17 Jun 2025 17:38:39 +0200
Subject: [PATCH 6/9] Update openml/datasets/functions.py

Co-authored-by: Pieter Gijsbers <p.gijsbers@tue.nl>
---
 openml/datasets/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index e1428dfce..ac5466a44 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -191,7 +191,7 @@ def _list_datasets(
             if value is not None:
                 api_call += f"/{operator}/{value}"
     if data_id is not None:
-        api_call += f"/data_id/{','.join([str(int(i)}" for i in data_id]))
+        api_call += f"/data_id/{','.join([str(int(i)) for i in data_id])}"
     return __list_datasets(api_call=api_call)
 
 

From ba780273a5e54b0befdcc998aaf93218516b7786 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Tue, 17 Jun 2025 17:39:19 +0200
Subject: [PATCH 7/9] Update openml/setups/functions.py

---
 openml/setups/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 6aeec7f98..374911901 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -207,7 +207,7 @@ def _list_setups(
     if offset is not None:
         api_call += f"/offset/{offset}"
     if setup is not None:
-        api_call += f"/setup/{','.join([str(int(i)}" for i in setup]))
+        api_call += f"/setup/{','.join([str(int(i)) for i in setup])}"
     if flow is not None:
         api_call += f"/flow/{flow}"
     if tag is not None:

From a98943f2decb807e282e56085646ec5dcc2cf61f Mon Sep 17 00:00:00 2001
From: SubhadityaMukherjee <msubhaditya@gmail.com>
Date: Tue, 17 Jun 2025 17:41:24 +0200
Subject: [PATCH 8/9] f string issue in test

---
 .../test_sklearn_extension/test_sklearn_extension.py            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 6f8cf37c2..891ae7da3 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -904,7 +904,7 @@ def test_serialize_complex_flow(self):
             boosting_name,
         )
         fixture_name = (
-            f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name)"
+            f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name}"
         )
         fixture_structure = {
             ohe_name: ["estimator", "ohe"],

From 2cb58a0857053257b452074fc14bcfec6f91dbef Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Wed, 18 Jun 2025 14:52:36 +0200
Subject: [PATCH 9/9] Update
 tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py

---
 .../test_sklearn_extension/test_sklearn_extension.py            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 891ae7da3..9913436e4 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -904,7 +904,7 @@ def test_serialize_complex_flow(self):
             boosting_name,
         )
         fixture_name = (
-            f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name}"
+            f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name})"
         )
         fixture_structure = {
             ohe_name: ["estimator", "ohe"],