From e2e746c6ef51075e4575b1d0b97eca8ebe1f8a36 Mon Sep 17 00:00:00 2001 From: SubhadityaMukherjee Date: Tue, 17 Jun 2025 09:44:52 +0200 Subject: [PATCH 1/9] f strings guidelines --- CONTRIBUTING.md | 1 + .../30_extended/fetch_evaluations_tutorial.py | 2 +- .../30_extended/fetch_runtimes_tutorial.py | 4 +- .../40_paper/2015_neurips_feurer_example.py | 2 +- .../40_paper/2018_neurips_perrone_example.py | 2 +- openml/base.py | 2 +- openml/datasets/functions.py | 2 +- openml/evaluations/evaluation.py | 2 +- openml/evaluations/functions.py | 10 ++-- openml/extensions/sklearn/extension.py | 18 +++---- openml/runs/functions.py | 10 ++-- openml/setups/functions.py | 2 +- openml/setups/setup.py | 6 +-- openml/tasks/functions.py | 2 +- tests/conftest.py | 2 +- tests/test_datasets/test_dataset_functions.py | 50 +++++++++---------- .../test_sklearn_extension.py | 8 +-- tests/test_flows/test_flow.py | 22 ++++---- tests/test_flows/test_flow_functions.py | 6 +-- tests/test_runs/test_run.py | 8 +-- tests/test_runs/test_run_functions.py | 2 +- tests/test_setups/test_setup_functions.py | 8 +-- tests/test_study/test_study_examples.py | 2 +- tests/test_study/test_study_functions.py | 8 +-- tests/test_tasks/test_clustering_task.py | 2 +- tests/test_tasks/test_task.py | 2 +- 26 files changed, 93 insertions(+), 92 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cc8633f84..da1beed04 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -138,6 +138,7 @@ following rules before you submit a pull request: - Use the [`str.format`](https://docs.python.org/3/library/stdtypes.html#str.format) over [`printf`](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting) style formatting. E.g. use `"{} {}".format('hello', 'world')` not `"%s %s" % ('hello', 'world')`. (note: old code may still use `printf`-formatting, this is work in progress.) + Try to use [`f-strings`](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings) if you can. You can use [flynt](https://github.com/ikamensh/flynt) to make sure your contributed code uses f-strings. `flynt {source_file_or_directory}` - If your pull request addresses an issue, please use the pull request title to describe the issue and mention the issue number in the pull request description. This will make sure a link back to the original issue is diff --git a/examples/30_extended/fetch_evaluations_tutorial.py b/examples/30_extended/fetch_evaluations_tutorial.py index 86302e2d1..9e9e0f0d9 100644 --- a/examples/30_extended/fetch_evaluations_tutorial.py +++ b/examples/30_extended/fetch_evaluations_tutorial.py @@ -94,7 +94,7 @@ def plot_cdf(values, metric="predictive_accuracy"): plt.minorticks_on() plt.grid(visible=True, which="minor", linestyle="--") plt.axvline(max_val, linestyle="--", color="gray") - plt.text(max_val, 0, "%.3f" % max_val, fontsize=9) + plt.text(max_val, 0, f"{max_val:.3f}", fontsize=9) plt.show() diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py index 107adee79..8adf37d31 100644 --- a/examples/30_extended/fetch_runtimes_tutorial.py +++ b/examples/30_extended/fetch_runtimes_tutorial.py @@ -119,7 +119,7 @@ def print_compare_runtimes(measures): ) for repeat, val1 in measures["predictive_accuracy"].items(): for fold, val2 in val1.items(): - print("Repeat #{}-Fold #{}: {:.4f}".format(repeat, fold, val2)) + print(f"Repeat #{repeat}-Fold #{fold}: {val2:.4f}") print() ################################################################################ @@ -242,7 +242,7 @@ def print_compare_runtimes(measures): # the 2-fold (inner) CV search performed. # We earlier extracted the number of repeats and folds for this task: -print("# repeats: {}\n# folds: {}".format(n_repeats, n_folds)) +print(f"# repeats: {n_repeats}\n# folds: {n_folds}") # To extract the training runtime of the first repeat, first fold: print(run4.fold_evaluations["wall_clock_time_millis_training"][0][0]) diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py index ae59c9ced..5b5f84a0d 100644 --- a/examples/40_paper/2015_neurips_feurer_example.py +++ b/examples/40_paper/2015_neurips_feurer_example.py @@ -68,7 +68,7 @@ task_ids = [] for did in dataset_ids: - tasks_ = list(tasks.query("did == {}".format(did)).tid) + tasks_ = list(tasks.query(f"did == {did}").tid) if len(tasks_) >= 1: # if there are multiple task, take the one with lowest ID (oldest). task_id = min(tasks_) else: diff --git a/examples/40_paper/2018_neurips_perrone_example.py b/examples/40_paper/2018_neurips_perrone_example.py index 0d72846ac..ec5fe3c60 100644 --- a/examples/40_paper/2018_neurips_perrone_example.py +++ b/examples/40_paper/2018_neurips_perrone_example.py @@ -206,7 +206,7 @@ def list_categorical_attributes(flow_type="svm"): model.fit(X, y) y_pred = model.predict(X) -print("Training RMSE : {:.5}".format(mean_squared_error(y, y_pred))) +print(f"Training RMSE : {mean_squared_error(y, y_pred):.5}") ############################################################################# diff --git a/openml/base.py b/openml/base.py index 37693a2ec..fbfb9dfc8 100644 --- a/openml/base.py +++ b/openml/base.py @@ -78,7 +78,7 @@ def _apply_repr_template( self.__class__.__name__[len("OpenML") :], ) header_text = f"OpenML {name_with_spaces}" - header = "{}\n{}\n".format(header_text, "=" * len(header_text)) + header = f"{header_text}\n{'=' * len(header_text)}\n" _body_fields: list[tuple[str, str | int | list[str]]] = [ (k, "None" if v is None else v) for k, v in body_fields diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 3f3c709f9..d96db6507 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -253,7 +253,7 @@ def _list_datasets( for operator, value in kwargs.items(): api_call += f"/{operator}/{value}" if data_id is not None: - api_call += "/data_id/{}".format(",".join([str(int(i)) for i in data_id])) + api_call += f"/data_id/{','.join([str(int(i)) for i in data_id])}" return __list_datasets(api_call=api_call, output_format=output_format) diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py index 3cf732f25..3b10e798a 100644 --- a/openml/evaluations/evaluation.py +++ b/openml/evaluations/evaluation.py @@ -80,7 +80,7 @@ def __init__( # noqa: PLR0913 def __repr__(self) -> str: header = "OpenML Evaluation" - header = "{}\n{}\n".format(header, "=" * len(header)) + header = f"{header}\n{'=' * len(header)}\n" fields = { "Upload Date": self.upload_time, diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index a39096a58..7f3c0a365 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -207,15 +207,15 @@ def _list_evaluations( for operator, value in kwargs.items(): api_call += f"/{operator}/{value}" if tasks is not None: - api_call += "/task/{}".format(",".join([str(int(i)) for i in tasks])) + api_call += f"/task/{','.join([str(int(i)) for i in tasks])}" if setups is not None: - api_call += "/setup/{}".format(",".join([str(int(i)) for i in setups])) + api_call += f"/setup/{','.join([str(int(i)) for i in setups])}" if flows is not None: - api_call += "/flow/{}".format(",".join([str(int(i)) for i in flows])) + api_call += f"/flow/{','.join([str(int(i)) for i in flows])}" if runs is not None: - api_call += "/run/{}".format(",".join([str(int(i)) for i in runs])) + api_call += f"/run/{','.join([str(int(i)) for i in runs])}" if uploaders is not None: - api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploaders])) + api_call += f"/uploader/{','.join([str(int(i)) for i in uploaders])}" if study is not None: api_call += "/study/%d" % study if sort_order is not None: diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index 2d40d03b8..b837c1a18 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -223,7 +223,7 @@ def remove_all_in_parentheses(string: str) -> str: # then the pipeline steps are formatted e.g.: # step1name=sklearn.submodule.ClassName,step2name... components = [component.split(".")[-1] for component in pipeline.split(",")] - pipeline = "{}({})".format(pipeline_class, ",".join(components)) + pipeline = f"{pipeline_class}({','.join(components)})" if len(short_name.format(pipeline)) > extra_trim_length: pipeline = f"{pipeline_class}(...,{components[-1]})" else: @@ -483,7 +483,7 @@ def _deserialize_sklearn( # noqa: PLR0915, C901, PLR0912 else: raise TypeError(o) logger.info( - "-{} flow_to_sklearn END o={}, rval={}".format("-" * recursion_depth, o, rval) + f"-{'-' * recursion_depth} flow_to_sklearn END o={o}, rval={rval}" ) return rval @@ -574,7 +574,7 @@ def get_version_information(self) -> list[str]: import sklearn major, minor, micro, _, _ = sys.version_info - python_version = "Python_{}.".format(".".join([str(major), str(minor), str(micro)])) + python_version = f"Python_{'.'.join([str(major), str(minor), str(micro)])}." sklearn_version = f"Sklearn_{sklearn.__version__}." numpy_version = f"NumPy_{numpy.__version__}." # type: ignore scipy_version = f"SciPy_{scipy.__version__}." @@ -628,7 +628,7 @@ def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str: """ def match_format(s): - return "{}\n{}\n".format(s, len(s) * "-") + return f"{s}\n{len(s) * '-'}\n" s = inspect.getdoc(model) if s is None: @@ -680,7 +680,7 @@ def _extract_sklearn_parameter_docstring(self, model) -> None | str: """ def match_format(s): - return "{}\n{}\n".format(s, len(s) * "-") + return f"{s}\n{len(s) * '-'}\n" s = inspect.getdoc(model) if s is None: @@ -689,7 +689,7 @@ def match_format(s): index1 = s.index(match_format("Parameters")) except ValueError as e: # when sklearn docstring has no 'Parameters' section - logger.warning("{} {}".format(match_format("Parameters"), e)) + logger.warning(f"{match_format('Parameters')} {e}") return None headings = ["Attributes", "Notes", "See also", "Note", "References"] @@ -1151,7 +1151,7 @@ def _deserialize_model( recursion_depth: int, strict_version: bool = True, # noqa: FBT002, FBT001 ) -> Any: - logger.info("-{} deserialize {}".format("-" * recursion_depth, flow.name)) + logger.info(f"-{'-' * recursion_depth} deserialize {flow.name}") model_name = flow.class_name self._check_dependencies(flow.dependencies, strict_version=strict_version) @@ -1169,7 +1169,7 @@ def _deserialize_model( for name in parameters: value = parameters.get(name) logger.info( - "--{} flow_parameter={}, value={}".format("-" * recursion_depth, name, value) + f"--{'-' * recursion_depth} flow_parameter={name}, value={value}" ) rval = self._deserialize_sklearn( value, @@ -1187,7 +1187,7 @@ def _deserialize_model( continue value = components[name] logger.info( - "--{} flow_component={}, value={}".format("-" * recursion_depth, name, value) + f"--{'-' * recursion_depth} flow_component={name}, value={value}" ) rval = self._deserialize_sklearn( value, diff --git a/openml/runs/functions.py b/openml/runs/functions.py index b6f950020..f95a56675 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -1204,15 +1204,15 @@ def _list_runs( # noqa: PLR0913 for operator, value in kwargs.items(): api_call += f"/{operator}/{value}" if id is not None: - api_call += "/run/{}".format(",".join([str(int(i)) for i in id])) + api_call += f"/run/{','.join([str(int(i)) for i in id])}" if task is not None: - api_call += "/task/{}".format(",".join([str(int(i)) for i in task])) + api_call += f"/task/{','.join([str(int(i)) for i in task])}" if setup is not None: - api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup])) + api_call += f"/setup/{','.join([str(int(i)) for i in setup])}" if flow is not None: - api_call += "/flow/{}".format(",".join([str(int(i)) for i in flow])) + api_call += f"/flow/{','.join([str(int(i)) for i in flow])}" if uploader is not None: - api_call += "/uploader/{}".format(",".join([str(int(i)) for i in uploader])) + api_call += f"/uploader/{','.join([str(int(i)) for i in uploader])}" if study is not None: api_call += "/study/%d" % study if display_errors: diff --git a/openml/setups/functions.py b/openml/setups/functions.py index 877384636..11fc19573 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -212,7 +212,7 @@ def _list_setups( """ api_call = "setup/list" if setup is not None: - api_call += "/setup/{}".format(",".join([str(int(i)) for i in setup])) + api_call += f"/setup/{','.join([str(int(i)) for i in setup])}" if kwargs is not None: for operator, value in kwargs.items(): api_call += f"/{operator}/{value}" diff --git a/openml/setups/setup.py b/openml/setups/setup.py index e8dc059e7..e186e4d27 100644 --- a/openml/setups/setup.py +++ b/openml/setups/setup.py @@ -36,7 +36,7 @@ def __init__(self, setup_id: int, flow_id: int, parameters: dict[int, Any] | Non def __repr__(self) -> str: header = "OpenML Setup" - header = "{}\n{}\n".format(header, "=" * len(header)) + header = f"{header}\n{'=' * len(header)}\n" fields = { "Setup ID": self.setup_id, @@ -104,7 +104,7 @@ def __init__( # noqa: PLR0913 def __repr__(self) -> str: header = "OpenML Parameter" - header = "{}\n{}\n".format(header, "=" * len(header)) + header = f"{header}\n{'=' * len(header)}\n" fields = { "ID": self.id, @@ -116,7 +116,7 @@ def __repr__(self) -> str: } # indented prints for parameter attributes # indention = 2 spaces + 1 | + 2 underscores - indent = "{}|{}".format(" " * 2, "_" * 2) + indent = f"{' ' * 2}|{'_' * 2}" parameter_data_type = f"{indent}Data Type" fields[parameter_data_type] = self.data_type parameter_default = f"{indent}Default" diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index 54030422d..599b86efd 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -528,7 +528,7 @@ def _create_task_from_xml(xml: str) -> OpenMLTask: TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, }.get(task_type) if cls is None: - raise NotImplementedError("Task type {} not supported.".format(common_kwargs["task_type"])) + raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") return cls(**common_kwargs) # type: ignore diff --git a/tests/conftest.py b/tests/conftest.py index 79ee2bbd3..afa2cba6b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -104,7 +104,7 @@ def delete_remote_files(tracker, flow_names) -> None: # deleting all collected entities published to test server # 'run's are deleted first to prevent dependency issue of entities on deletion - logger.info("Entity Types: {}".format(["run", "data", "flow", "task", "study"])) + logger.info(f"Entity Types: {['run', 'data', 'flow', 'task', 'study']}") for entity_type in ["run", "data", "flow", "task", "study"]: logger.info(f"Deleting {entity_type}s...") for _i, entity in enumerate(tracker[entity_type]): diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index a15100070..158dac8f2 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -61,7 +61,7 @@ def _remove_pickle_files(self): self.lock_path = os.path.join(openml.config.get_cache_directory(), "locks") for did in ["-1", "2"]: with lockutils.external_lock( - name="datasets.functions.get_dataset:%s" % did, + name=f"datasets.functions.get_dataset:{did}", lock_path=self.lock_path, ): pickle_path = os.path.join( @@ -531,7 +531,7 @@ def test_publish_dataset(self): dataset.publish() TestBase._mark_entity_for_removal("data", dataset.dataset_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], dataset.dataset_id), + f"collected from {__file__.split('/')[-1]}: {dataset.dataset_id}", ) assert isinstance(dataset.dataset_id, int) @@ -553,7 +553,7 @@ def test__retrieve_class_labels(self): def test_upload_dataset_with_url(self): dataset = OpenMLDataset( - "%s-UploadTestWithURL" % self._get_sentinel(), + f"{self._get_sentinel()}-UploadTestWithURL", "test", data_format="arff", version=1, @@ -562,7 +562,7 @@ def test_upload_dataset_with_url(self): dataset.publish() TestBase._mark_entity_for_removal("data", dataset.dataset_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], dataset.dataset_id), + f"collected from {__file__.split('/')[-1]}: {dataset.dataset_id}", ) assert isinstance(dataset.dataset_id, int) @@ -583,7 +583,7 @@ def _assert_status_of_dataset(self, *, did: int, status: str): @pytest.mark.flaky() def test_data_status(self): dataset = OpenMLDataset( - "%s-UploadTestWithURL" % self._get_sentinel(), + f"{self._get_sentinel()}-UploadTestWithURL", "test", "ARFF", version=1, @@ -591,7 +591,7 @@ def test_data_status(self): ) dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") did = dataset.id # admin key for test server (only adminds can activate datasets. @@ -678,7 +678,7 @@ def test_create_dataset_numpy(self): attributes = [(f"col_{i}", "REAL") for i in range(data.shape[1])] dataset = create_dataset( - name="%s-NumPy_testing_dataset" % self._get_sentinel(), + name=f"{self._get_sentinel()}-NumPy_testing_dataset", description="Synthetic dataset created from a NumPy array", creator="OpenML tester", contributor=None, @@ -698,7 +698,7 @@ def test_create_dataset_numpy(self): dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") assert ( _get_online_dataset_arff(dataset.id) == dataset._dataset @@ -733,7 +733,7 @@ def test_create_dataset_list(self): ] dataset = create_dataset( - name="%s-ModifiedWeather" % self._get_sentinel(), + name=f"{self._get_sentinel()}-ModifiedWeather", description=("Testing dataset upload when the data is a list of lists"), creator="OpenML test", contributor=None, @@ -753,7 +753,7 @@ def test_create_dataset_list(self): dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") assert ( _get_online_dataset_arff(dataset.id) == dataset._dataset ), "Uploaded ARFF does not match original one" @@ -775,7 +775,7 @@ def test_create_dataset_sparse(self): ] xor_dataset = create_dataset( - name="%s-XOR" % self._get_sentinel(), + name=f"{self._get_sentinel()}-XOR", description="Dataset representing the XOR operation", creator=None, contributor=None, @@ -794,7 +794,7 @@ def test_create_dataset_sparse(self): xor_dataset.publish() TestBase._mark_entity_for_removal("data", xor_dataset.id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], xor_dataset.id), + f"collected from {__file__.split('/')[-1]}: {xor_dataset.id}", ) assert ( _get_online_dataset_arff(xor_dataset.id) == xor_dataset._dataset @@ -807,7 +807,7 @@ def test_create_dataset_sparse(self): sparse_data = [{0: 0.0}, {1: 1.0, 2: 1.0}, {0: 1.0, 2: 1.0}, {0: 1.0, 1: 1.0}] xor_dataset = create_dataset( - name="%s-XOR" % self._get_sentinel(), + name=f"{self._get_sentinel()}-XOR", description="Dataset representing the XOR operation", creator=None, contributor=None, @@ -826,7 +826,7 @@ def test_create_dataset_sparse(self): xor_dataset.publish() TestBase._mark_entity_for_removal("data", xor_dataset.id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], xor_dataset.id), + f"collected from {__file__.split('/')[-1]}: {xor_dataset.id}", ) assert ( _get_online_dataset_arff(xor_dataset.id) == xor_dataset._dataset @@ -925,7 +925,7 @@ def test_create_dataset_pandas(self): df["windy"] = df["windy"].astype("bool") df["play"] = df["play"].astype("category") # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" @@ -954,7 +954,7 @@ def test_create_dataset_pandas(self): ) dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") assert ( _get_online_dataset_arff(dataset.id) == dataset._dataset ), "Uploaded ARFF does not match original one" @@ -990,7 +990,7 @@ def test_create_dataset_pandas(self): ) dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") assert ( _get_online_dataset_arff(dataset.id) == dataset._dataset ), "Uploaded ARFF does not match original one" @@ -1022,7 +1022,7 @@ def test_create_dataset_pandas(self): ) dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") downloaded_data = _get_online_dataset_arff(dataset.id) assert downloaded_data == dataset._dataset, "Uploaded ARFF does not match original one" assert "@ATTRIBUTE rnd_str {a, b, c, d, e, f, g}" in downloaded_data @@ -1049,7 +1049,7 @@ def test_ignore_attributes_dataset(self): df["windy"] = df["windy"].astype("bool") df["play"] = df["play"].astype("category") # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" @@ -1150,7 +1150,7 @@ def test_publish_fetch_ignore_attribute(self): df["windy"] = df["windy"].astype("bool") df["play"] = df["play"].astype("category") # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" @@ -1185,7 +1185,7 @@ def test_publish_fetch_ignore_attribute(self): # publish dataset dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], dataset.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {dataset.id}") # test if publish was successful assert isinstance(dataset.id, int) @@ -1209,7 +1209,7 @@ def _wait_for_dataset_being_processed( def test_create_dataset_row_id_attribute_error(self): # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" @@ -1247,7 +1247,7 @@ def test_create_dataset_row_id_attribute_error(self): def test_create_dataset_row_id_attribute_inference(self): # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" @@ -1291,7 +1291,7 @@ def test_create_dataset_row_id_attribute_inference(self): dataset.publish() TestBase._mark_entity_for_removal("data", dataset.id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], dataset.id), + f"collected from {__file__.split('/')[-1]}: {dataset.id}", ) arff_dataset = arff.loads(_get_online_dataset_arff(dataset.id)) arff_data = np.array(arff_dataset["data"], dtype=object) @@ -1667,7 +1667,7 @@ def test_delete_dataset(self): df["windy"] = df["windy"].astype("bool") df["play"] = df["play"].astype("category") # meta-information - name = "%s-pandas_testing_dataset" % self._get_sentinel() + name = f"{self._get_sentinel()}-pandas_testing_dataset" description = "Synthetic dataset created from a Pandas DataFrame" creator = "OpenML tester" collection_date = "01-01-2018" diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py index e181aaa15..70024768b 100644 --- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py +++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py @@ -837,7 +837,7 @@ def test_serialize_complex_flow(self): structure = serialized.get_structure("name") # OneHotEncoder was moved to _encoders module in 0.20 module_name_encoder = "_encoders" if Version(sklearn.__version__) >= Version("0.20") else "data" - ohe_name = "sklearn.preprocessing.%s.OneHotEncoder" % module_name_encoder + ohe_name = f"sklearn.preprocessing.{module_name_encoder}.OneHotEncoder" scaler_name = "sklearn.preprocessing.{}.StandardScaler".format( "data" if Version(sklearn.__version__) < Version("0.22") else "_data", ) @@ -855,7 +855,7 @@ def test_serialize_complex_flow(self): boosting_name, ) fixture_name = ( - "sklearn.model_selection._search.RandomizedSearchCV" "(estimator=%s)" % pipeline_name + f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name})" ) fixture_structure = { ohe_name: ["estimator", "ohe"], @@ -1542,7 +1542,7 @@ def test_openml_param_name_to_sklearn(self): run = openml.runs.run_flow_on_task(flow, task) run = run.publish() TestBase._mark_entity_for_removal("run", run.run_id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}") run = openml.runs.get_run(run.run_id) setup = openml.setups.get_setup(run.setup_id) @@ -2105,7 +2105,7 @@ def test__extract_trace_data(self): assert len(trace_iteration.parameters) == len(param_grid) for param in param_grid: # Prepend with the "parameter_" prefix - param_in_trace = "parameter_%s" % param + param_in_trace = f"parameter_{param}" assert param_in_trace in trace_iteration.parameters param_value = json.loads(trace_iteration.parameters[param_in_trace]) assert param_value in param_grid[param] diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py index dafbeaf3c..7533cc0f2 100644 --- a/tests/test_flows/test_flow.py +++ b/tests/test_flows/test_flow.py @@ -196,7 +196,7 @@ def test_publish_flow(self): flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") assert isinstance(flow.flow_id, int) @pytest.mark.sklearn() @@ -211,7 +211,7 @@ def test_publish_existing_flow(self, flow_exists_mock): TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id), + f"collected from {__file__.split('/')[-1]}: {flow.flow_id}", ) @pytest.mark.sklearn() @@ -223,7 +223,7 @@ def test_publish_flow_with_similar_components(self): flow, _ = self._add_sentinel_to_flow_name(flow, None) flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") # For a flow where both components are published together, the upload # date should be equal assert flow.upload_date == flow.components["lr"].upload_date, ( @@ -238,7 +238,7 @@ def test_publish_flow_with_similar_components(self): flow1, sentinel = self._add_sentinel_to_flow_name(flow1, None) flow1.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow1.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow1.flow_id}") # In order to assign different upload times to the flows! time.sleep(1) @@ -250,7 +250,7 @@ def test_publish_flow_with_similar_components(self): flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel) flow2.publish() TestBase._mark_entity_for_removal("flow", flow2.flow_id, flow2.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow2.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow2.flow_id}") # If one component was published before the other, the components in # the flow should have different upload dates assert flow2.upload_date != flow2.components["dt"].upload_date @@ -262,7 +262,7 @@ def test_publish_flow_with_similar_components(self): # correctly on the server should thus not check the child's parameters! flow3.publish() TestBase._mark_entity_for_removal("flow", flow3.flow_id, flow3.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow3.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}") @pytest.mark.sklearn() def test_semi_legal_flow(self): @@ -284,7 +284,7 @@ def test_semi_legal_flow(self): flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") @pytest.mark.sklearn() @mock.patch("openml.flows.functions.get_flow") @@ -337,7 +337,7 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock): TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id), + f"collected from {__file__.split('/')[-1]}: {flow.flow_id}", ) assert get_flow_mock.call_count == 2 @@ -362,7 +362,7 @@ def get_sentinel(): md5 = hashlib.md5() md5.update(str(time.time()).encode("utf-8")) sentinel = md5.hexdigest()[:10] - return "TEST%s" % sentinel + return f"TEST{sentinel}" name = get_sentinel() + get_sentinel() version = get_sentinel() @@ -397,7 +397,7 @@ def test_existing_flow_exists(self): flow = flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id), + f"collected from {__file__.split('/')[-1]}: {flow.flow_id}", ) # redownload the flow flow = openml.flows.get_flow(flow.flow_id) @@ -460,7 +460,7 @@ def test_sklearn_to_upload_to_flow(self): flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") assert isinstance(flow.flow_id, int) # Check whether we can load the flow again diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py index b3d5be1a6..86b45dfe7 100644 --- a/tests/test_flows/test_flow_functions.py +++ b/tests/test_flows/test_flow_functions.py @@ -288,7 +288,7 @@ def test_sklearn_to_flow_list_of_lists(self): self._add_sentinel_to_flow_name(flow) flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") # Test deserialization works server_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True) assert server_flow.parameters["categories"] == "[[0, 1], [0, 1]]" @@ -309,7 +309,7 @@ def test_get_flow_reinstantiate_model(self): flow = extension.model_to_flow(model) flow.publish(raise_error_if_exists=False) TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") downloaded_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True) assert isinstance(downloaded_flow.model, sklearn.ensemble.RandomForestClassifier) @@ -394,7 +394,7 @@ def test_get_flow_id(self): flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id), + f"collected from {__file__.split('/')[-1]}: {flow.flow_id}", ) assert openml.flows.get_flow_id(model=clf, exact_version=True) == flow.flow_id diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py index ce46b6548..0a7f6343a 100644 --- a/tests/test_runs/test_run.py +++ b/tests/test_runs/test_run.py @@ -149,7 +149,7 @@ def test_to_from_filesystem_vanilla(self): run_prime.publish() TestBase._mark_entity_for_removal("run", run_prime.run_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id), + f"collected from {__file__.split('/')[-1]}: {run_prime.run_id}", ) @pytest.mark.sklearn() @@ -185,7 +185,7 @@ def test_to_from_filesystem_search(self): run_prime.publish() TestBase._mark_entity_for_removal("run", run_prime.run_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id), + f"collected from {__file__.split('/')[-1]}: {run_prime.run_id}", ) @pytest.mark.sklearn() @@ -308,7 +308,7 @@ def test_publish_with_local_loaded_flow(self): # Clean up TestBase._mark_entity_for_removal("run", loaded_run.run_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id), + f"collected from {__file__.split('/')[-1]}: {loaded_run.run_id}", ) # make sure the flow is published as part of publishing the run. @@ -355,7 +355,7 @@ def test_offline_and_online_run_identical(self): # Clean up TestBase._mark_entity_for_removal("run", run.run_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id), + f"collected from {__file__.split('/')[-1]}: {loaded_run.run_id}", ) def test_run_setup_string_included_in_xml(self): diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 2bd9ee0ed..44e12df88 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -1682,7 +1682,7 @@ def test_run_flow_on_task_downloaded_flow(self): run.publish() TestBase._mark_entity_for_removal("run", run.run_id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}") @pytest.mark.production() def test_format_prediction_non_supervised(self): diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py index 259cb98b4..3a273ef9a 100644 --- a/tests/test_setups/test_setup_functions.py +++ b/tests/test_setups/test_setup_functions.py @@ -25,7 +25,7 @@ def get_sentinel(): md5 = hashlib.md5() md5.update(str(time.time()).encode("utf-8")) sentinel = md5.hexdigest()[:10] - return "TEST%s" % sentinel + return f"TEST{sentinel}" class TestSetupFunctions(TestBase): @@ -45,7 +45,7 @@ def test_nonexisting_setup_exists(self): flow.name = f"TEST{sentinel}{flow.name}" flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") # although the flow exists (created as of previous statement), # we can be sure there are no setups (yet) as it was just created @@ -58,7 +58,7 @@ def _existing_setup_exists(self, classif): flow.name = f"TEST{get_sentinel()}{flow.name}" flow.publish() TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") # although the flow exists, we can be sure there are no # setups (yet) as it hasn't been ran @@ -74,7 +74,7 @@ def _existing_setup_exists(self, classif): run.flow_id = flow.flow_id run.publish() TestBase._mark_entity_for_removal("run", run.run_id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}") # download the run, as it contains the right setup id run = openml.runs.get_run(run.run_id) diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py index 9e5cb4e5e..e3b21fc8c 100644 --- a/tests/test_study/test_study_examples.py +++ b/tests/test_study/test_study_examples.py @@ -72,6 +72,6 @@ def test_Figure1a(self): run.publish() # publish the experiment on OpenML (optional) TestBase._mark_entity_for_removal("run", run.run_id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], run.run_id), + f"collected from {__file__.split('/')[-1]}: {run.run_id}", ) TestBase.logger.info("URL for run: %s/run/%d" % (openml.config.server, run.run_id)) diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py index d01a1dcf4..963876a77 100644 --- a/tests/test_study/test_study_functions.py +++ b/tests/test_study/test_study_functions.py @@ -88,7 +88,7 @@ def test_publish_benchmark_suite(self): ) study.publish() TestBase._mark_entity_for_removal("study", study.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}") assert study.id > 0 @@ -135,7 +135,7 @@ def _test_publish_empty_study_is_allowed(self, explicit: bool): study.publish() TestBase._mark_entity_for_removal("study", study.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}") assert study.id > 0 study_downloaded = openml.study.get_study(study.id) @@ -170,7 +170,7 @@ def test_publish_study(self): ) study.publish() TestBase._mark_entity_for_removal("study", study.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}") assert study.id > 0 study_downloaded = openml.study.get_study(study.id) assert study_downloaded.alias == fixt_alias @@ -232,7 +232,7 @@ def test_study_attach_illegal(self): ) study.publish() TestBase._mark_entity_for_removal("study", study.id) - TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id)) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {study.id}") study_original = openml.study.get_study(study.id) with pytest.raises( diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py index bc59ad26c..bc0876228 100644 --- a/tests/test_tasks/test_clustering_task.py +++ b/tests/test_tasks/test_clustering_task.py @@ -50,7 +50,7 @@ def test_upload_task(self): task = task.publish() TestBase._mark_entity_for_removal("task", task.id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], task.id), + f"collected from {__file__.split('/')[-1]}: {task.id}", ) # success break diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py index ec5a8caf5..8ec8704c3 100644 --- a/tests/test_tasks/test_task.py +++ b/tests/test_tasks/test_task.py @@ -53,7 +53,7 @@ def test_upload_task(self): task.publish() TestBase._mark_entity_for_removal("task", task.id) TestBase.logger.info( - "collected from {}: {}".format(__file__.split("/")[-1], task.id), + f"collected from {__file__.split('/')[-1]}: {task.id}", ) # success break From 82603c10317121781b35e350b224595635d5dc06 Mon Sep 17 00:00:00 2001 From: SubhadityaMukherjee Date: Tue, 17 Jun 2025 09:45:10 +0200 Subject: [PATCH 2/9] f strings guidelines --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 90548b2c3..5687e41f1 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,4 @@ dmypy.sock # Tests .pytest_cache +.venv \ No newline at end of file From c7bc1a7ff91ac8294eda2b5218c343f9ef955904 Mon Sep 17 00:00:00 2001 From: Subhaditya Mukherjee <26865436+SubhadityaMukherjee@users.noreply.github.com> Date: Tue, 17 Jun 2025 09:48:39 +0200 Subject: [PATCH 3/9] Update CONTRIBUTING.md minor readme changes for better readability --- CONTRIBUTING.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index da1beed04..698928af0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -138,7 +138,10 @@ following rules before you submit a pull request: - Use the [`str.format`](https://docs.python.org/3/library/stdtypes.html#str.format) over [`printf`](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting) style formatting. E.g. use `"{} {}".format('hello', 'world')` not `"%s %s" % ('hello', 'world')`. (note: old code may still use `printf`-formatting, this is work in progress.) - Try to use [`f-strings`](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings) if you can. You can use [flynt](https://github.com/ikamensh/flynt) to make sure your contributed code uses f-strings. `flynt {source_file_or_directory}` + Try to use [`f-strings`](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings) if you can. You can use [flynt](https://github.com/ikamensh/flynt) to make sure your contributed code uses f-strings. + ```python + flynt {source_file_or_directory} + ``` - If your pull request addresses an issue, please use the pull request title to describe the issue and mention the issue number in the pull request description. This will make sure a link back to the original issue is From 414dd93525c7d92a53548c53f3323a999e39e007 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Jun 2025 08:02:59 +0000 Subject: [PATCH 4/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openml/extensions/sklearn/extension.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index b837c1a18..ec402e038 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -482,9 +482,7 @@ def _deserialize_sklearn( # noqa: PLR0915, C901, PLR0912 ) else: raise TypeError(o) - logger.info( - f"-{'-' * recursion_depth} flow_to_sklearn END o={o}, rval={rval}" - ) + logger.info(f"-{'-' * recursion_depth} flow_to_sklearn END o={o}, rval={rval}") return rval def model_to_flow(self, model: Any) -> OpenMLFlow: @@ -1168,9 +1166,7 @@ def _deserialize_model( for name in parameters: value = parameters.get(name) - logger.info( - f"--{'-' * recursion_depth} flow_parameter={name}, value={value}" - ) + logger.info(f"--{'-' * recursion_depth} flow_parameter={name}, value={value}") rval = self._deserialize_sklearn( value, components=components_, @@ -1186,9 +1182,7 @@ def _deserialize_model( if name not in components_: continue value = components[name] - logger.info( - f"--{'-' * recursion_depth} flow_component={name}, value={value}" - ) + logger.info(f"--{'-' * recursion_depth} flow_component={name}, value={value}") rval = self._deserialize_sklearn( value, recursion_depth=recursion_depth + 1, From d66cd4633b7bb1a71978b9957c8fb7af32267604 Mon Sep 17 00:00:00 2001 From: Subhaditya Mukherjee <26865436+SubhadityaMukherjee@users.noreply.github.com> Date: Tue, 17 Jun 2025 11:29:17 +0200 Subject: [PATCH 5/9] Update CONTRIBUTING.md Co-authored-by: Pieter Gijsbers --- CONTRIBUTING.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 698928af0..f65e46ab7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -135,13 +135,9 @@ following rules before you submit a pull request: - When creating a multi-line expression with binary operators, break before the operator. - Add type hints to all function signatures. (note: not all functions have type hints yet, this is work in progress.) - - Use the [`str.format`](https://docs.python.org/3/library/stdtypes.html#str.format) over [`printf`](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting) style formatting. - E.g. use `"{} {}".format('hello', 'world')` not `"%s %s" % ('hello', 'world')`. - (note: old code may still use `printf`-formatting, this is work in progress.) - Try to use [`f-strings`](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings) if you can. You can use [flynt](https://github.com/ikamensh/flynt) to make sure your contributed code uses f-strings. + - Use [`f-strings`](https://docs.python.org/3/library/stdtypes.html#formatted-string-literals-f-strings) for text interpolation. You can use [flynt](https://github.com/ikamensh/flynt) to make sure your contributed code uses f-strings. ```python flynt {source_file_or_directory} - ``` - If your pull request addresses an issue, please use the pull request title to describe the issue and mention the issue number in the pull request description. This will make sure a link back to the original issue is From efb143fd9028d6382b84fb7390d171eeed52b794 Mon Sep 17 00:00:00 2001 From: Subhaditya Mukherjee <26865436+SubhadityaMukherjee@users.noreply.github.com> Date: Tue, 17 Jun 2025 17:38:39 +0200 Subject: [PATCH 6/9] Update openml/datasets/functions.py Co-authored-by: Pieter Gijsbers --- openml/datasets/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index e1428dfce..ac5466a44 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -191,7 +191,7 @@ def _list_datasets( if value is not None: api_call += f"/{operator}/{value}" if data_id is not None: - api_call += f"/data_id/{','.join([str(int(i)}" for i in data_id])) + api_call += f"/data_id/{','.join([str(int(i)) for i in data_id])}" return __list_datasets(api_call=api_call) From ba780273a5e54b0befdcc998aaf93218516b7786 Mon Sep 17 00:00:00 2001 From: Pieter Gijsbers Date: Tue, 17 Jun 2025 17:39:19 +0200 Subject: [PATCH 7/9] Update openml/setups/functions.py --- openml/setups/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/setups/functions.py b/openml/setups/functions.py index 6aeec7f98..374911901 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -207,7 +207,7 @@ def _list_setups( if offset is not None: api_call += f"/offset/{offset}" if setup is not None: - api_call += f"/setup/{','.join([str(int(i)}" for i in setup])) + api_call += f"/setup/{','.join([str(int(i)) for i in setup])}" if flow is not None: api_call += f"/flow/{flow}" if tag is not None: From a98943f2decb807e282e56085646ec5dcc2cf61f Mon Sep 17 00:00:00 2001 From: SubhadityaMukherjee Date: Tue, 17 Jun 2025 17:41:24 +0200 Subject: [PATCH 8/9] f string issue in test --- .../test_sklearn_extension/test_sklearn_extension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py index 6f8cf37c2..891ae7da3 100644 --- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py +++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py @@ -904,7 +904,7 @@ def test_serialize_complex_flow(self): boosting_name, ) fixture_name = ( - f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name)" + f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name}" ) fixture_structure = { ohe_name: ["estimator", "ohe"], From 2cb58a0857053257b452074fc14bcfec6f91dbef Mon Sep 17 00:00:00 2001 From: Pieter Gijsbers Date: Wed, 18 Jun 2025 14:52:36 +0200 Subject: [PATCH 9/9] Update tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py --- .../test_sklearn_extension/test_sklearn_extension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py index 891ae7da3..9913436e4 100644 --- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py +++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py @@ -904,7 +904,7 @@ def test_serialize_complex_flow(self): boosting_name, ) fixture_name = ( - f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name}" + f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name})" ) fixture_structure = { ohe_name: ["estimator", "ohe"],