Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def _resolve_default_cache_dir() -> Path:
"apikey": "",
"server": "https://www.openml.org/api/v1/xml",
"cachedir": _resolve_default_cache_dir(),
"avoid_duplicate_runs": True,
"avoid_duplicate_runs": False,
"retry_policy": "human",
"connection_n_retries": 5,
"show_progress": False,
Expand Down
15 changes: 11 additions & 4 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
def run_model_on_task( # noqa: PLR0913
model: Any,
task: int | str | OpenMLTask,
avoid_duplicate_runs: bool = True, # noqa: FBT001, FBT002
avoid_duplicate_runs: bool | None = None,
flow_tags: list[str] | None = None,
seed: int | None = None,
add_local_measures: bool = True, # noqa: FBT001, FBT002
Expand All @@ -77,9 +77,10 @@ def run_model_on_task( # noqa: PLR0913
task : OpenMLTask or int or str
Task to perform or Task id.
This may be a model instead if the first argument is an OpenMLTask.
avoid_duplicate_runs : bool, optional (default=True)
avoid_duplicate_runs : bool, optional (default=None)
If True, the run will throw an error if the setup/task combination is already present on
the server. This feature requires an internet connection.
If not set, it will use the default from your openml configuration (False if unset).
flow_tags : List[str], optional (default=None)
A list of tags that the flow should have at creation.
seed: int, optional (default=None)
Expand All @@ -104,6 +105,8 @@ def run_model_on_task( # noqa: PLR0913
flow : OpenMLFlow (optional, only if `return_flow` is True).
Flow generated from the model.
"""
if avoid_duplicate_runs is None:
avoid_duplicate_runs = openml.config.avoid_duplicate_runs
if avoid_duplicate_runs and not config.apikey:
warnings.warn(
"avoid_duplicate_runs is set to True, but no API key is set. "
Expand Down Expand Up @@ -175,7 +178,7 @@ def get_task_and_type_conversion(_task: int | str | OpenMLTask) -> OpenMLTask:
def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913
flow: OpenMLFlow,
task: OpenMLTask,
avoid_duplicate_runs: bool = True, # noqa: FBT002, FBT001
avoid_duplicate_runs: bool | None = None,
flow_tags: list[str] | None = None,
seed: int | None = None,
add_local_measures: bool = True, # noqa: FBT001, FBT002
Expand All @@ -195,9 +198,10 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913
all supervised estimators of scikit learn follow this definition of a model.
task : OpenMLTask
Task to perform. This may be an OpenMLFlow instead if the first argument is an OpenMLTask.
avoid_duplicate_runs : bool, optional (default=True)
avoid_duplicate_runs : bool, optional (default=None)
If True, the run will throw an error if the setup/task combination is already present on
the server. This feature requires an internet connection.
If not set, it will use the default from your openml configuration (False if unset).
flow_tags : List[str], optional (default=None)
A list of tags that the flow should have at creation.
seed: int, optional (default=None)
Expand All @@ -221,6 +225,9 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913
if flow_tags is not None and not isinstance(flow_tags, list):
raise ValueError("flow_tags should be a list")

if avoid_duplicate_runs is None:
avoid_duplicate_runs = openml.config.avoid_duplicate_runs

# TODO: At some point in the future do not allow for arguments in old order (changed 6-2018).
# Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
if isinstance(flow, OpenMLTask) and isinstance(task, OpenMLFlow):
Expand Down
1 change: 0 additions & 1 deletion openml/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None:
self.cached = True
openml.config.apikey = TestBase.apikey
self.production_server = "https://www.openml.org/api/v1/xml"
openml.config.avoid_duplicate_runs = False
openml.config.set_root_cache_directory(str(self.workdir))

# Increase the number of retries to avoid spurious server failures
Expand Down
9 changes: 5 additions & 4 deletions tests/test_openml/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,13 +175,14 @@ def test_configuration_file_not_overwritten_on_load():

def test_configuration_loads_booleans(tmp_path):
config_file_content = "avoid_duplicate_runs=true\nshow_progress=false"
with (tmp_path / "config").open("w") as config_file:
tmp_file = tmp_path / "config"
with tmp_file.open("w") as config_file:
config_file.write(config_file_content)
read_config = openml.config._parse_config(tmp_path)
read_config = openml.config._parse_config(tmp_file)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was passed a directory... which lead to a silent error and an empty config, which went unnoticed because the values below happened to coincide with the old defaults :D


# Explicit test to avoid truthy/falsy modes of other types
assert True == read_config["avoid_duplicate_runs"]
assert False == read_config["show_progress"]
assert read_config["avoid_duplicate_runs"] is True
assert read_config["show_progress"] is False


def test_openml_cache_dir_env_var(tmp_path: Path) -> None:
Expand Down
4 changes: 0 additions & 4 deletions tests/test_runs/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def test_to_from_filesystem_vanilla(self):
model=model,
task=task,
add_local_measures=False,
avoid_duplicate_runs=False,
upload_flow=True,
)

Expand Down Expand Up @@ -174,7 +173,6 @@ def test_to_from_filesystem_search(self):
model=model,
task=task,
add_local_measures=False,
avoid_duplicate_runs=False,
)

cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
Expand Down Expand Up @@ -311,7 +309,6 @@ def test_publish_with_local_loaded_flow(self):
flow=flow,
task=task,
add_local_measures=False,
avoid_duplicate_runs=False,
upload_flow=False,
)

Expand Down Expand Up @@ -351,7 +348,6 @@ def test_offline_and_online_run_identical(self):
flow=flow,
task=task,
add_local_measures=False,
avoid_duplicate_runs=False,
upload_flow=False,
)

Expand Down
14 changes: 1 addition & 13 deletions tests/test_runs/test_run_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,12 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create
run_prime = openml.runs.run_model_on_task(
model=model_prime,
task=task,
avoid_duplicate_runs=False,
seed=seed,
)
else:
run_prime = openml.runs.run_model_on_task(
model=model_prime,
task=run.task_id,
avoid_duplicate_runs=False,
seed=seed,
)

Expand Down Expand Up @@ -278,7 +276,6 @@ def _remove_random_state(flow):
flow=flow,
task=task,
seed=seed,
avoid_duplicate_runs=openml.config.avoid_duplicate_runs,
)
run_ = run.publish()
TestBase._mark_entity_for_removal("run", run.run_id)
Expand Down Expand Up @@ -414,7 +411,6 @@ def test_run_regression_on_classif_task(self):
openml.runs.run_model_on_task(
model=clf,
task=task,
avoid_duplicate_runs=False,
)

@pytest.mark.sklearn()
Expand Down Expand Up @@ -969,7 +965,6 @@ def test_initialize_cv_from_run(self):
run = openml.runs.run_model_on_task(
model=randomsearch,
task=task,
avoid_duplicate_runs=False,
seed=1,
)
run_ = run.publish()
Expand Down Expand Up @@ -1026,7 +1021,6 @@ def test_local_run_swapped_parameter_order_model(self):
run = openml.runs.run_model_on_task(
task,
clf,
avoid_duplicate_runs=False,
upload_flow=False,
)

Expand Down Expand Up @@ -1055,7 +1049,6 @@ def test_local_run_swapped_parameter_order_flow(self):
run = openml.runs.run_flow_on_task(
task,
flow,
avoid_duplicate_runs=False,
upload_flow=False,
)

Expand Down Expand Up @@ -1083,7 +1076,6 @@ def test_local_run_metric_score(self):
run = openml.runs.run_model_on_task(
model=clf,
task=task,
avoid_duplicate_runs=False,
upload_flow=False,
)

Expand Down Expand Up @@ -1142,7 +1134,6 @@ def test_initialize_model_from_run(self):
run = openml.runs.run_model_on_task(
model=clf,
task=task,
avoid_duplicate_runs=False,
)
run_ = run.publish()
TestBase._mark_entity_for_removal("run", run_.run_id)
Expand Down Expand Up @@ -1251,7 +1242,6 @@ def test_run_with_illegal_flow_id_after_load(self):
run = openml.runs.run_flow_on_task(
task=task,
flow=flow,
avoid_duplicate_runs=False,
upload_flow=False,
)

Expand Down Expand Up @@ -1316,7 +1306,6 @@ def test_run_with_illegal_flow_id_1_after_load(self):
run = openml.runs.run_flow_on_task(
task=task,
flow=flow_new,
avoid_duplicate_runs=False,
upload_flow=False,
)

Expand Down Expand Up @@ -1664,7 +1653,6 @@ def test_run_flow_on_task_downloaded_flow(self):
run = openml.runs.run_flow_on_task(
flow=downloaded_flow,
task=task,
avoid_duplicate_runs=False,
upload_flow=False,
)

Expand Down Expand Up @@ -1913,7 +1901,7 @@ def test_delete_run(self):
task = openml.tasks.get_task(32) # diabetes; crossvalidation

run = openml.runs.run_model_on_task(
model=clf, task=task, seed=rs, avoid_duplicate_runs=False
model=clf, task=task, seed=rs,
)
run.publish()

Expand Down