|
32 | 32 |
|
33 | 33 | import sys |
34 | 34 |
|
35 | | -if sys.platform == "win32": # noqa |
| 35 | +if sys.platform == "win32": |
36 | 36 | print( |
37 | 37 | "The pyrfr library (requirement of fanova) can currently not be installed on Windows systems" |
38 | 38 | ) |
39 | | - exit() |
| 39 | + sys.exit() |
40 | 40 |
|
41 | 41 | # DEPRECATED EXAMPLE -- Avoid running this code in our CI/CD pipeline |
42 | 42 | print("This example is deprecated, remove the `if False` in this code to use it manually.") |
43 | 43 | if False: |
44 | 44 | import json |
| 45 | + |
45 | 46 | import fanova |
46 | 47 | import matplotlib.pyplot as plt |
47 | 48 | import pandas as pd |
48 | 49 | import seaborn as sns |
49 | 50 |
|
50 | 51 | import openml |
51 | 52 |
|
52 | | - |
53 | 53 | ############################################################################## |
54 | 54 | # With the advent of automated machine learning, automated hyperparameter |
55 | 55 | # optimization methods are by now routinely used in data mining. However, this |
|
80 | 80 | # important when it is put on a log-scale. All these simplifications can be |
81 | 81 | # addressed by defining a ConfigSpace. For a more elaborated example that uses |
82 | 82 | # this, please see: |
83 | | - # https://github.com/janvanrijn/openml-pimp/blob/d0a14f3eb480f2a90008889f00041bdccc7b9265/examples/plot/plot_fanova_aggregates.py # noqa F401 |
| 83 | + # https://github.com/janvanrijn/openml-pimp/blob/d0a14f3eb480f2a90008889f00041bdccc7b9265/examples/plot/plot_fanova_aggregates.py |
84 | 84 |
|
85 | 85 | suite = openml.study.get_suite("OpenML100") |
86 | 86 | flow_id = 7707 |
|
97 | 97 | if limit_nr_tasks is not None and idx >= limit_nr_tasks: |
98 | 98 | continue |
99 | 99 | print( |
100 | | - "Starting with task %d (%d/%d)" |
101 | | - % (task_id, idx + 1, len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks) |
| 100 | + f"Starting with task {task_id} ({idx + 1}/{len(suite.tasks) if limit_nr_tasks is None else limit_nr_tasks})" |
102 | 101 | ) |
103 | 102 | # note that we explicitly only include tasks from the benchmark suite that was specified (as per the for-loop) |
104 | 103 | evals = openml.evaluations.list_evaluations_setups( |
|
121 | 120 | [ |
122 | 121 | dict( |
123 | 122 | **{name: json.loads(value) for name, value in setup["parameters"].items()}, |
124 | | - **{performance_column: setup[performance_column]} |
| 123 | + **{performance_column: setup[performance_column]}, |
125 | 124 | ) |
126 | 125 | for _, setup in evals.iterrows() |
127 | 126 | ] |
128 | 127 | ) |
129 | 128 | except json.decoder.JSONDecodeError as e: |
130 | | - print("Task %d error: %s" % (task_id, e)) |
| 129 | + print(f"Task {task_id} error: {e}") |
131 | 130 | continue |
132 | 131 | # apply our filters, to have only the setups that comply to the hyperparameters we want |
133 | 132 | for filter_key, filter_value in parameter_filters.items(): |
|
156 | 155 | Y=setups_evals[performance_column].to_numpy(), |
157 | 156 | n_trees=n_trees, |
158 | 157 | ) |
159 | | - for idx, pname in enumerate(parameter_names): |
| 158 | + for idx, pname in enumerate(parameter_names): # noqa: PLW2901 |
160 | 159 | try: |
161 | 160 | fanova_results.append( |
162 | 161 | { |
163 | 162 | "hyperparameter": pname.split(".")[-1], |
164 | | - "fanova": evaluator.quantify_importance([idx])[(idx,)]["individual importance"], |
| 163 | + "fanova": evaluator.quantify_importance([idx])[(idx,)][ |
| 164 | + "individual importance" |
| 165 | + ], |
165 | 166 | } |
166 | 167 | ) |
167 | 168 | except RuntimeError as e: |
168 | 169 | # functional ANOVA sometimes crashes with a RuntimeError, e.g., on tasks where the performance is constant |
169 | 170 | # for all configurations (there is no variance). We will skip these tasks (like the authors did in the |
170 | 171 | # paper). |
171 | | - print("Task %d error: %s" % (task_id, e)) |
| 172 | + print(f"Task {task_id} error: {e}") |
172 | 173 | continue |
173 | 174 |
|
174 | 175 | # transform ``fanova_results`` from a list of dicts into a DataFrame |
|
0 commit comments