From dc92df367d72fd9ea9ff1a440b5756284cccaa70 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Tue, 7 Jan 2025 04:29:29 +0100 Subject: [PATCH 1/3] Add OptunaHub example --- .../30_extended/benchmark_with_optunahub.py | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 examples/30_extended/benchmark_with_optunahub.py diff --git a/examples/30_extended/benchmark_with_optunahub.py b/examples/30_extended/benchmark_with_optunahub.py new file mode 100644 index 000000000..002ef3169 --- /dev/null +++ b/examples/30_extended/benchmark_with_optunahub.py @@ -0,0 +1,84 @@ +""" +==================================================== +Hyperparameter Optimization Benchmark with OptunaHub +==================================================== + +In this tutorial, we walk through how to conduct hyperparameter optimization experiments using OpenML and OptunaHub. +""" +############################################################################ +# We first import all the necessary modules. + +# License: BSD 3-Clause + +import openml +from openml.extensions.sklearn import cat +from openml.extensions.sklearn import cont +import optuna +import optunahub +from sklearn.compose import ColumnTransformer +from sklearn.ensemble import RandomForestClassifier +from sklearn.impute import SimpleImputer +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import OneHotEncoder + +############################################################################ +# Prepare for preprocessors and an OpenML task +# ============================================ + +# https://www.openml.org/search?type=study&study_type=task&id=218 +task_id = 10101 +seed = 42 +categorical_preproc = ("categorical", OneHotEncoder(sparse_output=False, handle_unknown="ignore"), cat) +numerical_preproc = ("numerical", SimpleImputer(strategy="median"), cont) +preproc = ColumnTransformer([categorical_preproc, numerical_preproc]) + +############################################################################ +# Define a pipeline for the hyperparameter optimization +# ===================================================== + +# Since we use `OptunaHub `__ for the benchmarking of hyperparameter optimization, +# we follow the `Optuna `__ search space design. +# We can simply pass the parametrized classifier to `run_model_on_task` to obtain the performance of the pipeline +# on the specified OpenML task. + +def objective(trial: optuna.Trial) -> Pipeline: + clf = RandomForestClassifier( + max_depth=trial.suggest_int("max_depth", 2, 32, log=True), + min_samples_leaf=trial.suggest_float("min_samples_leaf", 0.0, 1.0), + random_state=seed, + ) + pipe = Pipeline(steps=[("preproc", preproc), ("model", clf)]) + run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False) + accuracy = max(run.fold_evaluations["predictive_accuracy"][0].values()) + return accuracy + +############################################################################ +# Load a sampler from OptunaHub +# ============================= + +# OptunaHub is a feature-sharing plotform for hyperparameter optimization methods. +# For example, we load a state-of-the-art algorithm (`HEBO `__ +# , the winning solution of `NeurIPS 2020 Black-Box Optimisation Challenge `__) +# from OptunaHub here. + +sampler = optunahub.load_module("samplers/hebo").HEBOSampler(seed=seed) + +############################################################################ +# Optimize the pipeline +# ===================== + +# We now run the optimization. For more details about Optuna API, +# please visit `the API reference `__. + +study = optuna.create_study(direction="maximize", sampler=sampler) +study.optimize(objective, n_trials=15) + +############################################################################ +# Visualize the optimization history +# ================================== + +# It is very simple to visualize the result by the Optuna visualization module. +# For more details, please check `the API reference `__. + +fig = optuna.visualization.plot_optimization_history(study) +fig.show() From c322a8fd14e0e813c1649855b535afb244da9997 Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Tue, 7 Jan 2025 04:35:29 +0100 Subject: [PATCH 2/3] Add dependencies --- examples/30_extended/benchmark_with_optunahub.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/30_extended/benchmark_with_optunahub.py b/examples/30_extended/benchmark_with_optunahub.py index 002ef3169..6861b8e07 100644 --- a/examples/30_extended/benchmark_with_optunahub.py +++ b/examples/30_extended/benchmark_with_optunahub.py @@ -6,7 +6,9 @@ In this tutorial, we walk through how to conduct hyperparameter optimization experiments using OpenML and OptunaHub. """ ############################################################################ -# We first import all the necessary modules. +# Please make sure to install the dependencies with: +# ``pip install openml optunahub hebo`` and ``pip install --upgrade pymoo`` +# Then we import all the necessary modules. # License: BSD 3-Clause From 9485f5051b1042751f2b05fd4ecd25a7300dc99a Mon Sep 17 00:00:00 2001 From: SubhadityaMukherjee Date: Wed, 19 Mar 2025 13:19:18 +0100 Subject: [PATCH 3/3] added publishing to openml --- .../30_extended/benchmark_with_optunahub.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/examples/30_extended/benchmark_with_optunahub.py b/examples/30_extended/benchmark_with_optunahub.py index 6861b8e07..0fd4a63e5 100644 --- a/examples/30_extended/benchmark_with_optunahub.py +++ b/examples/30_extended/benchmark_with_optunahub.py @@ -23,6 +23,8 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import OneHotEncoder +# Set your openml api key if you want to publish the run +openml.config.apikey = "" ############################################################################ # Prepare for preprocessors and an OpenML task # ============================================ @@ -30,7 +32,11 @@ # https://www.openml.org/search?type=study&study_type=task&id=218 task_id = 10101 seed = 42 -categorical_preproc = ("categorical", OneHotEncoder(sparse_output=False, handle_unknown="ignore"), cat) +categorical_preproc = ( + "categorical", + OneHotEncoder(sparse_output=False, handle_unknown="ignore"), + cat, +) numerical_preproc = ("numerical", SimpleImputer(strategy="median"), cont) preproc = ColumnTransformer([categorical_preproc, numerical_preproc]) @@ -43,6 +49,7 @@ # We can simply pass the parametrized classifier to `run_model_on_task` to obtain the performance of the pipeline # on the specified OpenML task. + def objective(trial: optuna.Trial) -> Pipeline: clf = RandomForestClassifier( max_depth=trial.suggest_int("max_depth", 2, 32, log=True), @@ -51,9 +58,19 @@ def objective(trial: optuna.Trial) -> Pipeline: ) pipe = Pipeline(steps=[("preproc", preproc), ("model", clf)]) run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False) - accuracy = max(run.fold_evaluations["predictive_accuracy"][0].values()) + if openml.config.apikey != "": + try: + run.publish() + except Exception as e: + print(f"Could not publish run - {e}") + else: + print( + "If you want to publish your results to OpenML, please set an apikey using `openml.config.apikey = ''`" + ) + accuracy = max(run.fold_evaluations["predictive_accuracy"][0].values()) return accuracy + ############################################################################ # Load a sampler from OptunaHub # =============================