From dc92df367d72fd9ea9ff1a440b5756284cccaa70 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 7 Jan 2025 04:29:29 +0100
Subject: [PATCH 1/3] Add OptunaHub example

---
 .../30_extended/benchmark_with_optunahub.py   | 84 +++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 examples/30_extended/benchmark_with_optunahub.py

diff --git a/examples/30_extended/benchmark_with_optunahub.py b/examples/30_extended/benchmark_with_optunahub.py
new file mode 100644
index 000000000..002ef3169
--- /dev/null
+++ b/examples/30_extended/benchmark_with_optunahub.py
@@ -0,0 +1,84 @@
+"""
+====================================================
+Hyperparameter Optimization Benchmark with OptunaHub
+====================================================
+
+In this tutorial, we walk through how to conduct hyperparameter optimization experiments using OpenML and OptunaHub.
+"""
+############################################################################
+# We first import all the necessary modules.
+
+# License: BSD 3-Clause
+
+import openml
+from openml.extensions.sklearn import cat
+from openml.extensions.sklearn import cont
+import optuna
+import optunahub
+from sklearn.compose import ColumnTransformer
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder
+
+############################################################################
+# Prepare for preprocessors and an OpenML task
+# ============================================
+
+# https://www.openml.org/search?type=study&study_type=task&id=218
+task_id = 10101
+seed = 42
+categorical_preproc = ("categorical", OneHotEncoder(sparse_output=False, handle_unknown="ignore"), cat)
+numerical_preproc = ("numerical", SimpleImputer(strategy="median"), cont)
+preproc = ColumnTransformer([categorical_preproc, numerical_preproc])
+
+############################################################################
+# Define a pipeline for the hyperparameter optimization
+# =====================================================
+
+# Since we use `OptunaHub <https://hub.optuna.org/>`__ for the benchmarking of hyperparameter optimization,
+# we follow the `Optuna <https://github.com/optuna/optuna/>`__ search space design.
+# We can simply pass the parametrized classifier to `run_model_on_task` to obtain the performance of the pipeline
+# on the specified OpenML task.
+
+def objective(trial: optuna.Trial) -> Pipeline:
+    clf = RandomForestClassifier(
+        max_depth=trial.suggest_int("max_depth", 2, 32, log=True),
+        min_samples_leaf=trial.suggest_float("min_samples_leaf", 0.0, 1.0),
+        random_state=seed,
+    )
+    pipe = Pipeline(steps=[("preproc", preproc), ("model", clf)])
+    run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False)
+    accuracy = max(run.fold_evaluations["predictive_accuracy"][0].values())    
+    return accuracy
+
+############################################################################
+# Load a sampler from OptunaHub
+# =============================
+
+# OptunaHub is a feature-sharing plotform for hyperparameter optimization methods.
+# For example, we load a state-of-the-art algorithm (`HEBO <https://github.com/huawei-noah/HEBO/tree/master/HEBO>`__
+# , the winning solution of `NeurIPS 2020 Black-Box Optimisation Challenge <https://bbochallenge.com/leaderboard/>`__)
+# from OptunaHub here.
+
+sampler = optunahub.load_module("samplers/hebo").HEBOSampler(seed=seed)
+
+############################################################################
+# Optimize the pipeline
+# =====================
+
+# We now run the optimization. For more details about Optuna API,
+# please visit `the API reference <https://optuna.readthedocs.io/en/stable/reference/index.html>`__.
+
+study = optuna.create_study(direction="maximize", sampler=sampler)
+study.optimize(objective, n_trials=15)
+
+############################################################################
+# Visualize the optimization history
+# ==================================
+
+# It is very simple to visualize the result by the Optuna visualization module.
+# For more details, please check `the API reference <https://optuna.readthedocs.io/en/stable/reference/visualization/index.html>`__.
+
+fig = optuna.visualization.plot_optimization_history(study)
+fig.show()

From c322a8fd14e0e813c1649855b535afb244da9997 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 7 Jan 2025 04:35:29 +0100
Subject: [PATCH 2/3] Add dependencies

---
 examples/30_extended/benchmark_with_optunahub.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/30_extended/benchmark_with_optunahub.py b/examples/30_extended/benchmark_with_optunahub.py
index 002ef3169..6861b8e07 100644
--- a/examples/30_extended/benchmark_with_optunahub.py
+++ b/examples/30_extended/benchmark_with_optunahub.py
@@ -6,7 +6,9 @@
 In this tutorial, we walk through how to conduct hyperparameter optimization experiments using OpenML and OptunaHub.
 """
 ############################################################################
-# We first import all the necessary modules.
+# Please make sure to install the dependencies with:
+# ``pip install openml optunahub hebo`` and ``pip install --upgrade pymoo``
+# Then we import all the necessary modules.
 
 # License: BSD 3-Clause
 

From 9485f5051b1042751f2b05fd4ecd25a7300dc99a Mon Sep 17 00:00:00 2001
From: SubhadityaMukherjee <msubhaditya@gmail.com>
Date: Wed, 19 Mar 2025 13:19:18 +0100
Subject: [PATCH 3/3] added publishing to openml

---
 .../30_extended/benchmark_with_optunahub.py   | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/examples/30_extended/benchmark_with_optunahub.py b/examples/30_extended/benchmark_with_optunahub.py
index 6861b8e07..0fd4a63e5 100644
--- a/examples/30_extended/benchmark_with_optunahub.py
+++ b/examples/30_extended/benchmark_with_optunahub.py
@@ -23,6 +23,8 @@
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import OneHotEncoder
 
+# Set your openml api key if you want to publish the run
+openml.config.apikey = ""
 ############################################################################
 # Prepare for preprocessors and an OpenML task
 # ============================================
@@ -30,7 +32,11 @@
 # https://www.openml.org/search?type=study&study_type=task&id=218
 task_id = 10101
 seed = 42
-categorical_preproc = ("categorical", OneHotEncoder(sparse_output=False, handle_unknown="ignore"), cat)
+categorical_preproc = (
+    "categorical",
+    OneHotEncoder(sparse_output=False, handle_unknown="ignore"),
+    cat,
+)
 numerical_preproc = ("numerical", SimpleImputer(strategy="median"), cont)
 preproc = ColumnTransformer([categorical_preproc, numerical_preproc])
 
@@ -43,6 +49,7 @@
 # We can simply pass the parametrized classifier to `run_model_on_task` to obtain the performance of the pipeline
 # on the specified OpenML task.
 
+
 def objective(trial: optuna.Trial) -> Pipeline:
     clf = RandomForestClassifier(
         max_depth=trial.suggest_int("max_depth", 2, 32, log=True),
@@ -51,9 +58,19 @@ def objective(trial: optuna.Trial) -> Pipeline:
     )
     pipe = Pipeline(steps=[("preproc", preproc), ("model", clf)])
     run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False)
-    accuracy = max(run.fold_evaluations["predictive_accuracy"][0].values())    
+    if openml.config.apikey != "":
+        try:
+            run.publish()
+        except Exception as e:
+            print(f"Could not publish run - {e}")
+    else:
+        print(
+            "If you want to publish your results to OpenML, please set an apikey using `openml.config.apikey = ''`"
+        )
+    accuracy = max(run.fold_evaluations["predictive_accuracy"][0].values())
     return accuracy
 
+
 ############################################################################
 # Load a sampler from OptunaHub
 # =============================