add init randomsearch

thierrymoudiki · thierrymoudiki · commit 7209049a7547 · 2025-06-15T01:44:23.000Z
diff --git a/GPopt/GPOpt.py b/GPopt/GPOpt.py
@@ -20,6 +20,8 @@
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.gaussian_process.kernels import Matern
+from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
+
 import scipy.stats as st
 from joblib import Parallel, delayed
 from time import time
@@ -302,12 +304,25 @@ def close_shelve(self):
 
     # fit predict
     def surrogate_fit_predict(
-        self, X_train, y_train, X_test, return_std=False, return_pi=False
+        self, X_train, y_train, X_test, return_std=False, return_pi=False,
+        param_search_init_design=False, param_distributions=None, **kwargs
     ):
 
         if len(X_train.shape) == 1:
             X_train = X_train.reshape((-1, 1))
             X_test = X_test.reshape((-1, 1))
+        
+        if X_train.shape[0] <= self.n_init and param_search_init_design == True: # on initial design
+            try: 
+                rs_obj = RandomizedSearchCV(self.surrogate_obj, 
+                                            param_distributions=param_distributions, 
+                                            random_state=42,
+                                            cv=3,
+                                            **kwargs)
+                rs_obj.fit(X_train, y_train)
+                self.surrogate_obj = rs_obj.best_estimator_
+            except Exception as e: 
+                print(str(e))
 
         # Get mean and standard deviation (+ lower and upper for not GPs)
         assert (
@@ -458,6 +473,8 @@ def optimize(
         ucb_tol=None,
         min_budget=50,  # minimum budget for early stopping
         func_args=None,        
+        param_search_init_design=False, 
+        param_distributions=None
     ):
         """Launch optimization loop.
 
@@ -482,7 +499,17 @@ def optimize(
                 minimum number of iterations before early stopping controlled by `abs_tol`
 
             func_args: a list;
-                additional parameters for the objective function (if necessary)            
+                additional parameters for the objective function (if necessary)  
+
+            param_search_init_design: a boolean;
+                whether random search tuning must occur on the initial design or not
+            
+            param_distributions: dict or list of dicts;
+                Dictionary with parameters names (str) as keys and distributions or lists of 
+                parameters to try. Distributions must provide a rvs method for sampling 
+                (such as those from scipy.stats.distributions). If a list is given, it 
+                is sampled uniformly. If a list of dicts is given, first a dict is sampled 
+                uniformly, and then a parameter is sampled using that dict as above.                        
 
         see also [Bayesian Optimization with GPopt](https://thierrymoudiki.github.io/blog/2021/04/16/python/misc/gpopt)
         and [Hyperparameters tuning with GPopt](https://thierrymoudiki.github.io/blog/2021/06/11/python/misc/hyperparam-tuning-gpopt)
@@ -601,64 +628,138 @@ def optimize(
 
             # current gp mean and std on initial design
             # /!\ if GP
-            if self.method == "bayesian":                
-                self.posterior_ = "gaussian"
-                try:
-                    y_mean, y_std = self.surrogate_fit_predict(
+            if param_search_init_design == False: 
+
+                if self.method == "bayesian":                
+                    self.posterior_ = "gaussian"
+                    try:
+                        y_mean, y_std = self.surrogate_fit_predict(
+                            np.asarray(self.parameters),
+                            np.asarray(self.scores),
+                            self.x_choices,
+                            return_std=True,
+                            return_pi=False,
+                        )
+                    except ValueError:  # do not remove this
+                        preds_with_std = self.surrogate_fit_predict(
+                            np.asarray(self.parameters),
+                            np.asarray(self.scores),
+                            self.x_choices,
+                            return_std=True,
+                            return_pi=False,
+                        )
+                        y_mean, y_std = preds_with_std[0], preds_with_std[1]
+                    self.y_mean = y_mean
+                    self.y_std = np.maximum(2.220446049250313e-16, y_std)                
+
+                elif self.method == "mc":
+
+                    self.posterior_ = "mc"
+                    assert self.surrogate_obj.__class__.__name__.startswith(
+                        "CustomRegressor"
+                    ) or self.surrogate_obj.__class__.__name__.startswith(
+                        "PredictionInterval"
+                    ), "for `method = 'mc'`, the surrogate must be a nnetsauce.CustomRegressor() or nnetsauce.PredictionInterval()"
+                    assert (
+                        self.surrogate_obj.replications is not None
+                    ), "for `method = 'mc'`, the surrogate must be a nnetsauce.CustomRegressor() with a number of 'replications' provided"
+                    preds_with_std = self.surrogate_fit_predict(
                         np.asarray(self.parameters),
                         np.asarray(self.scores),
                         self.x_choices,
-                        return_std=True,
-                        return_pi=False,
+                        return_std=False,
+                        return_pi=True,
                     )
-                except ValueError:  # do not remove this
+                    y_mean, y_std = preds_with_std[0], preds_with_std[1]
+                    self.y_mean = y_mean
+                    self.y_std = np.maximum(2.220446049250313e-16, y_std)
+                
+                elif self.method == "splitconformal":
+                    self.posterior_ = None
+                    #assert self.surrogate_obj.__class__.__name__.startswith(
+                    #    "PredictionInterval"
+                    #), "for `method = 'splitconformal'`, the surrogate must be a nnetsauce.PredictionInterval()"
+                    preds_with_pi = self.surrogate_fit_predict(
+                        np.asarray(self.parameters),
+                        np.asarray(self.scores),
+                        self.x_choices,
+                        return_std=False,
+                        return_pi=True,
+                    )
+                    y_lower = preds_with_pi[1]  
+                    self.lower = y_lower  
+            
+            else:
+
+                assert param_distributions is not None,\
+                      "When 'param_search_init_design == False', 'param_distributions' must be provided"
+
+                if self.method == "bayesian":                
+                    self.posterior_ = "gaussian"
+                    try:
+                        y_mean, y_std = self.surrogate_fit_predict(
+                            np.asarray(self.parameters),
+                            np.asarray(self.scores),
+                            self.x_choices,
+                            return_std=True,
+                            return_pi=False,
+                            param_search_init_design=True,
+                            param_distributions=param_distributions
+                        )
+                    except ValueError:  # do not remove this
+                        preds_with_std = self.surrogate_fit_predict(
+                            np.asarray(self.parameters),
+                            np.asarray(self.scores),
+                            self.x_choices,
+                            return_std=True,
+                            return_pi=False,
+                            param_search_init_design=True,
+                            param_distributions=param_distributions
+                        )
+                        y_mean, y_std = preds_with_std[0], preds_with_std[1]
+                    self.y_mean = y_mean
+                    self.y_std = np.maximum(2.220446049250313e-16, y_std)                
+
+                elif self.method == "mc":
+
+                    self.posterior_ = "mc"
+                    assert self.surrogate_obj.__class__.__name__.startswith(
+                        "CustomRegressor"
+                    ) or self.surrogate_obj.__class__.__name__.startswith(
+                        "PredictionInterval"
+                    ), "for `method = 'mc'`, the surrogate must be a nnetsauce.CustomRegressor() or nnetsauce.PredictionInterval()"
+                    assert (
+                        self.surrogate_obj.replications is not None
+                    ), "for `method = 'mc'`, the surrogate must be a nnetsauce.CustomRegressor() with a number of 'replications' provided"
                     preds_with_std = self.surrogate_fit_predict(
                         np.asarray(self.parameters),
                         np.asarray(self.scores),
                         self.x_choices,
-                        return_std=True,
-                        return_pi=False,
+                        return_std=False,
+                        return_pi=True,
+                        param_search_init_design=True,
+                        param_distributions=param_distributions
                     )
                     y_mean, y_std = preds_with_std[0], preds_with_std[1]
-                self.y_mean = y_mean
-                self.y_std = np.maximum(2.220446049250313e-16, y_std)
+                    self.y_mean = y_mean
+                    self.y_std = np.maximum(2.220446049250313e-16, y_std)
                 
-
-            elif self.method == "mc":
-                self.posterior_ = "mc"
-                assert self.surrogate_obj.__class__.__name__.startswith(
-                    "CustomRegressor"
-                ) or self.surrogate_obj.__class__.__name__.startswith(
-                    "PredictionInterval"
-                ), "for `method = 'mc'`, the surrogate must be a nnetsauce.CustomRegressor() or nnetsauce.PredictionInterval()"
-                assert (
-                    self.surrogate_obj.replications is not None
-                ), "for `method = 'mc'`, the surrogate must be a nnetsauce.CustomRegressor() with a number of 'replications' provided"
-                preds_with_std = self.surrogate_fit_predict(
-                    np.asarray(self.parameters),
-                    np.asarray(self.scores),
-                    self.x_choices,
-                    return_std=False,
-                    return_pi=True,
-                )
-                y_mean, y_std = preds_with_std[0], preds_with_std[1]
-                self.y_mean = y_mean
-                self.y_std = np.maximum(2.220446049250313e-16, y_std)
-            
-            elif self.method == "splitconformal":
-                self.posterior_ = None
-                #assert self.surrogate_obj.__class__.__name__.startswith(
-                #    "PredictionInterval"
-                #), "for `method = 'splitconformal'`, the surrogate must be a nnetsauce.PredictionInterval()"
-                preds_with_pi = self.surrogate_fit_predict(
-                    np.asarray(self.parameters),
-                    np.asarray(self.scores),
-                    self.x_choices,
-                    return_std=False,
-                    return_pi=True,
-                )
-                y_lower = preds_with_pi[1]  
-                self.lower = y_lower  
+                elif self.method == "splitconformal":
+                    self.posterior_ = None
+                    #assert self.surrogate_obj.__class__.__name__.startswith(
+                    #    "PredictionInterval"
+                    #), "for `method = 'splitconformal'`, the surrogate must be a nnetsauce.PredictionInterval()"
+                    preds_with_pi = self.surrogate_fit_predict(
+                        np.asarray(self.parameters),
+                        np.asarray(self.scores),
+                        self.x_choices,
+                        return_std=False,
+                        return_pi=True,
+                        param_search_init_design=True,
+                        param_distributions=param_distributions
+                    )
+                    y_lower = preds_with_pi[1]  
+                    self.lower = y_lower              
 
             # saving after initial design computation
             if self.save is not None:
diff --git a/Makefile b/Makefile
@@ -75,7 +75,7 @@ dist: clean ## builds source and wheel package
 	ls -l dist
 
 install: clean ## install the package to the active Python's site-packages
-	python3 -m pip install .
+	uv pip install -e .
 
 build-site: docs ## export mkdocs website to a folder	
 	cp -rf gpopt-docs/* ../../Pro_Website/Techtonique.github.io/GPopt
diff --git a/examples/braninhartcustomconformal_initrandomsearch.py b/examples/braninhartcustomconformal_initrandomsearch.py
@@ -0,0 +1,54 @@
+import os 
+import GPopt as gp
+import nnetsauce as ns 
+import numpy as np
+from os import chdir
+from scipy.optimize import minimize
+from sklearn.linear_model import RidgeCV
+from scipy.stats import randint, uniform
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+# hart6D
+def hart6(xx):
+    alpha = np.array([1.0, 1.2, 3.0, 3.2])
+    
+    A = np.array([10, 3, 17, 3.5, 1.7, 8,
+                  0.05, 10, 17, 0.1, 8, 14,
+                  3, 3.5, 1.7, 10, 17, 8,
+                  17, 8, 0.05, 10, 0.1, 14]).reshape(4, 6)
+            
+    P = 1e-4 * np.array([1312, 1696, 5569, 124, 8283, 5886,
+                    2329, 4135, 8307, 3736, 1004, 9991,
+                    2348, 1451, 3522, 2883, 3047, 6650,
+                    4047, 8828, 8732, 5743, 1091, 381]).reshape(4, 6)
+
+    xxmat = np.tile(xx,4).reshape(4, 6)
+    
+    inner = np.sum(A*(xxmat-P)**2, axis = 1)
+    outer = np.sum(alpha * np.exp(-inner))
+
+    return(-outer)
+
+
+gp_opt2 = gp.GPOpt(lower_bound = np.repeat(0, 6), 
+                   upper_bound = np.repeat(1, 6),                 
+                   objective_func=hart6, 
+                   n_choices=25000, 
+                   n_init=20, 
+                   n_iter=280,                    
+                   seed=4327, 
+                   acquisition="ucb",
+                   method="splitconformal",
+                   surrogate_obj=ns.CustomRegressor(obj=RidgeCV())
+                   )
+
+gp_opt2.optimize(verbose=2, abs_tol=1e-4, 
+                 param_search_init_design=True,  
+                 param_distributions={'n_hidden_features': randint(3, 50), 
+                                      'dropout': uniform(0, 0.4)})
+
+print(gp_opt2.surrogate_obj)
+
+print(gp_opt2.surrogate_obj.get_params())
+
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 from codecs import open
 from os import path
 
-__version__ = "0.8.2"
+__version__ = "0.9.0"
 
 subprocess.call("pip install -r requirements.txt", shell=True)