22import pandas as pd
33from h2o .estimators import H2OGeneralizedLinearEstimator
44
5- # Removing skopt imports to prevent the ParameterGrid TypeError
6- # from skopt.space import Real, Categorical, Integer
5+ # --- FIX: Re-import skopt for Bayesian search compatibility ---
6+ from skopt .space import Real , Categorical
77
88from .H2OBaseClassifier import H2OBaseClassifier
99
@@ -43,23 +43,12 @@ def _prepare_fit(self, X, y):
4343 # Get the standard parameters from the base class
4444 train_h2o , x_vars , outcome_var , model_params = super ()._prepare_fit (X , y )
4545
46- # --- STRICT OVERRIDE (The "Triple-Lock") ---
47- # Regardless of what GridSearch/HyperOpt requested, we force these values
48- # to prevent the Java Backend Crash (NullPointerException).
49-
50- # 1. Force L_BFGS: The only solver robust against the index mismatch bug on this data
46+ # --- STRICT OVERRIDE ---
47+ # Force L_BFGS: The only solver robust against the index mismatch bug on this data
5148 model_params ["solver" ] = "L_BFGS"
52-
53- # 2. Disable Collinear Removal: This prevents the coefficient vector size change
5449 model_params ["remove_collinear_columns" ] = False
55-
56- # 3. Disable Lambda Search: If True, H2O ignores 'solver' and uses Coordinate Descent
5750 model_params ["lambda_search" ] = False
5851
59- self .logger .info (
60- f"H2OGLMClassifier: Enforced stability params: solver={ model_params ['solver' ]} , lambda_search={ model_params ['lambda_search' ]} "
61- )
62-
6352 return train_h2o , x_vars , outcome_var , model_params
6453
6554 def fit (self , X : pd .DataFrame , y : pd .Series , ** kwargs ) -> "H2OGLMClassifier" :
@@ -80,33 +69,60 @@ def __init__(self, X=None, y=None, parameter_space_size="small"):
8069 # Instantiate the actual estimator wrapper
8170 self .algorithm_implementation = H2OGLMClassifier ()
8271
83- # Define the Hyperparameter Space
84- # FIX: Converted skopt distributions (Real, Categorical) to Lists
85- # to ensure compatibility with sklearn.model_selection.ParameterGrid
86-
87- if parameter_space_size == "xsmall" :
88- self .parameter_space = {
89- "alpha" : [0.0 , 0.5 , 1.0 ],
90- "lambda_" : [1e-3 , 1e-2 , 1e-1 ],
91- "family" : ["binomial" ],
92- "solver" : ["L_BFGS" ],
93- "standardize" : [True ],
94- }
95- elif parameter_space_size == "small" :
96- self .parameter_space = {
97- "alpha" : [0.0 , 0.25 , 0.5 , 0.75 , 1.0 ],
98- "lambda_" : np .logspace (- 4 , - 1 , 5 ).tolist (),
99- "family" : ["binomial" ],
100- "solver" : ["L_BFGS" ],
101- "standardize" : [True ],
102- }
72+ # --- FIX: Conditionally define parameter space for Bayes vs. Grid search ---
73+ from ml_grid .util .global_params import global_parameters
74+
75+ if global_parameters .bayessearch :
76+ # Use skopt spaces for Bayesian search
77+ if parameter_space_size == "xsmall" :
78+ self .parameter_space = {
79+ "alpha" : Real (0.0 , 1.0 ),
80+ "lambda_" : Real (1e-3 , 1e-1 , prior = "log-uniform" ),
81+ "family" : Categorical (["binomial" ]),
82+ "solver" : Categorical (["L_BFGS" ]),
83+ "standardize" : Categorical ([True ]),
84+ }
85+ elif parameter_space_size == "small" :
86+ self .parameter_space = {
87+ "alpha" : Real (0.0 , 1.0 ),
88+ "lambda_" : Real (1e-4 , 1e-1 , prior = "log-uniform" ),
89+ "family" : Categorical (["binomial" ]),
90+ "solver" : Categorical (["L_BFGS" ]),
91+ "standardize" : Categorical ([True ]),
92+ }
93+ else : # Medium/Large space
94+ self .parameter_space = {
95+ "alpha" : Real (0.0 , 1.0 ),
96+ "lambda_" : Real (1e-6 , 10.0 , prior = "log-uniform" ),
97+ "family" : Categorical (["binomial" ]),
98+ "solver" : Categorical (["L_BFGS" ]),
99+ "standardize" : Categorical ([True , False ]),
100+ "balance_classes" : Categorical ([True , False ]),
101+ }
103102 else :
104- # Medium/Large space
105- self .parameter_space = {
106- "alpha" : [0.0 , 0.1 , 0.3 , 0.5 , 0.7 , 0.9 , 1.0 ],
107- "lambda_" : np .logspace (- 6 , 1 , 8 ).tolist (),
108- "family" : ["binomial" ],
109- "solver" : ["L_BFGS" ],
110- "standardize" : [True , False ],
111- "balance_classes" : [True , False ],
112- }
103+ # Use lists for Grid/Random search
104+ if parameter_space_size == "xsmall" :
105+ self .parameter_space = {
106+ "alpha" : [0.0 , 0.5 , 1.0 ],
107+ "lambda_" : [1e-3 , 1e-2 , 1e-1 ],
108+ "family" : ["binomial" ],
109+ "solver" : ["L_BFGS" ],
110+ "standardize" : [True ],
111+ }
112+ elif parameter_space_size == "small" :
113+ self .parameter_space = {
114+ "alpha" : [0.0 , 0.25 , 0.5 , 0.75 , 1.0 ],
115+ "lambda_" : np .logspace (- 4 , - 1 , 5 ).tolist (),
116+ "family" : ["binomial" ],
117+ "solver" : ["L_BFGS" ],
118+ "standardize" : [True ],
119+ }
120+ else : # Medium/Large space
121+ self .parameter_space = {
122+ "alpha" : [0.0 , 0.1 , 0.3 , 0.5 , 0.7 , 0.9 , 1.0 ],
123+ "lambda_" : np .logspace (- 6 , 1 , 8 ).tolist (),
124+ "family" : ["binomial" ],
125+ "solver" : ["L_BFGS" ],
126+ "standardize" : [True , False ],
127+ "balance_classes" : [True , False ],
128+ }
0 commit comments