From cec9a508cb5808b7d89c0f24db751fdaf7ca6029 Mon Sep 17 00:00:00 2001 From: Giovanni Mesquita Micaroni Date: Wed, 3 Dec 2025 21:11:10 -0300 Subject: [PATCH 1/6] add option for predictions for all or selected models --- supervised/automl.py | 4 +- supervised/base_automl.py | 164 +++++++++++++++++++++++++++++++++++--- 2 files changed, 156 insertions(+), 12 deletions(-) diff --git a/supervised/automl.py b/supervised/automl.py index e7f3c954..5f886159 100644 --- a/supervised/automl.py +++ b/supervised/automl.py @@ -444,7 +444,7 @@ def fit( pass - def predict(self, X: Union[List, numpy.ndarray, pandas.DataFrame]) -> numpy.ndarray: + def predict(self, X: Union[List, numpy.ndarray, pandas.DataFrame], prediction_mode = 'best', n_models = 1, custom_models = []) -> numpy.ndarray: """ Computes predictions from AutoML best model. @@ -461,7 +461,7 @@ def predict(self, X: Union[List, numpy.ndarray, pandas.DataFrame]) -> numpy.ndar Raises: AutoMLException: Model has not yet been fitted. """ - return self._predict(X) + return self._predict(X, prediction_mode, n_models, custom_models) def predict_proba( self, X: Union[List, numpy.ndarray, pandas.DataFrame] diff --git a/supervised/base_automl.py b/supervised/base_automl.py index 40e883d9..8c004efc 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -1209,6 +1209,9 @@ def _fit(self, X, y, sample_weight=None, cv=None, sensitive_features=None): if not self._models: raise AutoMLException("No models produced.") + + #se der erro de compatibilidade fazer um self._ordered_models + self._models = sorted(self._models, key=lambda x: x.get_final_loss()) self._fit_level = "finished" self.save_progress() self.select_and_save_best(show_warnings=True) @@ -1434,6 +1437,23 @@ def models_needed_on_predict(self, required_model_name): + [required_model_name] ) ) + + def do_prediction_union(self, X, model_list = []): + predictions = [] + + for i, model in enumerate(model_list): + prediction = self._base_predict(X, model) + + if i > 0: + prediction = prediction.add_suffix(f"_{i}") + + predictions.append(prediction) + + df_union = pd.concat(predictions, axis=1) + + return df_union + + def _base_predict(self, X, model=None): if model is None: @@ -1504,16 +1524,140 @@ def _base_predict(self, X, model=None): else: return predictions - def _predict(self, X): - predictions = self._base_predict(X) - # Return predictions - # If classification task the result is in column 'label' - # If regression task the result is in column 'prediction' - return ( - predictions["label"].to_numpy() - if self._ml_task != REGRESSION - else predictions["prediction"].to_numpy() - ) + def _predict(self, X, prediction_mode='best', n_models=1, custom_models=[]): + """ + Generates predictions using one or multiple models based on the selected prediction mode. + + Parameters + ---------- + X : array-like, pandas.DataFrame + Input data to generate predictions for. + + prediction_mode : str, default='best' + Model selection strategy: + + - 'best': selects the top `n_models` models ranked by performance. + - 'custom': selects only the models explicitly listed in `custom_models`. + - 'all': uses all trained models. + + n_models : int, default=1 + Number of top models to select when using mode 'best'. + Must be > 0. + + custom_models : list, default=[] + List of model names to be used when `prediction_mode='custom'`. + Raises an exception if any provided model name does not exist. + + Returns + ------- + numpy.ndarray + - If a single model is selected: returns a 1D array of predictions. + - If multiple models are selected: returns a 2D array (n_samples x n_models), + containing predictions from each model side-by-side. + + Raises + ------ + AutoMLException + - If no models were selected. + - If invalid prediction mode is provided. + - If custom models are missing or invalid. + """ + + selected_models = [] + + # Model selection logic + match prediction_mode: + case 'best': + # Select the top n_models from self._models + for i in range(n_models): + selected_models.append(self._models[i]) + + case 'custom': + # Must specify custom model names + if not custom_models: + raise AutoMLException("No custom models were provided.") + + # Collect valid model names available in the system + available = {m.get_name() for m in self._models} + + # Detect invalid names passed by the user + invalid = [name for name in custom_models if name not in available] + + # If any invalid custom model name is found → raise detailed error + if invalid: + raise AutoMLException( + f"The following custom models are not available: {invalid}\n" + f"Available models are: {[m.get_name() for m in self._models]}" + ) + + # Select the models that match the requested names + filtered_models = [ + m for m in self._models if m.get_name() in custom_models + ] + + for model in filtered_models: + selected_models.append(model) + + case 'all': + # Use every available model + selected_models = self._models + + case _: + # Invalid prediction mode + raise AutoMLException(f"Invalid prediction mode '{prediction_mode}'.") + + n_selected = len(selected_models) + + if n_selected > 0: + selected_model_names = [m.get_name() for m in selected_models] + model_list_str = ", ".join(selected_model_names) + + if n_selected == 1: + print( + f"Prediction Mode: '{prediction_mode}'. " + f"Using 1 model: {model_list_str}." + ) + else: + print( + f"Prediction Mode: '{prediction_mode}'. " + f"Using {n_selected} models for multi-prediction output, and resulting array columns are formatted as follows: {model_list_str}" + ) + # ------------------------------------------------------------------ + # MULTI-MODEL PREDICTION (returns 2D array) + # ------------------------------------------------------------------ + if len(selected_models) > 1: + # Perform the union of predictions for all selected models + predictions = self.do_prediction_union(X, selected_models) + + # Select the correct output columns depending on the task + if self._ml_task != REGRESSION: + # Multi-class/binary classification → use "label" columns + cols = [c for c in predictions.columns if c.startswith("label")] + else: + # Regression → use "prediction" columns + cols = [c for c in predictions.columns if c.startswith("prediction")] + + # Return predictions as a 2D numpy array (n_samples x n_models) + return predictions[cols].to_numpy() + + # ------------------------------------------------------------------ + # SINGLE-MODEL PREDICTION (returns 1D array) + # ------------------------------------------------------------------ + elif len(selected_models) == 1: + predictions = self._base_predict(X, selected_models[0]) + + return ( + predictions["label"].to_numpy() + if self._ml_task != REGRESSION + else predictions["prediction"].to_numpy() + ) + + # ------------------------------------------------------------------ + # NO MODELS SELECTED → ERROR + # ------------------------------------------------------------------ + else: + raise AutoMLException("Prediction failed: no models were selected.") + def _predict_proba(self, X): # Check is task type is correct From e63b384aa4d10ddfea8e42b9ea813ed60be2287e Mon Sep 17 00:00:00 2001 From: Giovanni Mesquita Micaroni Date: Wed, 3 Dec 2025 21:14:09 -0300 Subject: [PATCH 2/6] add option for predictions for all or selected models. fix comment message --- supervised/base_automl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supervised/base_automl.py b/supervised/base_automl.py index 8c004efc..e44511e9 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -1210,7 +1210,7 @@ def _fit(self, X, y, sample_weight=None, cv=None, sensitive_features=None): if not self._models: raise AutoMLException("No models produced.") - #se der erro de compatibilidade fazer um self._ordered_models + #sorting models by lowest loss self._models = sorted(self._models, key=lambda x: x.get_final_loss()) self._fit_level = "finished" self.save_progress() From 2219b6b41326d100c5044a8227a9666042a3f1b6 Mon Sep 17 00:00:00 2001 From: Giovanni Mesquita Micaroni Date: Wed, 3 Dec 2025 21:23:48 -0300 Subject: [PATCH 3/6] add option for predictions for all or selected models. fix error handling --- supervised/base_automl.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/supervised/base_automl.py b/supervised/base_automl.py index e44511e9..cd128c48 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -1438,7 +1438,7 @@ def models_needed_on_predict(self, required_model_name): ) ) - def do_prediction_union(self, X, model_list = []): + def _do_prediction_union(self, X, model_list = []): predictions = [] for i, model in enumerate(model_list): @@ -1568,6 +1568,8 @@ def _predict(self, X, prediction_mode='best', n_models=1, custom_models=[]): # Model selection logic match prediction_mode: case 'best': + if n_models < 1 or n_models > len(self._models): + raise AutoMLException("Invalid number of models provided for prediction.") # Select the top n_models from self._models for i in range(n_models): selected_models.append(self._models[i]) @@ -1627,7 +1629,7 @@ def _predict(self, X, prediction_mode='best', n_models=1, custom_models=[]): # ------------------------------------------------------------------ if len(selected_models) > 1: # Perform the union of predictions for all selected models - predictions = self.do_prediction_union(X, selected_models) + predictions = self._do_prediction_union(X, selected_models) # Select the correct output columns depending on the task if self._ml_task != REGRESSION: From 366f85f7fccca6406a001e5c3b46b63c4ce81383 Mon Sep 17 00:00:00 2001 From: Giovanni Mesquita Micaroni Date: Thu, 4 Dec 2025 17:35:15 -0300 Subject: [PATCH 4/6] remove unnecessary else --- supervised/base_automl.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/supervised/base_automl.py b/supervised/base_automl.py index cd128c48..f4e10c4d 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -1653,12 +1653,7 @@ def _predict(self, X, prediction_mode='best', n_models=1, custom_models=[]): if self._ml_task != REGRESSION else predictions["prediction"].to_numpy() ) - - # ------------------------------------------------------------------ - # NO MODELS SELECTED → ERROR - # ------------------------------------------------------------------ - else: - raise AutoMLException("Prediction failed: no models were selected.") + def _predict_proba(self, X): From b0dfadbcc162a57929b8dc4088e9b51e31a812a0 Mon Sep 17 00:00:00 2001 From: Giovanni Mesquita Micaroni Date: Mon, 8 Dec 2025 18:05:36 -0300 Subject: [PATCH 5/6] refactor for PR --- supervised/automl.py | 4 +- supervised/base_automl.py | 85 ++++++++++++--------------------------- 2 files changed, 28 insertions(+), 61 deletions(-) diff --git a/supervised/automl.py b/supervised/automl.py index 5f886159..5c6ce6e3 100644 --- a/supervised/automl.py +++ b/supervised/automl.py @@ -444,7 +444,7 @@ def fit( pass - def predict(self, X: Union[List, numpy.ndarray, pandas.DataFrame], prediction_mode = 'best', n_models = 1, custom_models = []) -> numpy.ndarray: + def predict(self, X: Union[List, numpy.ndarray, pandas.DataFrame], models = []) -> numpy.ndarray: """ Computes predictions from AutoML best model. @@ -461,7 +461,7 @@ def predict(self, X: Union[List, numpy.ndarray, pandas.DataFrame], prediction_mo Raises: AutoMLException: Model has not yet been fitted. """ - return self._predict(X, prediction_mode, n_models, custom_models) + return self._predict(X, models) def predict_proba( self, X: Union[List, numpy.ndarray, pandas.DataFrame] diff --git a/supervised/base_automl.py b/supervised/base_automl.py index f4e10c4d..b884ac4c 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -1524,7 +1524,7 @@ def _base_predict(self, X, model=None): else: return predictions - def _predict(self, X, prediction_mode='best', n_models=1, custom_models=[]): + def _predict(self, X, models=[]): """ Generates predictions using one or multiple models based on the selected prediction mode. @@ -1537,14 +1537,14 @@ def _predict(self, X, prediction_mode='best', n_models=1, custom_models=[]): Model selection strategy: - 'best': selects the top `n_models` models ranked by performance. - - 'custom': selects only the models explicitly listed in `custom_models`. + - 'custom': selects only the models explicitly listed in `models`. - 'all': uses all trained models. n_models : int, default=1 Number of top models to select when using mode 'best'. Must be > 0. - custom_models : list, default=[] + models : list, default=[] List of model names to be used when `prediction_mode='custom'`. Raises an exception if any provided model name does not exist. @@ -1564,66 +1564,33 @@ def _predict(self, X, prediction_mode='best', n_models=1, custom_models=[]): """ selected_models = [] + n_models = len(models) + + if n_models >= 1: + # Collect valid model names available in the system + available = [m.get_name() for m in self._models] - # Model selection logic - match prediction_mode: - case 'best': - if n_models < 1 or n_models > len(self._models): - raise AutoMLException("Invalid number of models provided for prediction.") - # Select the top n_models from self._models - for i in range(n_models): - selected_models.append(self._models[i]) - - case 'custom': - # Must specify custom model names - if not custom_models: - raise AutoMLException("No custom models were provided.") - - # Collect valid model names available in the system - available = {m.get_name() for m in self._models} - - # Detect invalid names passed by the user - invalid = [name for name in custom_models if name not in available] - - # If any invalid custom model name is found → raise detailed error - if invalid: - raise AutoMLException( - f"The following custom models are not available: {invalid}\n" - f"Available models are: {[m.get_name() for m in self._models]}" - ) - - # Select the models that match the requested names - filtered_models = [ - m for m in self._models if m.get_name() in custom_models - ] - - for model in filtered_models: - selected_models.append(model) - - case 'all': - # Use every available model - selected_models = self._models + # Detect invalid names passed by the user + invalid = [name for name in models if name not in available] - case _: - # Invalid prediction mode - raise AutoMLException(f"Invalid prediction mode '{prediction_mode}'.") + # If any invalid custom model name is found → raise detailed error + if invalid: + raise AutoMLException( + f"The following custom models are not available: {invalid}\n" + f"Available models are: {available}" + ) - n_selected = len(selected_models) - - if n_selected > 0: - selected_model_names = [m.get_name() for m in selected_models] - model_list_str = ", ".join(selected_model_names) + # Select the models that match the requested names + filtered_models = [ + m for m in self._models if m.get_name() in models + ] - if n_selected == 1: - print( - f"Prediction Mode: '{prediction_mode}'. " - f"Using 1 model: {model_list_str}." - ) - else: - print( - f"Prediction Mode: '{prediction_mode}'. " - f"Using {n_selected} models for multi-prediction output, and resulting array columns are formatted as follows: {model_list_str}" - ) + for model in filtered_models: + selected_models.append(model) + else: + selected_models.append(self._best_model) + + print(f'Models being used for prediction: {[model.get_name() for model in selected_models]}') # ------------------------------------------------------------------ # MULTI-MODEL PREDICTION (returns 2D array) # ------------------------------------------------------------------ From 3f9a5a8a34e2125a6968e5ecf3c43eaa419437aa Mon Sep 17 00:00:00 2001 From: Giovanni Mesquita Micaroni Date: Mon, 8 Dec 2025 20:32:57 -0300 Subject: [PATCH 6/6] update docstring --- supervised/base_automl.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/supervised/base_automl.py b/supervised/base_automl.py index b884ac4c..71e4962d 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -1526,27 +1526,17 @@ def _base_predict(self, X, model=None): def _predict(self, X, models=[]): """ - Generates predictions using one or multiple models based on the selected prediction mode. + Generates predictions using one or multiple models based on the selected models. Parameters ---------- X : array-like, pandas.DataFrame Input data to generate predictions for. - prediction_mode : str, default='best' - Model selection strategy: - - - 'best': selects the top `n_models` models ranked by performance. - - 'custom': selects only the models explicitly listed in `models`. - - 'all': uses all trained models. - - n_models : int, default=1 - Number of top models to select when using mode 'best'. - Must be > 0. - models : list, default=[] - List of model names to be used when `prediction_mode='custom'`. + List of model names to be used. Raises an exception if any provided model name does not exist. + If no models are provided, use the best one. Returns -------