diff --git a/supervised/automl.py b/supervised/automl.py index e7f3c954..5c6ce6e3 100644 --- a/supervised/automl.py +++ b/supervised/automl.py @@ -444,7 +444,7 @@ def fit( pass - def predict(self, X: Union[List, numpy.ndarray, pandas.DataFrame]) -> numpy.ndarray: + def predict(self, X: Union[List, numpy.ndarray, pandas.DataFrame], models = []) -> numpy.ndarray: """ Computes predictions from AutoML best model. @@ -461,7 +461,7 @@ def predict(self, X: Union[List, numpy.ndarray, pandas.DataFrame]) -> numpy.ndar Raises: AutoMLException: Model has not yet been fitted. """ - return self._predict(X) + return self._predict(X, models) def predict_proba( self, X: Union[List, numpy.ndarray, pandas.DataFrame] diff --git a/supervised/base_automl.py b/supervised/base_automl.py index 40e883d9..71e4962d 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -1209,6 +1209,9 @@ def _fit(self, X, y, sample_weight=None, cv=None, sensitive_features=None): if not self._models: raise AutoMLException("No models produced.") + + #sorting models by lowest loss + self._models = sorted(self._models, key=lambda x: x.get_final_loss()) self._fit_level = "finished" self.save_progress() self.select_and_save_best(show_warnings=True) @@ -1434,6 +1437,23 @@ def models_needed_on_predict(self, required_model_name): + [required_model_name] ) ) + + def _do_prediction_union(self, X, model_list = []): + predictions = [] + + for i, model in enumerate(model_list): + prediction = self._base_predict(X, model) + + if i > 0: + prediction = prediction.add_suffix(f"_{i}") + + predictions.append(prediction) + + df_union = pd.concat(predictions, axis=1) + + return df_union + + def _base_predict(self, X, model=None): if model is None: @@ -1504,16 +1524,94 @@ def _base_predict(self, X, model=None): else: return predictions - def _predict(self, X): - predictions = self._base_predict(X) - # Return predictions - # If classification task the result is in column 'label' - # If regression task the result is in column 'prediction' - return ( - predictions["label"].to_numpy() - if self._ml_task != REGRESSION - else predictions["prediction"].to_numpy() - ) + def _predict(self, X, models=[]): + """ + Generates predictions using one or multiple models based on the selected models. + + Parameters + ---------- + X : array-like, pandas.DataFrame + Input data to generate predictions for. + + models : list, default=[] + List of model names to be used. + Raises an exception if any provided model name does not exist. + If no models are provided, use the best one. + + Returns + ------- + numpy.ndarray + - If a single model is selected: returns a 1D array of predictions. + - If multiple models are selected: returns a 2D array (n_samples x n_models), + containing predictions from each model side-by-side. + + Raises + ------ + AutoMLException + - If no models were selected. + - If invalid prediction mode is provided. + - If custom models are missing or invalid. + """ + + selected_models = [] + n_models = len(models) + + if n_models >= 1: + # Collect valid model names available in the system + available = [m.get_name() for m in self._models] + + # Detect invalid names passed by the user + invalid = [name for name in models if name not in available] + + # If any invalid custom model name is found → raise detailed error + if invalid: + raise AutoMLException( + f"The following custom models are not available: {invalid}\n" + f"Available models are: {available}" + ) + + # Select the models that match the requested names + filtered_models = [ + m for m in self._models if m.get_name() in models + ] + + for model in filtered_models: + selected_models.append(model) + else: + selected_models.append(self._best_model) + + print(f'Models being used for prediction: {[model.get_name() for model in selected_models]}') + # ------------------------------------------------------------------ + # MULTI-MODEL PREDICTION (returns 2D array) + # ------------------------------------------------------------------ + if len(selected_models) > 1: + # Perform the union of predictions for all selected models + predictions = self._do_prediction_union(X, selected_models) + + # Select the correct output columns depending on the task + if self._ml_task != REGRESSION: + # Multi-class/binary classification → use "label" columns + cols = [c for c in predictions.columns if c.startswith("label")] + else: + # Regression → use "prediction" columns + cols = [c for c in predictions.columns if c.startswith("prediction")] + + # Return predictions as a 2D numpy array (n_samples x n_models) + return predictions[cols].to_numpy() + + # ------------------------------------------------------------------ + # SINGLE-MODEL PREDICTION (returns 1D array) + # ------------------------------------------------------------------ + elif len(selected_models) == 1: + predictions = self._base_predict(X, selected_models[0]) + + return ( + predictions["label"].to_numpy() + if self._ml_task != REGRESSION + else predictions["prediction"].to_numpy() + ) + + def _predict_proba(self, X): # Check is task type is correct