@@ -732,8 +732,166 @@ def set_params(self, **params: Any) -> LightGBMForecaster: # noqa: ANN401
732732 return self
733733
734734
735+ class XGBoostForecaster (BaseForecaster ):
736+ """Feature-aware forecaster wrapping ``xgboost.XGBRegressor``.
737+
738+ The second ADVANCED feature-aware tree model (MLZOO-C1). Structurally a
739+ twin of ``LightGBMForecaster``: it REQUIRES a non-``None`` exogenous ``X``
740+ for both ``fit`` and ``predict``; the estimator is gradient-boosted trees
741+ from the optional ``xgboost`` package.
742+
743+ ``xgboost`` is imported LAZILY inside ``fit`` — never at module scope and
744+ never in ``__init__`` — so importing this module (which every forecasting
745+ code path does, baseline models included) never requires the optional
746+ ``ml-xgboost`` dependency.
747+
748+ Determinism: ``XGBRegressor`` has no ``deterministic`` switch (unlike
749+ LightGBM). Bit-reproducibility comes from ``n_jobs=1`` + ``tree_method="hist"``
750+ + a fixed ``random_state`` + the conservative config leaving ``subsample`` /
751+ ``colsample_bytree`` at their ``1.0`` defaults (no stochastic sampling) —
752+ all pinned in ``fit``. XGBoost tolerates ``NaN`` natively (``missing=np.nan``),
753+ which matters because the future feature frame leaves lag cells ``NaN``
754+ when their source target lies in the un-observed horizon.
755+
756+ Attributes:
757+ n_estimators: Number of boosting rounds.
758+ learning_rate: Gradient-boosting learning rate.
759+ max_depth: Maximum depth of each tree.
760+ """
761+
762+ requires_features : ClassVar [bool ] = True
763+ """A feature-aware model — ``fit``/``predict`` REQUIRE a non-None ``X``."""
764+
765+ def __init__ (
766+ self ,
767+ * ,
768+ n_estimators : int = 100 ,
769+ learning_rate : float = 0.1 ,
770+ max_depth : int = 6 ,
771+ random_state : int = 42 ,
772+ ) -> None :
773+ """Initialize the XGBoost forecaster.
774+
775+ Args:
776+ n_estimators: Number of boosting rounds.
777+ learning_rate: Gradient-boosting learning rate.
778+ max_depth: Maximum depth of each tree.
779+ random_state: Random seed for reproducibility (determinism).
780+ """
781+ super ().__init__ (random_state )
782+ self .n_estimators = n_estimators
783+ self .learning_rate = learning_rate
784+ self .max_depth = max_depth
785+ self ._estimator : Any = None
786+
787+ def fit (
788+ self ,
789+ y : np .ndarray [Any , np .dtype [np .floating [Any ]]],
790+ X : np .ndarray [Any , np .dtype [np .floating [Any ]]] | None = None ,
791+ ) -> XGBoostForecaster :
792+ """Fit the gradient-boosted regressor on historical features.
793+
794+ Args:
795+ y: Target values (1D array of shape ``[n_samples]``).
796+ X: Exogenous features (2D array of shape ``[n_samples, n_features]``).
797+ REQUIRED — unlike the baseline forecasters.
798+
799+ Returns:
800+ self (for method chaining).
801+
802+ Raises:
803+ ValueError: If ``X`` is ``None``, ``y`` is empty, or the row counts
804+ of ``X`` and ``y`` do not match.
805+ """
806+ if X is None :
807+ raise ValueError ("XGBoostForecaster requires exogenous features X for fit()" )
808+ if len (y ) == 0 :
809+ raise ValueError ("Cannot fit on empty array" )
810+ if X .shape [0 ] != len (y ):
811+ raise ValueError (
812+ f"X has { X .shape [0 ]} rows but y has { len (y )} — feature/target rows must match"
813+ )
814+ # LAZY import — the optional ``ml-xgboost`` dependency is only needed
815+ # the first time an XGBoost model is actually fitted.
816+ import xgboost as xgb
817+
818+ estimator : Any = xgb .XGBRegressor (
819+ n_estimators = self .n_estimators ,
820+ learning_rate = self .learning_rate ,
821+ max_depth = self .max_depth ,
822+ random_state = self .random_state ,
823+ n_jobs = 1 , # single-threaded — removes float-summation non-determinism
824+ tree_method = "hist" , # explicit; the default, and the reproducible path
825+ verbosity = 0 , # silence XGBoost's training chatter
826+ )
827+ estimator .fit (X , y )
828+ self ._estimator = estimator
829+ self ._last_values = np .asarray (y [- 1 :], dtype = np .float64 )
830+ self ._is_fitted = True
831+ return self
832+
833+ def predict (
834+ self ,
835+ horizon : int ,
836+ X : np .ndarray [Any , np .dtype [np .floating [Any ]]] | None = None ,
837+ ) -> np .ndarray [Any , np .dtype [np .floating [Any ]]]:
838+ """Generate forecasts from a future feature frame.
839+
840+ Args:
841+ horizon: Number of steps to forecast.
842+ X: Exogenous features for the forecast period, shape
843+ ``[horizon, n_features]``. REQUIRED.
844+
845+ Returns:
846+ Array of forecasts with shape ``[horizon]``.
847+
848+ Raises:
849+ RuntimeError: If the model has not been fitted.
850+ ValueError: If ``X`` is ``None`` or its row count is not ``horizon``.
851+ """
852+ if not self ._is_fitted or self ._estimator is None :
853+ raise RuntimeError ("Model must be fitted before predict" )
854+ if X is None :
855+ raise ValueError ("XGBoostForecaster requires exogenous features X for predict()" )
856+ if X .shape [0 ] != horizon :
857+ raise ValueError (f"X has { X .shape [0 ]} rows but horizon is { horizon } — they must match" )
858+ predictions = self ._estimator .predict (X )
859+ result : np .ndarray [Any , np .dtype [np .floating [Any ]]] = np .asarray (
860+ predictions , dtype = np .float64
861+ )
862+ return result
863+
864+ def get_params (self ) -> dict [str , Any ]:
865+ """Get model parameters.
866+
867+ Returns:
868+ Dictionary with n_estimators, learning_rate, max_depth, random_state.
869+ """
870+ return {
871+ "n_estimators" : self .n_estimators ,
872+ "learning_rate" : self .learning_rate ,
873+ "max_depth" : self .max_depth ,
874+ "random_state" : self .random_state ,
875+ }
876+
877+ def set_params (self , ** params : Any ) -> XGBoostForecaster : # noqa: ANN401
878+ """Set model parameters.
879+
880+ Args:
881+ **params: Parameter names and values to set.
882+
883+ Returns:
884+ self (for method chaining).
885+ """
886+ for key , value in params .items ():
887+ setattr (self , key , value )
888+ return self
889+
890+
735891# Type alias for model type literals
736- ModelType = Literal ["naive" , "seasonal_naive" , "moving_average" , "lightgbm" , "regression" ]
892+ ModelType = Literal [
893+ "naive" , "seasonal_naive" , "moving_average" , "xgboost" , "lightgbm" , "regression"
894+ ]
737895
738896
739897def model_factory (config : ModelConfig , random_state : int = 42 ) -> BaseForecaster :
@@ -790,6 +948,21 @@ def model_factory(config: ModelConfig, random_state: int = 42) -> BaseForecaster
790948 random_state = random_state ,
791949 )
792950 raise ValueError ("Invalid config type for lightgbm" )
951+ elif model_type == "xgboost" :
952+ if not settings .forecast_enable_xgboost :
953+ raise ValueError (
954+ "XGBoost is not enabled. Set forecast_enable_xgboost=True in settings."
955+ )
956+ from app .features .forecasting .schemas import XGBoostModelConfig
957+
958+ if isinstance (config , XGBoostModelConfig ):
959+ return XGBoostForecaster (
960+ n_estimators = config .n_estimators ,
961+ learning_rate = config .learning_rate ,
962+ max_depth = config .max_depth ,
963+ random_state = random_state ,
964+ )
965+ raise ValueError ("Invalid config type for xgboost" )
793966 elif model_type == "regression" :
794967 from app .features .forecasting .schemas import RegressionModelConfig
795968
0 commit comments