rapidsai · RAMitchell · Feb 17, 2025 · Feb 10, 2025 · Feb 12, 2025 · Feb 13, 2025
diff --git a/legateboost/callbacks.py b/legateboost/callbacks.py
@@ -57,8 +57,8 @@ def after_iteration(
 
 
 class EarlyStopping(TrainingCallback):
-    """Callback for early stopping during training. The last evaluation dataset
-    is used for early stopping.
+    """Callback for early stopping during training. The last evaluation dataset is
+    used for early stopping.
 
     Args:
         rounds (int): The number of rounds to wait for improvement before stopping.

diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py
@@ -194,11 +194,7 @@ def _get_weighted_gradient(
         learning_rate: float,
     ) -> Tuple[cn.ndarray, cn.ndarray]:
         """Computes the weighted gradient and Hessian for the given predictions
-        and labels.
-
-        Also applies a pre-rounding step to ensure reproducible floating
-        point summation.
-        """
+        and labels."""
         # check input dimensions are consistent
         assert y.ndim == pred.ndim == 2, (y.shape, pred.shape)
         g, h = self._objective_instance.gradient(
@@ -317,8 +313,8 @@ def update(
         eval_result: EvalResult = {},
     ) -> Self:
         """Update a gradient boosting model from the training set (X, y). This
-        method does not add any new models to the ensemble, only updates
-        existing models to fit the new data.
+        method does not add any new models to the ensemble, only updates existing
+        models to fit the new data.
 
         Parameters
         ----------
@@ -477,8 +473,8 @@ def __iter__(self) -> Any:
         return iter(self.models_)
 
     def __mul__(self, scalar: Any) -> Self:
-        """Gradient boosted models are linear in the predictions before the
-        non-linear link function is applied. This means that the model can be
+        """Gradient boosted models are linear in the predictions before the non-
+        linear link function is applied. This means that the model can be
         multiplied by a scalar, which subsequently scales all raw output
         predictions. This is useful for ensembling models.
 
@@ -550,8 +546,8 @@ def global_attributions(
         n_samples: int = 5,
         check_efficiency: bool = False,
     ) -> Tuple[cn.array, cn.array]:
-        r"""Compute global feature attributions for the model. Global
-        attributions show the effect of a feature on a model's loss function.
+        r"""Compute global feature attributions for the model. Global attributions
+        show the effect of a feature on a model's loss function.
 
         We use a Shapley value approach to compute the attributions:
         :math:`Sh_i(v)=\frac{1}{|N|!} \sum_{\sigma \in \mathfrak{S}_d} \big[ v([\sigma]_{i-1} \cup\{i\}) - v([\sigma]_{i-1}) \big],`
@@ -612,11 +608,10 @@ def local_attributions(
         n_samples: int = 5,
         check_efficiency: bool = False,
     ) -> Tuple[cn.array, cn.array]:
-        r"""Local feature attributions for model predictions. Shows the effect
-        of a feature on each output prediction. See the definition of Shapley
-        values in :func:`~legateboost.BaseModel.global_attributions`, where the
-        :math:`v` function is here the model prediction instead of the loss
-        function.
+        r"""Local feature attributions for model predictions. Shows the effect of a
+        feature on each output prediction. See the definition of Shapley values in
+        :func:`~legateboost.BaseModel.global_attributions`, where the :math:`v`
+        function is here the model prediction instead of the loss function.
 
         Parameters
         ----------
@@ -750,8 +745,8 @@ def partial_fit(
         eval_set: List[Tuple[cn.ndarray, ...]] = [],
         eval_result: EvalResult = {},
     ) -> LBBase:
-        """This method is used for incremental (online) training of the model.
-        An additional `n_estimators` models will be added to the ensemble.
+        """This method is used for incremental (online) training of the model. An
+        additional `n_estimators` models will be added to the ensemble.
 
         Parameters
         ----------
@@ -928,8 +923,8 @@ def partial_fit(
         eval_result: EvalResult = {},
     ) -> LBBase:
         """This method is used for incremental fitting on a batch of samples.
-        Requires the classes to be provided up front, as they may not be
-        inferred from the first batch.
+        Requires the classes to be provided up front, as they may not be inferred
+        from the first batch.
 
         Parameters
         ----------
@@ -1033,8 +1028,8 @@ def fit(
         return self
 
     def predict_raw(self, X: cn.ndarray) -> cn.ndarray:
-        """Predict pre-transformed values for samples in X. E.g. before
-        applying a sigmoid function.
+        """Predict pre-transformed values for samples in X. E.g. before applying a
+        sigmoid function.
 
         Parameters
         ----------
@@ -1064,7 +1059,7 @@ def predict_proba(self, X: cn.ndarray) -> cn.ndarray:
         Returns
         -------
 
-        y :
+        probabilities:
             The predicted class probabilities for each sample in X.
         """
         X = _lb_check_X(X)
@@ -1091,4 +1086,5 @@ def predict(self, X: cn.ndarray) -> cn.ndarray:
         y :
             The predicted class labels for each sample in X.
         """
-        return cn.argmax(self.predict_proba(X), axis=1)
+        check_is_fitted(self)
+        return self._objective_instance.output_class(self.predict_proba(X))
diff --git a/legateboost/metrics.py b/legateboost/metrics.py
@@ -17,6 +17,7 @@
     "GammaDevianceMetric",
     "QuantileMetric",
     "LogLossMetric",
+    "MultiLabelMetric",
     "ExponentialMetric",
 ]
 
@@ -144,8 +145,8 @@ def name(self) -> str:
 
 
 class GammaLLMetric(BaseMetric):
-    """The mean negative log likelihood of the labels, given parameters
-    predicted by the model."""
+    """The mean negative log likelihood of the labels, given parameters predicted
+    by the model."""
 
     @override
     def metric(self, y: cn.ndarray, pred: cn.ndarray, w: cn.ndarray) -> cn.ndarray:
@@ -252,8 +253,8 @@ def name(self) -> str:
 
 
 class LogLossMetric(BaseMetric):
-    """Class for computing the logarithmic loss (logloss) metric between the
-    true labels and predicted labels.
+    """Class for computing the logarithmic loss (logloss) metric between the true
+    labels and predicted labels.
 
     For binary classification:
 
@@ -273,7 +274,7 @@ class LogLossMetric(BaseMetric):
     def metric(self, y: cn.ndarray, pred: cn.ndarray, w: cn.ndarray) -> cn.ndarray:
         y = y.squeeze()
         eps = cn.finfo(pred.dtype).eps
-        cn.clip(pred, eps, 1 - eps, out=pred)
+        pred = cn.clip(pred, eps, 1 - eps)
 
         w_sum = w.sum()
 
@@ -296,6 +297,26 @@ def name(self) -> str:
         return "log_loss"
 
 
+class MultiLabelMetric(BaseMetric):
+    """Multi-label metric is a binary log-loss metric averaged over multiple
+    labels.
+
+    See also:
+        :class:`legateboost.objectives.MultiLabelObjective`
+    """  # noqa: E501
+
+    def metric(self, y: cn.ndarray, pred: cn.ndarray, w: cn.ndarray) -> cn.ndarray:
+        y = y.squeeze()
+        eps = cn.finfo(pred.dtype).eps
+        pred = cn.clip(pred, eps, 1 - eps)
+        w_sum = w.sum()
+        logloss = -(y * cn.log(pred) + (self.one - y) * cn.log(self.one - pred))
+        return (logloss * w[:, cn.newaxis]).sum() / w_sum
+
+    def name(self) -> str:
+        return "multi_label"
+
+
 class ExponentialMetric(BaseMetric):
     """Class for computing the exponential loss metric.
 

diff --git a/legateboost/models/base_model.py b/legateboost/models/base_model.py
@@ -11,9 +11,9 @@
 class BaseModel(PickleCupynumericMixin, ABC):
     """Base class for all models in LegateBoost.
 
-    Defines the interface for fitting, updating, and predicting a model,
-    as well as string representation and equality comparison. Implement
-    these methods to create a custom model.
+    Defines the interface for fitting, updating, and predicting a model, as well
+    as string representation and equality comparison. Implement these methods to
+    create a custom model.
     """
 
     def set_random_state(self, random_state: np.random.RandomState) -> "BaseModel":
@@ -27,8 +27,7 @@ def fit(
         g: cn.ndarray,
         h: cn.ndarray,
     ) -> "BaseModel":
-        """Fit the model to a second order Taylor expansion of the loss
-        function.
+        """Fit the model to a second order Taylor expansion of the loss function.
 
         Parameters
         ----------

diff --git a/legateboost/models/krr.py b/legateboost/models/krr.py
@@ -35,9 +35,9 @@ def rbf(x: cn.ndarray, sigma: float) -> cn.ndarray:
 
 
 class KRR(BaseModel):
-    """Kernel Ridge Regression model using the Nyström approximation. The
-    accuracy of the approximation is governed by the parameter `n_components`
-    <= `n`. Effectively, `n_components` rows will be randomly sampled (without
+    """Kernel Ridge Regression model using the Nyström approximation. The accuracy
+    of the approximation is governed by the parameter `n_components` <= `n`.
+    Effectively, `n_components` rows will be randomly sampled (without
     replacement) from X in each boosting iteration.
 
     The kernel is fixed to be the RBF kernel:

diff --git a/legateboost/models/linear.py b/legateboost/models/linear.py
@@ -9,11 +9,11 @@
 
 
 class Linear(BaseModel):
-    """Generalised linear model. Boosting linear models is equivalent to
-    fitting a single linear model where each boosting iteration is a newton
-    step. Note that the l2 penalty is applied to the weights of each model, as
-    opposed to the sum of all models. This can lead to different results when
-    compared to fitting a linear model with sklearn.
+    """Generalised linear model. Boosting linear models is equivalent to fitting a
+    single linear model where each boosting iteration is a newton step. Note that
+    the l2 penalty is applied to the weights of each model, as opposed to the sum
+    of all models. This can lead to different results when compared to fitting a
+    linear model with sklearn.
 
     It is recommended to normalize the data before fitting. This ensures
     regularisation is evenly applied to all features and prevents numerical issues.