Skip to content
Merged
4 changes: 2 additions & 2 deletions legateboost/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def after_iteration(


class EarlyStopping(TrainingCallback):
"""Callback for early stopping during training. The last evaluation dataset
is used for early stopping.
"""Callback for early stopping during training. The last evaluation dataset is
used for early stopping.

Args:
rounds (int): The number of rounds to wait for improvement before stopping.
Expand Down
44 changes: 20 additions & 24 deletions legateboost/legateboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,7 @@ def _get_weighted_gradient(
learning_rate: float,
) -> Tuple[cn.ndarray, cn.ndarray]:
"""Computes the weighted gradient and Hessian for the given predictions
and labels.

Also applies a pre-rounding step to ensure reproducible floating
point summation.
"""
and labels."""
# check input dimensions are consistent
assert y.ndim == pred.ndim == 2, (y.shape, pred.shape)
g, h = self._objective_instance.gradient(
Expand Down Expand Up @@ -317,8 +313,8 @@ def update(
eval_result: EvalResult = {},
) -> Self:
"""Update a gradient boosting model from the training set (X, y). This
method does not add any new models to the ensemble, only updates
existing models to fit the new data.
method does not add any new models to the ensemble, only updates existing
models to fit the new data.

Parameters
----------
Expand Down Expand Up @@ -477,8 +473,8 @@ def __iter__(self) -> Any:
return iter(self.models_)

def __mul__(self, scalar: Any) -> Self:
"""Gradient boosted models are linear in the predictions before the
non-linear link function is applied. This means that the model can be
"""Gradient boosted models are linear in the predictions before the non-
linear link function is applied. This means that the model can be
multiplied by a scalar, which subsequently scales all raw output
predictions. This is useful for ensembling models.

Expand Down Expand Up @@ -550,8 +546,8 @@ def global_attributions(
n_samples: int = 5,
check_efficiency: bool = False,
) -> Tuple[cn.array, cn.array]:
r"""Compute global feature attributions for the model. Global
attributions show the effect of a feature on a model's loss function.
r"""Compute global feature attributions for the model. Global attributions
show the effect of a feature on a model's loss function.

We use a Shapley value approach to compute the attributions:
:math:`Sh_i(v)=\frac{1}{|N|!} \sum_{\sigma \in \mathfrak{S}_d} \big[ v([\sigma]_{i-1} \cup\{i\}) - v([\sigma]_{i-1}) \big],`
Expand Down Expand Up @@ -612,11 +608,10 @@ def local_attributions(
n_samples: int = 5,
check_efficiency: bool = False,
) -> Tuple[cn.array, cn.array]:
r"""Local feature attributions for model predictions. Shows the effect
of a feature on each output prediction. See the definition of Shapley
values in :func:`~legateboost.BaseModel.global_attributions`, where the
:math:`v` function is here the model prediction instead of the loss
function.
r"""Local feature attributions for model predictions. Shows the effect of a
feature on each output prediction. See the definition of Shapley values in
:func:`~legateboost.BaseModel.global_attributions`, where the :math:`v`
function is here the model prediction instead of the loss function.

Parameters
----------
Expand Down Expand Up @@ -750,8 +745,8 @@ def partial_fit(
eval_set: List[Tuple[cn.ndarray, ...]] = [],
eval_result: EvalResult = {},
) -> LBBase:
"""This method is used for incremental (online) training of the model.
An additional `n_estimators` models will be added to the ensemble.
"""This method is used for incremental (online) training of the model. An
additional `n_estimators` models will be added to the ensemble.

Parameters
----------
Expand Down Expand Up @@ -928,8 +923,8 @@ def partial_fit(
eval_result: EvalResult = {},
) -> LBBase:
"""This method is used for incremental fitting on a batch of samples.
Requires the classes to be provided up front, as they may not be
inferred from the first batch.
Requires the classes to be provided up front, as they may not be inferred
from the first batch.

Parameters
----------
Expand Down Expand Up @@ -1033,8 +1028,8 @@ def fit(
return self

def predict_raw(self, X: cn.ndarray) -> cn.ndarray:
"""Predict pre-transformed values for samples in X. E.g. before
applying a sigmoid function.
"""Predict pre-transformed values for samples in X. E.g. before applying a
sigmoid function.

Parameters
----------
Expand Down Expand Up @@ -1064,7 +1059,7 @@ def predict_proba(self, X: cn.ndarray) -> cn.ndarray:
Returns
-------

y :
probabilities:
The predicted class probabilities for each sample in X.
"""
X = _lb_check_X(X)
Expand All @@ -1091,4 +1086,5 @@ def predict(self, X: cn.ndarray) -> cn.ndarray:
y :
The predicted class labels for each sample in X.
"""
return cn.argmax(self.predict_proba(X), axis=1)
check_is_fitted(self)
return self._objective_instance.output_class(self.predict_proba(X))
31 changes: 26 additions & 5 deletions legateboost/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"GammaDevianceMetric",
"QuantileMetric",
"LogLossMetric",
"MultiLabelMetric",
"ExponentialMetric",
]

Expand Down Expand Up @@ -144,8 +145,8 @@ def name(self) -> str:


class GammaLLMetric(BaseMetric):
"""The mean negative log likelihood of the labels, given parameters
predicted by the model."""
"""The mean negative log likelihood of the labels, given parameters predicted
by the model."""

@override
def metric(self, y: cn.ndarray, pred: cn.ndarray, w: cn.ndarray) -> cn.ndarray:
Expand Down Expand Up @@ -252,8 +253,8 @@ def name(self) -> str:


class LogLossMetric(BaseMetric):
"""Class for computing the logarithmic loss (logloss) metric between the
true labels and predicted labels.
"""Class for computing the logarithmic loss (logloss) metric between the true
labels and predicted labels.

For binary classification:

Expand All @@ -273,7 +274,7 @@ class LogLossMetric(BaseMetric):
def metric(self, y: cn.ndarray, pred: cn.ndarray, w: cn.ndarray) -> cn.ndarray:
y = y.squeeze()
eps = cn.finfo(pred.dtype).eps
cn.clip(pred, eps, 1 - eps, out=pred)
pred = cn.clip(pred, eps, 1 - eps)

w_sum = w.sum()

Expand All @@ -296,6 +297,26 @@ def name(self) -> str:
return "log_loss"


class MultiLabelMetric(BaseMetric):
"""Multi-label metric is a binary log-loss metric averaged over multiple
labels.

See also:
:class:`legateboost.objectives.MultiLabelObjective`
""" # noqa: E501

def metric(self, y: cn.ndarray, pred: cn.ndarray, w: cn.ndarray) -> cn.ndarray:
y = y.squeeze()
eps = cn.finfo(pred.dtype).eps
pred = cn.clip(pred, eps, 1 - eps)
w_sum = w.sum()
logloss = -(y * cn.log(pred) + (self.one - y) * cn.log(self.one - pred))
return (logloss * w[:, cn.newaxis]).sum() / w_sum

def name(self) -> str:
return "multi_label"


class ExponentialMetric(BaseMetric):
"""Class for computing the exponential loss metric.

Expand Down
9 changes: 4 additions & 5 deletions legateboost/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
class BaseModel(PickleCupynumericMixin, ABC):
"""Base class for all models in LegateBoost.

Defines the interface for fitting, updating, and predicting a model,
as well as string representation and equality comparison. Implement
these methods to create a custom model.
Defines the interface for fitting, updating, and predicting a model, as well
as string representation and equality comparison. Implement these methods to
create a custom model.
"""

def set_random_state(self, random_state: np.random.RandomState) -> "BaseModel":
Expand All @@ -27,8 +27,7 @@ def fit(
g: cn.ndarray,
h: cn.ndarray,
) -> "BaseModel":
"""Fit the model to a second order Taylor expansion of the loss
function.
"""Fit the model to a second order Taylor expansion of the loss function.

Parameters
----------
Expand Down
6 changes: 3 additions & 3 deletions legateboost/models/krr.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def rbf(x: cn.ndarray, sigma: float) -> cn.ndarray:


class KRR(BaseModel):
"""Kernel Ridge Regression model using the Nyström approximation. The
accuracy of the approximation is governed by the parameter `n_components`
<= `n`. Effectively, `n_components` rows will be randomly sampled (without
"""Kernel Ridge Regression model using the Nyström approximation. The accuracy
of the approximation is governed by the parameter `n_components` <= `n`.
Effectively, `n_components` rows will be randomly sampled (without
replacement) from X in each boosting iteration.

The kernel is fixed to be the RBF kernel:
Expand Down
10 changes: 5 additions & 5 deletions legateboost/models/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@


class Linear(BaseModel):
"""Generalised linear model. Boosting linear models is equivalent to
fitting a single linear model where each boosting iteration is a newton
step. Note that the l2 penalty is applied to the weights of each model, as
opposed to the sum of all models. This can lead to different results when
compared to fitting a linear model with sklearn.
"""Generalised linear model. Boosting linear models is equivalent to fitting a
single linear model where each boosting iteration is a newton step. Note that
the l2 penalty is applied to the weights of each model, as opposed to the sum
of all models. This can lead to different results when compared to fitting a
linear model with sklearn.

It is recommended to normalize the data before fitting. This ensures
regularisation is evenly applied to all features and prevents numerical issues.
Expand Down
Loading