Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion coremltools/_deps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def __get_sklearn_version(version):

# ---------------------------------------------------------------------------------------
_HAS_XGBOOST = True
_XGBOOST_MAX_VERSION = "1.4.2"
_XGBOOST_MAX_VERSION = "3.0.2"
try:
import xgboost
_warn_if_above_max_supported_version("XGBoost", xgboost.__version__, _XGBOOST_MAX_VERSION)
Expand Down
12 changes: 9 additions & 3 deletions coremltools/converters/xgboost/_tree_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def convert_tree_ensemble(
import os

feature_map = None
base_score = None
if isinstance(
model, (_xgboost.core.Booster, _xgboost.XGBRegressor, _xgboost.XGBClassifier)
):
Expand Down Expand Up @@ -199,6 +200,8 @@ def convert_tree_ensemble(
model = model.get_booster()
else:
model = model.booster()
config = json.loads(model.save_config())
base_score = float(config['learner']['learner_model_param']['base_score'])

# Xgboost sometimes has feature names in there. Sometimes does not.
if (feature_names is None) and (model.feature_names is None):
Expand All @@ -212,6 +215,8 @@ def convert_tree_ensemble(
# When XGboost model artifact does not have feature names
# (seems to be the default in new Xgboost releases),
# but the user provides them, use them as they are expecting later.
if not isinstance(feature_names, list):
feature_names = list(feature_names)
model.feature_names=feature_names

xgb_model_str = model.get_dump(with_stats=True, dump_format="json")
Expand Down Expand Up @@ -247,9 +252,10 @@ def convert_tree_ensemble(
class_labels = range(n_classes)
if n_classes == 2:
# if we have only 2 classes we only have one sequence of estimators
base_prediction = [0.0]
base_prediction = [base_score if base_score is not None else 0.0]
else:
base_prediction = [0.0 for c in range(n_classes)]
bs = base_score if base_score is not None else 0.0
base_prediction = [bs for c in range(n_classes)]
# target here is the equivalent of output_features in scikit learn
mlkit_tree = TreeEnsembleClassifier(feature_names, class_labels, target)
mlkit_tree.set_default_prediction_value(base_prediction)
Expand All @@ -259,7 +265,7 @@ def convert_tree_ensemble(
mlkit_tree.set_post_evaluation_transform("Classification_SoftMax")
else:
mlkit_tree = _TreeEnsembleRegressor(feature_names, target)
mlkit_tree.set_default_prediction_value(0.5)
mlkit_tree.set_default_prediction_value(base_score if base_score is not None else 0.5)

for xgb_tree_id, xgb_tree_str in enumerate(xgb_model_str):
if mode == "classifier" and n_classes > 2:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,42 @@ def test_boston_housing_parameter_stress_test(self):
print("Testing a total of %s cases. This could take a while" % len(args))
for it, arg in enumerate(args):
self._train_convert_evaluate_assert(arg)

@unittest.skipIf(not _HAS_XGBOOST, "Missing xgboost. Skipping")
class XGBoostBaseScoreTest(unittest.TestCase):
@classmethod
def setUpClass(self):
scikit_data = load_boston()
self.X = scikit_data["data"]
self.target = scikit_data["target"]
self.feature_names = scikit_data["feature_names"]
self.output_name = "target"

def test_base_score_extraction(self):
"""Verify that the converter reads base_score from the model config
instead of using the hardcoded default (0.5)."""
import json

# Train a model - XGBoost 3.x auto-estimates base_score from data
xgb_model = xgboost.XGBRegressor(n_estimators=5)
xgb_model.fit(self.X, self.target)

# Extract the actual base_score from the booster config
booster = xgb_model.get_booster()
config = json.loads(booster.save_config())
actual_base_score = float(
config['learner']['learner_model_param']['base_score']
)

# Convert and check the default_prediction_value matches
spec = xgb_converter.convert(
xgb_model, self.feature_names, self.output_name, force_32bit_float=False
)
tree_ensemble = spec.get_spec().treeEnsembleRegressor.treeEnsemble
default_value = tree_ensemble.basePredictionValue[0]
self.assertAlmostEqual(
default_value,
actual_base_score,
places=5,
msg="Converter should use model's actual base_score, not hardcoded 0.5",
)
2 changes: 1 addition & 1 deletion reqs/test.pip
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ scipy==1.8.1; python_version >= '3.8' and python_version < '3.11' and platform_m
scipy==1.9.2; python_version == '3.11' and platform_machine != "arm64"
scipy; platform_machine == "arm64"

xgboost==1.4.2; platform_machine != "arm64"
xgboost==3.0.2; platform_machine != "arm64"

# coremltools.optimize.torch
filelock==3.6.0
Expand Down