From b0d9b3302ef3cb73b12349ecb0ca0bfd8cb4ea8d Mon Sep 17 00:00:00 2001 From: Siddharth Date: Thu, 5 Mar 2026 21:07:59 +0530 Subject: [PATCH 1/4] Update XGBoost max supported version to 3.0.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XGBoost converter is already compatible with 3.x — the JSON tree dump format and API (get_booster, get_dump, feature_names, copy) are unchanged. Tested with XGBRegressor, XGBClassifier (binary and multi-class), and raw Booster conversion. --- coremltools/_deps/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coremltools/_deps/__init__.py b/coremltools/_deps/__init__.py index fdf090875..e47f65349 100644 --- a/coremltools/_deps/__init__.py +++ b/coremltools/_deps/__init__.py @@ -94,7 +94,7 @@ def __get_sklearn_version(version): # --------------------------------------------------------------------------------------- _HAS_XGBOOST = True -_XGBOOST_MAX_VERSION = "1.4.2" +_XGBOOST_MAX_VERSION = "3.0.2" try: import xgboost _warn_if_above_max_supported_version("XGBoost", xgboost.__version__, _XGBOOST_MAX_VERSION) From 694755bde6c7eaf7e3e2dd614d1de0110ca6c4dd Mon Sep 17 00:00:00 2001 From: Siddharth Date: Sat, 7 Mar 2026 06:50:49 +0530 Subject: [PATCH 2/4] Update XGBoost test dependency to 3.0.2 Update the pinned XGBoost version in test requirements to match the new max supported version. --- reqs/test.pip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reqs/test.pip b/reqs/test.pip index 9c26f3d61..3b6536d4d 100644 --- a/reqs/test.pip +++ b/reqs/test.pip @@ -11,7 +11,7 @@ scipy==1.8.1; python_version >= '3.8' and python_version < '3.11' and platform_m scipy==1.9.2; python_version == '3.11' and platform_machine != "arm64" scipy; platform_machine == "arm64" -xgboost==1.4.2; platform_machine != "arm64" +xgboost==3.0.2; platform_machine != "arm64" # coremltools.optimize.torch filelock==3.6.0 From 78cd8bec9586400d19b15d4077318989c3ee5b31 Mon Sep 17 00:00:00 2001 From: Siddharth Date: Sat, 7 Mar 2026 07:43:27 +0530 Subject: [PATCH 3/4] Fix XGBoost 3.x compatibility: base_score and feature_names - Read base_score from booster config instead of hardcoding 0.5/0.0 - Convert feature_names to list for XGBoost 3.x compatibility - Add test verifying base_score is correctly propagated --- .../converters/xgboost/_tree_ensemble.py | 15 +++++-- .../test_boosted_trees_regression_numeric.py | 39 +++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/coremltools/converters/xgboost/_tree_ensemble.py b/coremltools/converters/xgboost/_tree_ensemble.py index bc8c06b00..bbb55aba6 100644 --- a/coremltools/converters/xgboost/_tree_ensemble.py +++ b/coremltools/converters/xgboost/_tree_ensemble.py @@ -159,6 +159,7 @@ def convert_tree_ensemble( import os feature_map = None + base_score = None if isinstance( model, (_xgboost.core.Booster, _xgboost.XGBRegressor, _xgboost.XGBClassifier) ): @@ -199,6 +200,11 @@ def convert_tree_ensemble( model = model.get_booster() else: model = model.booster() + try: + config = json.loads(model.save_config()) + base_score = float(config['learner']['learner_model_param']['base_score']) + except (KeyError, ValueError, AttributeError): + pass # Xgboost sometimes has feature names in there. Sometimes does not. if (feature_names is None) and (model.feature_names is None): @@ -212,6 +218,8 @@ def convert_tree_ensemble( # When XGboost model artifact does not have feature names # (seems to be the default in new Xgboost releases), # but the user provides them, use them as they are expecting later. + if not isinstance(feature_names, list): + feature_names = list(feature_names) model.feature_names=feature_names xgb_model_str = model.get_dump(with_stats=True, dump_format="json") @@ -247,9 +255,10 @@ def convert_tree_ensemble( class_labels = range(n_classes) if n_classes == 2: # if we have only 2 classes we only have one sequence of estimators - base_prediction = [0.0] + base_prediction = [base_score if base_score is not None else 0.0] else: - base_prediction = [0.0 for c in range(n_classes)] + bs = base_score if base_score is not None else 0.0 + base_prediction = [bs for c in range(n_classes)] # target here is the equivalent of output_features in scikit learn mlkit_tree = TreeEnsembleClassifier(feature_names, class_labels, target) mlkit_tree.set_default_prediction_value(base_prediction) @@ -259,7 +268,7 @@ def convert_tree_ensemble( mlkit_tree.set_post_evaluation_transform("Classification_SoftMax") else: mlkit_tree = _TreeEnsembleRegressor(feature_names, target) - mlkit_tree.set_default_prediction_value(0.5) + mlkit_tree.set_default_prediction_value(base_score if base_score is not None else 0.5) for xgb_tree_id, xgb_tree_str in enumerate(xgb_model_str): if mode == "classifier" and n_classes > 2: diff --git a/coremltools/test/xgboost_tests/test_boosted_trees_regression_numeric.py b/coremltools/test/xgboost_tests/test_boosted_trees_regression_numeric.py index cc3b38aa3..bf6e72ecc 100644 --- a/coremltools/test/xgboost_tests/test_boosted_trees_regression_numeric.py +++ b/coremltools/test/xgboost_tests/test_boosted_trees_regression_numeric.py @@ -307,3 +307,42 @@ def test_boston_housing_parameter_stress_test(self): print("Testing a total of %s cases. This could take a while" % len(args)) for it, arg in enumerate(args): self._train_convert_evaluate_assert(arg) + +@unittest.skipIf(not _HAS_XGBOOST, "Missing xgboost. Skipping") +class XGBoostBaseScoreTest(unittest.TestCase): + @classmethod + def setUpClass(self): + scikit_data = load_boston() + self.X = scikit_data["data"] + self.target = scikit_data["target"] + self.feature_names = scikit_data["feature_names"] + self.output_name = "target" + + def test_base_score_extraction(self): + """Verify that the converter reads base_score from the model config + instead of using the hardcoded default (0.5).""" + import json + + # Train a model - XGBoost 3.x auto-estimates base_score from data + xgb_model = xgboost.XGBRegressor(n_estimators=5) + xgb_model.fit(self.X, self.target) + + # Extract the actual base_score from the booster config + booster = xgb_model.get_booster() + config = json.loads(booster.save_config()) + actual_base_score = float( + config['learner']['learner_model_param']['base_score'] + ) + + # Convert and check the default_prediction_value matches + spec = xgb_converter.convert( + xgb_model, self.feature_names, self.output_name, force_32bit_float=False + ) + tree_ensemble = spec.get_spec().treeEnsembleRegressor.treeEnsemble + default_value = tree_ensemble.basePredictionValue[0] + self.assertAlmostEqual( + default_value, + actual_base_score, + places=5, + msg="Converter should use model's actual base_score, not hardcoded 0.5", + ) From f1e799e61b90a5639f0a0887a4c1896bb5e2797d Mon Sep 17 00:00:00 2001 From: Siddharth Date: Tue, 10 Mar 2026 07:18:44 +0530 Subject: [PATCH 4/4] Remove unnecessary try/except around base_score extraction save_config() exists in all supported XGBoost versions (1.4.2+), so silencing exceptions would hide real breakage. --- coremltools/converters/xgboost/_tree_ensemble.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/coremltools/converters/xgboost/_tree_ensemble.py b/coremltools/converters/xgboost/_tree_ensemble.py index bbb55aba6..c617eae8c 100644 --- a/coremltools/converters/xgboost/_tree_ensemble.py +++ b/coremltools/converters/xgboost/_tree_ensemble.py @@ -200,11 +200,8 @@ def convert_tree_ensemble( model = model.get_booster() else: model = model.booster() - try: - config = json.loads(model.save_config()) - base_score = float(config['learner']['learner_model_param']['base_score']) - except (KeyError, ValueError, AttributeError): - pass + config = json.loads(model.save_config()) + base_score = float(config['learner']['learner_model_param']['base_score']) # Xgboost sometimes has feature names in there. Sometimes does not. if (feature_names is None) and (model.feature_names is None):