From 19eaf625023a35c7299ac505541876d1214fd473 Mon Sep 17 00:00:00 2001 From: xieofxie Date: Mon, 29 Jun 2026 16:38:29 +0800 Subject: [PATCH] fix(build): honor loader.model_class on pretrained load path The pretrained branch of _load_model dropped the explicit config.loader.model_class when calling load_hf_model, so class resolution fell back to the (model_type, task) lookup alone. After #836 began threading config.loader.model_type as a build-variant override, CLIP feature-extraction broke: the variant tag 'clip_text_model' is not a key in MODEL_CLASS_MAPPING (which is keyed on the native 'clip'), so resolution fell through to AutoModel and loaded the full CLIPModel. Export with text-only inputs then failed with 'You have to specify pixel_values'. Forwarding model_class lets resolve_task take its Stage-0 user-class path, which resolves CLIPTextModelWithProjection directly and ignores the model_type override. Safe for the qwen3 transformer-only variant since Stage 0 only activates when model_class is set. --- src/winml/modelkit/build/hf.py | 1 + tests/unit/build/test_hf.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/winml/modelkit/build/hf.py b/src/winml/modelkit/build/hf.py index b2897b952..a0ee7d22e 100644 --- a/src/winml/modelkit/build/hf.py +++ b/src/winml/modelkit/build/hf.py @@ -527,6 +527,7 @@ def _load_model( pytorch_model, _, _ = load_hf_model( model_name_or_path=model_id, task=task, + model_class=config.loader.model_class, trust_remote_code=effective_trust, hf_config=hf_config, model_type=model_type, diff --git a/tests/unit/build/test_hf.py b/tests/unit/build/test_hf.py index e59c419f4..5cbd3d358 100644 --- a/tests/unit/build/test_hf.py +++ b/tests/unit/build/test_hf.py @@ -322,6 +322,23 @@ def test_pretrained_weights_calls_load_model( call_args = mock_pipeline["load"].call_args assert call_args[0][1] == "bert-base" # model_id + def test_pretrained_load_threads_model_class(self, sample_config) -> None: + """Pretrained load path forwards ``loader.model_class`` to load_hf_model. + + Regression (#836): the pretrained branch dropped the explicit + ``model_class``, so e.g. CLIP feature-extraction resolved to the full + CLIPModel (which requires ``pixel_values``) instead of the configured + CLIPTextModelWithProjection, failing export with text-only inputs. + """ + from winml.modelkit.build.hf import _load_model + + with patch("winml.modelkit.loader.load_hf_model") as m_load: + m_load.return_value = (MagicMock(), MagicMock(), "image-classification") + _load_model(sample_config, "test-model", trust_remote_code=False) + + m_load.assert_called_once() + assert m_load.call_args.kwargs["model_class"] == "AutoModelForImageClassification" + def test_pre_loaded_model_skips_load( self, tmp_path: Path, sample_config, mock_pipeline ) -> None: