diff --git a/src/winml/modelkit/build/hf.py b/src/winml/modelkit/build/hf.py index b2897b952..a0ee7d22e 100644 --- a/src/winml/modelkit/build/hf.py +++ b/src/winml/modelkit/build/hf.py @@ -527,6 +527,7 @@ def _load_model( pytorch_model, _, _ = load_hf_model( model_name_or_path=model_id, task=task, + model_class=config.loader.model_class, trust_remote_code=effective_trust, hf_config=hf_config, model_type=model_type, diff --git a/tests/unit/build/test_hf.py b/tests/unit/build/test_hf.py index e59c419f4..5cbd3d358 100644 --- a/tests/unit/build/test_hf.py +++ b/tests/unit/build/test_hf.py @@ -322,6 +322,23 @@ def test_pretrained_weights_calls_load_model( call_args = mock_pipeline["load"].call_args assert call_args[0][1] == "bert-base" # model_id + def test_pretrained_load_threads_model_class(self, sample_config) -> None: + """Pretrained load path forwards ``loader.model_class`` to load_hf_model. + + Regression (#836): the pretrained branch dropped the explicit + ``model_class``, so e.g. CLIP feature-extraction resolved to the full + CLIPModel (which requires ``pixel_values``) instead of the configured + CLIPTextModelWithProjection, failing export with text-only inputs. + """ + from winml.modelkit.build.hf import _load_model + + with patch("winml.modelkit.loader.load_hf_model") as m_load: + m_load.return_value = (MagicMock(), MagicMock(), "image-classification") + _load_model(sample_config, "test-model", trust_remote_code=False) + + m_load.assert_called_once() + assert m_load.call_args.kwargs["model_class"] == "AutoModelForImageClassification" + def test_pre_loaded_model_skips_load( self, tmp_path: Path, sample_config, mock_pipeline ) -> None: