From ef42e477ca210de064439df768bc4130c965d630 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 08:18:11 +0000 Subject: [PATCH 01/35] Initial plan From 00571f01ef82f4cd4d821c0d7b391eac38904cef Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 08:33:38 +0000 Subject: [PATCH 02/35] feat: add CLI test-model config for HF inputs Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/e2149604-9db1-438e-a31f-01a53886093d Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/cli/base.py | 7 +++++ olive/common/hf/model_io.py | 8 ++++-- olive/common/hf/utils.py | 49 ++++++++++++++++++++++++++++----- olive/model/handler/hf.py | 11 ++++++-- olive/model/handler/mixin/hf.py | 15 ++++++++-- test/cli/test_base.py | 16 +++++++++++ test/cli/test_cli.py | 19 +++++++++++++ test/common/test_hf.py | 21 ++++++++++++++ 8 files changed, 133 insertions(+), 13 deletions(-) diff --git a/olive/cli/base.py b/olive/cli/base.py index 75fd2816c7..9355a0d301 100644 --- a/olive/cli/base.py +++ b/olive/cli/base.py @@ -105,6 +105,8 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) input_model["adapter_path"] = args.adapter_path if getattr(args, "trust_remote_code", None) is not None: input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code + if getattr(args, "test", False): + input_model["test_model_config"] = {"hidden_layers": 2} return input_model @@ -371,6 +373,11 @@ def add_input_model_options( model_group.add_argument( "--trust_remote_code", action="store_true", help="Trust remote code when loading a huggingface model." ) + model_group.add_argument( + "--test", + action="store_true", + help="Use a randomly initialized test model with the same Hugging Face architecture and 2 hidden layers.", + ) if enable_hf_adapter: assert enable_hf, "enable_hf must be True when enable_hf_adapter is True." diff --git a/olive/common/hf/model_io.py b/olive/common/hf/model_io.py index a3ebd73058..7d66d57574 100644 --- a/olive/common/hf/model_io.py +++ b/olive/common/hf/model_io.py @@ -27,6 +27,7 @@ def get_model_io_config( model_name: str, task: str, model: Optional["PreTrainedModel"] = None, + test_model_config: Optional[dict[str, Any]] = None, **kwargs, ) -> Optional[dict[str, Any]]: """Get the input/output config for the model and task. @@ -35,6 +36,7 @@ def get_model_io_config( model_name: The model name or path. task: The task type (e.g., "text-generation", "text-classification"). model: Optional loaded model for input signature inspection. + test_model_config: Optional overrides for creating a lightweight random test model from the same config. **kwargs: Additional arguments including use_cache. Returns: @@ -68,7 +70,7 @@ def get_model_io_config( return None # Get model config - model_config = get_model_config(model_name, **kwargs) + model_config = get_model_config(model_name, test_model_config=test_model_config, **kwargs) # Handle PEFT models actual_model = model @@ -92,6 +94,7 @@ def get_model_dummy_input( model_name: str, task: str, model: Optional["PreTrainedModel"] = None, + test_model_config: Optional[dict[str, Any]] = None, **kwargs, ) -> Optional[dict[str, Any]]: """Get dummy inputs for the model and task. @@ -100,6 +103,7 @@ def get_model_dummy_input( model_name: The model name or path. task: The task type. model: Optional loaded model for input signature inspection. + test_model_config: Optional overrides for creating a lightweight random test model from the same config. **kwargs: Additional arguments including use_cache, batch_size, sequence_length. Returns: @@ -133,7 +137,7 @@ def get_model_dummy_input( return None # Get model config (handles MLflow paths) - model_config = get_model_config(model_name, **kwargs) + model_config = get_model_config(model_name, test_model_config=test_model_config, **kwargs) # Handle PEFT models actual_model = model diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index a070e85ac8..99a1a2d7e3 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -3,8 +3,9 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- import logging +from copy import deepcopy from pathlib import Path -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union from transformers import AutoConfig, AutoModel, AutoTokenizer, GenerationConfig @@ -18,7 +19,33 @@ logger = logging.getLogger(__name__) -def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTrainedModel": +def _apply_test_model_config( + model_config: "PretrainedConfig", test_model_config: Optional[dict[str, Any]] = None +) -> "PretrainedConfig": + """Apply lightweight test-model overrides to a model config.""" + if not test_model_config: + return model_config + + model_config = deepcopy(model_config) + hidden_layers = test_model_config.get("hidden_layers", test_model_config.get("num_hidden_layers", 2)) + if hidden_layers < 1: + raise ValueError("test_model_config.hidden_layers must be greater than 0.") + + updated = False + for attr_name in ("num_hidden_layers", "num_layers", "n_layer", "n_layers"): + if hasattr(model_config, attr_name): + setattr(model_config, attr_name, hidden_layers) + updated = True + + if not updated: + raise ValueError("Unable to create a test model because the config does not expose a hidden-layer count.") + + return model_config + + +def load_model_from_task( + task: str, model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs +) -> "PreTrainedModel": """Load huggingface model from task and model_name_or_path.""" from transformers.pipelines import check_task @@ -31,7 +58,7 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr else: raise ValueError("unsupported transformers version") - model_config = get_model_config(model_name_or_path, **kwargs) + model_config = get_model_config(model_name_or_path, test_model_config=test_model_config, **kwargs) if getattr(model_config, "quantization_config", None): if not isinstance(model_config.quantization_config, dict): model_config.quantization_config = model_config.quantization_config.to_dict() @@ -59,7 +86,13 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr model = None for i, model_class in enumerate(class_tuple): try: - model = from_pretrained(model_class, model_name_or_path, "model", **kwargs) + if test_model_config: + try: + model = model_class.from_config(model_config, trust_remote_code=kwargs.get("trust_remote_code")) + except TypeError: + model = model_class.from_config(model_config) + else: + model = from_pretrained(model_class, model_name_or_path, "model", **kwargs) logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path) break except (OSError, ValueError) as e: @@ -94,14 +127,16 @@ def from_pretrained(cls, model_name_or_path: str, mlflow_dir: str, **kwargs): return cls.from_pretrained(get_pretrained_name_or_path(model_name_or_path, mlflow_dir), **kwargs) -def get_model_config(model_name_or_path: str, **kwargs) -> "PretrainedConfig": +def get_model_config( + model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs +) -> "PretrainedConfig": """Get HF Config for the given model_name_or_path.""" model_config = from_pretrained(AutoConfig, model_name_or_path, "config", **kwargs) # add quantization config quantization_config = kwargs.get("quantization_config") if not quantization_config: - return model_config + return _apply_test_model_config(model_config, test_model_config) if hasattr(model_config, "quantization_config") and model_config.quantization_config: logger.warning( @@ -111,7 +146,7 @@ def get_model_config(model_name_or_path: str, **kwargs) -> "PretrainedConfig": ) else: model_config.quantization_config = quantization_config - return model_config + return _apply_test_model_config(model_config, test_model_config) def save_model_config(config: Union["PretrainedConfig", "GenerationConfig"], output_dir: str, **kwargs): diff --git a/olive/model/handler/hf.py b/olive/model/handler/hf.py index daac587bc5..0ed38a5ada 100644 --- a/olive/model/handler/hf.py +++ b/olive/model/handler/hf.py @@ -28,7 +28,7 @@ @model_handler_registry("HFModel") class HfModelHandler(PyTorchModelHandlerBase, MLFlowTransformersMixin, HfMixin): # pylint: disable=too-many-ancestors resource_keys: tuple[str, ...] = ("model_path", "adapter_path") - json_config_keys: tuple[str, ...] = ("task", "load_kwargs") + json_config_keys: tuple[str, ...] = ("task", "load_kwargs", "test_model_config") def __init__( self, @@ -37,6 +37,7 @@ def __init__( load_kwargs: Union[dict[str, Any], HfLoadKwargs] = None, io_config: Union[dict[str, Any], IoConfig, str] = None, adapter_path: OLIVE_RESOURCE_ANNOTATIONS = None, + test_model_config: Optional[dict[str, Any]] = None, model_attributes: Optional[dict[str, Any]] = None, ): super().__init__( @@ -48,6 +49,7 @@ def __init__( self.add_resources(locals()) self.task = task self.load_kwargs = validate_config(load_kwargs, HfLoadKwargs, warn_unused_keys=False) if load_kwargs else None + self.test_model_config = test_model_config self.model_attributes = {**self.get_hf_model_config().to_dict(), **(self.model_attributes or {})} @@ -72,7 +74,12 @@ def load_model(self, rank: int = None, cache_model: bool = True) -> "torch.nn.Mo if self.model: model = self.model else: - model = load_model_from_task(self.task, self.model_path, **self.get_load_kwargs()) + model = load_model_from_task( + self.task, + self.model_path, + test_model_config=self.test_model_config, + **self.get_load_kwargs(), + ) # we only have peft adapters for now if self.adapter_path: diff --git a/olive/model/handler/mixin/hf.py b/olive/model/handler/mixin/hf.py index 730d6bd6ec..3e400c64e7 100644 --- a/olive/model/handler/mixin/hf.py +++ b/olive/model/handler/mixin/hf.py @@ -39,7 +39,11 @@ def get_hf_model_config(self, exclude_load_keys: Optional[list[str]] = None) -> :param exclude_load_keys: list of keys to exclude from load_kwargs :return: model config """ - return get_model_config(self.model_path, **self.get_load_kwargs(exclude_load_keys)) + return get_model_config( + self.model_path, + test_model_config=getattr(self, "test_model_config", None), + **self.get_load_kwargs(exclude_load_keys), + ) def get_hf_generation_config(self, exclude_load_keys: Optional[list[str]] = None) -> Optional["GenerationConfig"]: """Get generation config for the model if it exists. @@ -114,7 +118,13 @@ def save_metadata(self, output_dir: str, exclude_load_keys: Optional[list[str]] def get_hf_io_config(self) -> Optional[dict[str, Any]]: """Get Io config for the model.""" - return get_model_io_config(self.model_path, self.task, self.load_model(), **self.get_load_kwargs()) + return get_model_io_config( + self.model_path, + self.task, + self.load_model(), + test_model_config=getattr(self, "test_model_config", None), + **self.get_load_kwargs(), + ) def get_hf_dummy_inputs(self) -> Optional[dict[str, Any]]: """Get dummy inputs for the model.""" @@ -122,6 +132,7 @@ def get_hf_dummy_inputs(self) -> Optional[dict[str, Any]]: self.model_path, self.task, model=self.load_model(), + test_model_config=getattr(self, "test_model_config", None), **self.get_load_kwargs(), ) diff --git a/test/cli/test_base.py b/test/cli/test_base.py index 38c96c6aa9..6327f1f335 100644 --- a/test/cli/test_base.py +++ b/test/cli/test_base.py @@ -229,6 +229,22 @@ def test_insert_input_model_invalid_hf_model_name(): get_input_model_config(args) +@patch("huggingface_hub.repo_exists", return_value=True) +def test_get_input_model_config_hf_test_model(_): + args = SimpleNamespace( + model_name_or_path="hf_model", + trust_remote_code=False, + task="text-generation", + model_script=None, + script_dir=None, + test=True, + ) + + config = get_input_model_config(args) + + assert config["test_model_config"] == {"hidden_layers": 2} + + def test_insert_input_model_cli_output_model(): # setup model_path = str(Path(__file__).parent.resolve() / "output_model") diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index a7cb39e244..816147e063 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -190,6 +190,25 @@ def test_finetune_command(_, mock_run, tmp_path): assert mock_run.call_count == 1 +@patch("huggingface_hub.repo_exists", return_value=True) +def test_optimize_command_test_model_config(_, tmp_path): + output_dir = tmp_path / "output_dir" + command_args = [ + "optimize", + "-m", + "dummy-model-id", + "--test", + "--dry_run", + "-o", + str(output_dir), + ] + + cli_main(command_args) + + config = json.loads((output_dir / "config.json").read_text()) + assert config["input_model"]["test_model_config"] == {"hidden_layers": 2} + + @patch("olive.workflows.run") @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) def test_diffusion_lora_command(_, mock_run, tmp_path): diff --git a/test/common/test_hf.py b/test/common/test_hf.py index 00b4cbd043..dfab798547 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -6,6 +6,7 @@ import pytest import torch +import transformers from olive.common.hf.model_io import get_model_dummy_input, get_model_io_config from olive.common.hf.utils import load_model_from_task @@ -21,6 +22,26 @@ def test_load_model_from_task(): assert isinstance(model, torch.nn.Module) +def test_load_model_from_task_test_model_config(): + model_config = transformers.BertConfig(num_hidden_layers=12) + created_model = MagicMock(spec=torch.nn.Module) + + with ( + patch("transformers.pipelines.check_task") as mock_check_task, + patch("olive.common.hf.utils.from_pretrained", return_value=model_config) as mock_from_pretrained, + ): + mock_model_class = MagicMock() + mock_model_class.from_config.return_value = created_model + mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None) + + model = load_model_from_task("text-classification", "dummy-model", test_model_config={"hidden_layers": 2}) + + assert model is created_model + mock_from_pretrained.assert_called_once() + mock_model_class.from_config.assert_called_once() + assert mock_model_class.from_config.call_args.args[0].num_hidden_layers == 2 + + @pytest.mark.parametrize( ("exceptions", "expected_exception", "expected_message"), [ From 485dfbf79b855f0bde0488de9c70c0357068a5a2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 08:36:02 +0000 Subject: [PATCH 03/35] test: broaden HF test-model coverage Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/e2149604-9db1-438e-a31f-01a53886093d Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/common/hf/utils.py | 4 +++- test/common/test_hf.py | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index 99a1a2d7e3..7c713ce1ce 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -27,11 +27,13 @@ def _apply_test_model_config( return model_config model_config = deepcopy(model_config) - hidden_layers = test_model_config.get("hidden_layers", test_model_config.get("num_hidden_layers", 2)) + hidden_layers = test_model_config.get("hidden_layers") or test_model_config.get("num_hidden_layers") or 2 if hidden_layers < 1: raise ValueError("test_model_config.hidden_layers must be greater than 0.") updated = False + # Common Hugging Face configs do not use a single canonical field: + # BERT-style models use num_hidden_layers while GPT-style models often use n_layer/n_layers/num_layers. for attr_name in ("num_hidden_layers", "num_layers", "n_layer", "n_layers"): if hasattr(model_config, attr_name): setattr(model_config, attr_name, hidden_layers) diff --git a/test/common/test_hf.py b/test/common/test_hf.py index dfab798547..dacc4ac3ad 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -22,8 +22,14 @@ def test_load_model_from_task(): assert isinstance(model, torch.nn.Module) -def test_load_model_from_task_test_model_config(): - model_config = transformers.BertConfig(num_hidden_layers=12) +@pytest.mark.parametrize( + ("model_config", "hidden_layers_attr"), + [ + (transformers.BertConfig(num_hidden_layers=12), "num_hidden_layers"), + (transformers.GPT2Config(n_layer=12), "n_layer"), + ], +) +def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr): created_model = MagicMock(spec=torch.nn.Module) with ( @@ -39,7 +45,7 @@ def test_load_model_from_task_test_model_config(): assert model is created_model mock_from_pretrained.assert_called_once() mock_model_class.from_config.assert_called_once() - assert mock_model_class.from_config.call_args.args[0].num_hidden_layers == 2 + assert getattr(mock_model_class.from_config.call_args.args[0], hidden_layers_attr) == 2 @pytest.mark.parametrize( From a6fa34aa939fc21e9429253f611d70bf652420e1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 08:38:06 +0000 Subject: [PATCH 04/35] chore: polish test model config handling Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/e2149604-9db1-438e-a31f-01a53886093d Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/common/hf/utils.py | 7 ++++++- test/common/test_hf.py | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index 7c713ce1ce..110b4fe3ae 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -27,7 +27,12 @@ def _apply_test_model_config( return model_config model_config = deepcopy(model_config) - hidden_layers = test_model_config.get("hidden_layers") or test_model_config.get("num_hidden_layers") or 2 + if "hidden_layers" in test_model_config: + hidden_layers = test_model_config["hidden_layers"] + elif "num_hidden_layers" in test_model_config: + hidden_layers = test_model_config["num_hidden_layers"] + else: + hidden_layers = 2 if hidden_layers < 1: raise ValueError("test_model_config.hidden_layers must be greater than 0.") diff --git a/test/common/test_hf.py b/test/common/test_hf.py index dacc4ac3ad..5300ca118a 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -6,7 +6,7 @@ import pytest import torch -import transformers +from transformers import BertConfig, GPT2Config from olive.common.hf.model_io import get_model_dummy_input, get_model_io_config from olive.common.hf.utils import load_model_from_task @@ -25,8 +25,8 @@ def test_load_model_from_task(): @pytest.mark.parametrize( ("model_config", "hidden_layers_attr"), [ - (transformers.BertConfig(num_hidden_layers=12), "num_hidden_layers"), - (transformers.GPT2Config(n_layer=12), "n_layer"), + (BertConfig(num_hidden_layers=12), "num_hidden_layers"), + (GPT2Config(n_layer=12), "n_layer"), ], ) def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr): From 273850c72e26934b91f7fb505c411952bde1e91f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 09:19:50 +0000 Subject: [PATCH 05/35] fix: fail fast for HF test model loading Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/d4221aaf-b4a3-4bed-85fe-30a12dcdfd6e Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/common/hf/utils.py | 2 ++ test/common/test_hf.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index 110b4fe3ae..692019b8ab 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -103,6 +103,8 @@ def load_model_from_task( logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path) break except (OSError, ValueError) as e: + if test_model_config: + raise if i == len(class_tuple) - 1: # len(class_tuple) == 1 covers most common tasks like text-generation, text-classification, etc # error could be device OOM, device_map: "auto" not supported, etc diff --git a/test/common/test_hf.py b/test/common/test_hf.py index 5300ca118a..ab942ee399 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -48,6 +48,26 @@ def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr assert getattr(mock_model_class.from_config.call_args.args[0], hidden_layers_attr) == 2 +def test_load_model_from_task_test_model_config_fails_without_fallback(): + model_config = BertConfig(num_hidden_layers=12) + + with ( + patch("transformers.pipelines.check_task") as mock_check_task, + patch("olive.common.hf.utils.from_pretrained", return_value=model_config), + ): + first_model_class = MagicMock() + first_model_class.from_config.side_effect = ValueError("unexpected architecture") + second_model_class = MagicMock() + second_model_class.from_config.return_value = MagicMock(spec=torch.nn.Module) + mock_check_task.return_value = ("text-classification", {"pt": (first_model_class, second_model_class)}, None) + + with pytest.raises(ValueError, match="unexpected architecture"): + load_model_from_task("text-classification", "dummy-model", test_model_config={"hidden_layers": 2}) + + first_model_class.from_config.assert_called_once() + second_model_class.from_config.assert_not_called() + + @pytest.mark.parametrize( ("exceptions", "expected_exception", "expected_message"), [ From 318fcbeefd65b4f5b2fc8c9ab42b953e7d7585e4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 09:35:06 +0000 Subject: [PATCH 06/35] refactor: remove nested try from HF test loading Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/57fd060e-5678-4015-b277-c7d14ec52145 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/common/hf/utils.py | 18 ++++++++++++++---- test/common/test_hf.py | 15 ++++++++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index 692019b8ab..91155e5340 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import inspect import logging from copy import deepcopy from pathlib import Path @@ -50,6 +51,18 @@ def _apply_test_model_config( return model_config +def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_remote_code: Optional[bool] = None): + """Instantiate a random-initialized HF model from config for test mode.""" + from_config_signature = inspect.signature(model_class.from_config) + supports_trust_remote_code = "trust_remote_code" in from_config_signature.parameters or any( + parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in from_config_signature.parameters.values() + ) + from_config_kwargs = {} + if supports_trust_remote_code and trust_remote_code is not None: + from_config_kwargs["trust_remote_code"] = trust_remote_code + return model_class.from_config(model_config, **from_config_kwargs) + + def load_model_from_task( task: str, model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs ) -> "PreTrainedModel": @@ -94,10 +107,7 @@ def load_model_from_task( for i, model_class in enumerate(class_tuple): try: if test_model_config: - try: - model = model_class.from_config(model_config, trust_remote_code=kwargs.get("trust_remote_code")) - except TypeError: - model = model_class.from_config(model_config) + model = _load_test_model(model_class, model_config, kwargs.get("trust_remote_code")) else: model = from_pretrained(model_class, model_name_or_path, "model", **kwargs) logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path) diff --git a/test/common/test_hf.py b/test/common/test_hf.py index ab942ee399..7fa0c9801e 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -9,7 +9,7 @@ from transformers import BertConfig, GPT2Config from olive.common.hf.model_io import get_model_dummy_input, get_model_io_config -from olive.common.hf.utils import load_model_from_task +from olive.common.hf.utils import _load_test_model, load_model_from_task def test_load_model_from_task(): @@ -68,6 +68,19 @@ def test_load_model_from_task_test_model_config_fails_without_fallback(): second_model_class.from_config.assert_not_called() +def test_load_test_model_omits_unsupported_trust_remote_code_kwarg(): + model_config = BertConfig(num_hidden_layers=12) + + class MockModelClass: + @staticmethod + def from_config(config): + return config + + created_model = _load_test_model(MockModelClass, model_config, trust_remote_code=True) + + assert created_model is model_config + + @pytest.mark.parametrize( ("exceptions", "expected_exception", "expected_message"), [ From 40b07407da32fb962d6e33932ef8b3e1688e771b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 09:37:26 +0000 Subject: [PATCH 07/35] test: cover trust_remote_code helper behavior Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/57fd060e-5678-4015-b277-c7d14ec52145 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- test/common/test_hf.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/test/common/test_hf.py b/test/common/test_hf.py index 7fa0c9801e..7df0d2207d 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -70,15 +70,52 @@ def test_load_model_from_task_test_model_config_fails_without_fallback(): def test_load_test_model_omits_unsupported_trust_remote_code_kwarg(): model_config = BertConfig(num_hidden_layers=12) + captured = {} class MockModelClass: @staticmethod def from_config(config): + captured["config"] = config return config created_model = _load_test_model(MockModelClass, model_config, trust_remote_code=True) assert created_model is model_config + assert captured == {"config": model_config} + + +def test_load_test_model_omits_none_trust_remote_code_kwarg(): + model_config = BertConfig(num_hidden_layers=12) + captured = {} + + class MockModelClass: + @staticmethod + def from_config(config, **kwargs): + captured["config"] = config + captured["kwargs"] = kwargs + return config + + created_model = _load_test_model(MockModelClass, model_config) + + assert created_model is model_config + assert captured == {"config": model_config, "kwargs": {}} + + +def test_load_test_model_passes_supported_trust_remote_code_kwarg(): + model_config = BertConfig(num_hidden_layers=12) + captured = {} + + class MockModelClass: + @staticmethod + def from_config(config, trust_remote_code=None): + captured["config"] = config + captured["trust_remote_code"] = trust_remote_code + return config + + created_model = _load_test_model(MockModelClass, model_config, trust_remote_code=True) + + assert created_model is model_config + assert captured == {"config": model_config, "trust_remote_code": True} @pytest.mark.parametrize( From 386ff01781c5f32edf811814efc6e33616c8003e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 10:56:58 +0000 Subject: [PATCH 08/35] feat: persist reusable HF test model path Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/cf4d7fdc-1de9-4098-adc3-3bedd436d7d0 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/cli/base.py | 16 +++++++++--- olive/common/hf/utils.py | 19 ++++++++++++-- olive/model/handler/hf.py | 9 ++++++- test/cli/test_base.py | 3 ++- test/cli/test_cli.py | 3 +++ test/common/test_hf.py | 53 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 96 insertions(+), 7 deletions(-) diff --git a/olive/cli/base.py b/olive/cli/base.py index 9355a0d301..dcc8be00b2 100644 --- a/olive/cli/base.py +++ b/olive/cli/base.py @@ -105,8 +105,13 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) input_model["adapter_path"] = args.adapter_path if getattr(args, "trust_remote_code", None) is not None: input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code - if getattr(args, "test", False): + test_model_output_path = getattr(args, "test", None) + if test_model_output_path is not None and test_model_output_path is not False: input_model["test_model_config"] = {"hidden_layers": 2} + if test_model_output_path is True and getattr(args, "output_path", None): + test_model_output_path = str(Path(args.output_path) / "test_model") + if test_model_output_path is not True: + input_model["test_model_path"] = test_model_output_path return input_model @@ -375,8 +380,13 @@ def add_input_model_options( ) model_group.add_argument( "--test", - action="store_true", - help="Use a randomly initialized test model with the same Hugging Face architecture and 2 hidden layers.", + type=str, + nargs="?", + const=True, + help=( + "Use a randomly initialized test model with the same Hugging Face architecture and 2 hidden layers. " + "Optionally provide a folder where the generated test model should be saved and reused." + ), ) if enable_hf_adapter: diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index 91155e5340..ff397c1bca 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -63,8 +63,18 @@ def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_ return model_class.from_config(model_config, **from_config_kwargs) +def _save_test_model(model: "PreTrainedModel", output_dir: str): + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + model.save_pretrained(str(output_path)) + + def load_model_from_task( - task: str, model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs + task: str, + model_name_or_path: str, + test_model_config: Optional[dict[str, Any]] = None, + test_model_path: Optional[str] = None, + **kwargs, ) -> "PreTrainedModel": """Load huggingface model from task and model_name_or_path.""" from transformers.pipelines import check_task @@ -107,7 +117,12 @@ def load_model_from_task( for i, model_class in enumerate(class_tuple): try: if test_model_config: - model = _load_test_model(model_class, model_config, kwargs.get("trust_remote_code")) + if test_model_path and (Path(test_model_path) / "config.json").exists(): + model = from_pretrained(model_class, test_model_path, "model", **kwargs) + else: + model = _load_test_model(model_class, model_config, kwargs.get("trust_remote_code")) + if test_model_path: + _save_test_model(model, test_model_path) else: model = from_pretrained(model_class, model_name_or_path, "model", **kwargs) logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path) diff --git a/olive/model/handler/hf.py b/olive/model/handler/hf.py index 0ed38a5ada..fc659a57e6 100644 --- a/olive/model/handler/hf.py +++ b/olive/model/handler/hf.py @@ -27,7 +27,7 @@ @model_handler_registry("HFModel") class HfModelHandler(PyTorchModelHandlerBase, MLFlowTransformersMixin, HfMixin): # pylint: disable=too-many-ancestors - resource_keys: tuple[str, ...] = ("model_path", "adapter_path") + resource_keys: tuple[str, ...] = ("model_path", "adapter_path", "test_model_path") json_config_keys: tuple[str, ...] = ("task", "load_kwargs", "test_model_config") def __init__( @@ -37,6 +37,7 @@ def __init__( load_kwargs: Union[dict[str, Any], HfLoadKwargs] = None, io_config: Union[dict[str, Any], IoConfig, str] = None, adapter_path: OLIVE_RESOURCE_ANNOTATIONS = None, + test_model_path: OLIVE_RESOURCE_ANNOTATIONS = None, test_model_config: Optional[dict[str, Any]] = None, model_attributes: Optional[dict[str, Any]] = None, ): @@ -69,6 +70,11 @@ def adapter_path(self) -> str: """Return the path to the peft adapter.""" return self.get_resource("adapter_path") + @property + def test_model_path(self) -> str: + """Return the optional path to a persisted lightweight test model.""" + return self.get_resource("test_model_path") + def load_model(self, rank: int = None, cache_model: bool = True) -> "torch.nn.Module": """Load the model from the model path.""" if self.model: @@ -78,6 +84,7 @@ def load_model(self, rank: int = None, cache_model: bool = True) -> "torch.nn.Mo self.task, self.model_path, test_model_config=self.test_model_config, + test_model_path=self.test_model_path, **self.get_load_kwargs(), ) diff --git a/test/cli/test_base.py b/test/cli/test_base.py index 6327f1f335..55dabf65ba 100644 --- a/test/cli/test_base.py +++ b/test/cli/test_base.py @@ -237,12 +237,13 @@ def test_get_input_model_config_hf_test_model(_): task="text-generation", model_script=None, script_dir=None, - test=True, + test="saved_test_model", ) config = get_input_model_config(args) assert config["test_model_config"] == {"hidden_layers": 2} + assert config["test_model_path"] == "saved_test_model" def test_insert_input_model_cli_output_model(): diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index 816147e063..6ab59999a7 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -193,11 +193,13 @@ def test_finetune_command(_, mock_run, tmp_path): @patch("huggingface_hub.repo_exists", return_value=True) def test_optimize_command_test_model_config(_, tmp_path): output_dir = tmp_path / "output_dir" + test_model_dir = tmp_path / "saved_test_model" command_args = [ "optimize", "-m", "dummy-model-id", "--test", + str(test_model_dir), "--dry_run", "-o", str(output_dir), @@ -207,6 +209,7 @@ def test_optimize_command_test_model_config(_, tmp_path): config = json.loads((output_dir / "config.json").read_text()) assert config["input_model"]["test_model_config"] == {"hidden_layers": 2} + assert config["input_model"]["test_model_path"] == str(test_model_dir) @patch("olive.workflows.run") diff --git a/test/common/test_hf.py b/test/common/test_hf.py index 7df0d2207d..f297d5ca2b 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -68,6 +68,59 @@ def test_load_model_from_task_test_model_config_fails_without_fallback(): second_model_class.from_config.assert_not_called() +def test_load_model_from_task_test_model_config_saves_model(tmp_path): + model_config = BertConfig(num_hidden_layers=12) + created_model = MagicMock() + test_model_path = tmp_path / "saved_test_model" + + with ( + patch("transformers.pipelines.check_task") as mock_check_task, + patch("olive.common.hf.utils.from_pretrained", return_value=model_config), + ): + mock_model_class = MagicMock() + mock_model_class.from_config.return_value = created_model + mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None) + + model = load_model_from_task( + "text-classification", + "dummy-model", + test_model_config={"hidden_layers": 2}, + test_model_path=str(test_model_path), + ) + + assert model is created_model + mock_model_class.from_config.assert_called_once() + created_model.save_pretrained.assert_called_once_with(str(test_model_path)) + + +def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path): + model_config = BertConfig(num_hidden_layers=12) + test_model_path = tmp_path / "saved_test_model" + test_model_path.mkdir() + (test_model_path / "config.json").write_text("{}") + loaded_model = MagicMock(spec=torch.nn.Module) + + with ( + patch("transformers.pipelines.check_task") as mock_check_task, + patch( + "olive.common.hf.utils.from_pretrained", side_effect=[model_config, loaded_model] + ) as mock_from_pretrained, + ): + mock_model_class = MagicMock() + mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None) + + model = load_model_from_task( + "text-classification", + "dummy-model", + test_model_config={"hidden_layers": 2}, + test_model_path=str(test_model_path), + ) + + assert model is loaded_model + mock_model_class.from_config.assert_not_called() + assert mock_from_pretrained.call_args_list[1].args[1] == str(test_model_path) + + def test_load_test_model_omits_unsupported_trust_remote_code_kwarg(): model_config = BertConfig(num_hidden_layers=12) captured = {} From 09fac8c37891c45bbf3d9f0c68e1b635e60d39f9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 11:00:00 +0000 Subject: [PATCH 09/35] fix: tighten HF test model path handling Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/cf4d7fdc-1de9-4098-adc3-3bedd436d7d0 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/cli/base.py | 4 +++- olive/common/hf/utils.py | 9 +++++++-- test/cli/test_base.py | 15 +++++++++++++++ test/common/test_hf.py | 4 ++-- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/olive/cli/base.py b/olive/cli/base.py index dcc8be00b2..fe43a15a00 100644 --- a/olive/cli/base.py +++ b/olive/cli/base.py @@ -106,10 +106,12 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) if getattr(args, "trust_remote_code", None) is not None: input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code test_model_output_path = getattr(args, "test", None) - if test_model_output_path is not None and test_model_output_path is not False: + if test_model_output_path not in (None, False): input_model["test_model_config"] = {"hidden_layers": 2} if test_model_output_path is True and getattr(args, "output_path", None): test_model_output_path = str(Path(args.output_path) / "test_model") + elif test_model_output_path is True: + raise ValueError("--test requires an explicit folder when output_path is not available.") if test_model_output_path is not True: input_model["test_model_path"] = test_model_output_path return input_model diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index ff397c1bca..aa144a24f3 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -65,8 +65,13 @@ def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_ def _save_test_model(model: "PreTrainedModel", output_dir: str): output_path = Path(output_dir) - output_path.mkdir(parents=True, exist_ok=True) - model.save_pretrained(str(output_path)) + try: + output_path.mkdir(parents=True, exist_ok=True) + logger.info("Saving generated test model to %s", output_path) + model.save_pretrained(str(output_path)) + except Exception: + logger.exception("Failed to save generated test model to %s", output_path) + raise def load_model_from_task( diff --git a/test/cli/test_base.py b/test/cli/test_base.py index 55dabf65ba..bb34cef3f2 100644 --- a/test/cli/test_base.py +++ b/test/cli/test_base.py @@ -246,6 +246,21 @@ def test_get_input_model_config_hf_test_model(_): assert config["test_model_path"] == "saved_test_model" +@patch("huggingface_hub.repo_exists", return_value=True) +def test_get_input_model_config_hf_test_model_requires_path_without_output_path(_): + args = SimpleNamespace( + model_name_or_path="hf_model", + trust_remote_code=False, + task="text-generation", + model_script=None, + script_dir=None, + test=True, + ) + + with pytest.raises(ValueError, match=r"--test requires an explicit folder when output_path is not available\."): + get_input_model_config(args) + + def test_insert_input_model_cli_output_model(): # setup model_path = str(Path(__file__).parent.resolve() / "output_model") diff --git a/test/common/test_hf.py b/test/common/test_hf.py index f297d5ca2b..bb89215c81 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -84,7 +84,7 @@ def test_load_model_from_task_test_model_config_saves_model(tmp_path): model = load_model_from_task( "text-classification", "dummy-model", - test_model_config={"hidden_layers": 2}, + test_model_config={"num_hidden_layers": 2}, test_model_path=str(test_model_path), ) @@ -112,7 +112,7 @@ def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path): model = load_model_from_task( "text-classification", "dummy-model", - test_model_config={"hidden_layers": 2}, + test_model_config={"num_hidden_layers": 2}, test_model_path=str(test_model_path), ) From 09df0a7e5f01c6d8ed92ba712ca2cb139f51a47c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 11:02:14 +0000 Subject: [PATCH 10/35] refactor: simplify test model path handling Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/cf4d7fdc-1de9-4098-adc3-3bedd436d7d0 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/cli/base.py | 12 ++++++------ olive/common/hf/utils.py | 10 +++------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/olive/cli/base.py b/olive/cli/base.py index fe43a15a00..ccf23ed957 100644 --- a/olive/cli/base.py +++ b/olive/cli/base.py @@ -108,12 +108,12 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) test_model_output_path = getattr(args, "test", None) if test_model_output_path not in (None, False): input_model["test_model_config"] = {"hidden_layers": 2} - if test_model_output_path is True and getattr(args, "output_path", None): - test_model_output_path = str(Path(args.output_path) / "test_model") - elif test_model_output_path is True: - raise ValueError("--test requires an explicit folder when output_path is not available.") - if test_model_output_path is not True: - input_model["test_model_path"] = test_model_output_path + if test_model_output_path is True: + output_path = getattr(args, "output_path", None) + if not output_path: + raise ValueError("--test requires an explicit folder when output_path is not available.") + test_model_output_path = str(Path(output_path) / "test_model") + input_model["test_model_path"] = test_model_output_path return input_model diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index aa144a24f3..3bc5c29999 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -65,13 +65,9 @@ def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_ def _save_test_model(model: "PreTrainedModel", output_dir: str): output_path = Path(output_dir) - try: - output_path.mkdir(parents=True, exist_ok=True) - logger.info("Saving generated test model to %s", output_path) - model.save_pretrained(str(output_path)) - except Exception: - logger.exception("Failed to save generated test model to %s", output_path) - raise + output_path.mkdir(parents=True, exist_ok=True) + logger.info("Saving generated test model to %s", output_path) + model.save_pretrained(str(output_path)) def load_model_from_task( From d4ebad5e79fb9482817bf981429ffa69f637c7fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Mon, 11 May 2026 14:09:38 +0200 Subject: [PATCH 11/35] lintrunner --- test/common/test_hf.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/common/test_hf.py b/test/common/test_hf.py index bb89215c81..7360309d6c 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -25,8 +25,8 @@ def test_load_model_from_task(): @pytest.mark.parametrize( ("model_config", "hidden_layers_attr"), [ - (BertConfig(num_hidden_layers=12), "num_hidden_layers"), - (GPT2Config(n_layer=12), "n_layer"), + (BertConfig(num_hidden_layers=12), "num_hidden_layers"), # pylint: disable=unexpected-keyword-arg + (GPT2Config(n_layer=12), "n_layer"), # pylint: disable=unexpected-keyword-arg ], ) def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr): @@ -69,7 +69,7 @@ def test_load_model_from_task_test_model_config_fails_without_fallback(): def test_load_model_from_task_test_model_config_saves_model(tmp_path): - model_config = BertConfig(num_hidden_layers=12) + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg created_model = MagicMock() test_model_path = tmp_path / "saved_test_model" @@ -94,7 +94,7 @@ def test_load_model_from_task_test_model_config_saves_model(tmp_path): def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path): - model_config = BertConfig(num_hidden_layers=12) + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg test_model_path = tmp_path / "saved_test_model" test_model_path.mkdir() (test_model_path / "config.json").write_text("{}") @@ -122,7 +122,7 @@ def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path): def test_load_test_model_omits_unsupported_trust_remote_code_kwarg(): - model_config = BertConfig(num_hidden_layers=12) + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg captured = {} class MockModelClass: @@ -138,7 +138,7 @@ def from_config(config): def test_load_test_model_omits_none_trust_remote_code_kwarg(): - model_config = BertConfig(num_hidden_layers=12) + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg captured = {} class MockModelClass: @@ -155,7 +155,7 @@ def from_config(config, **kwargs): def test_load_test_model_passes_supported_trust_remote_code_kwarg(): - model_config = BertConfig(num_hidden_layers=12) + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg captured = {} class MockModelClass: From 6321d32bb7910a1e48aaf6ca560626083d19aaa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Mon, 11 May 2026 14:54:25 +0200 Subject: [PATCH 12/35] lint --- test/common/test_hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/common/test_hf.py b/test/common/test_hf.py index 7360309d6c..32fa143b89 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -49,7 +49,7 @@ def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr def test_load_model_from_task_test_model_config_fails_without_fallback(): - model_config = BertConfig(num_hidden_layers=12) + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg with ( patch("transformers.pipelines.check_task") as mock_check_task, From 6709852376abf12615bb896a94b77896dc19b28b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 13:05:29 +0000 Subject: [PATCH 13/35] docs: add phi test conversion how-to Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/48043731-ac61-43be-9857-ff5ff131fe93 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- .../source/how-to/cli/cli-convert-phi-test.md | 77 +++++++++++++++++++ docs/source/how-to/index.md | 2 + 2 files changed, 79 insertions(+) create mode 100644 docs/source/how-to/cli/cli-convert-phi-test.md diff --git a/docs/source/how-to/cli/cli-convert-phi-test.md b/docs/source/how-to/cli/cli-convert-phi-test.md new file mode 100644 index 0000000000..42e4d57cf8 --- /dev/null +++ b/docs/source/how-to/cli/cli-convert-phi-test.md @@ -0,0 +1,77 @@ +# How to convert a Phi model with a quick `--test` smoke check + +If you are converting a large language model, it is often useful to validate the Olive command, environment, and conversion recipe on a much smaller model before spending time on the full checkpoint. + +The `--test` option does that for Hugging Face models. Olive keeps the same model architecture, reduces it to a random 2-layer test model, saves it to the folder you provide, and reuses that folder on later runs. + +This example uses [`microsoft/Phi-3.5-mini-instruct`](https://huggingface.co/microsoft/Phi-3.5-mini-instruct), but the same pattern works for other supported Hugging Face LLMs. + +## Step 1: run a fast smoke test + +Start with a lightweight conversion pass that uses `--test` to create and reuse a reduced Phi model. + +```bash +olive optimize \ + --model_name_or_path microsoft/Phi-3.5-mini-instruct \ + --device cpu \ + --provider CPUExecutionProvider \ + --precision int4 \ + --test out/phi-test-model \ + --output_path out/phi-smoke +``` + +What this does: + +- `--test out/phi-test-model` creates a reduced random Phi model and saves it in `out/phi-test-model` +- later runs reuse the same saved test model instead of recreating it +- `--output_path out/phi-smoke` stores the converted ONNX artifacts from the smoke test + +This is a quick way to confirm that: + +- Olive can load the source model +- the selected optimization recipe is valid for your setup +- the conversion path completes before you run the full model + +If you only want to inspect the generated workflow first, add `--dry_run`: + +```bash +olive optimize \ + --model_name_or_path microsoft/Phi-3.5-mini-instruct \ + --device cpu \ + --provider CPUExecutionProvider \ + --precision int4 \ + --test out/phi-test-model \ + --dry_run \ + --output_path out/phi-smoke +``` + +The generated `config.json` will include both `test_model_config` and `test_model_path`, so the same reduced model can be reused later. + +## Step 2: run the full conversion + +Once the smoke test succeeds, rerun the conversion on the full Phi checkpoint by removing `--test`. + +```bash +olive optimize \ + --model_name_or_path microsoft/Phi-3.5-mini-instruct \ + --device cpu \ + --provider CPUExecutionProvider \ + --precision int4 \ + --output_path out/phi-full +``` + +At this point you know the Olive command and the conversion recipe already worked on the lightweight test model, so you can focus on the full-model run instead of debugging both at once. + +## Why keep the test model folder? + +The saved test model is useful beyond the first smoke test: + +- you can rerun the reduced conversion quickly while iterating on options +- you can reuse the same HF test model later when comparing the Hugging Face model against the exported ONNX model +- you avoid recreating a new random test checkpoint every time + +## Related docs + +- [How to use the `olive optimize` command to optimize a Pytorch model](cli-optimize) +- [How to write a new workflow from scratch](../configure-workflows/build-workflow) +- [CLI reference](../../reference/cli) diff --git a/docs/source/how-to/index.md b/docs/source/how-to/index.md index 8ec16aba9e..d0b4a290a1 100644 --- a/docs/source/how-to/index.md +++ b/docs/source/how-to/index.md @@ -8,6 +8,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx-graph-capture`, `auto-opt` that enable you to *easily* optimize select models and experiment with different cutting-edge optimization strategies without the need to define workflows. - [How to use the `olive optimize` command to optimize a Pytorch model](cli/cli-optimize) +- [How to convert a Phi model with a quick `--test` smoke check](cli/cli-convert-phi-test) - [How to use the `olive auto-opt` command to take a PyTorch/Hugging Face model and turn it into an optimized ONNX model](cli/cli-auto-opt) - [how to use the `olive finetune` command to create (Q)LoRA adapters](cli/cli-finetune) - [How to use the `olive quantize` command to quantize your model with different precisions and techniques such as AWQ](cli/cli-quantize) @@ -43,6 +44,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx installation cli/cli-optimize +cli/cli-convert-phi-test cli/cli-auto-opt cli/cli-finetune cli/cli-quantize From 3f8f8fc3425935233b3e85d05105d37bdea3a8c8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 13:31:45 +0000 Subject: [PATCH 14/35] feat: support run command test models Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/6e67b7b2-2cf7-46fa-8edc-04dbd263c56d Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- .../source/how-to/cli/cli-convert-phi-test.md | 43 ++++++++-------- olive/cli/base.py | 25 ++++++---- olive/cli/run.py | 9 ++++ test/cli/test_cli.py | 49 ++++++++++++++++++- 4 files changed, 92 insertions(+), 34 deletions(-) diff --git a/docs/source/how-to/cli/cli-convert-phi-test.md b/docs/source/how-to/cli/cli-convert-phi-test.md index 42e4d57cf8..b0fc17ec14 100644 --- a/docs/source/how-to/cli/cli-convert-phi-test.md +++ b/docs/source/how-to/cli/cli-convert-phi-test.md @@ -6,9 +6,9 @@ The `--test` option does that for Hugging Face models. Olive keeps the same mode This example uses [`microsoft/Phi-3.5-mini-instruct`](https://huggingface.co/microsoft/Phi-3.5-mini-instruct), but the same pattern works for other supported Hugging Face LLMs. -## Step 1: run a fast smoke test +## Step 1: generate the workflow config -Start with a lightweight conversion pass that uses `--test` to create and reuse a reduced Phi model. +Start by generating the config that Olive will run for the Phi conversion. ```bash olive optimize \ @@ -16,6 +16,19 @@ olive optimize \ --device cpu \ --provider CPUExecutionProvider \ --precision int4 \ + --dry_run \ + --output_path out/phi-smoke +``` + +This creates `out/phi-smoke/config.json` without launching the full conversion yet. + +## Step 2: run a fast smoke test with `olive run --test` + +Use the generated config with `olive run` and pass `--test` so Olive swaps in a reduced random Phi model. + +```bash +olive run \ + --config out/phi-smoke/config.json \ --test out/phi-test-model \ --output_path out/phi-smoke ``` @@ -24,7 +37,7 @@ What this does: - `--test out/phi-test-model` creates a reduced random Phi model and saves it in `out/phi-test-model` - later runs reuse the same saved test model instead of recreating it -- `--output_path out/phi-smoke` stores the converted ONNX artifacts from the smoke test +- `--output_path out/phi-smoke` reuses the generated workflow and stores the converted ONNX artifacts from the smoke test This is a quick way to confirm that: @@ -32,31 +45,15 @@ This is a quick way to confirm that: - the selected optimization recipe is valid for your setup - the conversion path completes before you run the full model -If you only want to inspect the generated workflow first, add `--dry_run`: +If you omit the folder and just pass `--test`, `olive run` will save the reduced model under `/test_model`. -```bash -olive optimize \ - --model_name_or_path microsoft/Phi-3.5-mini-instruct \ - --device cpu \ - --provider CPUExecutionProvider \ - --precision int4 \ - --test out/phi-test-model \ - --dry_run \ - --output_path out/phi-smoke -``` - -The generated `config.json` will include both `test_model_config` and `test_model_path`, so the same reduced model can be reused later. - -## Step 2: run the full conversion +## Step 3: run the full conversion Once the smoke test succeeds, rerun the conversion on the full Phi checkpoint by removing `--test`. ```bash -olive optimize \ - --model_name_or_path microsoft/Phi-3.5-mini-instruct \ - --device cpu \ - --provider CPUExecutionProvider \ - --precision int4 \ +olive run \ + --config out/phi-smoke/config.json \ --output_path out/phi-full ``` diff --git a/olive/cli/base.py b/olive/cli/base.py index ccf23ed957..4a8194460e 100644 --- a/olive/cli/base.py +++ b/olive/cli/base.py @@ -82,6 +82,20 @@ def run(self): raise NotImplementedError +def add_hf_test_model_config(input_model: dict, test_value, output_path: Optional[str] = None) -> dict: + if test_value in (None, False): + return input_model + + test_model_output_path = test_value + input_model["test_model_config"] = {"hidden_layers": 2} + if test_model_output_path is True: + if not output_path: + raise ValueError("--test requires an explicit folder when output_path is not available.") + test_model_output_path = str(Path(output_path) / "test_model") + input_model["test_model_path"] = test_model_output_path + return input_model + + def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) -> dict: """Get the input model config for HuggingFace model. @@ -105,16 +119,7 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) input_model["adapter_path"] = args.adapter_path if getattr(args, "trust_remote_code", None) is not None: input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code - test_model_output_path = getattr(args, "test", None) - if test_model_output_path not in (None, False): - input_model["test_model_config"] = {"hidden_layers": 2} - if test_model_output_path is True: - output_path = getattr(args, "output_path", None) - if not output_path: - raise ValueError("--test requires an explicit folder when output_path is not available.") - test_model_output_path = str(Path(output_path) / "test_model") - input_model["test_model_path"] = test_model_output_path - return input_model + return add_hf_test_model_config(input_model, getattr(args, "test", None), getattr(args, "output_path", None)) def _get_onnx_input_model(args: Namespace, model_path: str) -> dict: diff --git a/olive/cli/run.py b/olive/cli/run.py index 6d2a831aef..c8ac0fb506 100644 --- a/olive/cli/run.py +++ b/olive/cli/run.py @@ -6,6 +6,7 @@ from olive.cli.base import ( BaseOliveCLICommand, + add_hf_test_model_config, add_input_model_options, add_logging_options, add_telemetry_options, @@ -59,6 +60,14 @@ def run(self): if input_model_config := get_input_model_config(self.args, required=False): print("Replacing input model config in run config") run_config["input_model"] = input_model_config + elif getattr(self.args, "test", None) not in (None, False): + input_model = run_config.get("input_model") + if not isinstance(input_model, dict) or input_model.get("type") != "HfModel": + raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.") + output_path = ( + self.args.output_path or run_config.get("output_dir") or run_config.get("engine", {}).get("output_dir") + ) + run_config["input_model"] = add_hf_test_model_config(input_model, self.args.test, output_path) for arg_key, rc_key in [("output_path", "output_dir"), ("log_level", "log_severity_level")]: if (arg_value := getattr(self.args, arg_key)) is not None: diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index 6ab59999a7..5e6e1877e6 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -112,7 +112,8 @@ def test_workflow_run_command(mock_run, tempdir, list_required_packages, tmp_pat @patch("olive.workflows.run") -def test_workflow_run_command_with_overrides(mock_run, tmp_path): +@patch("huggingface_hub.repo_exists", return_value=True) +def test_workflow_run_command_with_overrides(_, mock_run, tmp_path): # setup config_path = tmp_path / "config.json" config_path.write_text( @@ -150,6 +151,52 @@ def test_workflow_run_command_with_overrides(mock_run, tmp_path): ) +@patch("olive.workflows.run") +def test_workflow_run_command_with_test_override(mock_run, tmp_path): + config_path = tmp_path / "config.json" + config_path.write_text( + json.dumps( + { + "input_model": { + "type": "HfModel", + "model_path": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "load_kwargs": {"attn_implementation": "eager", "trust_remote_code": False}, + }, + "output_dir": str(tmp_path / "output"), + } + ) + ) + command_args = ["run", "--run-config", str(config_path), "--test"] + + cli_main(command_args) + + mock_run.assert_called_once_with( + { + "input_model": { + "type": "HfModel", + "model_path": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "load_kwargs": {"attn_implementation": "eager", "trust_remote_code": False}, + "test_model_config": {"hidden_layers": 2}, + "test_model_path": str(tmp_path / "output" / "test_model"), + }, + "output_dir": str(tmp_path / "output"), + }, + list_required_packages=False, + package_config=None, + tempdir=None, + ) + + +def test_workflow_run_command_with_test_requires_hf_input_model(tmp_path): + config_path = tmp_path / "config.json" + config_path.write_text(json.dumps({"input_model": {"type": "OnnxModel", "model_path": "model.onnx"}})) + + with pytest.raises( + ValueError, match=r"--test for olive run requires a Hugging Face input_model in the run config\." + ): + cli_main(["run", "--run-config", str(config_path), "--test"]) + + @patch("olive.platform_sdk.qualcomm.configure.configure.configure") def test_configure_qualcomm_sdk_command(mock_configure): # setup From 189289ad6c31247cfcf9af65b2480f18749dd355 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 13:34:48 +0000 Subject: [PATCH 15/35] chore: address review nits for run test support Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/6e67b7b2-2cf7-46fa-8edc-04dbd263c56d Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/cli/base.py | 1 + olive/cli/run.py | 7 +++++-- test/cli/test_cli.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/olive/cli/base.py b/olive/cli/base.py index 4a8194460e..e31fb04ddf 100644 --- a/olive/cli/base.py +++ b/olive/cli/base.py @@ -87,6 +87,7 @@ def add_hf_test_model_config(input_model: dict, test_value, output_path: Optiona return input_model test_model_output_path = test_value + # Use 2 layers to keep the test model fast and lightweight while preserving the original architecture family. input_model["test_model_config"] = {"hidden_layers": 2} if test_model_output_path is True: if not output_path: diff --git a/olive/cli/run.py b/olive/cli/run.py index c8ac0fb506..ea533d78b9 100644 --- a/olive/cli/run.py +++ b/olive/cli/run.py @@ -57,17 +57,20 @@ def run(self): run_config = self.args.run_config if not isinstance(run_config, dict): run_config = load_config_file(run_config) + test_value = getattr(self.args, "test", None) + if test_value in (None, False): + test_value = None if input_model_config := get_input_model_config(self.args, required=False): print("Replacing input model config in run config") run_config["input_model"] = input_model_config - elif getattr(self.args, "test", None) not in (None, False): + elif test_value: input_model = run_config.get("input_model") if not isinstance(input_model, dict) or input_model.get("type") != "HfModel": raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.") output_path = ( self.args.output_path or run_config.get("output_dir") or run_config.get("engine", {}).get("output_dir") ) - run_config["input_model"] = add_hf_test_model_config(input_model, self.args.test, output_path) + run_config["input_model"] = add_hf_test_model_config(input_model, test_value, output_path) for arg_key, rc_key in [("output_path", "output_dir"), ("log_level", "log_severity_level")]: if (arg_value := getattr(self.args, arg_key)) is not None: diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index 5e6e1877e6..310e23f561 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -113,7 +113,7 @@ def test_workflow_run_command(mock_run, tempdir, list_required_packages, tmp_pat @patch("olive.workflows.run") @patch("huggingface_hub.repo_exists", return_value=True) -def test_workflow_run_command_with_overrides(_, mock_run, tmp_path): +def test_workflow_run_command_with_overrides(mock_repo_exists, mock_run, tmp_path): # setup config_path = tmp_path / "config.json" config_path.write_text( From c90c5201cb8b211c3f02391dee51e96ac6e67bc6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 13:36:38 +0000 Subject: [PATCH 16/35] chore: simplify run test override handling Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/6e67b7b2-2cf7-46fa-8edc-04dbd263c56d Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/cli/run.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/olive/cli/run.py b/olive/cli/run.py index ea533d78b9..25a7b7203d 100644 --- a/olive/cli/run.py +++ b/olive/cli/run.py @@ -57,13 +57,10 @@ def run(self): run_config = self.args.run_config if not isinstance(run_config, dict): run_config = load_config_file(run_config) - test_value = getattr(self.args, "test", None) - if test_value in (None, False): - test_value = None if input_model_config := get_input_model_config(self.args, required=False): print("Replacing input model config in run config") run_config["input_model"] = input_model_config - elif test_value: + elif (test_value := getattr(self.args, "test", None)) not in (None, False): input_model = run_config.get("input_model") if not isinstance(input_model, dict) or input_model.get("type") != "HfModel": raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.") From 5cec47fea06e3bc5c98b6518488afd6627009b0f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 13:38:50 +0000 Subject: [PATCH 17/35] chore: polish run test support follow-up Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/6e67b7b2-2cf7-46fa-8edc-04dbd263c56d Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/cli/run.py | 4 ++-- test/cli/test_cli.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/olive/cli/run.py b/olive/cli/run.py index 25a7b7203d..3d85522330 100644 --- a/olive/cli/run.py +++ b/olive/cli/run.py @@ -60,14 +60,14 @@ def run(self): if input_model_config := get_input_model_config(self.args, required=False): print("Replacing input model config in run config") run_config["input_model"] = input_model_config - elif (test_value := getattr(self.args, "test", None)) not in (None, False): + elif self.args.test not in (None, False): input_model = run_config.get("input_model") if not isinstance(input_model, dict) or input_model.get("type") != "HfModel": raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.") output_path = ( self.args.output_path or run_config.get("output_dir") or run_config.get("engine", {}).get("output_dir") ) - run_config["input_model"] = add_hf_test_model_config(input_model, test_value, output_path) + run_config["input_model"] = add_hf_test_model_config(input_model, self.args.test, output_path) for arg_key, rc_key in [("output_path", "output_dir"), ("log_level", "log_severity_level")]: if (arg_value := getattr(self.args, arg_key)) is not None: diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index 310e23f561..48c607e2ad 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -115,6 +115,7 @@ def test_workflow_run_command(mock_run, tempdir, list_required_packages, tmp_pat @patch("huggingface_hub.repo_exists", return_value=True) def test_workflow_run_command_with_overrides(mock_repo_exists, mock_run, tmp_path): # setup + # Prevent a live Hugging Face repo lookup when the CLI resolves the HF input model override. config_path = tmp_path / "config.json" config_path.write_text( json.dumps({"input_model": {"key": "value"}, "engine": {"log_severity_level": 3}, "output_dir": "output"}) From eaf0a16db600f90567f5059cc182ccc7feff7b4d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 13:56:44 +0000 Subject: [PATCH 18/35] fix: use saved test checkpoint in model builder Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/ccae26e4-35a5-4f0a-a20e-894d4d89d1f6 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/passes/onnx/model_builder.py | 6 +++ test/passes/onnx/test_model_builder.py | 60 ++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py index f704579ba2..3b743e1c5a 100644 --- a/olive/passes/onnx/model_builder.py +++ b/olive/passes/onnx/model_builder.py @@ -247,6 +247,12 @@ def _run_for_config( input_path = str(model.get_resource("model_path")) else: model_path = model.model_name_or_path + if model.test_model_config: + if not model.test_model_path: + raise ValueError("ModelBuilder requires test_model_path when test_model_config is provided.") + if not (Path(model.test_model_path) / "config.json").exists(): + model.load_model(cache_model=False) + model_path = model.test_model_path # provide the model path as input path, model builder uses input_path for quantized models input_path = model_path if model.adapter_path: diff --git a/test/passes/onnx/test_model_builder.py b/test/passes/onnx/test_model_builder.py index ba62005e4b..67a8f53ed4 100644 --- a/test/passes/onnx/test_model_builder.py +++ b/test/passes/onnx/test_model_builder.py @@ -2,12 +2,16 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import sys +import types from pathlib import Path +from unittest.mock import MagicMock, patch import onnx import pytest from olive.model import ONNXModelHandler +from olive.model.handler.hf import HfModelHandler from olive.passes.olive_pass import create_pass_from_dict from olive.passes.onnx.model_builder import ModelBuilder from olive.passes.pytorch.rtn import Rtn @@ -100,3 +104,59 @@ def test_model_builder_layer_annotations(tmp_path, layer_annotations): assert len(node_names_with_metadata) > 0, ( "Expected nodes with metadata_props when layer_annotations are provided" ) + + +def test_model_builder_uses_saved_test_model_path(tmp_path): + test_model_path = tmp_path / "saved_test_model" + output_folder = tmp_path / "output_model" + + mock_cfg = MagicMock() + mock_cfg.to_dict.return_value = {} + with patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg): + input_model = HfModelHandler( + model_path="hf-internal-testing/tiny-random-LlamaForCausalLM", + test_model_config={"hidden_layers": 2}, + test_model_path=str(test_model_path), + ) + + def materialize_test_model(*_, **__): + test_model_path.mkdir(parents=True, exist_ok=True) + (test_model_path / "config.json").write_text("{}") + return MagicMock() + + def fake_create_model(*_, **kwargs): + output_dir = Path(kwargs["output_dir"]) + (output_dir / kwargs["filename"]).write_text("dummy onnx file") + (output_dir / "genai_config.json").write_text("{}") + + fake_builder = types.ModuleType("onnxruntime_genai.models.builder") + fake_builder.create_model = MagicMock(side_effect=fake_create_model) + fake_models = types.ModuleType("onnxruntime_genai.models") + fake_models.builder = fake_builder + fake_ort_genai = types.ModuleType("onnxruntime_genai") + fake_ort_genai.models = fake_models + fake_ort_genai.__version__ = "0.0.0" + + p = create_pass_from_dict(ModelBuilder, {"precision": "fp32"}, disable_search=True) + + with ( + patch.object(ModelBuilder, "maybe_patch_quant"), + patch.dict( + sys.modules, + { + "onnxruntime_genai": fake_ort_genai, + "onnxruntime_genai.models": fake_models, + "onnxruntime_genai.models.builder": fake_builder, + }, + ), + patch.object(input_model, "load_model", side_effect=materialize_test_model) as mock_load_model, + patch.object(input_model, "save_metadata", return_value=[]), + ): + output_model = p.run(input_model, output_folder) + + assert isinstance(output_model, ONNXModelHandler) + assert mock_load_model.call_count == 1 + assert Path(output_model.model_path).exists() + assert test_model_path.exists() + assert fake_builder.create_model.call_args.kwargs["model_name"] == str(test_model_path) + assert fake_builder.create_model.call_args.kwargs["input_path"] == str(test_model_path) From 17cb075145c187df4a8ca9b149a491f6a3df6e57 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 13:58:39 +0000 Subject: [PATCH 19/35] chore: tidy model builder test fixture Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/ccae26e4-35a5-4f0a-a20e-894d4d89d1f6 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- test/passes/onnx/test_model_builder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/passes/onnx/test_model_builder.py b/test/passes/onnx/test_model_builder.py index 67a8f53ed4..5b86886d7a 100644 --- a/test/passes/onnx/test_model_builder.py +++ b/test/passes/onnx/test_model_builder.py @@ -17,6 +17,8 @@ from olive.passes.pytorch.rtn import Rtn from test.utils import make_local_tiny_llama +TEST_MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM" + @pytest.mark.parametrize("metadata_only", [True, False]) def test_model_builder(tmp_path, metadata_only): @@ -114,7 +116,7 @@ def test_model_builder_uses_saved_test_model_path(tmp_path): mock_cfg.to_dict.return_value = {} with patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg): input_model = HfModelHandler( - model_path="hf-internal-testing/tiny-random-LlamaForCausalLM", + model_path=TEST_MODEL_ID, test_model_config={"hidden_layers": 2}, test_model_path=str(test_model_path), ) From 996f6331ae4614721093316e17597ff32b4218c8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 14:01:32 +0000 Subject: [PATCH 20/35] chore: clarify model builder test model errors Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/ccae26e4-35a5-4f0a-a20e-894d4d89d1f6 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/passes/onnx/model_builder.py | 5 ++++- test/passes/onnx/test_model_builder.py | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py index 3b743e1c5a..ba29c0a9b3 100644 --- a/olive/passes/onnx/model_builder.py +++ b/olive/passes/onnx/model_builder.py @@ -249,7 +249,10 @@ def _run_for_config( model_path = model.model_name_or_path if model.test_model_config: if not model.test_model_path: - raise ValueError("ModelBuilder requires test_model_path when test_model_config is provided.") + raise ValueError( + "ModelBuilder requires test_model_path to be set when test_model_config is provided. " + "Please specify the path where the test model should be saved." + ) if not (Path(model.test_model_path) / "config.json").exists(): model.load_model(cache_model=False) model_path = model.test_model_path diff --git a/test/passes/onnx/test_model_builder.py b/test/passes/onnx/test_model_builder.py index 5b86886d7a..0c838a3f7f 100644 --- a/test/passes/onnx/test_model_builder.py +++ b/test/passes/onnx/test_model_builder.py @@ -17,7 +17,7 @@ from olive.passes.pytorch.rtn import Rtn from test.utils import make_local_tiny_llama -TEST_MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM" +TINY_RANDOM_LLAMA_MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM" @pytest.mark.parametrize("metadata_only", [True, False]) @@ -116,12 +116,12 @@ def test_model_builder_uses_saved_test_model_path(tmp_path): mock_cfg.to_dict.return_value = {} with patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg): input_model = HfModelHandler( - model_path=TEST_MODEL_ID, + model_path=TINY_RANDOM_LLAMA_MODEL_ID, test_model_config={"hidden_layers": 2}, test_model_path=str(test_model_path), ) - def materialize_test_model(*_, **__): + def materialize_test_model(*args, **kwargs): test_model_path.mkdir(parents=True, exist_ok=True) (test_model_path / "config.json").write_text("{}") return MagicMock() From 00732b75fb86ed0aca5fc3803424c3ee00c3ca47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Mon, 11 May 2026 16:15:27 +0200 Subject: [PATCH 21/35] fix dtype: auto --- olive/common/hf/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index 3bc5c29999..fbe4ed7f10 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -48,6 +48,11 @@ def _apply_test_model_config( if not updated: raise ValueError("Unable to create a test model because the config does not expose a hidden-layer count.") + if "dtype" in model_config and model_config.dtype == "auto": + # This is not allowed anymore with transformers >=4.57, + # we select float16 instead. + model_config.dtype = "float16" + return model_config From 76ee4efb7acf93a6b4fb0d921cf05c417b01eb59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Mon, 11 May 2026 16:26:28 +0200 Subject: [PATCH 22/35] update documentation --- docs/source/how-to/cli/cli-convert-phi-test.md | 2 +- docs/source/how-to/index.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/how-to/cli/cli-convert-phi-test.md b/docs/source/how-to/cli/cli-convert-phi-test.md index b0fc17ec14..c64209b1cd 100644 --- a/docs/source/how-to/cli/cli-convert-phi-test.md +++ b/docs/source/how-to/cli/cli-convert-phi-test.md @@ -16,8 +16,8 @@ olive optimize \ --device cpu \ --provider CPUExecutionProvider \ --precision int4 \ - --dry_run \ --output_path out/phi-smoke + --dry_run ``` This creates `out/phi-smoke/config.json` without launching the full conversion yet. diff --git a/docs/source/how-to/index.md b/docs/source/how-to/index.md index d0b4a290a1..35a6074237 100644 --- a/docs/source/how-to/index.md +++ b/docs/source/how-to/index.md @@ -8,11 +8,11 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx-graph-capture`, `auto-opt` that enable you to *easily* optimize select models and experiment with different cutting-edge optimization strategies without the need to define workflows. - [How to use the `olive optimize` command to optimize a Pytorch model](cli/cli-optimize) -- [How to convert a Phi model with a quick `--test` smoke check](cli/cli-convert-phi-test) - [How to use the `olive auto-opt` command to take a PyTorch/Hugging Face model and turn it into an optimized ONNX model](cli/cli-auto-opt) - [how to use the `olive finetune` command to create (Q)LoRA adapters](cli/cli-finetune) - [How to use the `olive quantize` command to quantize your model with different precisions and techniques such as AWQ](cli/cli-quantize) - [How to use the `olive run` command to execute an Olive workflow.](cli/cli-run) +- [How to convert a Phi model with a quick `--test` smoke check](cli/cli-convert-phi-test) # Olive Python API From 9ba38c7c1cb20585eb8622bb2fbe9ab4d6806ded Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 14:42:05 +0000 Subject: [PATCH 23/35] docs: clarify phi smoke test output path Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c25a0cd6-d252-4e16-8783-52a5be97f14e Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- docs/source/how-to/cli/cli-convert-phi-test.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/how-to/cli/cli-convert-phi-test.md b/docs/source/how-to/cli/cli-convert-phi-test.md index c64209b1cd..f17fb3c425 100644 --- a/docs/source/how-to/cli/cli-convert-phi-test.md +++ b/docs/source/how-to/cli/cli-convert-phi-test.md @@ -16,7 +16,7 @@ olive optimize \ --device cpu \ --provider CPUExecutionProvider \ --precision int4 \ - --output_path out/phi-smoke + --output_path out/phi-smoke \ --dry_run ``` @@ -30,14 +30,16 @@ Use the generated config with `olive run` and pass `--test` so Olive swaps in a olive run \ --config out/phi-smoke/config.json \ --test out/phi-test-model \ - --output_path out/phi-smoke + --output_path out/phi-smoke-run ``` What this does: - `--test out/phi-test-model` creates a reduced random Phi model and saves it in `out/phi-test-model` - later runs reuse the same saved test model instead of recreating it -- `--output_path out/phi-smoke` reuses the generated workflow and stores the converted ONNX artifacts from the smoke test +- `--output_path out/phi-smoke-run` gives the smoke test its own output folder, so the generated ONNX artifacts are easy to find + +After the smoke test finishes, look under `out/phi-smoke-run` for the exported ONNX model and related files. This is a quick way to confirm that: From fa6bee7c4736b639b4addd666ca3938f378166d7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 14:46:18 +0000 Subject: [PATCH 24/35] docs: switch smoke test how-to to qwen Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/acc48c65-2f6d-44c0-80c1-271e83838437 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- ...t-phi-test.md => cli-convert-qwen-test.md} | 32 +++++++++---------- docs/source/how-to/index.md | 4 +-- 2 files changed, 18 insertions(+), 18 deletions(-) rename docs/source/how-to/cli/{cli-convert-phi-test.md => cli-convert-qwen-test.md} (62%) diff --git a/docs/source/how-to/cli/cli-convert-phi-test.md b/docs/source/how-to/cli/cli-convert-qwen-test.md similarity index 62% rename from docs/source/how-to/cli/cli-convert-phi-test.md rename to docs/source/how-to/cli/cli-convert-qwen-test.md index f17fb3c425..a26be99a59 100644 --- a/docs/source/how-to/cli/cli-convert-phi-test.md +++ b/docs/source/how-to/cli/cli-convert-qwen-test.md @@ -1,45 +1,45 @@ -# How to convert a Phi model with a quick `--test` smoke check +# How to convert a Qwen model with a quick `--test` smoke check If you are converting a large language model, it is often useful to validate the Olive command, environment, and conversion recipe on a much smaller model before spending time on the full checkpoint. The `--test` option does that for Hugging Face models. Olive keeps the same model architecture, reduces it to a random 2-layer test model, saves it to the folder you provide, and reuses that folder on later runs. -This example uses [`microsoft/Phi-3.5-mini-instruct`](https://huggingface.co/microsoft/Phi-3.5-mini-instruct), but the same pattern works for other supported Hugging Face LLMs. +This example uses [`Qwen/Qwen3-0.6B`](https://huggingface.co/Qwen/Qwen3-0.6B), but the same pattern works for other supported Hugging Face LLMs. ## Step 1: generate the workflow config -Start by generating the config that Olive will run for the Phi conversion. +Start by generating the config that Olive will run for the Qwen conversion. ```bash olive optimize \ - --model_name_or_path microsoft/Phi-3.5-mini-instruct \ + --model_name_or_path Qwen/Qwen3-0.6B \ --device cpu \ --provider CPUExecutionProvider \ --precision int4 \ - --output_path out/phi-smoke \ + --output_path out/qwen-smoke \ --dry_run ``` -This creates `out/phi-smoke/config.json` without launching the full conversion yet. +This creates `out/qwen-smoke/config.json` without launching the full conversion yet. ## Step 2: run a fast smoke test with `olive run --test` -Use the generated config with `olive run` and pass `--test` so Olive swaps in a reduced random Phi model. +Use the generated config with `olive run` and pass `--test` so Olive swaps in a reduced random Qwen model. ```bash olive run \ - --config out/phi-smoke/config.json \ - --test out/phi-test-model \ - --output_path out/phi-smoke-run + --config out/qwen-smoke/config.json \ + --test out/qwen-test-model \ + --output_path out/qwen-smoke-run ``` What this does: -- `--test out/phi-test-model` creates a reduced random Phi model and saves it in `out/phi-test-model` +- `--test out/qwen-test-model` creates a reduced random Qwen model and saves it in `out/qwen-test-model` - later runs reuse the same saved test model instead of recreating it -- `--output_path out/phi-smoke-run` gives the smoke test its own output folder, so the generated ONNX artifacts are easy to find +- `--output_path out/qwen-smoke-run` gives the smoke test its own output folder, so the generated ONNX artifacts are easy to find -After the smoke test finishes, look under `out/phi-smoke-run` for the exported ONNX model and related files. +After the smoke test finishes, look under `out/qwen-smoke-run` for the exported ONNX model and related files. This is a quick way to confirm that: @@ -51,12 +51,12 @@ If you omit the folder and just pass `--test`, `olive run` will save the reduced ## Step 3: run the full conversion -Once the smoke test succeeds, rerun the conversion on the full Phi checkpoint by removing `--test`. +Once the smoke test succeeds, rerun the conversion on the full Qwen checkpoint by removing `--test`. ```bash olive run \ - --config out/phi-smoke/config.json \ - --output_path out/phi-full + --config out/qwen-smoke/config.json \ + --output_path out/qwen-full ``` At this point you know the Olive command and the conversion recipe already worked on the lightweight test model, so you can focus on the full-model run instead of debugging both at once. diff --git a/docs/source/how-to/index.md b/docs/source/how-to/index.md index 35a6074237..4e00636e7d 100644 --- a/docs/source/how-to/index.md +++ b/docs/source/how-to/index.md @@ -12,7 +12,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx - [how to use the `olive finetune` command to create (Q)LoRA adapters](cli/cli-finetune) - [How to use the `olive quantize` command to quantize your model with different precisions and techniques such as AWQ](cli/cli-quantize) - [How to use the `olive run` command to execute an Olive workflow.](cli/cli-run) -- [How to convert a Phi model with a quick `--test` smoke check](cli/cli-convert-phi-test) +- [How to convert a Qwen model with a quick `--test` smoke check](cli/cli-convert-qwen-test) # Olive Python API @@ -44,7 +44,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx installation cli/cli-optimize -cli/cli-convert-phi-test +cli/cli-convert-qwen-test cli/cli-auto-opt cli/cli-finetune cli/cli-quantize From ffda0dc05edd2388104794d7aef39ee959061c5d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 15:14:56 +0000 Subject: [PATCH 25/35] test: cover documented llm smoke flow Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- test/cli/test_cli.py | 92 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index 48c607e2ad..e76e3fb1fa 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -260,6 +260,98 @@ def test_optimize_command_test_model_config(_, tmp_path): assert config["input_model"]["test_model_path"] == str(test_model_dir) +@patch("huggingface_hub.repo_exists", return_value=True) +def test_documented_smoke_test_commands_produce_onnx_with_tiny_random_llama(_, tmp_path): + import types + from unittest.mock import MagicMock + + from olive.model.config.model_config import ModelConfig + from olive.model.handler.hf import HfModelHandler + from olive.passes.onnx.model_builder import ModelBuilder + from olive.passes.pytorch.gptq import Gptq + + model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM" + config_output_dir = tmp_path / "qwen-smoke" + test_model_dir = tmp_path / "qwen-test-model" + run_output_dir = tmp_path / "qwen-smoke-run" + + cli_main( + [ + "optimize", + "-m", + model_id, + "--device", + "cpu", + "--provider", + "CPUExecutionProvider", + "--precision", + "int4", + "--output_path", + str(config_output_dir), + "--dry_run", + ] + ) + + config_path = config_output_dir / "config.json" + assert config_path.exists() + + def materialize_test_model(self, *args, **kwargs): + Path(self.test_model_path).mkdir(parents=True, exist_ok=True) + (Path(self.test_model_path) / "config.json").write_text("{}") + return MagicMock() + + def fake_gptq_run(self, model, config, output_model_path): + return model + + def fake_create_model(*_, **kwargs): + output_dir = Path(kwargs["output_dir"]) + output_dir.mkdir(parents=True, exist_ok=True) + (output_dir / kwargs["filename"]).write_text("dummy onnx file") + (output_dir / "genai_config.json").write_text("{}") + + fake_builder = types.ModuleType("onnxruntime_genai.models.builder") + fake_builder.create_model = MagicMock(side_effect=fake_create_model) + fake_models = types.ModuleType("onnxruntime_genai.models") + fake_models.builder = fake_builder + fake_ort_genai = types.ModuleType("onnxruntime_genai") + fake_ort_genai.models = fake_models + fake_ort_genai.__version__ = "0.0.0" + mock_cfg = MagicMock() + mock_cfg.to_dict.return_value = {} + + with ( + patch.object(ModelConfig, "get_model_identifier", return_value="tiny-random-llama"), + patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg), + patch.object(HfModelHandler, "load_model", new=materialize_test_model), + patch.object(HfModelHandler, "save_metadata", return_value=[]), + patch.object(Gptq, "_run_for_config", autospec=True, side_effect=fake_gptq_run), + patch.object(ModelBuilder, "maybe_patch_quant"), + patch.dict( + sys.modules, + { + "onnxruntime_genai": fake_ort_genai, + "onnxruntime_genai.models": fake_models, + "onnxruntime_genai.models.builder": fake_builder, + }, + ), + ): + cli_main( + [ + "run", + "--config", + str(config_path), + "--test", + str(test_model_dir), + "--output_path", + str(run_output_dir), + ] + ) + + assert (test_model_dir / "config.json").exists() + assert (run_output_dir / "model.onnx").exists() + assert (run_output_dir / "genai_config.json").exists() + + @patch("olive.workflows.run") @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) def test_diffusion_lora_command(_, mock_run, tmp_path): From d0f868f7c5a66f23f1f18d91007fb865a9b14c79 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 15:17:13 +0000 Subject: [PATCH 26/35] test: polish documented smoke flow test Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- test/cli/test_cli.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index e76e3fb1fa..a17950eaa4 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -271,9 +271,9 @@ def test_documented_smoke_test_commands_produce_onnx_with_tiny_random_llama(_, t from olive.passes.pytorch.gptq import Gptq model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM" - config_output_dir = tmp_path / "qwen-smoke" - test_model_dir = tmp_path / "qwen-test-model" - run_output_dir = tmp_path / "qwen-smoke-run" + config_output_dir = tmp_path / "test-smoke" + test_model_dir = tmp_path / "test-model" + run_output_dir = tmp_path / "test-smoke-run" cli_main( [ @@ -306,7 +306,7 @@ def fake_gptq_run(self, model, config, output_model_path): def fake_create_model(*_, **kwargs): output_dir = Path(kwargs["output_dir"]) output_dir.mkdir(parents=True, exist_ok=True) - (output_dir / kwargs["filename"]).write_text("dummy onnx file") + (output_dir / kwargs["filename"]).write_text("dummy ONNX file") (output_dir / "genai_config.json").write_text("{}") fake_builder = types.ModuleType("onnxruntime_genai.models.builder") From a408b6391188bddcbafe7efd03a4c7efd78b53f2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 15:19:22 +0000 Subject: [PATCH 27/35] test: rename smoke flow cli test Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- test/cli/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index a17950eaa4..67a39a71c9 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -261,7 +261,7 @@ def test_optimize_command_test_model_config(_, tmp_path): @patch("huggingface_hub.repo_exists", return_value=True) -def test_documented_smoke_test_commands_produce_onnx_with_tiny_random_llama(_, tmp_path): +def test_optimize_dry_run_then_run_with_test_model(_, tmp_path): import types from unittest.mock import MagicMock From e272c2a8221f99a5befe016b3209d420b914a214 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 15:21:38 +0000 Subject: [PATCH 28/35] test: refine smoke flow workflow stubs Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- test/cli/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index 67a39a71c9..3c00aff9cd 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -297,7 +297,7 @@ def test_optimize_dry_run_then_run_with_test_model(_, tmp_path): def materialize_test_model(self, *args, **kwargs): Path(self.test_model_path).mkdir(parents=True, exist_ok=True) - (Path(self.test_model_path) / "config.json").write_text("{}") + (Path(self.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"})) return MagicMock() def fake_gptq_run(self, model, config, output_model_path): @@ -315,7 +315,7 @@ def fake_create_model(*_, **kwargs): fake_models.builder = fake_builder fake_ort_genai = types.ModuleType("onnxruntime_genai") fake_ort_genai.models = fake_models - fake_ort_genai.__version__ = "0.0.0" + fake_ort_genai.__version__ = "0.10.0" mock_cfg = MagicMock() mock_cfg.to_dict.return_value = {} From c901b636e962e150a2cb490f39dbdec7d48fc594 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 15:23:44 +0000 Subject: [PATCH 29/35] test: tidy smoke flow helper names Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- test/cli/test_cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index 3c00aff9cd..40a203b9b9 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -295,7 +295,7 @@ def test_optimize_dry_run_then_run_with_test_model(_, tmp_path): config_path = config_output_dir / "config.json" assert config_path.exists() - def materialize_test_model(self, *args, **kwargs): + def setup_test_model_files(self, *args, **kwargs): Path(self.test_model_path).mkdir(parents=True, exist_ok=True) (Path(self.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"})) return MagicMock() @@ -303,7 +303,7 @@ def materialize_test_model(self, *args, **kwargs): def fake_gptq_run(self, model, config, output_model_path): return model - def fake_create_model(*_, **kwargs): + def fake_create_model(**kwargs): output_dir = Path(kwargs["output_dir"]) output_dir.mkdir(parents=True, exist_ok=True) (output_dir / kwargs["filename"]).write_text("dummy ONNX file") @@ -322,7 +322,7 @@ def fake_create_model(*_, **kwargs): with ( patch.object(ModelConfig, "get_model_identifier", return_value="tiny-random-llama"), patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg), - patch.object(HfModelHandler, "load_model", new=materialize_test_model), + patch.object(HfModelHandler, "load_model", new=setup_test_model_files), patch.object(HfModelHandler, "save_metadata", return_value=[]), patch.object(Gptq, "_run_for_config", autospec=True, side_effect=fake_gptq_run), patch.object(ModelBuilder, "maybe_patch_quant"), From 36410cdf054ad007daa11f863ed67b121b69aad9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 15:26:08 +0000 Subject: [PATCH 30/35] test: clarify smoke flow mocks Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- test/cli/test_cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index 40a203b9b9..04904551a0 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -295,12 +295,12 @@ def test_optimize_dry_run_then_run_with_test_model(_, tmp_path): config_path = config_output_dir / "config.json" assert config_path.exists() - def setup_test_model_files(self, *args, **kwargs): + def fake_load_model(self, *args, **kwargs): Path(self.test_model_path).mkdir(parents=True, exist_ok=True) (Path(self.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"})) return MagicMock() - def fake_gptq_run(self, model, config, output_model_path): + def fake_gptq_run(self, model, _config, _output_model_path): return model def fake_create_model(**kwargs): @@ -322,7 +322,7 @@ def fake_create_model(**kwargs): with ( patch.object(ModelConfig, "get_model_identifier", return_value="tiny-random-llama"), patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg), - patch.object(HfModelHandler, "load_model", new=setup_test_model_files), + patch.object(HfModelHandler, "load_model", new=fake_load_model), patch.object(HfModelHandler, "save_metadata", return_value=[]), patch.object(Gptq, "_run_for_config", autospec=True, side_effect=fake_gptq_run), patch.object(ModelBuilder, "maybe_patch_quant"), From e16cb826fa332e472b6f00007e228446dc179981 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 15:28:31 +0000 Subject: [PATCH 31/35] test: polish documented smoke flow test naming Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- test/cli/test_cli.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index 04904551a0..ef2c9840dd 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -261,7 +261,7 @@ def test_optimize_command_test_model_config(_, tmp_path): @patch("huggingface_hub.repo_exists", return_value=True) -def test_optimize_dry_run_then_run_with_test_model(_, tmp_path): +def test_optimize_dry_run_then_run_with_test_model(mock_repo_exists, tmp_path): import types from unittest.mock import MagicMock @@ -294,13 +294,15 @@ def test_optimize_dry_run_then_run_with_test_model(_, tmp_path): config_path = config_output_dir / "config.json" assert config_path.exists() + assert mock_repo_exists.called - def fake_load_model(self, *args, **kwargs): - Path(self.test_model_path).mkdir(parents=True, exist_ok=True) - (Path(self.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"})) + def fake_load_model(handler, *args, **kwargs): + Path(handler.test_model_path).mkdir(parents=True, exist_ok=True) + (Path(handler.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"})) return MagicMock() - def fake_gptq_run(self, model, _config, _output_model_path): + def fake_gptq_run(self, model, pass_config, output_model_path): + del pass_config, output_model_path return model def fake_create_model(**kwargs): From 750760406f64b2d34df3ff18d1efdf4adcf4dc94 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 15:30:48 +0000 Subject: [PATCH 32/35] test: lift smoke flow imports and mock defaults Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- test/cli/test_cli.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index ef2c9840dd..8489b4586f 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -5,8 +5,9 @@ import json import subprocess import sys +import types from pathlib import Path -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest @@ -262,9 +263,6 @@ def test_optimize_command_test_model_config(_, tmp_path): @patch("huggingface_hub.repo_exists", return_value=True) def test_optimize_dry_run_then_run_with_test_model(mock_repo_exists, tmp_path): - import types - from unittest.mock import MagicMock - from olive.model.config.model_config import ModelConfig from olive.model.handler.hf import HfModelHandler from olive.passes.onnx.model_builder import ModelBuilder @@ -327,7 +325,7 @@ def fake_create_model(**kwargs): patch.object(HfModelHandler, "load_model", new=fake_load_model), patch.object(HfModelHandler, "save_metadata", return_value=[]), patch.object(Gptq, "_run_for_config", autospec=True, side_effect=fake_gptq_run), - patch.object(ModelBuilder, "maybe_patch_quant"), + patch.object(ModelBuilder, "maybe_patch_quant", return_value=None), patch.dict( sys.modules, { From ac7840f189cafed6aef2d86c214449a7fc019940 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 11 May 2026 16:57:29 +0000 Subject: [PATCH 33/35] fix: keep qwen test layer types in sync Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/5bd5199d-42fa-4109-94a3-c7995abe72a2 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- olive/common/hf/utils.py | 4 ++++ test/common/test_hf.py | 19 +++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index fbe4ed7f10..c2c2110c2c 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -48,6 +48,10 @@ def _apply_test_model_config( if not updated: raise ValueError("Unable to create a test model because the config does not expose a hidden-layer count.") + layer_types = getattr(model_config, "layer_types", None) + if isinstance(layer_types, (list, tuple)): + model_config.layer_types = layer_types[:hidden_layers] + if "dtype" in model_config and model_config.dtype == "auto": # This is not allowed anymore with transformers >=4.57, # we select float16 instead. diff --git a/test/common/test_hf.py b/test/common/test_hf.py index 32fa143b89..cab00e3df3 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -6,10 +6,10 @@ import pytest import torch -from transformers import BertConfig, GPT2Config +from transformers import BertConfig, GPT2Config, Qwen3Config from olive.common.hf.model_io import get_model_dummy_input, get_model_io_config -from olive.common.hf.utils import _load_test_model, load_model_from_task +from olive.common.hf.utils import _apply_test_model_config, _load_test_model, load_model_from_task def test_load_model_from_task(): @@ -121,6 +121,21 @@ def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path): assert mock_from_pretrained.call_args_list[1].args[1] == str(test_model_path) +def test_apply_test_model_config_updates_qwen3_layer_types(): + model_config = Qwen3Config() + model_config.num_hidden_layers = 4 + model_config.layer_types = model_config.layer_types[:4] + + updated_config = _apply_test_model_config(model_config, {"hidden_layers": 2}) + + assert updated_config.num_hidden_layers == 2 + assert updated_config.layer_types == model_config.layer_types[:2] + reloaded_config = Qwen3Config(**updated_config.to_dict()) + assert reloaded_config.num_hidden_layers == 2 + assert len(reloaded_config.layer_types) == 2 + assert reloaded_config.layer_types == model_config.layer_types[:2] + + def test_load_test_model_omits_unsupported_trust_remote_code_kwarg(): model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg captured = {} From f165dda8b883920fab82cb7b5d2755a08b7cb7e6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 09:54:28 +0000 Subject: [PATCH 34/35] Merge origin/main and resolve model builder test conflict Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/2f777346-5b6a-423f-89fd-f40de41d2b70 Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com> --- .../source/how-to/extending/custom-scripts.md | 2 +- mcp/uv.lock | 6 +- olive/cache.py | 10 +- olive/cli/auto_opt.py | 14 ++- olive/evaluator/lmeval_ort.py | 18 ++-- olive/passes/onnx/kquant_quantization.py | 10 +- olive/passes/onnx/model_builder.py | 99 ++++++++++++----- test/passes/onnx/test_model_builder.py | 102 +++++++++++++++++- 8 files changed, 215 insertions(+), 46 deletions(-) diff --git a/docs/source/how-to/extending/custom-scripts.md b/docs/source/how-to/extending/custom-scripts.md index 8e8961a5b6..5e78149fe1 100644 --- a/docs/source/how-to/extending/custom-scripts.md +++ b/docs/source/how-to/extending/custom-scripts.md @@ -36,7 +36,7 @@ class MyDataLoader: @Registry.register_dataloader() def my_dataloader(dataset, batch_size): - return MyDataloader(dataset, batch_size) + return MyDataLoader(dataset, batch_size) @Registry.register_post_process() def my_post_process(output): diff --git a/mcp/uv.lock b/mcp/uv.lock index b7995e5533..50c363efdb 100644 --- a/mcp/uv.lock +++ b/mcp/uv.lock @@ -594,11 +594,11 @@ wheels = [ [[package]] name = "python-multipart" -version = "0.0.26" +version = "0.0.27" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/88/71/b145a380824a960ebd60e1014256dbb7d2253f2316ff2d73dfd8928ec2c3/python_multipart-0.0.26.tar.gz", hash = "sha256:08fadc45918cd615e26846437f50c5d6d23304da32c341f289a617127b081f17", size = 43501, upload-time = "2026-04-10T14:09:59.473Z" } +sdist = { url = "https://files.pythonhosted.org/packages/69/9b/f23807317a113dc36e74e75eb265a02dd1a4d9082abc3c1064acd22997c4/python_multipart-0.0.27.tar.gz", hash = "sha256:9870a6a8c5a20a5bf4f07c017bd1489006ff8836cff097b6933355ee2b49b602", size = 44043, upload-time = "2026-04-27T10:51:26.649Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/22/f1925cdda983ab66fc8ec6ec8014b959262747e58bdca26a4e3d1da29d56/python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185", size = 28847, upload-time = "2026-04-10T14:09:58.131Z" }, + { url = "https://files.pythonhosted.org/packages/99/78/4126abcbdbd3c559d43e0db7f7b9173fc6befe45d39a2856cc0b8ec2a5a6/python_multipart-0.0.27-py3-none-any.whl", hash = "sha256:6fccfad17a27334bd0193681b369f476eda3409f17381a2d65aa7df3f7275645", size = 29254, upload-time = "2026-04-27T10:51:24.997Z" }, ] [[package]] diff --git a/olive/cache.py b/olive/cache.py index 22b13eae5b..ceb64e1528 100644 --- a/olive/cache.py +++ b/olive/cache.py @@ -439,13 +439,19 @@ def save_model( else: from olive.passes.onnx.common import resave_model + component_output_name = ( + component_name + if Path(component_name).suffix == ".onnx" + else f"{component_name}.onnx" + ) + resave_model( ModelConfig.model_validate(component_model_json).create_model().model_path, - actual_output_dir / f"{component_name}.onnx", + actual_output_dir / component_output_name, saved_external_files=saved_external_files, ) component_model_json["config"][resource_name] = str(actual_output_dir) - component_model_json["config"]["onnx_file_name"] = f"{component_name}.onnx" + component_model_json["config"]["onnx_file_name"] = component_output_name copied_components.append(component_model_json) diff --git a/olive/cli/auto_opt.py b/olive/cli/auto_opt.py index 2e0f73444f..ef8f5fed8d 100644 --- a/olive/cli/auto_opt.py +++ b/olive/cli/auto_opt.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import logging from argparse import ArgumentParser from collections import OrderedDict from copy import deepcopy @@ -25,13 +26,19 @@ from olive.package_config import OlivePackageConfig from olive.telemetry import action +logger = logging.getLogger(__name__) + class AutoOptCommand(BaseOliveCLICommand): @staticmethod def register_subcommand(parser: ArgumentParser): sub_parser = parser.add_parser( "auto-opt", - help="Automatically optimize the performance of the input model.", + help=( + "Automatically optimize the performance of the input model.\n" + "**** DEPRECATION WARNING ****\n" + '"auto-opt" command is deprecated in favor of "optimize".' + ), ) # Model options @@ -174,6 +181,11 @@ def register_subcommand(parser: ArgumentParser): @action def run(self): + logger.warning( + "**** DEPRECATION WARNING ****\n" + '"auto-opt" command is deprecated in favor of "optimize". Please switch to using "optimize".\n' + "Deprecated commands will be removed entirely in future release." + ) return self._run_workflow() def _get_run_config(self, tempdir) -> dict: diff --git a/olive/evaluator/lmeval_ort.py b/olive/evaluator/lmeval_ort.py index 50d1f1289d..c4a158533d 100644 --- a/olive/evaluator/lmeval_ort.py +++ b/olive/evaluator/lmeval_ort.py @@ -551,27 +551,25 @@ def model_call(self, input_ids: torch.Tensor, cont_len: int = 0) -> torch.Tensor self.params.set_search_options(batch_size=batch_size) generator = og.Generator(self.model, self.params) - if self._returns_full_logits: - generator.append_tokens(input_ids.tolist()) - return torch.from_numpy(generator.get_output("logits")).to(self.device) - - # Model only returns logits for the last appended position. if batch_size > 1 and cont_len > 1: raise ValueError( - "batch_size > 1 is not supported when the model returns single-position logits" + "batch_size > 1 is not supported when using incremental get_logits() retrieval" " and continuation length > 1. Right-padding misaligns continuation positions across" " batch elements. Use batch_size=1 instead." ) - # Bulk-append context tokens, then step through the last cont_len tokens - # one at a time to collect only the logits we actually need. + # Use incremental token appending with get_logits() to avoid copying + # the full logits tensor from GPU to CPU. get_output("logits") copies + # seq_len * vocab_size * 2 bytes (e.g. 472MB for 900 tokens with + # 262K vocab), while get_logits() copies only vocab_size * 4 bytes + # (~1MB) per position. n_logits = max(cont_len, 1) prefix_len = seq_len - n_logits generator.append_tokens(input_ids[:, : prefix_len + 1].tolist()) - all_logits = [torch.from_numpy(generator.get_output("logits")).to(self.device)] + all_logits = [torch.from_numpy(generator.get_logits()).to(self.device)] for i in range(prefix_len + 1, seq_len): generator.append_tokens(input_ids[:, i : i + 1].tolist()) - all_logits.append(torch.from_numpy(generator.get_output("logits")).to(self.device)) + all_logits.append(torch.from_numpy(generator.get_logits()).to(self.device)) # No need to pad to [batch, seq_len, vocab]. The slicing in _loglikelihood_tokens computes # ctx_len = inplen + (logits.shape[0] - padding_len_inp), which adjusts for the shorter diff --git a/olive/passes/onnx/kquant_quantization.py b/olive/passes/onnx/kquant_quantization.py index 5d75016ecc..4263406afd 100644 --- a/olive/passes/onnx/kquant_quantization.py +++ b/olive/passes/onnx/kquant_quantization.py @@ -256,7 +256,15 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon def _run_for_config( self, model: ONNXModelHandler, config: type[BasePassConfig], output_model_path: str ) -> ONNXModelHandler: - output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name) + # For composite model components (e.g., Whisper encoder.onnx/decoder.onnx), + # output_model_path already includes .onnx extension. Strip it so ir.save doesn't + # create a double extension (.onnx.onnx). For other cases, resolve normally. + output_path_obj = Path(output_model_path) + if output_path_obj.suffix == ".onnx": + output_model_path = str(output_path_obj.with_suffix("")) + else: + output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name) + ir_model = model.load_ir_model() ir.external_data.load_to_model(ir_model) ir_model.graph.opset_imports[MSFT_DOMAIN] = 1 diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py index ba29c0a9b3..24f3c27785 100644 --- a/olive/passes/onnx/model_builder.py +++ b/olive/passes/onnx/model_builder.py @@ -20,7 +20,7 @@ from olive.constants import Precision from olive.hardware.accelerator import AcceleratorSpec, Device from olive.hardware.constants import ExecutionProvider -from olive.model import HfModelHandler, ONNXModelHandler +from olive.model import CompositeModelHandler, HfModelHandler, ONNXModelHandler from olive.model.utils import resolve_onnx_path from olive.passes import Pass from olive.passes.olive_pass import PassConfigParam @@ -273,8 +273,9 @@ def _run_for_config( if config.extra_options: extra_args.update(config.extra_options) - # Ensure output_model_filepath matches the final filename in extra_args - output_model_filepath = Path(output_model_path) / extra_args["filename"] + # Ensure output_model_filepath matches the final filename in extra_args while preserving + # the resolved output directory selected above. + output_model_filepath = output_model_filepath.parent / extra_args["filename"] model_attributes = copy.deepcopy(model.model_attributes or {}) @@ -292,26 +293,6 @@ def _run_for_config( **extra_args, ) - # Apply post-processing annotations (split assignments and/or layer annotations) - # in a single load/save cycle to avoid redundant disk I/O. - split_assignments = model_attributes.get("split_assignments") if not metadata_only else None - layer_annotations = model_attributes.get("layer_annotations") if not metadata_only else None - - if split_assignments or layer_annotations: - model_proto = onnx.load(output_model_filepath, load_external_data=False) - - if split_assignments: - # NOTE: currently the model builder renames modules to it's own naming convention - # so the assignments for the renamed modules won't match - split_assignment_str = ";".join([f"{k}={v}" for k, v in split_assignments.items()]) - onnx.helper.set_model_props(model_proto, {"split_assignments": split_assignment_str}) - - if layer_annotations: - from olive.passes.onnx.layer_annotation import annotate_proto_model - - annotate_proto_model(model_proto, layer_annotations) - - onnx.save(model_proto, output_model_filepath) except Exception: # if model building fails, clean up the intermediate files in the cache_dir cache_dir = Path(HF_HUB_CACHE) @@ -337,6 +318,58 @@ def _run_for_config( # tokenizer and generation configs are skipped since they are already saved by the model builder model.save_metadata(output_model_filepath.parent) + generated_onnx_files = sorted(output_model_filepath.parent.glob("*.onnx")) if not metadata_only else [] + + # For multi-file models (e.g., Whisper), preserve component file names and process each file independently + # in subsequent passes by returning a CompositeModelHandler. + is_multi_file_model = not metadata_only and len(generated_onnx_files) > 1 + resolved_single_model_filepath = output_model_filepath + if ( + not metadata_only + and not is_multi_file_model + and not output_model_filepath.exists() + and len(generated_onnx_files) == 1 + ): + logger.info( + "ONNX model file %s does not exist, using %s instead", + output_model_filepath, + generated_onnx_files[0].name, + ) + resolved_single_model_filepath = generated_onnx_files[0] + + # Apply post-processing annotations (split assignments and/or layer annotations) + # in a single load/save cycle to avoid redundant disk I/O. + split_assignments = model_attributes.get("split_assignments") if not metadata_only else None + layer_annotations = model_attributes.get("layer_annotations") if not metadata_only else None + if is_multi_file_model: + primary_onnx_files = generated_onnx_files + elif resolved_single_model_filepath.exists(): + primary_onnx_files = [resolved_single_model_filepath] + else: + primary_onnx_files = [] + if split_assignments or layer_annotations: + if primary_onnx_files: + for primary_onnx_file in primary_onnx_files: + model_proto = onnx.load(primary_onnx_file, load_external_data=False) + + if split_assignments: + # NOTE: currently the model builder renames modules to it's own naming convention + # so the assignments for the renamed modules won't match + split_assignment_str = ";".join([f"{k}={v}" for k, v in split_assignments.items()]) + onnx.helper.set_model_props(model_proto, {"split_assignments": split_assignment_str}) + + if layer_annotations: + from olive.passes.onnx.layer_annotation import annotate_proto_model + + annotate_proto_model(model_proto, layer_annotations) + + onnx.save(model_proto, primary_onnx_file) + else: + logger.warning( + "Skipping split_assignments/layer_annotations because no ONNX file was generated in %s.", + output_model_filepath.parent, + ) + # add additional files generated by model builder to model_attributes additional_files = model_attributes.get("additional_files") or [] if metadata_only: @@ -347,20 +380,36 @@ def _run_for_config( str(output_model_filepath.parent / "genai_config.json"), ] else: + primary_model_paths = {str(fp) for fp in primary_onnx_files} model_attributes["additional_files"] = sorted( set(additional_files) # all files in the output directory except the model and model.data files | {str(fp) for fp in output_model_filepath.parent.iterdir()} - - {str(output_model_filepath), str(output_model_filepath) + ".data"} + - primary_model_paths + - {f"{path}.data" for path in primary_model_paths} ) if metadata_only: output_model = copy.copy(model) output_model.model_attributes = model_attributes + elif is_multi_file_model: + # Use the ONNX filenames as component names so child passes write back to encoder.onnx/decoder.onnx + # instead of defaulting to model.onnx. + component_names = [fp.name for fp in generated_onnx_files] + components = [ + ONNXModelHandler(output_model_filepath.parent, onnx_file_name=component_name) + for component_name in component_names + ] + output_model = CompositeModelHandler( + components, + component_names, + model_path=output_model_filepath.parent, + model_attributes=model_attributes, + ) else: output_model = ONNXModelHandler( output_model_filepath.parent, - onnx_file_name=output_model_filepath.name, + onnx_file_name=resolved_single_model_filepath.name, model_attributes=model_attributes, ) diff --git a/test/passes/onnx/test_model_builder.py b/test/passes/onnx/test_model_builder.py index 0c838a3f7f..0f71535db9 100644 --- a/test/passes/onnx/test_model_builder.py +++ b/test/passes/onnx/test_model_builder.py @@ -2,16 +2,16 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import json import sys import types from pathlib import Path -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, Mock, patch import onnx import pytest -from olive.model import ONNXModelHandler -from olive.model.handler.hf import HfModelHandler +from olive.model import CompositeModelHandler, HfModelHandler, ONNXModelHandler from olive.passes.olive_pass import create_pass_from_dict from olive.passes.onnx.model_builder import ModelBuilder from olive.passes.pytorch.rtn import Rtn @@ -20,6 +20,29 @@ TINY_RANDOM_LLAMA_MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM" +def _create_test_onnx_model(model_path: Path, node_name: str): + input_info = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, [1, 1]) + output_info = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [1, 1]) + node = onnx.helper.make_node("Identity", ["input"], ["output"], name=node_name) + graph = onnx.helper.make_graph([node], "test_graph", [input_info], [output_info]) + model = onnx.helper.make_model(graph) + onnx.save(model, model_path) + + +def _mock_genai_builder(monkeypatch, create_model_fn): + builder_module = types.ModuleType("onnxruntime_genai.models.builder") + builder_module.create_model = create_model_fn + models_module = types.ModuleType("onnxruntime_genai.models") + models_module.builder = builder_module + genai_module = types.ModuleType("onnxruntime_genai") + genai_module.__version__ = "0.8.0" + genai_module.models = models_module + monkeypatch.setitem(sys.modules, "onnxruntime_genai", genai_module) + monkeypatch.setitem(sys.modules, "onnxruntime_genai.models", models_module) + monkeypatch.setitem(sys.modules, "onnxruntime_genai.models.builder", builder_module) + monkeypatch.setattr(ModelBuilder, "maybe_patch_quant", staticmethod(lambda: None)) + + @pytest.mark.parametrize("metadata_only", [True, False]) def test_model_builder(tmp_path, metadata_only): input_model = make_local_tiny_llama(tmp_path / "input_model", "onnx" if metadata_only else "hf") @@ -162,3 +185,76 @@ def fake_create_model(*_, **kwargs): assert test_model_path.exists() assert fake_builder.create_model.call_args.kwargs["model_name"] == str(test_model_path) assert fake_builder.create_model.call_args.kwargs["input_path"] == str(test_model_path) + + +def test_model_builder_apply_annotations_on_single_file_fallback(tmp_path, monkeypatch): + def fake_create_model( + model_name, input_path, output_dir, precision, execution_provider, cache_dir, filename, **kwargs + ): + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + _create_test_onnx_model(output_dir / "actual.onnx", "test_node") + (output_dir / "actual.onnx.data").write_text("external_data") + (output_dir / "tokenizer.json").write_text("{}") + (output_dir / "genai_config.json").write_text(json.dumps({"search": {}})) + + _mock_genai_builder(monkeypatch, fake_create_model) + input_model = Mock(spec=HfModelHandler) + input_model.model_name_or_path = "dummy-model" + input_model.adapter_path = None + input_model.test_model_config = None + input_model.test_model_path = None + input_model.model_attributes = {"split_assignments": {"model.layers.0": 1}} + + p = create_pass_from_dict( + ModelBuilder, {"precision": "fp32", "extra_options": {"filename": "expected.onnx"}}, disable_search=True + ) + output_folder = tmp_path / "output_model" + output_model = p.run(input_model, output_folder) + + assert isinstance(output_model, ONNXModelHandler) + assert output_model.onnx_file_name == "actual.onnx" + model_proto = onnx.load(output_folder / "actual.onnx", load_external_data=False) + metadata_props = {prop.key: prop.value for prop in model_proto.metadata_props} + assert metadata_props["split_assignments"] == "model.layers.0=1" + assert str(output_folder / "actual.onnx") not in output_model.model_attributes["additional_files"] + assert str(output_folder / "actual.onnx.data") not in output_model.model_attributes["additional_files"] + assert str(output_folder / "tokenizer.json") in output_model.model_attributes["additional_files"] + + +def test_model_builder_multi_file_output_preserves_component_filenames(tmp_path, monkeypatch): + def fake_create_model( + model_name, input_path, output_dir, precision, execution_provider, cache_dir, filename, **kwargs + ): + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + _create_test_onnx_model(output_dir / "encoder.onnx", "encoder_node") + _create_test_onnx_model(output_dir / "decoder.onnx", "decoder_node") + (output_dir / "encoder.onnx.data").write_text("encoder_data") + (output_dir / "decoder.onnx.data").write_text("decoder_data") + (output_dir / "tokenizer.json").write_text("{}") + (output_dir / "genai_config.json").write_text(json.dumps({"search": {}})) + + _mock_genai_builder(monkeypatch, fake_create_model) + input_model = Mock(spec=HfModelHandler) + input_model.model_name_or_path = "dummy-model" + input_model.adapter_path = None + input_model.test_model_config = None + input_model.test_model_path = None + input_model.model_attributes = {} + + p = create_pass_from_dict(ModelBuilder, {"precision": "fp32"}, disable_search=True) + output_folder = tmp_path / "output_model" + output_model = p.run(input_model, output_folder) + + assert isinstance(output_model, CompositeModelHandler) + expected_component_names = sorted(["encoder.onnx", "decoder.onnx"]) + assert output_model.model_component_names == expected_component_names + component_onnx_files = [component.onnx_file_name for component in output_model.model_components] + assert component_onnx_files == output_model.model_component_names + additional_files = output_model.model_attributes["additional_files"] + assert str(output_folder / "encoder.onnx") not in additional_files + assert str(output_folder / "decoder.onnx") not in additional_files + assert str(output_folder / "encoder.onnx.data") not in additional_files + assert str(output_folder / "decoder.onnx.data") not in additional_files + assert str(output_folder / "tokenizer.json") in additional_files From 8941efbed4c153b3ddb8804be313ac3e9fa7ff46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Tue, 12 May 2026 12:10:52 +0200 Subject: [PATCH 35/35] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- olive/common/hf/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index c2c2110c2c..2abc534eb0 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -52,7 +52,8 @@ def _apply_test_model_config( if isinstance(layer_types, (list, tuple)): model_config.layer_types = layer_types[:hidden_layers] - if "dtype" in model_config and model_config.dtype == "auto": + dtype = getattr(model_config, "dtype", None) + if dtype == "auto": # This is not allowed anymore with transformers >=4.57, # we select float16 instead. model_config.dtype = "float16"