From ef42e477ca210de064439df768bc4130c965d630 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 08:18:11 +0000
Subject: [PATCH 01/35] Initial plan


From 00571f01ef82f4cd4d821c0d7b391eac38904cef Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 08:33:38 +0000
Subject: [PATCH 02/35] feat: add CLI test-model config for HF inputs

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/e2149604-9db1-438e-a31f-01a53886093d

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/cli/base.py               |  7 +++++
 olive/common/hf/model_io.py     |  8 ++++--
 olive/common/hf/utils.py        | 49 ++++++++++++++++++++++++++++-----
 olive/model/handler/hf.py       | 11 ++++++--
 olive/model/handler/mixin/hf.py | 15 ++++++++--
 test/cli/test_base.py           | 16 +++++++++++
 test/cli/test_cli.py            | 19 +++++++++++++
 test/common/test_hf.py          | 21 ++++++++++++++
 8 files changed, 133 insertions(+), 13 deletions(-)

diff --git a/olive/cli/base.py b/olive/cli/base.py
index 75fd2816c7..9355a0d301 100644
--- a/olive/cli/base.py
+++ b/olive/cli/base.py
@@ -105,6 +105,8 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS)
         input_model["adapter_path"] = args.adapter_path
     if getattr(args, "trust_remote_code", None) is not None:
         input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code
+    if getattr(args, "test", False):
+        input_model["test_model_config"] = {"hidden_layers": 2}
     return input_model
 
 
@@ -371,6 +373,11 @@ def add_input_model_options(
         model_group.add_argument(
             "--trust_remote_code", action="store_true", help="Trust remote code when loading a huggingface model."
         )
+        model_group.add_argument(
+            "--test",
+            action="store_true",
+            help="Use a randomly initialized test model with the same Hugging Face architecture and 2 hidden layers.",
+        )
 
     if enable_hf_adapter:
         assert enable_hf, "enable_hf must be True when enable_hf_adapter is True."
diff --git a/olive/common/hf/model_io.py b/olive/common/hf/model_io.py
index a3ebd73058..7d66d57574 100644
--- a/olive/common/hf/model_io.py
+++ b/olive/common/hf/model_io.py
@@ -27,6 +27,7 @@ def get_model_io_config(
     model_name: str,
     task: str,
     model: Optional["PreTrainedModel"] = None,
+    test_model_config: Optional[dict[str, Any]] = None,
     **kwargs,
 ) -> Optional[dict[str, Any]]:
     """Get the input/output config for the model and task.
@@ -35,6 +36,7 @@ def get_model_io_config(
         model_name: The model name or path.
         task: The task type (e.g., "text-generation", "text-classification").
         model: Optional loaded model for input signature inspection.
+        test_model_config: Optional overrides for creating a lightweight random test model from the same config.
         **kwargs: Additional arguments including use_cache.
 
     Returns:
@@ -68,7 +70,7 @@ def get_model_io_config(
         return None
 
     # Get model config
-    model_config = get_model_config(model_name, **kwargs)
+    model_config = get_model_config(model_name, test_model_config=test_model_config, **kwargs)
 
     # Handle PEFT models
     actual_model = model
@@ -92,6 +94,7 @@ def get_model_dummy_input(
     model_name: str,
     task: str,
     model: Optional["PreTrainedModel"] = None,
+    test_model_config: Optional[dict[str, Any]] = None,
     **kwargs,
 ) -> Optional[dict[str, Any]]:
     """Get dummy inputs for the model and task.
@@ -100,6 +103,7 @@ def get_model_dummy_input(
         model_name: The model name or path.
         task: The task type.
         model: Optional loaded model for input signature inspection.
+        test_model_config: Optional overrides for creating a lightweight random test model from the same config.
         **kwargs: Additional arguments including use_cache, batch_size, sequence_length.
 
     Returns:
@@ -133,7 +137,7 @@ def get_model_dummy_input(
         return None
 
     # Get model config (handles MLflow paths)
-    model_config = get_model_config(model_name, **kwargs)
+    model_config = get_model_config(model_name, test_model_config=test_model_config, **kwargs)
 
     # Handle PEFT models
     actual_model = model
diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index a070e85ac8..99a1a2d7e3 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -3,8 +3,9 @@
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
 import logging
+from copy import deepcopy
 from pathlib import Path
-from typing import TYPE_CHECKING, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
 
 from transformers import AutoConfig, AutoModel, AutoTokenizer, GenerationConfig
 
@@ -18,7 +19,33 @@
 logger = logging.getLogger(__name__)
 
 
-def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTrainedModel":
+def _apply_test_model_config(
+    model_config: "PretrainedConfig", test_model_config: Optional[dict[str, Any]] = None
+) -> "PretrainedConfig":
+    """Apply lightweight test-model overrides to a model config."""
+    if not test_model_config:
+        return model_config
+
+    model_config = deepcopy(model_config)
+    hidden_layers = test_model_config.get("hidden_layers", test_model_config.get("num_hidden_layers", 2))
+    if hidden_layers < 1:
+        raise ValueError("test_model_config.hidden_layers must be greater than 0.")
+
+    updated = False
+    for attr_name in ("num_hidden_layers", "num_layers", "n_layer", "n_layers"):
+        if hasattr(model_config, attr_name):
+            setattr(model_config, attr_name, hidden_layers)
+            updated = True
+
+    if not updated:
+        raise ValueError("Unable to create a test model because the config does not expose a hidden-layer count.")
+
+    return model_config
+
+
+def load_model_from_task(
+    task: str, model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs
+) -> "PreTrainedModel":
     """Load huggingface model from task and model_name_or_path."""
     from transformers.pipelines import check_task
 
@@ -31,7 +58,7 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr
     else:
         raise ValueError("unsupported transformers version")
 
-    model_config = get_model_config(model_name_or_path, **kwargs)
+    model_config = get_model_config(model_name_or_path, test_model_config=test_model_config, **kwargs)
     if getattr(model_config, "quantization_config", None):
         if not isinstance(model_config.quantization_config, dict):
             model_config.quantization_config = model_config.quantization_config.to_dict()
@@ -59,7 +86,13 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr
     model = None
     for i, model_class in enumerate(class_tuple):
         try:
-            model = from_pretrained(model_class, model_name_or_path, "model", **kwargs)
+            if test_model_config:
+                try:
+                    model = model_class.from_config(model_config, trust_remote_code=kwargs.get("trust_remote_code"))
+                except TypeError:
+                    model = model_class.from_config(model_config)
+            else:
+                model = from_pretrained(model_class, model_name_or_path, "model", **kwargs)
             logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path)
             break
         except (OSError, ValueError) as e:
@@ -94,14 +127,16 @@ def from_pretrained(cls, model_name_or_path: str, mlflow_dir: str, **kwargs):
     return cls.from_pretrained(get_pretrained_name_or_path(model_name_or_path, mlflow_dir), **kwargs)
 
 
-def get_model_config(model_name_or_path: str, **kwargs) -> "PretrainedConfig":
+def get_model_config(
+    model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs
+) -> "PretrainedConfig":
     """Get HF Config for the given model_name_or_path."""
     model_config = from_pretrained(AutoConfig, model_name_or_path, "config", **kwargs)
 
     # add quantization config
     quantization_config = kwargs.get("quantization_config")
     if not quantization_config:
-        return model_config
+        return _apply_test_model_config(model_config, test_model_config)
 
     if hasattr(model_config, "quantization_config") and model_config.quantization_config:
         logger.warning(
@@ -111,7 +146,7 @@ def get_model_config(model_name_or_path: str, **kwargs) -> "PretrainedConfig":
         )
     else:
         model_config.quantization_config = quantization_config
-    return model_config
+    return _apply_test_model_config(model_config, test_model_config)
 
 
 def save_model_config(config: Union["PretrainedConfig", "GenerationConfig"], output_dir: str, **kwargs):
diff --git a/olive/model/handler/hf.py b/olive/model/handler/hf.py
index daac587bc5..0ed38a5ada 100644
--- a/olive/model/handler/hf.py
+++ b/olive/model/handler/hf.py
@@ -28,7 +28,7 @@
 @model_handler_registry("HFModel")
 class HfModelHandler(PyTorchModelHandlerBase, MLFlowTransformersMixin, HfMixin):  # pylint: disable=too-many-ancestors
     resource_keys: tuple[str, ...] = ("model_path", "adapter_path")
-    json_config_keys: tuple[str, ...] = ("task", "load_kwargs")
+    json_config_keys: tuple[str, ...] = ("task", "load_kwargs", "test_model_config")
 
     def __init__(
         self,
@@ -37,6 +37,7 @@ def __init__(
         load_kwargs: Union[dict[str, Any], HfLoadKwargs] = None,
         io_config: Union[dict[str, Any], IoConfig, str] = None,
         adapter_path: OLIVE_RESOURCE_ANNOTATIONS = None,
+        test_model_config: Optional[dict[str, Any]] = None,
         model_attributes: Optional[dict[str, Any]] = None,
     ):
         super().__init__(
@@ -48,6 +49,7 @@ def __init__(
         self.add_resources(locals())
         self.task = task
         self.load_kwargs = validate_config(load_kwargs, HfLoadKwargs, warn_unused_keys=False) if load_kwargs else None
+        self.test_model_config = test_model_config
 
         self.model_attributes = {**self.get_hf_model_config().to_dict(), **(self.model_attributes or {})}
 
@@ -72,7 +74,12 @@ def load_model(self, rank: int = None, cache_model: bool = True) -> "torch.nn.Mo
         if self.model:
             model = self.model
         else:
-            model = load_model_from_task(self.task, self.model_path, **self.get_load_kwargs())
+            model = load_model_from_task(
+                self.task,
+                self.model_path,
+                test_model_config=self.test_model_config,
+                **self.get_load_kwargs(),
+            )
 
             # we only have peft adapters for now
             if self.adapter_path:
diff --git a/olive/model/handler/mixin/hf.py b/olive/model/handler/mixin/hf.py
index 730d6bd6ec..3e400c64e7 100644
--- a/olive/model/handler/mixin/hf.py
+++ b/olive/model/handler/mixin/hf.py
@@ -39,7 +39,11 @@ def get_hf_model_config(self, exclude_load_keys: Optional[list[str]] = None) ->
         :param exclude_load_keys: list of keys to exclude from load_kwargs
         :return: model config
         """
-        return get_model_config(self.model_path, **self.get_load_kwargs(exclude_load_keys))
+        return get_model_config(
+            self.model_path,
+            test_model_config=getattr(self, "test_model_config", None),
+            **self.get_load_kwargs(exclude_load_keys),
+        )
 
     def get_hf_generation_config(self, exclude_load_keys: Optional[list[str]] = None) -> Optional["GenerationConfig"]:
         """Get generation config for the model if it exists.
@@ -114,7 +118,13 @@ def save_metadata(self, output_dir: str, exclude_load_keys: Optional[list[str]]
 
     def get_hf_io_config(self) -> Optional[dict[str, Any]]:
         """Get Io config for the model."""
-        return get_model_io_config(self.model_path, self.task, self.load_model(), **self.get_load_kwargs())
+        return get_model_io_config(
+            self.model_path,
+            self.task,
+            self.load_model(),
+            test_model_config=getattr(self, "test_model_config", None),
+            **self.get_load_kwargs(),
+        )
 
     def get_hf_dummy_inputs(self) -> Optional[dict[str, Any]]:
         """Get dummy inputs for the model."""
@@ -122,6 +132,7 @@ def get_hf_dummy_inputs(self) -> Optional[dict[str, Any]]:
             self.model_path,
             self.task,
             model=self.load_model(),
+            test_model_config=getattr(self, "test_model_config", None),
             **self.get_load_kwargs(),
         )
 
diff --git a/test/cli/test_base.py b/test/cli/test_base.py
index 38c96c6aa9..6327f1f335 100644
--- a/test/cli/test_base.py
+++ b/test/cli/test_base.py
@@ -229,6 +229,22 @@ def test_insert_input_model_invalid_hf_model_name():
         get_input_model_config(args)
 
 
+@patch("huggingface_hub.repo_exists", return_value=True)
+def test_get_input_model_config_hf_test_model(_):
+    args = SimpleNamespace(
+        model_name_or_path="hf_model",
+        trust_remote_code=False,
+        task="text-generation",
+        model_script=None,
+        script_dir=None,
+        test=True,
+    )
+
+    config = get_input_model_config(args)
+
+    assert config["test_model_config"] == {"hidden_layers": 2}
+
+
 def test_insert_input_model_cli_output_model():
     # setup
     model_path = str(Path(__file__).parent.resolve() / "output_model")
diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index a7cb39e244..816147e063 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -190,6 +190,25 @@ def test_finetune_command(_, mock_run, tmp_path):
     assert mock_run.call_count == 1
 
 
+@patch("huggingface_hub.repo_exists", return_value=True)
+def test_optimize_command_test_model_config(_, tmp_path):
+    output_dir = tmp_path / "output_dir"
+    command_args = [
+        "optimize",
+        "-m",
+        "dummy-model-id",
+        "--test",
+        "--dry_run",
+        "-o",
+        str(output_dir),
+    ]
+
+    cli_main(command_args)
+
+    config = json.loads((output_dir / "config.json").read_text())
+    assert config["input_model"]["test_model_config"] == {"hidden_layers": 2}
+
+
 @patch("olive.workflows.run")
 @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True)
 def test_diffusion_lora_command(_, mock_run, tmp_path):
diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index 00b4cbd043..dfab798547 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -6,6 +6,7 @@
 
 import pytest
 import torch
+import transformers
 
 from olive.common.hf.model_io import get_model_dummy_input, get_model_io_config
 from olive.common.hf.utils import load_model_from_task
@@ -21,6 +22,26 @@ def test_load_model_from_task():
     assert isinstance(model, torch.nn.Module)
 
 
+def test_load_model_from_task_test_model_config():
+    model_config = transformers.BertConfig(num_hidden_layers=12)
+    created_model = MagicMock(spec=torch.nn.Module)
+
+    with (
+        patch("transformers.pipelines.check_task") as mock_check_task,
+        patch("olive.common.hf.utils.from_pretrained", return_value=model_config) as mock_from_pretrained,
+    ):
+        mock_model_class = MagicMock()
+        mock_model_class.from_config.return_value = created_model
+        mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None)
+
+        model = load_model_from_task("text-classification", "dummy-model", test_model_config={"hidden_layers": 2})
+
+    assert model is created_model
+    mock_from_pretrained.assert_called_once()
+    mock_model_class.from_config.assert_called_once()
+    assert mock_model_class.from_config.call_args.args[0].num_hidden_layers == 2
+
+
 @pytest.mark.parametrize(
     ("exceptions", "expected_exception", "expected_message"),
     [

From 485dfbf79b855f0bde0488de9c70c0357068a5a2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 08:36:02 +0000
Subject: [PATCH 03/35] test: broaden HF test-model coverage

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/e2149604-9db1-438e-a31f-01a53886093d

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/common/hf/utils.py |  4 +++-
 test/common/test_hf.py   | 12 +++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index 99a1a2d7e3..7c713ce1ce 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -27,11 +27,13 @@ def _apply_test_model_config(
         return model_config
 
     model_config = deepcopy(model_config)
-    hidden_layers = test_model_config.get("hidden_layers", test_model_config.get("num_hidden_layers", 2))
+    hidden_layers = test_model_config.get("hidden_layers") or test_model_config.get("num_hidden_layers") or 2
     if hidden_layers < 1:
         raise ValueError("test_model_config.hidden_layers must be greater than 0.")
 
     updated = False
+    # Common Hugging Face configs do not use a single canonical field:
+    # BERT-style models use num_hidden_layers while GPT-style models often use n_layer/n_layers/num_layers.
     for attr_name in ("num_hidden_layers", "num_layers", "n_layer", "n_layers"):
         if hasattr(model_config, attr_name):
             setattr(model_config, attr_name, hidden_layers)
diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index dfab798547..dacc4ac3ad 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -22,8 +22,14 @@ def test_load_model_from_task():
     assert isinstance(model, torch.nn.Module)
 
 
-def test_load_model_from_task_test_model_config():
-    model_config = transformers.BertConfig(num_hidden_layers=12)
+@pytest.mark.parametrize(
+    ("model_config", "hidden_layers_attr"),
+    [
+        (transformers.BertConfig(num_hidden_layers=12), "num_hidden_layers"),
+        (transformers.GPT2Config(n_layer=12), "n_layer"),
+    ],
+)
+def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr):
     created_model = MagicMock(spec=torch.nn.Module)
 
     with (
@@ -39,7 +45,7 @@ def test_load_model_from_task_test_model_config():
     assert model is created_model
     mock_from_pretrained.assert_called_once()
     mock_model_class.from_config.assert_called_once()
-    assert mock_model_class.from_config.call_args.args[0].num_hidden_layers == 2
+    assert getattr(mock_model_class.from_config.call_args.args[0], hidden_layers_attr) == 2
 
 
 @pytest.mark.parametrize(

From a6fa34aa939fc21e9429253f611d70bf652420e1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 08:38:06 +0000
Subject: [PATCH 04/35] chore: polish test model config handling

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/e2149604-9db1-438e-a31f-01a53886093d

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/common/hf/utils.py | 7 ++++++-
 test/common/test_hf.py   | 6 +++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index 7c713ce1ce..110b4fe3ae 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -27,7 +27,12 @@ def _apply_test_model_config(
         return model_config
 
     model_config = deepcopy(model_config)
-    hidden_layers = test_model_config.get("hidden_layers") or test_model_config.get("num_hidden_layers") or 2
+    if "hidden_layers" in test_model_config:
+        hidden_layers = test_model_config["hidden_layers"]
+    elif "num_hidden_layers" in test_model_config:
+        hidden_layers = test_model_config["num_hidden_layers"]
+    else:
+        hidden_layers = 2
     if hidden_layers < 1:
         raise ValueError("test_model_config.hidden_layers must be greater than 0.")
 
diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index dacc4ac3ad..5300ca118a 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -6,7 +6,7 @@
 
 import pytest
 import torch
-import transformers
+from transformers import BertConfig, GPT2Config
 
 from olive.common.hf.model_io import get_model_dummy_input, get_model_io_config
 from olive.common.hf.utils import load_model_from_task
@@ -25,8 +25,8 @@ def test_load_model_from_task():
 @pytest.mark.parametrize(
     ("model_config", "hidden_layers_attr"),
     [
-        (transformers.BertConfig(num_hidden_layers=12), "num_hidden_layers"),
-        (transformers.GPT2Config(n_layer=12), "n_layer"),
+        (BertConfig(num_hidden_layers=12), "num_hidden_layers"),
+        (GPT2Config(n_layer=12), "n_layer"),
     ],
 )
 def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr):

From 273850c72e26934b91f7fb505c411952bde1e91f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 09:19:50 +0000
Subject: [PATCH 05/35] fix: fail fast for HF test model loading

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/d4221aaf-b4a3-4bed-85fe-30a12dcdfd6e

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/common/hf/utils.py |  2 ++
 test/common/test_hf.py   | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index 110b4fe3ae..692019b8ab 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -103,6 +103,8 @@ def load_model_from_task(
             logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path)
             break
         except (OSError, ValueError) as e:
+            if test_model_config:
+                raise
             if i == len(class_tuple) - 1:
                 # len(class_tuple) == 1 covers most common tasks like text-generation, text-classification, etc
                 # error could be device OOM, device_map: "auto" not supported, etc
diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index 5300ca118a..ab942ee399 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -48,6 +48,26 @@ def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr
     assert getattr(mock_model_class.from_config.call_args.args[0], hidden_layers_attr) == 2
 
 
+def test_load_model_from_task_test_model_config_fails_without_fallback():
+    model_config = BertConfig(num_hidden_layers=12)
+
+    with (
+        patch("transformers.pipelines.check_task") as mock_check_task,
+        patch("olive.common.hf.utils.from_pretrained", return_value=model_config),
+    ):
+        first_model_class = MagicMock()
+        first_model_class.from_config.side_effect = ValueError("unexpected architecture")
+        second_model_class = MagicMock()
+        second_model_class.from_config.return_value = MagicMock(spec=torch.nn.Module)
+        mock_check_task.return_value = ("text-classification", {"pt": (first_model_class, second_model_class)}, None)
+
+        with pytest.raises(ValueError, match="unexpected architecture"):
+            load_model_from_task("text-classification", "dummy-model", test_model_config={"hidden_layers": 2})
+
+    first_model_class.from_config.assert_called_once()
+    second_model_class.from_config.assert_not_called()
+
+
 @pytest.mark.parametrize(
     ("exceptions", "expected_exception", "expected_message"),
     [

From 318fcbeefd65b4f5b2fc8c9ab42b953e7d7585e4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 09:35:06 +0000
Subject: [PATCH 06/35] refactor: remove nested try from HF test loading

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/57fd060e-5678-4015-b277-c7d14ec52145

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/common/hf/utils.py | 18 ++++++++++++++----
 test/common/test_hf.py   | 15 ++++++++++++++-
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index 692019b8ab..91155e5340 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -2,6 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
+import inspect
 import logging
 from copy import deepcopy
 from pathlib import Path
@@ -50,6 +51,18 @@ def _apply_test_model_config(
     return model_config
 
 
+def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_remote_code: Optional[bool] = None):
+    """Instantiate a random-initialized HF model from config for test mode."""
+    from_config_signature = inspect.signature(model_class.from_config)
+    supports_trust_remote_code = "trust_remote_code" in from_config_signature.parameters or any(
+        parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in from_config_signature.parameters.values()
+    )
+    from_config_kwargs = {}
+    if supports_trust_remote_code and trust_remote_code is not None:
+        from_config_kwargs["trust_remote_code"] = trust_remote_code
+    return model_class.from_config(model_config, **from_config_kwargs)
+
+
 def load_model_from_task(
     task: str, model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs
 ) -> "PreTrainedModel":
@@ -94,10 +107,7 @@ def load_model_from_task(
     for i, model_class in enumerate(class_tuple):
         try:
             if test_model_config:
-                try:
-                    model = model_class.from_config(model_config, trust_remote_code=kwargs.get("trust_remote_code"))
-                except TypeError:
-                    model = model_class.from_config(model_config)
+                model = _load_test_model(model_class, model_config, kwargs.get("trust_remote_code"))
             else:
                 model = from_pretrained(model_class, model_name_or_path, "model", **kwargs)
             logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path)
diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index ab942ee399..7fa0c9801e 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -9,7 +9,7 @@
 from transformers import BertConfig, GPT2Config
 
 from olive.common.hf.model_io import get_model_dummy_input, get_model_io_config
-from olive.common.hf.utils import load_model_from_task
+from olive.common.hf.utils import _load_test_model, load_model_from_task
 
 
 def test_load_model_from_task():
@@ -68,6 +68,19 @@ def test_load_model_from_task_test_model_config_fails_without_fallback():
     second_model_class.from_config.assert_not_called()
 
 
+def test_load_test_model_omits_unsupported_trust_remote_code_kwarg():
+    model_config = BertConfig(num_hidden_layers=12)
+
+    class MockModelClass:
+        @staticmethod
+        def from_config(config):
+            return config
+
+    created_model = _load_test_model(MockModelClass, model_config, trust_remote_code=True)
+
+    assert created_model is model_config
+
+
 @pytest.mark.parametrize(
     ("exceptions", "expected_exception", "expected_message"),
     [

From 40b07407da32fb962d6e33932ef8b3e1688e771b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 09:37:26 +0000
Subject: [PATCH 07/35] test: cover trust_remote_code helper behavior

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/57fd060e-5678-4015-b277-c7d14ec52145

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 test/common/test_hf.py | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index 7fa0c9801e..7df0d2207d 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -70,15 +70,52 @@ def test_load_model_from_task_test_model_config_fails_without_fallback():
 
 def test_load_test_model_omits_unsupported_trust_remote_code_kwarg():
     model_config = BertConfig(num_hidden_layers=12)
+    captured = {}
 
     class MockModelClass:
         @staticmethod
         def from_config(config):
+            captured["config"] = config
             return config
 
     created_model = _load_test_model(MockModelClass, model_config, trust_remote_code=True)
 
     assert created_model is model_config
+    assert captured == {"config": model_config}
+
+
+def test_load_test_model_omits_none_trust_remote_code_kwarg():
+    model_config = BertConfig(num_hidden_layers=12)
+    captured = {}
+
+    class MockModelClass:
+        @staticmethod
+        def from_config(config, **kwargs):
+            captured["config"] = config
+            captured["kwargs"] = kwargs
+            return config
+
+    created_model = _load_test_model(MockModelClass, model_config)
+
+    assert created_model is model_config
+    assert captured == {"config": model_config, "kwargs": {}}
+
+
+def test_load_test_model_passes_supported_trust_remote_code_kwarg():
+    model_config = BertConfig(num_hidden_layers=12)
+    captured = {}
+
+    class MockModelClass:
+        @staticmethod
+        def from_config(config, trust_remote_code=None):
+            captured["config"] = config
+            captured["trust_remote_code"] = trust_remote_code
+            return config
+
+    created_model = _load_test_model(MockModelClass, model_config, trust_remote_code=True)
+
+    assert created_model is model_config
+    assert captured == {"config": model_config, "trust_remote_code": True}
 
 
 @pytest.mark.parametrize(

From 386ff01781c5f32edf811814efc6e33616c8003e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 10:56:58 +0000
Subject: [PATCH 08/35] feat: persist reusable HF test model path

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/cf4d7fdc-1de9-4098-adc3-3bedd436d7d0

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/cli/base.py         | 16 +++++++++---
 olive/common/hf/utils.py  | 19 ++++++++++++--
 olive/model/handler/hf.py |  9 ++++++-
 test/cli/test_base.py     |  3 ++-
 test/cli/test_cli.py      |  3 +++
 test/common/test_hf.py    | 53 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/olive/cli/base.py b/olive/cli/base.py
index 9355a0d301..dcc8be00b2 100644
--- a/olive/cli/base.py
+++ b/olive/cli/base.py
@@ -105,8 +105,13 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS)
         input_model["adapter_path"] = args.adapter_path
     if getattr(args, "trust_remote_code", None) is not None:
         input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code
-    if getattr(args, "test", False):
+    test_model_output_path = getattr(args, "test", None)
+    if test_model_output_path is not None and test_model_output_path is not False:
         input_model["test_model_config"] = {"hidden_layers": 2}
+        if test_model_output_path is True and getattr(args, "output_path", None):
+            test_model_output_path = str(Path(args.output_path) / "test_model")
+        if test_model_output_path is not True:
+            input_model["test_model_path"] = test_model_output_path
     return input_model
 
 
@@ -375,8 +380,13 @@ def add_input_model_options(
         )
         model_group.add_argument(
             "--test",
-            action="store_true",
-            help="Use a randomly initialized test model with the same Hugging Face architecture and 2 hidden layers.",
+            type=str,
+            nargs="?",
+            const=True,
+            help=(
+                "Use a randomly initialized test model with the same Hugging Face architecture and 2 hidden layers. "
+                "Optionally provide a folder where the generated test model should be saved and reused."
+            ),
         )
 
     if enable_hf_adapter:
diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index 91155e5340..ff397c1bca 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -63,8 +63,18 @@ def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_
     return model_class.from_config(model_config, **from_config_kwargs)
 
 
+def _save_test_model(model: "PreTrainedModel", output_dir: str):
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    model.save_pretrained(str(output_path))
+
+
 def load_model_from_task(
-    task: str, model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs
+    task: str,
+    model_name_or_path: str,
+    test_model_config: Optional[dict[str, Any]] = None,
+    test_model_path: Optional[str] = None,
+    **kwargs,
 ) -> "PreTrainedModel":
     """Load huggingface model from task and model_name_or_path."""
     from transformers.pipelines import check_task
@@ -107,7 +117,12 @@ def load_model_from_task(
     for i, model_class in enumerate(class_tuple):
         try:
             if test_model_config:
-                model = _load_test_model(model_class, model_config, kwargs.get("trust_remote_code"))
+                if test_model_path and (Path(test_model_path) / "config.json").exists():
+                    model = from_pretrained(model_class, test_model_path, "model", **kwargs)
+                else:
+                    model = _load_test_model(model_class, model_config, kwargs.get("trust_remote_code"))
+                    if test_model_path:
+                        _save_test_model(model, test_model_path)
             else:
                 model = from_pretrained(model_class, model_name_or_path, "model", **kwargs)
             logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path)
diff --git a/olive/model/handler/hf.py b/olive/model/handler/hf.py
index 0ed38a5ada..fc659a57e6 100644
--- a/olive/model/handler/hf.py
+++ b/olive/model/handler/hf.py
@@ -27,7 +27,7 @@
 
 @model_handler_registry("HFModel")
 class HfModelHandler(PyTorchModelHandlerBase, MLFlowTransformersMixin, HfMixin):  # pylint: disable=too-many-ancestors
-    resource_keys: tuple[str, ...] = ("model_path", "adapter_path")
+    resource_keys: tuple[str, ...] = ("model_path", "adapter_path", "test_model_path")
     json_config_keys: tuple[str, ...] = ("task", "load_kwargs", "test_model_config")
 
     def __init__(
@@ -37,6 +37,7 @@ def __init__(
         load_kwargs: Union[dict[str, Any], HfLoadKwargs] = None,
         io_config: Union[dict[str, Any], IoConfig, str] = None,
         adapter_path: OLIVE_RESOURCE_ANNOTATIONS = None,
+        test_model_path: OLIVE_RESOURCE_ANNOTATIONS = None,
         test_model_config: Optional[dict[str, Any]] = None,
         model_attributes: Optional[dict[str, Any]] = None,
     ):
@@ -69,6 +70,11 @@ def adapter_path(self) -> str:
         """Return the path to the peft adapter."""
         return self.get_resource("adapter_path")
 
+    @property
+    def test_model_path(self) -> str:
+        """Return the optional path to a persisted lightweight test model."""
+        return self.get_resource("test_model_path")
+
     def load_model(self, rank: int = None, cache_model: bool = True) -> "torch.nn.Module":
         """Load the model from the model path."""
         if self.model:
@@ -78,6 +84,7 @@ def load_model(self, rank: int = None, cache_model: bool = True) -> "torch.nn.Mo
                 self.task,
                 self.model_path,
                 test_model_config=self.test_model_config,
+                test_model_path=self.test_model_path,
                 **self.get_load_kwargs(),
             )
 
diff --git a/test/cli/test_base.py b/test/cli/test_base.py
index 6327f1f335..55dabf65ba 100644
--- a/test/cli/test_base.py
+++ b/test/cli/test_base.py
@@ -237,12 +237,13 @@ def test_get_input_model_config_hf_test_model(_):
         task="text-generation",
         model_script=None,
         script_dir=None,
-        test=True,
+        test="saved_test_model",
     )
 
     config = get_input_model_config(args)
 
     assert config["test_model_config"] == {"hidden_layers": 2}
+    assert config["test_model_path"] == "saved_test_model"
 
 
 def test_insert_input_model_cli_output_model():
diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index 816147e063..6ab59999a7 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -193,11 +193,13 @@ def test_finetune_command(_, mock_run, tmp_path):
 @patch("huggingface_hub.repo_exists", return_value=True)
 def test_optimize_command_test_model_config(_, tmp_path):
     output_dir = tmp_path / "output_dir"
+    test_model_dir = tmp_path / "saved_test_model"
     command_args = [
         "optimize",
         "-m",
         "dummy-model-id",
         "--test",
+        str(test_model_dir),
         "--dry_run",
         "-o",
         str(output_dir),
@@ -207,6 +209,7 @@ def test_optimize_command_test_model_config(_, tmp_path):
 
     config = json.loads((output_dir / "config.json").read_text())
     assert config["input_model"]["test_model_config"] == {"hidden_layers": 2}
+    assert config["input_model"]["test_model_path"] == str(test_model_dir)
 
 
 @patch("olive.workflows.run")
diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index 7df0d2207d..f297d5ca2b 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -68,6 +68,59 @@ def test_load_model_from_task_test_model_config_fails_without_fallback():
     second_model_class.from_config.assert_not_called()
 
 
+def test_load_model_from_task_test_model_config_saves_model(tmp_path):
+    model_config = BertConfig(num_hidden_layers=12)
+    created_model = MagicMock()
+    test_model_path = tmp_path / "saved_test_model"
+
+    with (
+        patch("transformers.pipelines.check_task") as mock_check_task,
+        patch("olive.common.hf.utils.from_pretrained", return_value=model_config),
+    ):
+        mock_model_class = MagicMock()
+        mock_model_class.from_config.return_value = created_model
+        mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None)
+
+        model = load_model_from_task(
+            "text-classification",
+            "dummy-model",
+            test_model_config={"hidden_layers": 2},
+            test_model_path=str(test_model_path),
+        )
+
+    assert model is created_model
+    mock_model_class.from_config.assert_called_once()
+    created_model.save_pretrained.assert_called_once_with(str(test_model_path))
+
+
+def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path):
+    model_config = BertConfig(num_hidden_layers=12)
+    test_model_path = tmp_path / "saved_test_model"
+    test_model_path.mkdir()
+    (test_model_path / "config.json").write_text("{}")
+    loaded_model = MagicMock(spec=torch.nn.Module)
+
+    with (
+        patch("transformers.pipelines.check_task") as mock_check_task,
+        patch(
+            "olive.common.hf.utils.from_pretrained", side_effect=[model_config, loaded_model]
+        ) as mock_from_pretrained,
+    ):
+        mock_model_class = MagicMock()
+        mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None)
+
+        model = load_model_from_task(
+            "text-classification",
+            "dummy-model",
+            test_model_config={"hidden_layers": 2},
+            test_model_path=str(test_model_path),
+        )
+
+    assert model is loaded_model
+    mock_model_class.from_config.assert_not_called()
+    assert mock_from_pretrained.call_args_list[1].args[1] == str(test_model_path)
+
+
 def test_load_test_model_omits_unsupported_trust_remote_code_kwarg():
     model_config = BertConfig(num_hidden_layers=12)
     captured = {}

From 09fac8c37891c45bbf3d9f0c68e1b635e60d39f9 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 11:00:00 +0000
Subject: [PATCH 09/35] fix: tighten HF test model path handling

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/cf4d7fdc-1de9-4098-adc3-3bedd436d7d0

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/cli/base.py        |  4 +++-
 olive/common/hf/utils.py |  9 +++++++--
 test/cli/test_base.py    | 15 +++++++++++++++
 test/common/test_hf.py   |  4 ++--
 4 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/olive/cli/base.py b/olive/cli/base.py
index dcc8be00b2..fe43a15a00 100644
--- a/olive/cli/base.py
+++ b/olive/cli/base.py
@@ -106,10 +106,12 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS)
     if getattr(args, "trust_remote_code", None) is not None:
         input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code
     test_model_output_path = getattr(args, "test", None)
-    if test_model_output_path is not None and test_model_output_path is not False:
+    if test_model_output_path not in (None, False):
         input_model["test_model_config"] = {"hidden_layers": 2}
         if test_model_output_path is True and getattr(args, "output_path", None):
             test_model_output_path = str(Path(args.output_path) / "test_model")
+        elif test_model_output_path is True:
+            raise ValueError("--test requires an explicit folder when output_path is not available.")
         if test_model_output_path is not True:
             input_model["test_model_path"] = test_model_output_path
     return input_model
diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index ff397c1bca..aa144a24f3 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -65,8 +65,13 @@ def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_
 
 def _save_test_model(model: "PreTrainedModel", output_dir: str):
     output_path = Path(output_dir)
-    output_path.mkdir(parents=True, exist_ok=True)
-    model.save_pretrained(str(output_path))
+    try:
+        output_path.mkdir(parents=True, exist_ok=True)
+        logger.info("Saving generated test model to %s", output_path)
+        model.save_pretrained(str(output_path))
+    except Exception:
+        logger.exception("Failed to save generated test model to %s", output_path)
+        raise
 
 
 def load_model_from_task(
diff --git a/test/cli/test_base.py b/test/cli/test_base.py
index 55dabf65ba..bb34cef3f2 100644
--- a/test/cli/test_base.py
+++ b/test/cli/test_base.py
@@ -246,6 +246,21 @@ def test_get_input_model_config_hf_test_model(_):
     assert config["test_model_path"] == "saved_test_model"
 
 
+@patch("huggingface_hub.repo_exists", return_value=True)
+def test_get_input_model_config_hf_test_model_requires_path_without_output_path(_):
+    args = SimpleNamespace(
+        model_name_or_path="hf_model",
+        trust_remote_code=False,
+        task="text-generation",
+        model_script=None,
+        script_dir=None,
+        test=True,
+    )
+
+    with pytest.raises(ValueError, match=r"--test requires an explicit folder when output_path is not available\."):
+        get_input_model_config(args)
+
+
 def test_insert_input_model_cli_output_model():
     # setup
     model_path = str(Path(__file__).parent.resolve() / "output_model")
diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index f297d5ca2b..bb89215c81 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -84,7 +84,7 @@ def test_load_model_from_task_test_model_config_saves_model(tmp_path):
         model = load_model_from_task(
             "text-classification",
             "dummy-model",
-            test_model_config={"hidden_layers": 2},
+            test_model_config={"num_hidden_layers": 2},
             test_model_path=str(test_model_path),
         )
 
@@ -112,7 +112,7 @@ def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path):
         model = load_model_from_task(
             "text-classification",
             "dummy-model",
-            test_model_config={"hidden_layers": 2},
+            test_model_config={"num_hidden_layers": 2},
             test_model_path=str(test_model_path),
         )
 

From 09df0a7e5f01c6d8ed92ba712ca2cb139f51a47c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 11:02:14 +0000
Subject: [PATCH 10/35] refactor: simplify test model path handling

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/cf4d7fdc-1de9-4098-adc3-3bedd436d7d0

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/cli/base.py        | 12 ++++++------
 olive/common/hf/utils.py | 10 +++-------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/olive/cli/base.py b/olive/cli/base.py
index fe43a15a00..ccf23ed957 100644
--- a/olive/cli/base.py
+++ b/olive/cli/base.py
@@ -108,12 +108,12 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS)
     test_model_output_path = getattr(args, "test", None)
     if test_model_output_path not in (None, False):
         input_model["test_model_config"] = {"hidden_layers": 2}
-        if test_model_output_path is True and getattr(args, "output_path", None):
-            test_model_output_path = str(Path(args.output_path) / "test_model")
-        elif test_model_output_path is True:
-            raise ValueError("--test requires an explicit folder when output_path is not available.")
-        if test_model_output_path is not True:
-            input_model["test_model_path"] = test_model_output_path
+        if test_model_output_path is True:
+            output_path = getattr(args, "output_path", None)
+            if not output_path:
+                raise ValueError("--test requires an explicit folder when output_path is not available.")
+            test_model_output_path = str(Path(output_path) / "test_model")
+        input_model["test_model_path"] = test_model_output_path
     return input_model
 
 
diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index aa144a24f3..3bc5c29999 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -65,13 +65,9 @@ def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_
 
 def _save_test_model(model: "PreTrainedModel", output_dir: str):
     output_path = Path(output_dir)
-    try:
-        output_path.mkdir(parents=True, exist_ok=True)
-        logger.info("Saving generated test model to %s", output_path)
-        model.save_pretrained(str(output_path))
-    except Exception:
-        logger.exception("Failed to save generated test model to %s", output_path)
-        raise
+    output_path.mkdir(parents=True, exist_ok=True)
+    logger.info("Saving generated test model to %s", output_path)
+    model.save_pretrained(str(output_path))
 
 
 def load_model_from_task(

From d4ebad5e79fb9482817bf981429ffa69f637c7fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@microsoft.com>
Date: Mon, 11 May 2026 14:09:38 +0200
Subject: [PATCH 11/35] lintrunner

---
 test/common/test_hf.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index bb89215c81..7360309d6c 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -25,8 +25,8 @@ def test_load_model_from_task():
 @pytest.mark.parametrize(
     ("model_config", "hidden_layers_attr"),
     [
-        (BertConfig(num_hidden_layers=12), "num_hidden_layers"),
-        (GPT2Config(n_layer=12), "n_layer"),
+        (BertConfig(num_hidden_layers=12), "num_hidden_layers"),  # pylint: disable=unexpected-keyword-arg
+        (GPT2Config(n_layer=12), "n_layer"),  # pylint: disable=unexpected-keyword-arg
     ],
 )
 def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr):
@@ -69,7 +69,7 @@ def test_load_model_from_task_test_model_config_fails_without_fallback():
 
 
 def test_load_model_from_task_test_model_config_saves_model(tmp_path):
-    model_config = BertConfig(num_hidden_layers=12)
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
     created_model = MagicMock()
     test_model_path = tmp_path / "saved_test_model"
 
@@ -94,7 +94,7 @@ def test_load_model_from_task_test_model_config_saves_model(tmp_path):
 
 
 def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path):
-    model_config = BertConfig(num_hidden_layers=12)
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
     test_model_path = tmp_path / "saved_test_model"
     test_model_path.mkdir()
     (test_model_path / "config.json").write_text("{}")
@@ -122,7 +122,7 @@ def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path):
 
 
 def test_load_test_model_omits_unsupported_trust_remote_code_kwarg():
-    model_config = BertConfig(num_hidden_layers=12)
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
     captured = {}
 
     class MockModelClass:
@@ -138,7 +138,7 @@ def from_config(config):
 
 
 def test_load_test_model_omits_none_trust_remote_code_kwarg():
-    model_config = BertConfig(num_hidden_layers=12)
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
     captured = {}
 
     class MockModelClass:
@@ -155,7 +155,7 @@ def from_config(config, **kwargs):
 
 
 def test_load_test_model_passes_supported_trust_remote_code_kwarg():
-    model_config = BertConfig(num_hidden_layers=12)
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
     captured = {}
 
     class MockModelClass:

From 6321d32bb7910a1e48aaf6ca560626083d19aaa8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@microsoft.com>
Date: Mon, 11 May 2026 14:54:25 +0200
Subject: [PATCH 12/35] lint

---
 test/common/test_hf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index 7360309d6c..32fa143b89 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -49,7 +49,7 @@ def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr
 
 
 def test_load_model_from_task_test_model_config_fails_without_fallback():
-    model_config = BertConfig(num_hidden_layers=12)
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
 
     with (
         patch("transformers.pipelines.check_task") as mock_check_task,

From 6709852376abf12615bb896a94b77896dc19b28b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 13:05:29 +0000
Subject: [PATCH 13/35] docs: add phi test conversion how-to

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/48043731-ac61-43be-9857-ff5ff131fe93

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 .../source/how-to/cli/cli-convert-phi-test.md | 77 +++++++++++++++++++
 docs/source/how-to/index.md                   |  2 +
 2 files changed, 79 insertions(+)
 create mode 100644 docs/source/how-to/cli/cli-convert-phi-test.md

diff --git a/docs/source/how-to/cli/cli-convert-phi-test.md b/docs/source/how-to/cli/cli-convert-phi-test.md
new file mode 100644
index 0000000000..42e4d57cf8
--- /dev/null
+++ b/docs/source/how-to/cli/cli-convert-phi-test.md
@@ -0,0 +1,77 @@
+# How to convert a Phi model with a quick `--test` smoke check
+
+If you are converting a large language model, it is often useful to validate the Olive command, environment, and conversion recipe on a much smaller model before spending time on the full checkpoint.
+
+The `--test` option does that for Hugging Face models. Olive keeps the same model architecture, reduces it to a random 2-layer test model, saves it to the folder you provide, and reuses that folder on later runs.
+
+This example uses [`microsoft/Phi-3.5-mini-instruct`](https://huggingface.co/microsoft/Phi-3.5-mini-instruct), but the same pattern works for other supported Hugging Face LLMs.
+
+## Step 1: run a fast smoke test
+
+Start with a lightweight conversion pass that uses `--test` to create and reuse a reduced Phi model.
+
+```bash
+olive optimize \
+    --model_name_or_path microsoft/Phi-3.5-mini-instruct \
+    --device cpu \
+    --provider CPUExecutionProvider \
+    --precision int4 \
+    --test out/phi-test-model \
+    --output_path out/phi-smoke
+```
+
+What this does:
+
+- `--test out/phi-test-model` creates a reduced random Phi model and saves it in `out/phi-test-model`
+- later runs reuse the same saved test model instead of recreating it
+- `--output_path out/phi-smoke` stores the converted ONNX artifacts from the smoke test
+
+This is a quick way to confirm that:
+
+- Olive can load the source model
+- the selected optimization recipe is valid for your setup
+- the conversion path completes before you run the full model
+
+If you only want to inspect the generated workflow first, add `--dry_run`:
+
+```bash
+olive optimize \
+    --model_name_or_path microsoft/Phi-3.5-mini-instruct \
+    --device cpu \
+    --provider CPUExecutionProvider \
+    --precision int4 \
+    --test out/phi-test-model \
+    --dry_run \
+    --output_path out/phi-smoke
+```
+
+The generated `config.json` will include both `test_model_config` and `test_model_path`, so the same reduced model can be reused later.
+
+## Step 2: run the full conversion
+
+Once the smoke test succeeds, rerun the conversion on the full Phi checkpoint by removing `--test`.
+
+```bash
+olive optimize \
+    --model_name_or_path microsoft/Phi-3.5-mini-instruct \
+    --device cpu \
+    --provider CPUExecutionProvider \
+    --precision int4 \
+    --output_path out/phi-full
+```
+
+At this point you know the Olive command and the conversion recipe already worked on the lightweight test model, so you can focus on the full-model run instead of debugging both at once.
+
+## Why keep the test model folder?
+
+The saved test model is useful beyond the first smoke test:
+
+- you can rerun the reduced conversion quickly while iterating on options
+- you can reuse the same HF test model later when comparing the Hugging Face model against the exported ONNX model
+- you avoid recreating a new random test checkpoint every time
+
+## Related docs
+
+- [How to use the `olive optimize` command to optimize a Pytorch model](cli-optimize)
+- [How to write a new workflow from scratch](../configure-workflows/build-workflow)
+- [CLI reference](../../reference/cli)
diff --git a/docs/source/how-to/index.md b/docs/source/how-to/index.md
index 8ec16aba9e..d0b4a290a1 100644
--- a/docs/source/how-to/index.md
+++ b/docs/source/how-to/index.md
@@ -8,6 +8,7 @@
 The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx-graph-capture`, `auto-opt` that enable you to *easily* optimize select models and experiment with different cutting-edge optimization strategies without the need to define workflows.
 
 - [How to use the `olive optimize` command to optimize a Pytorch model](cli/cli-optimize)
+- [How to convert a Phi model with a quick `--test` smoke check](cli/cli-convert-phi-test)
 - [How to use the `olive auto-opt` command to take a PyTorch/Hugging Face model and turn it into an optimized ONNX model](cli/cli-auto-opt)
 - [how to use the `olive finetune` command to create (Q)LoRA adapters](cli/cli-finetune)
 - [How to use the `olive quantize` command to quantize your model with different precisions and techniques such as AWQ](cli/cli-quantize)
@@ -43,6 +44,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx
 
 installation
 cli/cli-optimize
+cli/cli-convert-phi-test
 cli/cli-auto-opt
 cli/cli-finetune
 cli/cli-quantize

From 3f8f8fc3425935233b3e85d05105d37bdea3a8c8 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 13:31:45 +0000
Subject: [PATCH 14/35] feat: support run command test models

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/6e67b7b2-2cf7-46fa-8edc-04dbd263c56d

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 .../source/how-to/cli/cli-convert-phi-test.md | 43 ++++++++--------
 olive/cli/base.py                             | 25 ++++++----
 olive/cli/run.py                              |  9 ++++
 test/cli/test_cli.py                          | 49 ++++++++++++++++++-
 4 files changed, 92 insertions(+), 34 deletions(-)

diff --git a/docs/source/how-to/cli/cli-convert-phi-test.md b/docs/source/how-to/cli/cli-convert-phi-test.md
index 42e4d57cf8..b0fc17ec14 100644
--- a/docs/source/how-to/cli/cli-convert-phi-test.md
+++ b/docs/source/how-to/cli/cli-convert-phi-test.md
@@ -6,9 +6,9 @@ The `--test` option does that for Hugging Face models. Olive keeps the same mode
 
 This example uses [`microsoft/Phi-3.5-mini-instruct`](https://huggingface.co/microsoft/Phi-3.5-mini-instruct), but the same pattern works for other supported Hugging Face LLMs.
 
-## Step 1: run a fast smoke test
+## Step 1: generate the workflow config
 
-Start with a lightweight conversion pass that uses `--test` to create and reuse a reduced Phi model.
+Start by generating the config that Olive will run for the Phi conversion.
 
 ```bash
 olive optimize \
@@ -16,6 +16,19 @@ olive optimize \
     --device cpu \
     --provider CPUExecutionProvider \
     --precision int4 \
+    --dry_run \
+    --output_path out/phi-smoke
+```
+
+This creates `out/phi-smoke/config.json` without launching the full conversion yet.
+
+## Step 2: run a fast smoke test with `olive run --test`
+
+Use the generated config with `olive run` and pass `--test` so Olive swaps in a reduced random Phi model.
+
+```bash
+olive run \
+    --config out/phi-smoke/config.json \
     --test out/phi-test-model \
     --output_path out/phi-smoke
 ```
@@ -24,7 +37,7 @@ What this does:
 
 - `--test out/phi-test-model` creates a reduced random Phi model and saves it in `out/phi-test-model`
 - later runs reuse the same saved test model instead of recreating it
-- `--output_path out/phi-smoke` stores the converted ONNX artifacts from the smoke test
+- `--output_path out/phi-smoke` reuses the generated workflow and stores the converted ONNX artifacts from the smoke test
 
 This is a quick way to confirm that:
 
@@ -32,31 +45,15 @@ This is a quick way to confirm that:
 - the selected optimization recipe is valid for your setup
 - the conversion path completes before you run the full model
 
-If you only want to inspect the generated workflow first, add `--dry_run`:
+If you omit the folder and just pass `--test`, `olive run` will save the reduced model under `<output_path>/test_model`.
 
-```bash
-olive optimize \
-    --model_name_or_path microsoft/Phi-3.5-mini-instruct \
-    --device cpu \
-    --provider CPUExecutionProvider \
-    --precision int4 \
-    --test out/phi-test-model \
-    --dry_run \
-    --output_path out/phi-smoke
-```
-
-The generated `config.json` will include both `test_model_config` and `test_model_path`, so the same reduced model can be reused later.
-
-## Step 2: run the full conversion
+## Step 3: run the full conversion
 
 Once the smoke test succeeds, rerun the conversion on the full Phi checkpoint by removing `--test`.
 
 ```bash
-olive optimize \
-    --model_name_or_path microsoft/Phi-3.5-mini-instruct \
-    --device cpu \
-    --provider CPUExecutionProvider \
-    --precision int4 \
+olive run \
+    --config out/phi-smoke/config.json \
     --output_path out/phi-full
 ```
 
diff --git a/olive/cli/base.py b/olive/cli/base.py
index ccf23ed957..4a8194460e 100644
--- a/olive/cli/base.py
+++ b/olive/cli/base.py
@@ -82,6 +82,20 @@ def run(self):
         raise NotImplementedError
 
 
+def add_hf_test_model_config(input_model: dict, test_value, output_path: Optional[str] = None) -> dict:
+    if test_value in (None, False):
+        return input_model
+
+    test_model_output_path = test_value
+    input_model["test_model_config"] = {"hidden_layers": 2}
+    if test_model_output_path is True:
+        if not output_path:
+            raise ValueError("--test requires an explicit folder when output_path is not available.")
+        test_model_output_path = str(Path(output_path) / "test_model")
+    input_model["test_model_path"] = test_model_output_path
+    return input_model
+
+
 def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) -> dict:
     """Get the input model config for HuggingFace model.
 
@@ -105,16 +119,7 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS)
         input_model["adapter_path"] = args.adapter_path
     if getattr(args, "trust_remote_code", None) is not None:
         input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code
-    test_model_output_path = getattr(args, "test", None)
-    if test_model_output_path not in (None, False):
-        input_model["test_model_config"] = {"hidden_layers": 2}
-        if test_model_output_path is True:
-            output_path = getattr(args, "output_path", None)
-            if not output_path:
-                raise ValueError("--test requires an explicit folder when output_path is not available.")
-            test_model_output_path = str(Path(output_path) / "test_model")
-        input_model["test_model_path"] = test_model_output_path
-    return input_model
+    return add_hf_test_model_config(input_model, getattr(args, "test", None), getattr(args, "output_path", None))
 
 
 def _get_onnx_input_model(args: Namespace, model_path: str) -> dict:
diff --git a/olive/cli/run.py b/olive/cli/run.py
index 6d2a831aef..c8ac0fb506 100644
--- a/olive/cli/run.py
+++ b/olive/cli/run.py
@@ -6,6 +6,7 @@
 
 from olive.cli.base import (
     BaseOliveCLICommand,
+    add_hf_test_model_config,
     add_input_model_options,
     add_logging_options,
     add_telemetry_options,
@@ -59,6 +60,14 @@ def run(self):
         if input_model_config := get_input_model_config(self.args, required=False):
             print("Replacing input model config in run config")
             run_config["input_model"] = input_model_config
+        elif getattr(self.args, "test", None) not in (None, False):
+            input_model = run_config.get("input_model")
+            if not isinstance(input_model, dict) or input_model.get("type") != "HfModel":
+                raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.")
+            output_path = (
+                self.args.output_path or run_config.get("output_dir") or run_config.get("engine", {}).get("output_dir")
+            )
+            run_config["input_model"] = add_hf_test_model_config(input_model, self.args.test, output_path)
 
         for arg_key, rc_key in [("output_path", "output_dir"), ("log_level", "log_severity_level")]:
             if (arg_value := getattr(self.args, arg_key)) is not None:
diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index 6ab59999a7..5e6e1877e6 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -112,7 +112,8 @@ def test_workflow_run_command(mock_run, tempdir, list_required_packages, tmp_pat
 
 
 @patch("olive.workflows.run")
-def test_workflow_run_command_with_overrides(mock_run, tmp_path):
+@patch("huggingface_hub.repo_exists", return_value=True)
+def test_workflow_run_command_with_overrides(_, mock_run, tmp_path):
     # setup
     config_path = tmp_path / "config.json"
     config_path.write_text(
@@ -150,6 +151,52 @@ def test_workflow_run_command_with_overrides(mock_run, tmp_path):
     )
 
 
+@patch("olive.workflows.run")
+def test_workflow_run_command_with_test_override(mock_run, tmp_path):
+    config_path = tmp_path / "config.json"
+    config_path.write_text(
+        json.dumps(
+            {
+                "input_model": {
+                    "type": "HfModel",
+                    "model_path": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+                    "load_kwargs": {"attn_implementation": "eager", "trust_remote_code": False},
+                },
+                "output_dir": str(tmp_path / "output"),
+            }
+        )
+    )
+    command_args = ["run", "--run-config", str(config_path), "--test"]
+
+    cli_main(command_args)
+
+    mock_run.assert_called_once_with(
+        {
+            "input_model": {
+                "type": "HfModel",
+                "model_path": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+                "load_kwargs": {"attn_implementation": "eager", "trust_remote_code": False},
+                "test_model_config": {"hidden_layers": 2},
+                "test_model_path": str(tmp_path / "output" / "test_model"),
+            },
+            "output_dir": str(tmp_path / "output"),
+        },
+        list_required_packages=False,
+        package_config=None,
+        tempdir=None,
+    )
+
+
+def test_workflow_run_command_with_test_requires_hf_input_model(tmp_path):
+    config_path = tmp_path / "config.json"
+    config_path.write_text(json.dumps({"input_model": {"type": "OnnxModel", "model_path": "model.onnx"}}))
+
+    with pytest.raises(
+        ValueError, match=r"--test for olive run requires a Hugging Face input_model in the run config\."
+    ):
+        cli_main(["run", "--run-config", str(config_path), "--test"])
+
+
 @patch("olive.platform_sdk.qualcomm.configure.configure.configure")
 def test_configure_qualcomm_sdk_command(mock_configure):
     # setup

From 189289ad6c31247cfcf9af65b2480f18749dd355 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 13:34:48 +0000
Subject: [PATCH 15/35] chore: address review nits for run test support

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/6e67b7b2-2cf7-46fa-8edc-04dbd263c56d

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/cli/base.py    | 1 +
 olive/cli/run.py     | 7 +++++--
 test/cli/test_cli.py | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/olive/cli/base.py b/olive/cli/base.py
index 4a8194460e..e31fb04ddf 100644
--- a/olive/cli/base.py
+++ b/olive/cli/base.py
@@ -87,6 +87,7 @@ def add_hf_test_model_config(input_model: dict, test_value, output_path: Optiona
         return input_model
 
     test_model_output_path = test_value
+    # Use 2 layers to keep the test model fast and lightweight while preserving the original architecture family.
     input_model["test_model_config"] = {"hidden_layers": 2}
     if test_model_output_path is True:
         if not output_path:
diff --git a/olive/cli/run.py b/olive/cli/run.py
index c8ac0fb506..ea533d78b9 100644
--- a/olive/cli/run.py
+++ b/olive/cli/run.py
@@ -57,17 +57,20 @@ def run(self):
         run_config = self.args.run_config
         if not isinstance(run_config, dict):
             run_config = load_config_file(run_config)
+        test_value = getattr(self.args, "test", None)
+        if test_value in (None, False):
+            test_value = None
         if input_model_config := get_input_model_config(self.args, required=False):
             print("Replacing input model config in run config")
             run_config["input_model"] = input_model_config
-        elif getattr(self.args, "test", None) not in (None, False):
+        elif test_value:
             input_model = run_config.get("input_model")
             if not isinstance(input_model, dict) or input_model.get("type") != "HfModel":
                 raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.")
             output_path = (
                 self.args.output_path or run_config.get("output_dir") or run_config.get("engine", {}).get("output_dir")
             )
-            run_config["input_model"] = add_hf_test_model_config(input_model, self.args.test, output_path)
+            run_config["input_model"] = add_hf_test_model_config(input_model, test_value, output_path)
 
         for arg_key, rc_key in [("output_path", "output_dir"), ("log_level", "log_severity_level")]:
             if (arg_value := getattr(self.args, arg_key)) is not None:
diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index 5e6e1877e6..310e23f561 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -113,7 +113,7 @@ def test_workflow_run_command(mock_run, tempdir, list_required_packages, tmp_pat
 
 @patch("olive.workflows.run")
 @patch("huggingface_hub.repo_exists", return_value=True)
-def test_workflow_run_command_with_overrides(_, mock_run, tmp_path):
+def test_workflow_run_command_with_overrides(mock_repo_exists, mock_run, tmp_path):
     # setup
     config_path = tmp_path / "config.json"
     config_path.write_text(

From c90c5201cb8b211c3f02391dee51e96ac6e67bc6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 13:36:38 +0000
Subject: [PATCH 16/35] chore: simplify run test override handling

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/6e67b7b2-2cf7-46fa-8edc-04dbd263c56d

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/cli/run.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/olive/cli/run.py b/olive/cli/run.py
index ea533d78b9..25a7b7203d 100644
--- a/olive/cli/run.py
+++ b/olive/cli/run.py
@@ -57,13 +57,10 @@ def run(self):
         run_config = self.args.run_config
         if not isinstance(run_config, dict):
             run_config = load_config_file(run_config)
-        test_value = getattr(self.args, "test", None)
-        if test_value in (None, False):
-            test_value = None
         if input_model_config := get_input_model_config(self.args, required=False):
             print("Replacing input model config in run config")
             run_config["input_model"] = input_model_config
-        elif test_value:
+        elif (test_value := getattr(self.args, "test", None)) not in (None, False):
             input_model = run_config.get("input_model")
             if not isinstance(input_model, dict) or input_model.get("type") != "HfModel":
                 raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.")

From 5cec47fea06e3bc5c98b6518488afd6627009b0f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 13:38:50 +0000
Subject: [PATCH 17/35] chore: polish run test support follow-up

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/6e67b7b2-2cf7-46fa-8edc-04dbd263c56d

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/cli/run.py     | 4 ++--
 test/cli/test_cli.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/olive/cli/run.py b/olive/cli/run.py
index 25a7b7203d..3d85522330 100644
--- a/olive/cli/run.py
+++ b/olive/cli/run.py
@@ -60,14 +60,14 @@ def run(self):
         if input_model_config := get_input_model_config(self.args, required=False):
             print("Replacing input model config in run config")
             run_config["input_model"] = input_model_config
-        elif (test_value := getattr(self.args, "test", None)) not in (None, False):
+        elif self.args.test not in (None, False):
             input_model = run_config.get("input_model")
             if not isinstance(input_model, dict) or input_model.get("type") != "HfModel":
                 raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.")
             output_path = (
                 self.args.output_path or run_config.get("output_dir") or run_config.get("engine", {}).get("output_dir")
             )
-            run_config["input_model"] = add_hf_test_model_config(input_model, test_value, output_path)
+            run_config["input_model"] = add_hf_test_model_config(input_model, self.args.test, output_path)
 
         for arg_key, rc_key in [("output_path", "output_dir"), ("log_level", "log_severity_level")]:
             if (arg_value := getattr(self.args, arg_key)) is not None:
diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index 310e23f561..48c607e2ad 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -115,6 +115,7 @@ def test_workflow_run_command(mock_run, tempdir, list_required_packages, tmp_pat
 @patch("huggingface_hub.repo_exists", return_value=True)
 def test_workflow_run_command_with_overrides(mock_repo_exists, mock_run, tmp_path):
     # setup
+    # Prevent a live Hugging Face repo lookup when the CLI resolves the HF input model override.
     config_path = tmp_path / "config.json"
     config_path.write_text(
         json.dumps({"input_model": {"key": "value"}, "engine": {"log_severity_level": 3}, "output_dir": "output"})

From eaf0a16db600f90567f5059cc182ccc7feff7b4d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 13:56:44 +0000
Subject: [PATCH 18/35] fix: use saved test checkpoint in model builder

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/ccae26e4-35a5-4f0a-a20e-894d4d89d1f6

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/passes/onnx/model_builder.py     |  6 +++
 test/passes/onnx/test_model_builder.py | 60 ++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py
index f704579ba2..3b743e1c5a 100644
--- a/olive/passes/onnx/model_builder.py
+++ b/olive/passes/onnx/model_builder.py
@@ -247,6 +247,12 @@ def _run_for_config(
             input_path = str(model.get_resource("model_path"))
         else:
             model_path = model.model_name_or_path
+            if model.test_model_config:
+                if not model.test_model_path:
+                    raise ValueError("ModelBuilder requires test_model_path when test_model_config is provided.")
+                if not (Path(model.test_model_path) / "config.json").exists():
+                    model.load_model(cache_model=False)
+                model_path = model.test_model_path
             # provide the model path as input path, model builder uses input_path for quantized models
             input_path = model_path
             if model.adapter_path:
diff --git a/test/passes/onnx/test_model_builder.py b/test/passes/onnx/test_model_builder.py
index ba62005e4b..67a8f53ed4 100644
--- a/test/passes/onnx/test_model_builder.py
+++ b/test/passes/onnx/test_model_builder.py
@@ -2,12 +2,16 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
+import sys
+import types
 from pathlib import Path
+from unittest.mock import MagicMock, patch
 
 import onnx
 import pytest
 
 from olive.model import ONNXModelHandler
+from olive.model.handler.hf import HfModelHandler
 from olive.passes.olive_pass import create_pass_from_dict
 from olive.passes.onnx.model_builder import ModelBuilder
 from olive.passes.pytorch.rtn import Rtn
@@ -100,3 +104,59 @@ def test_model_builder_layer_annotations(tmp_path, layer_annotations):
         assert len(node_names_with_metadata) > 0, (
             "Expected nodes with metadata_props when layer_annotations are provided"
         )
+
+
+def test_model_builder_uses_saved_test_model_path(tmp_path):
+    test_model_path = tmp_path / "saved_test_model"
+    output_folder = tmp_path / "output_model"
+
+    mock_cfg = MagicMock()
+    mock_cfg.to_dict.return_value = {}
+    with patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg):
+        input_model = HfModelHandler(
+            model_path="hf-internal-testing/tiny-random-LlamaForCausalLM",
+            test_model_config={"hidden_layers": 2},
+            test_model_path=str(test_model_path),
+        )
+
+    def materialize_test_model(*_, **__):
+        test_model_path.mkdir(parents=True, exist_ok=True)
+        (test_model_path / "config.json").write_text("{}")
+        return MagicMock()
+
+    def fake_create_model(*_, **kwargs):
+        output_dir = Path(kwargs["output_dir"])
+        (output_dir / kwargs["filename"]).write_text("dummy onnx file")
+        (output_dir / "genai_config.json").write_text("{}")
+
+    fake_builder = types.ModuleType("onnxruntime_genai.models.builder")
+    fake_builder.create_model = MagicMock(side_effect=fake_create_model)
+    fake_models = types.ModuleType("onnxruntime_genai.models")
+    fake_models.builder = fake_builder
+    fake_ort_genai = types.ModuleType("onnxruntime_genai")
+    fake_ort_genai.models = fake_models
+    fake_ort_genai.__version__ = "0.0.0"
+
+    p = create_pass_from_dict(ModelBuilder, {"precision": "fp32"}, disable_search=True)
+
+    with (
+        patch.object(ModelBuilder, "maybe_patch_quant"),
+        patch.dict(
+            sys.modules,
+            {
+                "onnxruntime_genai": fake_ort_genai,
+                "onnxruntime_genai.models": fake_models,
+                "onnxruntime_genai.models.builder": fake_builder,
+            },
+        ),
+        patch.object(input_model, "load_model", side_effect=materialize_test_model) as mock_load_model,
+        patch.object(input_model, "save_metadata", return_value=[]),
+    ):
+        output_model = p.run(input_model, output_folder)
+
+    assert isinstance(output_model, ONNXModelHandler)
+    assert mock_load_model.call_count == 1
+    assert Path(output_model.model_path).exists()
+    assert test_model_path.exists()
+    assert fake_builder.create_model.call_args.kwargs["model_name"] == str(test_model_path)
+    assert fake_builder.create_model.call_args.kwargs["input_path"] == str(test_model_path)

From 17cb075145c187df4a8ca9b149a491f6a3df6e57 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 13:58:39 +0000
Subject: [PATCH 19/35] chore: tidy model builder test fixture

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/ccae26e4-35a5-4f0a-a20e-894d4d89d1f6

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 test/passes/onnx/test_model_builder.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/passes/onnx/test_model_builder.py b/test/passes/onnx/test_model_builder.py
index 67a8f53ed4..5b86886d7a 100644
--- a/test/passes/onnx/test_model_builder.py
+++ b/test/passes/onnx/test_model_builder.py
@@ -17,6 +17,8 @@
 from olive.passes.pytorch.rtn import Rtn
 from test.utils import make_local_tiny_llama
 
+TEST_MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM"
+
 
 @pytest.mark.parametrize("metadata_only", [True, False])
 def test_model_builder(tmp_path, metadata_only):
@@ -114,7 +116,7 @@ def test_model_builder_uses_saved_test_model_path(tmp_path):
     mock_cfg.to_dict.return_value = {}
     with patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg):
         input_model = HfModelHandler(
-            model_path="hf-internal-testing/tiny-random-LlamaForCausalLM",
+            model_path=TEST_MODEL_ID,
             test_model_config={"hidden_layers": 2},
             test_model_path=str(test_model_path),
         )

From 996f6331ae4614721093316e17597ff32b4218c8 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 14:01:32 +0000
Subject: [PATCH 20/35] chore: clarify model builder test model errors

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/ccae26e4-35a5-4f0a-a20e-894d4d89d1f6

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/passes/onnx/model_builder.py     | 5 ++++-
 test/passes/onnx/test_model_builder.py | 6 +++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py
index 3b743e1c5a..ba29c0a9b3 100644
--- a/olive/passes/onnx/model_builder.py
+++ b/olive/passes/onnx/model_builder.py
@@ -249,7 +249,10 @@ def _run_for_config(
             model_path = model.model_name_or_path
             if model.test_model_config:
                 if not model.test_model_path:
-                    raise ValueError("ModelBuilder requires test_model_path when test_model_config is provided.")
+                    raise ValueError(
+                        "ModelBuilder requires test_model_path to be set when test_model_config is provided. "
+                        "Please specify the path where the test model should be saved."
+                    )
                 if not (Path(model.test_model_path) / "config.json").exists():
                     model.load_model(cache_model=False)
                 model_path = model.test_model_path
diff --git a/test/passes/onnx/test_model_builder.py b/test/passes/onnx/test_model_builder.py
index 5b86886d7a..0c838a3f7f 100644
--- a/test/passes/onnx/test_model_builder.py
+++ b/test/passes/onnx/test_model_builder.py
@@ -17,7 +17,7 @@
 from olive.passes.pytorch.rtn import Rtn
 from test.utils import make_local_tiny_llama
 
-TEST_MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM"
+TINY_RANDOM_LLAMA_MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM"
 
 
 @pytest.mark.parametrize("metadata_only", [True, False])
@@ -116,12 +116,12 @@ def test_model_builder_uses_saved_test_model_path(tmp_path):
     mock_cfg.to_dict.return_value = {}
     with patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg):
         input_model = HfModelHandler(
-            model_path=TEST_MODEL_ID,
+            model_path=TINY_RANDOM_LLAMA_MODEL_ID,
             test_model_config={"hidden_layers": 2},
             test_model_path=str(test_model_path),
         )
 
-    def materialize_test_model(*_, **__):
+    def materialize_test_model(*args, **kwargs):
         test_model_path.mkdir(parents=True, exist_ok=True)
         (test_model_path / "config.json").write_text("{}")
         return MagicMock()

From 00732b75fb86ed0aca5fc3803424c3ee00c3ca47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@microsoft.com>
Date: Mon, 11 May 2026 16:15:27 +0200
Subject: [PATCH 21/35] fix dtype: auto

---
 olive/common/hf/utils.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index 3bc5c29999..fbe4ed7f10 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -48,6 +48,11 @@ def _apply_test_model_config(
     if not updated:
         raise ValueError("Unable to create a test model because the config does not expose a hidden-layer count.")
 
+    if "dtype" in model_config and model_config.dtype == "auto":
+        # This is not allowed anymore with transformers >=4.57,
+        # we select float16 instead.
+        model_config.dtype = "float16"
+
     return model_config
 
 
From 76ee4efb7acf93a6b4fb0d921cf05c417b01eb59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@microsoft.com>
Date: Mon, 11 May 2026 16:26:28 +0200
Subject: [PATCH 22/35] update documentation

---
 docs/source/how-to/cli/cli-convert-phi-test.md | 2 +-
 docs/source/how-to/index.md                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/how-to/cli/cli-convert-phi-test.md b/docs/source/how-to/cli/cli-convert-phi-test.md
index b0fc17ec14..c64209b1cd 100644
--- a/docs/source/how-to/cli/cli-convert-phi-test.md
+++ b/docs/source/how-to/cli/cli-convert-phi-test.md
@@ -16,8 +16,8 @@ olive optimize \
     --device cpu \
     --provider CPUExecutionProvider \
     --precision int4 \
-    --dry_run \
     --output_path out/phi-smoke
+    --dry_run
 ```
 
 This creates `out/phi-smoke/config.json` without launching the full conversion yet.
diff --git a/docs/source/how-to/index.md b/docs/source/how-to/index.md
index d0b4a290a1..35a6074237 100644
--- a/docs/source/how-to/index.md
+++ b/docs/source/how-to/index.md
@@ -8,11 +8,11 @@
 The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx-graph-capture`, `auto-opt` that enable you to *easily* optimize select models and experiment with different cutting-edge optimization strategies without the need to define workflows.
 
 - [How to use the `olive optimize` command to optimize a Pytorch model](cli/cli-optimize)
-- [How to convert a Phi model with a quick `--test` smoke check](cli/cli-convert-phi-test)
 - [How to use the `olive auto-opt` command to take a PyTorch/Hugging Face model and turn it into an optimized ONNX model](cli/cli-auto-opt)
 - [how to use the `olive finetune` command to create (Q)LoRA adapters](cli/cli-finetune)
 - [How to use the `olive quantize` command to quantize your model with different precisions and techniques such as AWQ](cli/cli-quantize)
 - [How to use the `olive run` command to execute an Olive workflow.](cli/cli-run)
+- [How to convert a Phi model with a quick `--test` smoke check](cli/cli-convert-phi-test)
 
 # Olive Python API
 

From 9ba38c7c1cb20585eb8622bb2fbe9ab4d6806ded Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 14:42:05 +0000
Subject: [PATCH 23/35] docs: clarify phi smoke test output path

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c25a0cd6-d252-4e16-8783-52a5be97f14e

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 docs/source/how-to/cli/cli-convert-phi-test.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/source/how-to/cli/cli-convert-phi-test.md b/docs/source/how-to/cli/cli-convert-phi-test.md
index c64209b1cd..f17fb3c425 100644
--- a/docs/source/how-to/cli/cli-convert-phi-test.md
+++ b/docs/source/how-to/cli/cli-convert-phi-test.md
@@ -16,7 +16,7 @@ olive optimize \
     --device cpu \
     --provider CPUExecutionProvider \
     --precision int4 \
-    --output_path out/phi-smoke
+    --output_path out/phi-smoke \
     --dry_run
 ```
 
@@ -30,14 +30,16 @@ Use the generated config with `olive run` and pass `--test` so Olive swaps in a
 olive run \
     --config out/phi-smoke/config.json \
     --test out/phi-test-model \
-    --output_path out/phi-smoke
+    --output_path out/phi-smoke-run
 ```
 
 What this does:
 
 - `--test out/phi-test-model` creates a reduced random Phi model and saves it in `out/phi-test-model`
 - later runs reuse the same saved test model instead of recreating it
-- `--output_path out/phi-smoke` reuses the generated workflow and stores the converted ONNX artifacts from the smoke test
+- `--output_path out/phi-smoke-run` gives the smoke test its own output folder, so the generated ONNX artifacts are easy to find
+
+After the smoke test finishes, look under `out/phi-smoke-run` for the exported ONNX model and related files.
 
 This is a quick way to confirm that:
 

From fa6bee7c4736b639b4addd666ca3938f378166d7 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 14:46:18 +0000
Subject: [PATCH 24/35] docs: switch smoke test how-to to qwen

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/acc48c65-2f6d-44c0-80c1-271e83838437

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 ...t-phi-test.md => cli-convert-qwen-test.md} | 32 +++++++++----------
 docs/source/how-to/index.md                   |  4 +--
 2 files changed, 18 insertions(+), 18 deletions(-)
 rename docs/source/how-to/cli/{cli-convert-phi-test.md => cli-convert-qwen-test.md} (62%)

diff --git a/docs/source/how-to/cli/cli-convert-phi-test.md b/docs/source/how-to/cli/cli-convert-qwen-test.md
similarity index 62%
rename from docs/source/how-to/cli/cli-convert-phi-test.md
rename to docs/source/how-to/cli/cli-convert-qwen-test.md
index f17fb3c425..a26be99a59 100644
--- a/docs/source/how-to/cli/cli-convert-phi-test.md
+++ b/docs/source/how-to/cli/cli-convert-qwen-test.md
@@ -1,45 +1,45 @@
-# How to convert a Phi model with a quick `--test` smoke check
+# How to convert a Qwen model with a quick `--test` smoke check
 
 If you are converting a large language model, it is often useful to validate the Olive command, environment, and conversion recipe on a much smaller model before spending time on the full checkpoint.
 
 The `--test` option does that for Hugging Face models. Olive keeps the same model architecture, reduces it to a random 2-layer test model, saves it to the folder you provide, and reuses that folder on later runs.
 
-This example uses [`microsoft/Phi-3.5-mini-instruct`](https://huggingface.co/microsoft/Phi-3.5-mini-instruct), but the same pattern works for other supported Hugging Face LLMs.
+This example uses [`Qwen/Qwen3-0.6B`](https://huggingface.co/Qwen/Qwen3-0.6B), but the same pattern works for other supported Hugging Face LLMs.
 
 ## Step 1: generate the workflow config
 
-Start by generating the config that Olive will run for the Phi conversion.
+Start by generating the config that Olive will run for the Qwen conversion.
 
 ```bash
 olive optimize \
-    --model_name_or_path microsoft/Phi-3.5-mini-instruct \
+    --model_name_or_path Qwen/Qwen3-0.6B \
     --device cpu \
     --provider CPUExecutionProvider \
     --precision int4 \
-    --output_path out/phi-smoke \
+    --output_path out/qwen-smoke \
     --dry_run
 ```
 
-This creates `out/phi-smoke/config.json` without launching the full conversion yet.
+This creates `out/qwen-smoke/config.json` without launching the full conversion yet.
 
 ## Step 2: run a fast smoke test with `olive run --test`
 
-Use the generated config with `olive run` and pass `--test` so Olive swaps in a reduced random Phi model.
+Use the generated config with `olive run` and pass `--test` so Olive swaps in a reduced random Qwen model.
 
 ```bash
 olive run \
-    --config out/phi-smoke/config.json \
-    --test out/phi-test-model \
-    --output_path out/phi-smoke-run
+    --config out/qwen-smoke/config.json \
+    --test out/qwen-test-model \
+    --output_path out/qwen-smoke-run
 ```
 
 What this does:
 
-- `--test out/phi-test-model` creates a reduced random Phi model and saves it in `out/phi-test-model`
+- `--test out/qwen-test-model` creates a reduced random Qwen model and saves it in `out/qwen-test-model`
 - later runs reuse the same saved test model instead of recreating it
-- `--output_path out/phi-smoke-run` gives the smoke test its own output folder, so the generated ONNX artifacts are easy to find
+- `--output_path out/qwen-smoke-run` gives the smoke test its own output folder, so the generated ONNX artifacts are easy to find
 
-After the smoke test finishes, look under `out/phi-smoke-run` for the exported ONNX model and related files.
+After the smoke test finishes, look under `out/qwen-smoke-run` for the exported ONNX model and related files.
 
 This is a quick way to confirm that:
 
@@ -51,12 +51,12 @@ If you omit the folder and just pass `--test`, `olive run` will save the reduced
 
 ## Step 3: run the full conversion
 
-Once the smoke test succeeds, rerun the conversion on the full Phi checkpoint by removing `--test`.
+Once the smoke test succeeds, rerun the conversion on the full Qwen checkpoint by removing `--test`.
 
 ```bash
 olive run \
-    --config out/phi-smoke/config.json \
-    --output_path out/phi-full
+    --config out/qwen-smoke/config.json \
+    --output_path out/qwen-full
 ```
 
 At this point you know the Olive command and the conversion recipe already worked on the lightweight test model, so you can focus on the full-model run instead of debugging both at once.
diff --git a/docs/source/how-to/index.md b/docs/source/how-to/index.md
index 35a6074237..4e00636e7d 100644
--- a/docs/source/how-to/index.md
+++ b/docs/source/how-to/index.md
@@ -12,7 +12,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx
 - [how to use the `olive finetune` command to create (Q)LoRA adapters](cli/cli-finetune)
 - [How to use the `olive quantize` command to quantize your model with different precisions and techniques such as AWQ](cli/cli-quantize)
 - [How to use the `olive run` command to execute an Olive workflow.](cli/cli-run)
-- [How to convert a Phi model with a quick `--test` smoke check](cli/cli-convert-phi-test)
+- [How to convert a Qwen model with a quick `--test` smoke check](cli/cli-convert-qwen-test)
 
 # Olive Python API
 
@@ -44,7 +44,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx
 
 installation
 cli/cli-optimize
-cli/cli-convert-phi-test
+cli/cli-convert-qwen-test
 cli/cli-auto-opt
 cli/cli-finetune
 cli/cli-quantize

From ffda0dc05edd2388104794d7aef39ee959061c5d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 15:14:56 +0000
Subject: [PATCH 25/35] test: cover documented llm smoke flow

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 test/cli/test_cli.py | 92 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index 48c607e2ad..e76e3fb1fa 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -260,6 +260,98 @@ def test_optimize_command_test_model_config(_, tmp_path):
     assert config["input_model"]["test_model_path"] == str(test_model_dir)
 
 
+@patch("huggingface_hub.repo_exists", return_value=True)
+def test_documented_smoke_test_commands_produce_onnx_with_tiny_random_llama(_, tmp_path):
+    import types
+    from unittest.mock import MagicMock
+
+    from olive.model.config.model_config import ModelConfig
+    from olive.model.handler.hf import HfModelHandler
+    from olive.passes.onnx.model_builder import ModelBuilder
+    from olive.passes.pytorch.gptq import Gptq
+
+    model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM"
+    config_output_dir = tmp_path / "qwen-smoke"
+    test_model_dir = tmp_path / "qwen-test-model"
+    run_output_dir = tmp_path / "qwen-smoke-run"
+
+    cli_main(
+        [
+            "optimize",
+            "-m",
+            model_id,
+            "--device",
+            "cpu",
+            "--provider",
+            "CPUExecutionProvider",
+            "--precision",
+            "int4",
+            "--output_path",
+            str(config_output_dir),
+            "--dry_run",
+        ]
+    )
+
+    config_path = config_output_dir / "config.json"
+    assert config_path.exists()
+
+    def materialize_test_model(self, *args, **kwargs):
+        Path(self.test_model_path).mkdir(parents=True, exist_ok=True)
+        (Path(self.test_model_path) / "config.json").write_text("{}")
+        return MagicMock()
+
+    def fake_gptq_run(self, model, config, output_model_path):
+        return model
+
+    def fake_create_model(*_, **kwargs):
+        output_dir = Path(kwargs["output_dir"])
+        output_dir.mkdir(parents=True, exist_ok=True)
+        (output_dir / kwargs["filename"]).write_text("dummy onnx file")
+        (output_dir / "genai_config.json").write_text("{}")
+
+    fake_builder = types.ModuleType("onnxruntime_genai.models.builder")
+    fake_builder.create_model = MagicMock(side_effect=fake_create_model)
+    fake_models = types.ModuleType("onnxruntime_genai.models")
+    fake_models.builder = fake_builder
+    fake_ort_genai = types.ModuleType("onnxruntime_genai")
+    fake_ort_genai.models = fake_models
+    fake_ort_genai.__version__ = "0.0.0"
+    mock_cfg = MagicMock()
+    mock_cfg.to_dict.return_value = {}
+
+    with (
+        patch.object(ModelConfig, "get_model_identifier", return_value="tiny-random-llama"),
+        patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg),
+        patch.object(HfModelHandler, "load_model", new=materialize_test_model),
+        patch.object(HfModelHandler, "save_metadata", return_value=[]),
+        patch.object(Gptq, "_run_for_config", autospec=True, side_effect=fake_gptq_run),
+        patch.object(ModelBuilder, "maybe_patch_quant"),
+        patch.dict(
+            sys.modules,
+            {
+                "onnxruntime_genai": fake_ort_genai,
+                "onnxruntime_genai.models": fake_models,
+                "onnxruntime_genai.models.builder": fake_builder,
+            },
+        ),
+    ):
+        cli_main(
+            [
+                "run",
+                "--config",
+                str(config_path),
+                "--test",
+                str(test_model_dir),
+                "--output_path",
+                str(run_output_dir),
+            ]
+        )
+
+    assert (test_model_dir / "config.json").exists()
+    assert (run_output_dir / "model.onnx").exists()
+    assert (run_output_dir / "genai_config.json").exists()
+
+
 @patch("olive.workflows.run")
 @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True)
 def test_diffusion_lora_command(_, mock_run, tmp_path):

From d0f868f7c5a66f23f1f18d91007fb865a9b14c79 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 15:17:13 +0000
Subject: [PATCH 26/35] test: polish documented smoke flow test

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 test/cli/test_cli.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index e76e3fb1fa..a17950eaa4 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -271,9 +271,9 @@ def test_documented_smoke_test_commands_produce_onnx_with_tiny_random_llama(_, t
     from olive.passes.pytorch.gptq import Gptq
 
     model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM"
-    config_output_dir = tmp_path / "qwen-smoke"
-    test_model_dir = tmp_path / "qwen-test-model"
-    run_output_dir = tmp_path / "qwen-smoke-run"
+    config_output_dir = tmp_path / "test-smoke"
+    test_model_dir = tmp_path / "test-model"
+    run_output_dir = tmp_path / "test-smoke-run"
 
     cli_main(
         [
@@ -306,7 +306,7 @@ def fake_gptq_run(self, model, config, output_model_path):
     def fake_create_model(*_, **kwargs):
         output_dir = Path(kwargs["output_dir"])
         output_dir.mkdir(parents=True, exist_ok=True)
-        (output_dir / kwargs["filename"]).write_text("dummy onnx file")
+        (output_dir / kwargs["filename"]).write_text("dummy ONNX file")
         (output_dir / "genai_config.json").write_text("{}")
 
     fake_builder = types.ModuleType("onnxruntime_genai.models.builder")

From a408b6391188bddcbafe7efd03a4c7efd78b53f2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 15:19:22 +0000
Subject: [PATCH 27/35] test: rename smoke flow cli test

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 test/cli/test_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index a17950eaa4..67a39a71c9 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -261,7 +261,7 @@ def test_optimize_command_test_model_config(_, tmp_path):
 
 
 @patch("huggingface_hub.repo_exists", return_value=True)
-def test_documented_smoke_test_commands_produce_onnx_with_tiny_random_llama(_, tmp_path):
+def test_optimize_dry_run_then_run_with_test_model(_, tmp_path):
     import types
     from unittest.mock import MagicMock
 

From e272c2a8221f99a5befe016b3209d420b914a214 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 15:21:38 +0000
Subject: [PATCH 28/35] test: refine smoke flow workflow stubs

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 test/cli/test_cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index 67a39a71c9..3c00aff9cd 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -297,7 +297,7 @@ def test_optimize_dry_run_then_run_with_test_model(_, tmp_path):
 
     def materialize_test_model(self, *args, **kwargs):
         Path(self.test_model_path).mkdir(parents=True, exist_ok=True)
-        (Path(self.test_model_path) / "config.json").write_text("{}")
+        (Path(self.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"}))
         return MagicMock()
 
     def fake_gptq_run(self, model, config, output_model_path):
@@ -315,7 +315,7 @@ def fake_create_model(*_, **kwargs):
     fake_models.builder = fake_builder
     fake_ort_genai = types.ModuleType("onnxruntime_genai")
     fake_ort_genai.models = fake_models
-    fake_ort_genai.__version__ = "0.0.0"
+    fake_ort_genai.__version__ = "0.10.0"
     mock_cfg = MagicMock()
     mock_cfg.to_dict.return_value = {}
 

From c901b636e962e150a2cb490f39dbdec7d48fc594 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 15:23:44 +0000
Subject: [PATCH 29/35] test: tidy smoke flow helper names

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 test/cli/test_cli.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index 3c00aff9cd..40a203b9b9 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -295,7 +295,7 @@ def test_optimize_dry_run_then_run_with_test_model(_, tmp_path):
     config_path = config_output_dir / "config.json"
     assert config_path.exists()
 
-    def materialize_test_model(self, *args, **kwargs):
+    def setup_test_model_files(self, *args, **kwargs):
         Path(self.test_model_path).mkdir(parents=True, exist_ok=True)
         (Path(self.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"}))
         return MagicMock()
@@ -303,7 +303,7 @@ def materialize_test_model(self, *args, **kwargs):
     def fake_gptq_run(self, model, config, output_model_path):
         return model
 
-    def fake_create_model(*_, **kwargs):
+    def fake_create_model(**kwargs):
         output_dir = Path(kwargs["output_dir"])
         output_dir.mkdir(parents=True, exist_ok=True)
         (output_dir / kwargs["filename"]).write_text("dummy ONNX file")
@@ -322,7 +322,7 @@ def fake_create_model(*_, **kwargs):
     with (
         patch.object(ModelConfig, "get_model_identifier", return_value="tiny-random-llama"),
         patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg),
-        patch.object(HfModelHandler, "load_model", new=materialize_test_model),
+        patch.object(HfModelHandler, "load_model", new=setup_test_model_files),
         patch.object(HfModelHandler, "save_metadata", return_value=[]),
         patch.object(Gptq, "_run_for_config", autospec=True, side_effect=fake_gptq_run),
         patch.object(ModelBuilder, "maybe_patch_quant"),

From 36410cdf054ad007daa11f863ed67b121b69aad9 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 15:26:08 +0000
Subject: [PATCH 30/35] test: clarify smoke flow mocks

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 test/cli/test_cli.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index 40a203b9b9..04904551a0 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -295,12 +295,12 @@ def test_optimize_dry_run_then_run_with_test_model(_, tmp_path):
     config_path = config_output_dir / "config.json"
     assert config_path.exists()
 
-    def setup_test_model_files(self, *args, **kwargs):
+    def fake_load_model(self, *args, **kwargs):
         Path(self.test_model_path).mkdir(parents=True, exist_ok=True)
         (Path(self.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"}))
         return MagicMock()
 
-    def fake_gptq_run(self, model, config, output_model_path):
+    def fake_gptq_run(self, model, _config, _output_model_path):
         return model
 
     def fake_create_model(**kwargs):
@@ -322,7 +322,7 @@ def fake_create_model(**kwargs):
     with (
         patch.object(ModelConfig, "get_model_identifier", return_value="tiny-random-llama"),
         patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg),
-        patch.object(HfModelHandler, "load_model", new=setup_test_model_files),
+        patch.object(HfModelHandler, "load_model", new=fake_load_model),
         patch.object(HfModelHandler, "save_metadata", return_value=[]),
         patch.object(Gptq, "_run_for_config", autospec=True, side_effect=fake_gptq_run),
         patch.object(ModelBuilder, "maybe_patch_quant"),

From e16cb826fa332e472b6f00007e228446dc179981 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 15:28:31 +0000
Subject: [PATCH 31/35] test: polish documented smoke flow test naming

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 test/cli/test_cli.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index 04904551a0..ef2c9840dd 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -261,7 +261,7 @@ def test_optimize_command_test_model_config(_, tmp_path):
 
 
 @patch("huggingface_hub.repo_exists", return_value=True)
-def test_optimize_dry_run_then_run_with_test_model(_, tmp_path):
+def test_optimize_dry_run_then_run_with_test_model(mock_repo_exists, tmp_path):
     import types
     from unittest.mock import MagicMock
 
@@ -294,13 +294,15 @@ def test_optimize_dry_run_then_run_with_test_model(_, tmp_path):
 
     config_path = config_output_dir / "config.json"
     assert config_path.exists()
+    assert mock_repo_exists.called
 
-    def fake_load_model(self, *args, **kwargs):
-        Path(self.test_model_path).mkdir(parents=True, exist_ok=True)
-        (Path(self.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"}))
+    def fake_load_model(handler, *args, **kwargs):
+        Path(handler.test_model_path).mkdir(parents=True, exist_ok=True)
+        (Path(handler.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"}))
         return MagicMock()
 
-    def fake_gptq_run(self, model, _config, _output_model_path):
+    def fake_gptq_run(self, model, pass_config, output_model_path):
+        del pass_config, output_model_path
         return model
 
     def fake_create_model(**kwargs):

From 750760406f64b2d34df3ff18d1efdf4adcf4dc94 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 15:30:48 +0000
Subject: [PATCH 32/35] test: lift smoke flow imports and mock defaults

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/c421adc3-0615-4d10-bf15-b21d632d70b2

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 test/cli/test_cli.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index ef2c9840dd..8489b4586f 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -5,8 +5,9 @@
 import json
 import subprocess
 import sys
+import types
 from pathlib import Path
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -262,9 +263,6 @@ def test_optimize_command_test_model_config(_, tmp_path):
 
 @patch("huggingface_hub.repo_exists", return_value=True)
 def test_optimize_dry_run_then_run_with_test_model(mock_repo_exists, tmp_path):
-    import types
-    from unittest.mock import MagicMock
-
     from olive.model.config.model_config import ModelConfig
     from olive.model.handler.hf import HfModelHandler
     from olive.passes.onnx.model_builder import ModelBuilder
@@ -327,7 +325,7 @@ def fake_create_model(**kwargs):
         patch.object(HfModelHandler, "load_model", new=fake_load_model),
         patch.object(HfModelHandler, "save_metadata", return_value=[]),
         patch.object(Gptq, "_run_for_config", autospec=True, side_effect=fake_gptq_run),
-        patch.object(ModelBuilder, "maybe_patch_quant"),
+        patch.object(ModelBuilder, "maybe_patch_quant", return_value=None),
         patch.dict(
             sys.modules,
             {

From ac7840f189cafed6aef2d86c214449a7fc019940 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 11 May 2026 16:57:29 +0000
Subject: [PATCH 33/35] fix: keep qwen test layer types in sync

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/5bd5199d-42fa-4109-94a3-c7995abe72a2

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 olive/common/hf/utils.py |  4 ++++
 test/common/test_hf.py   | 19 +++++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index fbe4ed7f10..c2c2110c2c 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -48,6 +48,10 @@ def _apply_test_model_config(
     if not updated:
         raise ValueError("Unable to create a test model because the config does not expose a hidden-layer count.")
 
+    layer_types = getattr(model_config, "layer_types", None)
+    if isinstance(layer_types, (list, tuple)):
+        model_config.layer_types = layer_types[:hidden_layers]
+
     if "dtype" in model_config and model_config.dtype == "auto":
         # This is not allowed anymore with transformers >=4.57,
         # we select float16 instead.
diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index 32fa143b89..cab00e3df3 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -6,10 +6,10 @@
 
 import pytest
 import torch
-from transformers import BertConfig, GPT2Config
+from transformers import BertConfig, GPT2Config, Qwen3Config
 
 from olive.common.hf.model_io import get_model_dummy_input, get_model_io_config
-from olive.common.hf.utils import _load_test_model, load_model_from_task
+from olive.common.hf.utils import _apply_test_model_config, _load_test_model, load_model_from_task
 
 
 def test_load_model_from_task():
@@ -121,6 +121,21 @@ def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path):
     assert mock_from_pretrained.call_args_list[1].args[1] == str(test_model_path)
 
 
+def test_apply_test_model_config_updates_qwen3_layer_types():
+    model_config = Qwen3Config()
+    model_config.num_hidden_layers = 4
+    model_config.layer_types = model_config.layer_types[:4]
+
+    updated_config = _apply_test_model_config(model_config, {"hidden_layers": 2})
+
+    assert updated_config.num_hidden_layers == 2
+    assert updated_config.layer_types == model_config.layer_types[:2]
+    reloaded_config = Qwen3Config(**updated_config.to_dict())
+    assert reloaded_config.num_hidden_layers == 2
+    assert len(reloaded_config.layer_types) == 2
+    assert reloaded_config.layer_types == model_config.layer_types[:2]
+
+
 def test_load_test_model_omits_unsupported_trust_remote_code_kwarg():
     model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
     captured = {}

From f165dda8b883920fab82cb7b5d2755a08b7cb7e6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 09:54:28 +0000
Subject: [PATCH 34/35] Merge origin/main and resolve model builder test
 conflict

Agent-Logs-Url: https://github.com/microsoft/Olive/sessions/2f777346-5b6a-423f-89fd-f40de41d2b70

Co-authored-by: xadupre <22452781+xadupre@users.noreply.github.com>
---
 .../source/how-to/extending/custom-scripts.md |   2 +-
 mcp/uv.lock                                   |   6 +-
 olive/cache.py                                |  10 +-
 olive/cli/auto_opt.py                         |  14 ++-
 olive/evaluator/lmeval_ort.py                 |  18 ++--
 olive/passes/onnx/kquant_quantization.py      |  10 +-
 olive/passes/onnx/model_builder.py            |  99 ++++++++++++-----
 test/passes/onnx/test_model_builder.py        | 102 +++++++++++++++++-
 8 files changed, 215 insertions(+), 46 deletions(-)

diff --git a/docs/source/how-to/extending/custom-scripts.md b/docs/source/how-to/extending/custom-scripts.md
index 8e8961a5b6..5e78149fe1 100644
--- a/docs/source/how-to/extending/custom-scripts.md
+++ b/docs/source/how-to/extending/custom-scripts.md
@@ -36,7 +36,7 @@ class MyDataLoader:
 
 @Registry.register_dataloader()
 def my_dataloader(dataset, batch_size):
-    return MyDataloader(dataset, batch_size)
+    return MyDataLoader(dataset, batch_size)
 
 @Registry.register_post_process()
 def my_post_process(output):
diff --git a/mcp/uv.lock b/mcp/uv.lock
index b7995e5533..50c363efdb 100644
--- a/mcp/uv.lock
+++ b/mcp/uv.lock
@@ -594,11 +594,11 @@ wheels = [
 
 [[package]]
 name = "python-multipart"
-version = "0.0.26"
+version = "0.0.27"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/88/71/b145a380824a960ebd60e1014256dbb7d2253f2316ff2d73dfd8928ec2c3/python_multipart-0.0.26.tar.gz", hash = "sha256:08fadc45918cd615e26846437f50c5d6d23304da32c341f289a617127b081f17", size = 43501, upload-time = "2026-04-10T14:09:59.473Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/9b/f23807317a113dc36e74e75eb265a02dd1a4d9082abc3c1064acd22997c4/python_multipart-0.0.27.tar.gz", hash = "sha256:9870a6a8c5a20a5bf4f07c017bd1489006ff8836cff097b6933355ee2b49b602", size = 44043, upload-time = "2026-04-27T10:51:26.649Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9a/22/f1925cdda983ab66fc8ec6ec8014b959262747e58bdca26a4e3d1da29d56/python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185", size = 28847, upload-time = "2026-04-10T14:09:58.131Z" },
+    { url = "https://files.pythonhosted.org/packages/99/78/4126abcbdbd3c559d43e0db7f7b9173fc6befe45d39a2856cc0b8ec2a5a6/python_multipart-0.0.27-py3-none-any.whl", hash = "sha256:6fccfad17a27334bd0193681b369f476eda3409f17381a2d65aa7df3f7275645", size = 29254, upload-time = "2026-04-27T10:51:24.997Z" },
 ]
 
 [[package]]
diff --git a/olive/cache.py b/olive/cache.py
index 22b13eae5b..ceb64e1528 100644
--- a/olive/cache.py
+++ b/olive/cache.py
@@ -439,13 +439,19 @@ def save_model(
                             else:
                                 from olive.passes.onnx.common import resave_model
 
+                                component_output_name = (
+                                    component_name
+                                    if Path(component_name).suffix == ".onnx"
+                                    else f"{component_name}.onnx"
+                                )
+
                                 resave_model(
                                     ModelConfig.model_validate(component_model_json).create_model().model_path,
-                                    actual_output_dir / f"{component_name}.onnx",
+                                    actual_output_dir / component_output_name,
                                     saved_external_files=saved_external_files,
                                 )
                                 component_model_json["config"][resource_name] = str(actual_output_dir)
-                                component_model_json["config"]["onnx_file_name"] = f"{component_name}.onnx"
+                                component_model_json["config"]["onnx_file_name"] = component_output_name
 
                         copied_components.append(component_model_json)
 
diff --git a/olive/cli/auto_opt.py b/olive/cli/auto_opt.py
index 2e0f73444f..ef8f5fed8d 100644
--- a/olive/cli/auto_opt.py
+++ b/olive/cli/auto_opt.py
@@ -2,6 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
+import logging
 from argparse import ArgumentParser
 from collections import OrderedDict
 from copy import deepcopy
@@ -25,13 +26,19 @@
 from olive.package_config import OlivePackageConfig
 from olive.telemetry import action
 
+logger = logging.getLogger(__name__)
+
 
 class AutoOptCommand(BaseOliveCLICommand):
     @staticmethod
     def register_subcommand(parser: ArgumentParser):
         sub_parser = parser.add_parser(
             "auto-opt",
-            help="Automatically optimize the performance of the input model.",
+            help=(
+                "Automatically optimize the performance of the input model.\n"
+                "**** DEPRECATION WARNING ****\n"
+                '"auto-opt" command is deprecated in favor of "optimize".'
+            ),
         )
 
         # Model options
@@ -174,6 +181,11 @@ def register_subcommand(parser: ArgumentParser):
 
     @action
     def run(self):
+        logger.warning(
+            "**** DEPRECATION WARNING ****\n"
+            '"auto-opt" command is deprecated in favor of "optimize". Please switch to using "optimize".\n'
+            "Deprecated commands will be removed entirely in future release."
+        )
         return self._run_workflow()
 
     def _get_run_config(self, tempdir) -> dict:
diff --git a/olive/evaluator/lmeval_ort.py b/olive/evaluator/lmeval_ort.py
index 50d1f1289d..c4a158533d 100644
--- a/olive/evaluator/lmeval_ort.py
+++ b/olive/evaluator/lmeval_ort.py
@@ -551,27 +551,25 @@ def model_call(self, input_ids: torch.Tensor, cont_len: int = 0) -> torch.Tensor
         self.params.set_search_options(batch_size=batch_size)
         generator = og.Generator(self.model, self.params)
 
-        if self._returns_full_logits:
-            generator.append_tokens(input_ids.tolist())
-            return torch.from_numpy(generator.get_output("logits")).to(self.device)
-
-        # Model only returns logits for the last appended position.
         if batch_size > 1 and cont_len > 1:
             raise ValueError(
-                "batch_size > 1 is not supported when the model returns single-position logits"
+                "batch_size > 1 is not supported when using incremental get_logits() retrieval"
                 " and continuation length > 1. Right-padding misaligns continuation positions across"
                 " batch elements. Use batch_size=1 instead."
             )
 
-        # Bulk-append context tokens, then step through the last cont_len tokens
-        # one at a time to collect only the logits we actually need.
+        # Use incremental token appending with get_logits() to avoid copying
+        # the full logits tensor from GPU to CPU. get_output("logits") copies
+        # seq_len * vocab_size * 2 bytes (e.g. 472MB for 900 tokens with
+        # 262K vocab), while get_logits() copies only vocab_size * 4 bytes
+        # (~1MB) per position.
         n_logits = max(cont_len, 1)
         prefix_len = seq_len - n_logits
         generator.append_tokens(input_ids[:, : prefix_len + 1].tolist())
-        all_logits = [torch.from_numpy(generator.get_output("logits")).to(self.device)]
+        all_logits = [torch.from_numpy(generator.get_logits()).to(self.device)]
         for i in range(prefix_len + 1, seq_len):
             generator.append_tokens(input_ids[:, i : i + 1].tolist())
-            all_logits.append(torch.from_numpy(generator.get_output("logits")).to(self.device))
+            all_logits.append(torch.from_numpy(generator.get_logits()).to(self.device))
 
         # No need to pad to [batch, seq_len, vocab]. The slicing in _loglikelihood_tokens computes
         # ctx_len = inplen + (logits.shape[0] - padding_len_inp), which adjusts for the shorter
diff --git a/olive/passes/onnx/kquant_quantization.py b/olive/passes/onnx/kquant_quantization.py
index 5d75016ecc..4263406afd 100644
--- a/olive/passes/onnx/kquant_quantization.py
+++ b/olive/passes/onnx/kquant_quantization.py
@@ -256,7 +256,15 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon
     def _run_for_config(
         self, model: ONNXModelHandler, config: type[BasePassConfig], output_model_path: str
     ) -> ONNXModelHandler:
-        output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name)
+        # For composite model components (e.g., Whisper encoder.onnx/decoder.onnx),
+        # output_model_path already includes .onnx extension. Strip it so ir.save doesn't
+        # create a double extension (.onnx.onnx). For other cases, resolve normally.
+        output_path_obj = Path(output_model_path)
+        if output_path_obj.suffix == ".onnx":
+            output_model_path = str(output_path_obj.with_suffix(""))
+        else:
+            output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name)
+
         ir_model = model.load_ir_model()
         ir.external_data.load_to_model(ir_model)
         ir_model.graph.opset_imports[MSFT_DOMAIN] = 1
diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py
index ba29c0a9b3..24f3c27785 100644
--- a/olive/passes/onnx/model_builder.py
+++ b/olive/passes/onnx/model_builder.py
@@ -20,7 +20,7 @@
 from olive.constants import Precision
 from olive.hardware.accelerator import AcceleratorSpec, Device
 from olive.hardware.constants import ExecutionProvider
-from olive.model import HfModelHandler, ONNXModelHandler
+from olive.model import CompositeModelHandler, HfModelHandler, ONNXModelHandler
 from olive.model.utils import resolve_onnx_path
 from olive.passes import Pass
 from olive.passes.olive_pass import PassConfigParam
@@ -273,8 +273,9 @@ def _run_for_config(
         if config.extra_options:
             extra_args.update(config.extra_options)
 
-        # Ensure output_model_filepath matches the final filename in extra_args
-        output_model_filepath = Path(output_model_path) / extra_args["filename"]
+        # Ensure output_model_filepath matches the final filename in extra_args while preserving
+        # the resolved output directory selected above.
+        output_model_filepath = output_model_filepath.parent / extra_args["filename"]
 
         model_attributes = copy.deepcopy(model.model_attributes or {})
 
@@ -292,26 +293,6 @@ def _run_for_config(
                 **extra_args,
             )
 
-            # Apply post-processing annotations (split assignments and/or layer annotations)
-            # in a single load/save cycle to avoid redundant disk I/O.
-            split_assignments = model_attributes.get("split_assignments") if not metadata_only else None
-            layer_annotations = model_attributes.get("layer_annotations") if not metadata_only else None
-
-            if split_assignments or layer_annotations:
-                model_proto = onnx.load(output_model_filepath, load_external_data=False)
-
-                if split_assignments:
-                    # NOTE: currently the model builder renames modules to it's own naming convention
-                    # so the assignments for the renamed modules won't match
-                    split_assignment_str = ";".join([f"{k}={v}" for k, v in split_assignments.items()])
-                    onnx.helper.set_model_props(model_proto, {"split_assignments": split_assignment_str})
-
-                if layer_annotations:
-                    from olive.passes.onnx.layer_annotation import annotate_proto_model
-
-                    annotate_proto_model(model_proto, layer_annotations)
-
-                onnx.save(model_proto, output_model_filepath)
         except Exception:
             # if model building fails, clean up the intermediate files in the cache_dir
             cache_dir = Path(HF_HUB_CACHE)
@@ -337,6 +318,58 @@ def _run_for_config(
             # tokenizer and generation configs are skipped since they are already saved by the model builder
             model.save_metadata(output_model_filepath.parent)
 
+        generated_onnx_files = sorted(output_model_filepath.parent.glob("*.onnx")) if not metadata_only else []
+
+        # For multi-file models (e.g., Whisper), preserve component file names and process each file independently
+        # in subsequent passes by returning a CompositeModelHandler.
+        is_multi_file_model = not metadata_only and len(generated_onnx_files) > 1
+        resolved_single_model_filepath = output_model_filepath
+        if (
+            not metadata_only
+            and not is_multi_file_model
+            and not output_model_filepath.exists()
+            and len(generated_onnx_files) == 1
+        ):
+            logger.info(
+                "ONNX model file %s does not exist, using %s instead",
+                output_model_filepath,
+                generated_onnx_files[0].name,
+            )
+            resolved_single_model_filepath = generated_onnx_files[0]
+
+        # Apply post-processing annotations (split assignments and/or layer annotations)
+        # in a single load/save cycle to avoid redundant disk I/O.
+        split_assignments = model_attributes.get("split_assignments") if not metadata_only else None
+        layer_annotations = model_attributes.get("layer_annotations") if not metadata_only else None
+        if is_multi_file_model:
+            primary_onnx_files = generated_onnx_files
+        elif resolved_single_model_filepath.exists():
+            primary_onnx_files = [resolved_single_model_filepath]
+        else:
+            primary_onnx_files = []
+        if split_assignments or layer_annotations:
+            if primary_onnx_files:
+                for primary_onnx_file in primary_onnx_files:
+                    model_proto = onnx.load(primary_onnx_file, load_external_data=False)
+
+                    if split_assignments:
+                        # NOTE: currently the model builder renames modules to it's own naming convention
+                        # so the assignments for the renamed modules won't match
+                        split_assignment_str = ";".join([f"{k}={v}" for k, v in split_assignments.items()])
+                        onnx.helper.set_model_props(model_proto, {"split_assignments": split_assignment_str})
+
+                    if layer_annotations:
+                        from olive.passes.onnx.layer_annotation import annotate_proto_model
+
+                        annotate_proto_model(model_proto, layer_annotations)
+
+                    onnx.save(model_proto, primary_onnx_file)
+            else:
+                logger.warning(
+                    "Skipping split_assignments/layer_annotations because no ONNX file was generated in %s.",
+                    output_model_filepath.parent,
+                )
+
         # add additional files generated by model builder to model_attributes
         additional_files = model_attributes.get("additional_files") or []
         if metadata_only:
@@ -347,20 +380,36 @@ def _run_for_config(
                 str(output_model_filepath.parent / "genai_config.json"),
             ]
         else:
+            primary_model_paths = {str(fp) for fp in primary_onnx_files}
             model_attributes["additional_files"] = sorted(
                 set(additional_files)
                 # all files in the output directory except the model and model.data files
                 | {str(fp) for fp in output_model_filepath.parent.iterdir()}
-                - {str(output_model_filepath), str(output_model_filepath) + ".data"}
+                - primary_model_paths
+                - {f"{path}.data" for path in primary_model_paths}
             )
 
         if metadata_only:
             output_model = copy.copy(model)
             output_model.model_attributes = model_attributes
+        elif is_multi_file_model:
+            # Use the ONNX filenames as component names so child passes write back to encoder.onnx/decoder.onnx
+            # instead of defaulting to model.onnx.
+            component_names = [fp.name for fp in generated_onnx_files]
+            components = [
+                ONNXModelHandler(output_model_filepath.parent, onnx_file_name=component_name)
+                for component_name in component_names
+            ]
+            output_model = CompositeModelHandler(
+                components,
+                component_names,
+                model_path=output_model_filepath.parent,
+                model_attributes=model_attributes,
+            )
         else:
             output_model = ONNXModelHandler(
                 output_model_filepath.parent,
-                onnx_file_name=output_model_filepath.name,
+                onnx_file_name=resolved_single_model_filepath.name,
                 model_attributes=model_attributes,
             )
 
diff --git a/test/passes/onnx/test_model_builder.py b/test/passes/onnx/test_model_builder.py
index 0c838a3f7f..0f71535db9 100644
--- a/test/passes/onnx/test_model_builder.py
+++ b/test/passes/onnx/test_model_builder.py
@@ -2,16 +2,16 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
+import json
 import sys
 import types
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock, Mock, patch
 
 import onnx
 import pytest
 
-from olive.model import ONNXModelHandler
-from olive.model.handler.hf import HfModelHandler
+from olive.model import CompositeModelHandler, HfModelHandler, ONNXModelHandler
 from olive.passes.olive_pass import create_pass_from_dict
 from olive.passes.onnx.model_builder import ModelBuilder
 from olive.passes.pytorch.rtn import Rtn
@@ -20,6 +20,29 @@
 TINY_RANDOM_LLAMA_MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM"
 
 
+def _create_test_onnx_model(model_path: Path, node_name: str):
+    input_info = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, [1, 1])
+    output_info = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [1, 1])
+    node = onnx.helper.make_node("Identity", ["input"], ["output"], name=node_name)
+    graph = onnx.helper.make_graph([node], "test_graph", [input_info], [output_info])
+    model = onnx.helper.make_model(graph)
+    onnx.save(model, model_path)
+
+
+def _mock_genai_builder(monkeypatch, create_model_fn):
+    builder_module = types.ModuleType("onnxruntime_genai.models.builder")
+    builder_module.create_model = create_model_fn
+    models_module = types.ModuleType("onnxruntime_genai.models")
+    models_module.builder = builder_module
+    genai_module = types.ModuleType("onnxruntime_genai")
+    genai_module.__version__ = "0.8.0"
+    genai_module.models = models_module
+    monkeypatch.setitem(sys.modules, "onnxruntime_genai", genai_module)
+    monkeypatch.setitem(sys.modules, "onnxruntime_genai.models", models_module)
+    monkeypatch.setitem(sys.modules, "onnxruntime_genai.models.builder", builder_module)
+    monkeypatch.setattr(ModelBuilder, "maybe_patch_quant", staticmethod(lambda: None))
+
+
 @pytest.mark.parametrize("metadata_only", [True, False])
 def test_model_builder(tmp_path, metadata_only):
     input_model = make_local_tiny_llama(tmp_path / "input_model", "onnx" if metadata_only else "hf")
@@ -162,3 +185,76 @@ def fake_create_model(*_, **kwargs):
     assert test_model_path.exists()
     assert fake_builder.create_model.call_args.kwargs["model_name"] == str(test_model_path)
     assert fake_builder.create_model.call_args.kwargs["input_path"] == str(test_model_path)
+
+
+def test_model_builder_apply_annotations_on_single_file_fallback(tmp_path, monkeypatch):
+    def fake_create_model(
+        model_name, input_path, output_dir, precision, execution_provider, cache_dir, filename, **kwargs
+    ):
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        _create_test_onnx_model(output_dir / "actual.onnx", "test_node")
+        (output_dir / "actual.onnx.data").write_text("external_data")
+        (output_dir / "tokenizer.json").write_text("{}")
+        (output_dir / "genai_config.json").write_text(json.dumps({"search": {}}))
+
+    _mock_genai_builder(monkeypatch, fake_create_model)
+    input_model = Mock(spec=HfModelHandler)
+    input_model.model_name_or_path = "dummy-model"
+    input_model.adapter_path = None
+    input_model.test_model_config = None
+    input_model.test_model_path = None
+    input_model.model_attributes = {"split_assignments": {"model.layers.0": 1}}
+
+    p = create_pass_from_dict(
+        ModelBuilder, {"precision": "fp32", "extra_options": {"filename": "expected.onnx"}}, disable_search=True
+    )
+    output_folder = tmp_path / "output_model"
+    output_model = p.run(input_model, output_folder)
+
+    assert isinstance(output_model, ONNXModelHandler)
+    assert output_model.onnx_file_name == "actual.onnx"
+    model_proto = onnx.load(output_folder / "actual.onnx", load_external_data=False)
+    metadata_props = {prop.key: prop.value for prop in model_proto.metadata_props}
+    assert metadata_props["split_assignments"] == "model.layers.0=1"
+    assert str(output_folder / "actual.onnx") not in output_model.model_attributes["additional_files"]
+    assert str(output_folder / "actual.onnx.data") not in output_model.model_attributes["additional_files"]
+    assert str(output_folder / "tokenizer.json") in output_model.model_attributes["additional_files"]
+
+
+def test_model_builder_multi_file_output_preserves_component_filenames(tmp_path, monkeypatch):
+    def fake_create_model(
+        model_name, input_path, output_dir, precision, execution_provider, cache_dir, filename, **kwargs
+    ):
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        _create_test_onnx_model(output_dir / "encoder.onnx", "encoder_node")
+        _create_test_onnx_model(output_dir / "decoder.onnx", "decoder_node")
+        (output_dir / "encoder.onnx.data").write_text("encoder_data")
+        (output_dir / "decoder.onnx.data").write_text("decoder_data")
+        (output_dir / "tokenizer.json").write_text("{}")
+        (output_dir / "genai_config.json").write_text(json.dumps({"search": {}}))
+
+    _mock_genai_builder(monkeypatch, fake_create_model)
+    input_model = Mock(spec=HfModelHandler)
+    input_model.model_name_or_path = "dummy-model"
+    input_model.adapter_path = None
+    input_model.test_model_config = None
+    input_model.test_model_path = None
+    input_model.model_attributes = {}
+
+    p = create_pass_from_dict(ModelBuilder, {"precision": "fp32"}, disable_search=True)
+    output_folder = tmp_path / "output_model"
+    output_model = p.run(input_model, output_folder)
+
+    assert isinstance(output_model, CompositeModelHandler)
+    expected_component_names = sorted(["encoder.onnx", "decoder.onnx"])
+    assert output_model.model_component_names == expected_component_names
+    component_onnx_files = [component.onnx_file_name for component in output_model.model_components]
+    assert component_onnx_files == output_model.model_component_names
+    additional_files = output_model.model_attributes["additional_files"]
+    assert str(output_folder / "encoder.onnx") not in additional_files
+    assert str(output_folder / "decoder.onnx") not in additional_files
+    assert str(output_folder / "encoder.onnx.data") not in additional_files
+    assert str(output_folder / "decoder.onnx.data") not in additional_files
+    assert str(output_folder / "tokenizer.json") in additional_files

From 8941efbed4c153b3ddb8804be313ac3e9fa7ff46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 12 May 2026 12:10:52 +0200
Subject: [PATCH 35/35] Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 olive/common/hf/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index c2c2110c2c..2abc534eb0 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -52,7 +52,8 @@ def _apply_test_model_config(
     if isinstance(layer_types, (list, tuple)):
         model_config.layer_types = layer_types[:hidden_layers]
 
-    if "dtype" in model_config and model_config.dtype == "auto":
+    dtype = getattr(model_config, "dtype", None)
+    if dtype == "auto":
         # This is not allowed anymore with transformers >=4.57,
         # we select float16 instead.
         model_config.dtype = "float16"