diff --git a/docs/source/how-to/cli/cli-convert-qwen-test.md b/docs/source/how-to/cli/cli-convert-qwen-test.md
new file mode 100644
index 0000000000..a26be99a59
--- /dev/null
+++ b/docs/source/how-to/cli/cli-convert-qwen-test.md
@@ -0,0 +1,76 @@
+# How to convert a Qwen model with a quick `--test` smoke check
+
+If you are converting a large language model, it is often useful to validate the Olive command, environment, and conversion recipe on a much smaller model before spending time on the full checkpoint.
+
+The `--test` option does that for Hugging Face models. Olive keeps the same model architecture, reduces it to a random 2-layer test model, saves it to the folder you provide, and reuses that folder on later runs.
+
+This example uses [`Qwen/Qwen3-0.6B`](https://huggingface.co/Qwen/Qwen3-0.6B), but the same pattern works for other supported Hugging Face LLMs.
+
+## Step 1: generate the workflow config
+
+Start by generating the config that Olive will run for the Qwen conversion.
+
+```bash
+olive optimize \
+    --model_name_or_path Qwen/Qwen3-0.6B \
+    --device cpu \
+    --provider CPUExecutionProvider \
+    --precision int4 \
+    --output_path out/qwen-smoke \
+    --dry_run
+```
+
+This creates `out/qwen-smoke/config.json` without launching the full conversion yet.
+
+## Step 2: run a fast smoke test with `olive run --test`
+
+Use the generated config with `olive run` and pass `--test` so Olive swaps in a reduced random Qwen model.
+
+```bash
+olive run \
+    --config out/qwen-smoke/config.json \
+    --test out/qwen-test-model \
+    --output_path out/qwen-smoke-run
+```
+
+What this does:
+
+- `--test out/qwen-test-model` creates a reduced random Qwen model and saves it in `out/qwen-test-model`
+- later runs reuse the same saved test model instead of recreating it
+- `--output_path out/qwen-smoke-run` gives the smoke test its own output folder, so the generated ONNX artifacts are easy to find
+
+After the smoke test finishes, look under `out/qwen-smoke-run` for the exported ONNX model and related files.
+
+This is a quick way to confirm that:
+
+- Olive can load the source model
+- the selected optimization recipe is valid for your setup
+- the conversion path completes before you run the full model
+
+If you omit the folder and just pass `--test`, `olive run` will save the reduced model under `<output_path>/test_model`.
+
+## Step 3: run the full conversion
+
+Once the smoke test succeeds, rerun the conversion on the full Qwen checkpoint by removing `--test`.
+
+```bash
+olive run \
+    --config out/qwen-smoke/config.json \
+    --output_path out/qwen-full
+```
+
+At this point you know the Olive command and the conversion recipe already worked on the lightweight test model, so you can focus on the full-model run instead of debugging both at once.
+
+## Why keep the test model folder?
+
+The saved test model is useful beyond the first smoke test:
+
+- you can rerun the reduced conversion quickly while iterating on options
+- you can reuse the same HF test model later when comparing the Hugging Face model against the exported ONNX model
+- you avoid recreating a new random test checkpoint every time
+
+## Related docs
+
+- [How to use the `olive optimize` command to optimize a Pytorch model](cli-optimize)
+- [How to write a new workflow from scratch](../configure-workflows/build-workflow)
+- [CLI reference](../../reference/cli)
diff --git a/docs/source/how-to/index.md b/docs/source/how-to/index.md
index 8ec16aba9e..4e00636e7d 100644
--- a/docs/source/how-to/index.md
+++ b/docs/source/how-to/index.md
@@ -12,6 +12,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx
 - [how to use the `olive finetune` command to create (Q)LoRA adapters](cli/cli-finetune)
 - [How to use the `olive quantize` command to quantize your model with different precisions and techniques such as AWQ](cli/cli-quantize)
 - [How to use the `olive run` command to execute an Olive workflow.](cli/cli-run)
+- [How to convert a Qwen model with a quick `--test` smoke check](cli/cli-convert-qwen-test)
 
 # Olive Python API
 
@@ -43,6 +44,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx
 
 installation
 cli/cli-optimize
+cli/cli-convert-qwen-test
 cli/cli-auto-opt
 cli/cli-finetune
 cli/cli-quantize
diff --git a/olive/cli/base.py b/olive/cli/base.py
index 75fd2816c7..e31fb04ddf 100644
--- a/olive/cli/base.py
+++ b/olive/cli/base.py
@@ -82,6 +82,21 @@ def run(self):
         raise NotImplementedError
 
 
+def add_hf_test_model_config(input_model: dict, test_value, output_path: Optional[str] = None) -> dict:
+    if test_value in (None, False):
+        return input_model
+
+    test_model_output_path = test_value
+    # Use 2 layers to keep the test model fast and lightweight while preserving the original architecture family.
+    input_model["test_model_config"] = {"hidden_layers": 2}
+    if test_model_output_path is True:
+        if not output_path:
+            raise ValueError("--test requires an explicit folder when output_path is not available.")
+        test_model_output_path = str(Path(output_path) / "test_model")
+    input_model["test_model_path"] = test_model_output_path
+    return input_model
+
+
 def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) -> dict:
     """Get the input model config for HuggingFace model.
 
@@ -105,7 +120,7 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS)
         input_model["adapter_path"] = args.adapter_path
     if getattr(args, "trust_remote_code", None) is not None:
         input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code
-    return input_model
+    return add_hf_test_model_config(input_model, getattr(args, "test", None), getattr(args, "output_path", None))
 
 
 def _get_onnx_input_model(args: Namespace, model_path: str) -> dict:
@@ -371,6 +386,16 @@ def add_input_model_options(
         model_group.add_argument(
             "--trust_remote_code", action="store_true", help="Trust remote code when loading a huggingface model."
         )
+        model_group.add_argument(
+            "--test",
+            type=str,
+            nargs="?",
+            const=True,
+            help=(
+                "Use a randomly initialized test model with the same Hugging Face architecture and 2 hidden layers. "
+                "Optionally provide a folder where the generated test model should be saved and reused."
+            ),
+        )
 
     if enable_hf_adapter:
         assert enable_hf, "enable_hf must be True when enable_hf_adapter is True."
diff --git a/olive/cli/run.py b/olive/cli/run.py
index 6d2a831aef..3d85522330 100644
--- a/olive/cli/run.py
+++ b/olive/cli/run.py
@@ -6,6 +6,7 @@
 
 from olive.cli.base import (
     BaseOliveCLICommand,
+    add_hf_test_model_config,
     add_input_model_options,
     add_logging_options,
     add_telemetry_options,
@@ -59,6 +60,14 @@ def run(self):
         if input_model_config := get_input_model_config(self.args, required=False):
             print("Replacing input model config in run config")
             run_config["input_model"] = input_model_config
+        elif self.args.test not in (None, False):
+            input_model = run_config.get("input_model")
+            if not isinstance(input_model, dict) or input_model.get("type") != "HfModel":
+                raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.")
+            output_path = (
+                self.args.output_path or run_config.get("output_dir") or run_config.get("engine", {}).get("output_dir")
+            )
+            run_config["input_model"] = add_hf_test_model_config(input_model, self.args.test, output_path)
 
         for arg_key, rc_key in [("output_path", "output_dir"), ("log_level", "log_severity_level")]:
             if (arg_value := getattr(self.args, arg_key)) is not None:
diff --git a/olive/common/hf/model_io.py b/olive/common/hf/model_io.py
index a3ebd73058..7d66d57574 100644
--- a/olive/common/hf/model_io.py
+++ b/olive/common/hf/model_io.py
@@ -27,6 +27,7 @@ def get_model_io_config(
     model_name: str,
     task: str,
     model: Optional["PreTrainedModel"] = None,
+    test_model_config: Optional[dict[str, Any]] = None,
     **kwargs,
 ) -> Optional[dict[str, Any]]:
     """Get the input/output config for the model and task.
@@ -35,6 +36,7 @@ def get_model_io_config(
         model_name: The model name or path.
         task: The task type (e.g., "text-generation", "text-classification").
         model: Optional loaded model for input signature inspection.
+        test_model_config: Optional overrides for creating a lightweight random test model from the same config.
         **kwargs: Additional arguments including use_cache.
 
     Returns:
@@ -68,7 +70,7 @@ def get_model_io_config(
         return None
 
     # Get model config
-    model_config = get_model_config(model_name, **kwargs)
+    model_config = get_model_config(model_name, test_model_config=test_model_config, **kwargs)
 
     # Handle PEFT models
     actual_model = model
@@ -92,6 +94,7 @@ def get_model_dummy_input(
     model_name: str,
     task: str,
     model: Optional["PreTrainedModel"] = None,
+    test_model_config: Optional[dict[str, Any]] = None,
     **kwargs,
 ) -> Optional[dict[str, Any]]:
     """Get dummy inputs for the model and task.
@@ -100,6 +103,7 @@ def get_model_dummy_input(
         model_name: The model name or path.
         task: The task type.
         model: Optional loaded model for input signature inspection.
+        test_model_config: Optional overrides for creating a lightweight random test model from the same config.
         **kwargs: Additional arguments including use_cache, batch_size, sequence_length.
 
     Returns:
@@ -133,7 +137,7 @@ def get_model_dummy_input(
         return None
 
     # Get model config (handles MLflow paths)
-    model_config = get_model_config(model_name, **kwargs)
+    model_config = get_model_config(model_name, test_model_config=test_model_config, **kwargs)
 
     # Handle PEFT models
     actual_model = model
diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
index a070e85ac8..2abc534eb0 100644
--- a/olive/common/hf/utils.py
+++ b/olive/common/hf/utils.py
@@ -2,9 +2,11 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
+import inspect
 import logging
+from copy import deepcopy
 from pathlib import Path
-from typing import TYPE_CHECKING, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
 
 from transformers import AutoConfig, AutoModel, AutoTokenizer, GenerationConfig
 
@@ -18,7 +20,73 @@
 logger = logging.getLogger(__name__)
 
 
-def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTrainedModel":
+def _apply_test_model_config(
+    model_config: "PretrainedConfig", test_model_config: Optional[dict[str, Any]] = None
+) -> "PretrainedConfig":
+    """Apply lightweight test-model overrides to a model config."""
+    if not test_model_config:
+        return model_config
+
+    model_config = deepcopy(model_config)
+    if "hidden_layers" in test_model_config:
+        hidden_layers = test_model_config["hidden_layers"]
+    elif "num_hidden_layers" in test_model_config:
+        hidden_layers = test_model_config["num_hidden_layers"]
+    else:
+        hidden_layers = 2
+    if hidden_layers < 1:
+        raise ValueError("test_model_config.hidden_layers must be greater than 0.")
+
+    updated = False
+    # Common Hugging Face configs do not use a single canonical field:
+    # BERT-style models use num_hidden_layers while GPT-style models often use n_layer/n_layers/num_layers.
+    for attr_name in ("num_hidden_layers", "num_layers", "n_layer", "n_layers"):
+        if hasattr(model_config, attr_name):
+            setattr(model_config, attr_name, hidden_layers)
+            updated = True
+
+    if not updated:
+        raise ValueError("Unable to create a test model because the config does not expose a hidden-layer count.")
+
+    layer_types = getattr(model_config, "layer_types", None)
+    if isinstance(layer_types, (list, tuple)):
+        model_config.layer_types = layer_types[:hidden_layers]
+
+    dtype = getattr(model_config, "dtype", None)
+    if dtype == "auto":
+        # This is not allowed anymore with transformers >=4.57,
+        # we select float16 instead.
+        model_config.dtype = "float16"
+
+    return model_config
+
+
+def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_remote_code: Optional[bool] = None):
+    """Instantiate a random-initialized HF model from config for test mode."""
+    from_config_signature = inspect.signature(model_class.from_config)
+    supports_trust_remote_code = "trust_remote_code" in from_config_signature.parameters or any(
+        parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in from_config_signature.parameters.values()
+    )
+    from_config_kwargs = {}
+    if supports_trust_remote_code and trust_remote_code is not None:
+        from_config_kwargs["trust_remote_code"] = trust_remote_code
+    return model_class.from_config(model_config, **from_config_kwargs)
+
+
+def _save_test_model(model: "PreTrainedModel", output_dir: str):
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    logger.info("Saving generated test model to %s", output_path)
+    model.save_pretrained(str(output_path))
+
+
+def load_model_from_task(
+    task: str,
+    model_name_or_path: str,
+    test_model_config: Optional[dict[str, Any]] = None,
+    test_model_path: Optional[str] = None,
+    **kwargs,
+) -> "PreTrainedModel":
     """Load huggingface model from task and model_name_or_path."""
     from transformers.pipelines import check_task
 
@@ -31,7 +99,7 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr
     else:
         raise ValueError("unsupported transformers version")
 
-    model_config = get_model_config(model_name_or_path, **kwargs)
+    model_config = get_model_config(model_name_or_path, test_model_config=test_model_config, **kwargs)
     if getattr(model_config, "quantization_config", None):
         if not isinstance(model_config.quantization_config, dict):
             model_config.quantization_config = model_config.quantization_config.to_dict()
@@ -59,10 +127,20 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr
     model = None
     for i, model_class in enumerate(class_tuple):
         try:
-            model = from_pretrained(model_class, model_name_or_path, "model", **kwargs)
+            if test_model_config:
+                if test_model_path and (Path(test_model_path) / "config.json").exists():
+                    model = from_pretrained(model_class, test_model_path, "model", **kwargs)
+                else:
+                    model = _load_test_model(model_class, model_config, kwargs.get("trust_remote_code"))
+                    if test_model_path:
+                        _save_test_model(model, test_model_path)
+            else:
+                model = from_pretrained(model_class, model_name_or_path, "model", **kwargs)
             logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path)
             break
         except (OSError, ValueError) as e:
+            if test_model_config:
+                raise
             if i == len(class_tuple) - 1:
                 # len(class_tuple) == 1 covers most common tasks like text-generation, text-classification, etc
                 # error could be device OOM, device_map: "auto" not supported, etc
@@ -94,14 +172,16 @@ def from_pretrained(cls, model_name_or_path: str, mlflow_dir: str, **kwargs):
     return cls.from_pretrained(get_pretrained_name_or_path(model_name_or_path, mlflow_dir), **kwargs)
 
 
-def get_model_config(model_name_or_path: str, **kwargs) -> "PretrainedConfig":
+def get_model_config(
+    model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs
+) -> "PretrainedConfig":
     """Get HF Config for the given model_name_or_path."""
     model_config = from_pretrained(AutoConfig, model_name_or_path, "config", **kwargs)
 
     # add quantization config
     quantization_config = kwargs.get("quantization_config")
     if not quantization_config:
-        return model_config
+        return _apply_test_model_config(model_config, test_model_config)
 
     if hasattr(model_config, "quantization_config") and model_config.quantization_config:
         logger.warning(
@@ -111,7 +191,7 @@ def get_model_config(model_name_or_path: str, **kwargs) -> "PretrainedConfig":
         )
     else:
         model_config.quantization_config = quantization_config
-    return model_config
+    return _apply_test_model_config(model_config, test_model_config)
 
 
 def save_model_config(config: Union["PretrainedConfig", "GenerationConfig"], output_dir: str, **kwargs):
diff --git a/olive/model/handler/hf.py b/olive/model/handler/hf.py
index daac587bc5..fc659a57e6 100644
--- a/olive/model/handler/hf.py
+++ b/olive/model/handler/hf.py
@@ -27,8 +27,8 @@
 
 @model_handler_registry("HFModel")
 class HfModelHandler(PyTorchModelHandlerBase, MLFlowTransformersMixin, HfMixin):  # pylint: disable=too-many-ancestors
-    resource_keys: tuple[str, ...] = ("model_path", "adapter_path")
-    json_config_keys: tuple[str, ...] = ("task", "load_kwargs")
+    resource_keys: tuple[str, ...] = ("model_path", "adapter_path", "test_model_path")
+    json_config_keys: tuple[str, ...] = ("task", "load_kwargs", "test_model_config")
 
     def __init__(
         self,
@@ -37,6 +37,8 @@ def __init__(
         load_kwargs: Union[dict[str, Any], HfLoadKwargs] = None,
         io_config: Union[dict[str, Any], IoConfig, str] = None,
         adapter_path: OLIVE_RESOURCE_ANNOTATIONS = None,
+        test_model_path: OLIVE_RESOURCE_ANNOTATIONS = None,
+        test_model_config: Optional[dict[str, Any]] = None,
         model_attributes: Optional[dict[str, Any]] = None,
     ):
         super().__init__(
@@ -48,6 +50,7 @@ def __init__(
         self.add_resources(locals())
         self.task = task
         self.load_kwargs = validate_config(load_kwargs, HfLoadKwargs, warn_unused_keys=False) if load_kwargs else None
+        self.test_model_config = test_model_config
 
         self.model_attributes = {**self.get_hf_model_config().to_dict(), **(self.model_attributes or {})}
 
@@ -67,12 +70,23 @@ def adapter_path(self) -> str:
         """Return the path to the peft adapter."""
         return self.get_resource("adapter_path")
 
+    @property
+    def test_model_path(self) -> str:
+        """Return the optional path to a persisted lightweight test model."""
+        return self.get_resource("test_model_path")
+
     def load_model(self, rank: int = None, cache_model: bool = True) -> "torch.nn.Module":
         """Load the model from the model path."""
         if self.model:
             model = self.model
         else:
-            model = load_model_from_task(self.task, self.model_path, **self.get_load_kwargs())
+            model = load_model_from_task(
+                self.task,
+                self.model_path,
+                test_model_config=self.test_model_config,
+                test_model_path=self.test_model_path,
+                **self.get_load_kwargs(),
+            )
 
             # we only have peft adapters for now
             if self.adapter_path:
diff --git a/olive/model/handler/mixin/hf.py b/olive/model/handler/mixin/hf.py
index 730d6bd6ec..3e400c64e7 100644
--- a/olive/model/handler/mixin/hf.py
+++ b/olive/model/handler/mixin/hf.py
@@ -39,7 +39,11 @@ def get_hf_model_config(self, exclude_load_keys: Optional[list[str]] = None) ->
         :param exclude_load_keys: list of keys to exclude from load_kwargs
         :return: model config
         """
-        return get_model_config(self.model_path, **self.get_load_kwargs(exclude_load_keys))
+        return get_model_config(
+            self.model_path,
+            test_model_config=getattr(self, "test_model_config", None),
+            **self.get_load_kwargs(exclude_load_keys),
+        )
 
     def get_hf_generation_config(self, exclude_load_keys: Optional[list[str]] = None) -> Optional["GenerationConfig"]:
         """Get generation config for the model if it exists.
@@ -114,7 +118,13 @@ def save_metadata(self, output_dir: str, exclude_load_keys: Optional[list[str]]
 
     def get_hf_io_config(self) -> Optional[dict[str, Any]]:
         """Get Io config for the model."""
-        return get_model_io_config(self.model_path, self.task, self.load_model(), **self.get_load_kwargs())
+        return get_model_io_config(
+            self.model_path,
+            self.task,
+            self.load_model(),
+            test_model_config=getattr(self, "test_model_config", None),
+            **self.get_load_kwargs(),
+        )
 
     def get_hf_dummy_inputs(self) -> Optional[dict[str, Any]]:
         """Get dummy inputs for the model."""
@@ -122,6 +132,7 @@ def get_hf_dummy_inputs(self) -> Optional[dict[str, Any]]:
             self.model_path,
             self.task,
             model=self.load_model(),
+            test_model_config=getattr(self, "test_model_config", None),
             **self.get_load_kwargs(),
         )
 
diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py
index e1062e02b9..24f3c27785 100644
--- a/olive/passes/onnx/model_builder.py
+++ b/olive/passes/onnx/model_builder.py
@@ -247,6 +247,15 @@ def _run_for_config(
             input_path = str(model.get_resource("model_path"))
         else:
             model_path = model.model_name_or_path
+            if model.test_model_config:
+                if not model.test_model_path:
+                    raise ValueError(
+                        "ModelBuilder requires test_model_path to be set when test_model_config is provided. "
+                        "Please specify the path where the test model should be saved."
+                    )
+                if not (Path(model.test_model_path) / "config.json").exists():
+                    model.load_model(cache_model=False)
+                model_path = model.test_model_path
             # provide the model path as input path, model builder uses input_path for quantized models
             input_path = model_path
             if model.adapter_path:
diff --git a/test/cli/test_base.py b/test/cli/test_base.py
index 38c96c6aa9..bb34cef3f2 100644
--- a/test/cli/test_base.py
+++ b/test/cli/test_base.py
@@ -229,6 +229,38 @@ def test_insert_input_model_invalid_hf_model_name():
         get_input_model_config(args)
 
 
+@patch("huggingface_hub.repo_exists", return_value=True)
+def test_get_input_model_config_hf_test_model(_):
+    args = SimpleNamespace(
+        model_name_or_path="hf_model",
+        trust_remote_code=False,
+        task="text-generation",
+        model_script=None,
+        script_dir=None,
+        test="saved_test_model",
+    )
+
+    config = get_input_model_config(args)
+
+    assert config["test_model_config"] == {"hidden_layers": 2}
+    assert config["test_model_path"] == "saved_test_model"
+
+
+@patch("huggingface_hub.repo_exists", return_value=True)
+def test_get_input_model_config_hf_test_model_requires_path_without_output_path(_):
+    args = SimpleNamespace(
+        model_name_or_path="hf_model",
+        trust_remote_code=False,
+        task="text-generation",
+        model_script=None,
+        script_dir=None,
+        test=True,
+    )
+
+    with pytest.raises(ValueError, match=r"--test requires an explicit folder when output_path is not available\."):
+        get_input_model_config(args)
+
+
 def test_insert_input_model_cli_output_model():
     # setup
     model_path = str(Path(__file__).parent.resolve() / "output_model")
diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py
index a7cb39e244..8489b4586f 100644
--- a/test/cli/test_cli.py
+++ b/test/cli/test_cli.py
@@ -5,8 +5,9 @@
 import json
 import subprocess
 import sys
+import types
 from pathlib import Path
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -112,8 +113,10 @@ def test_workflow_run_command(mock_run, tempdir, list_required_packages, tmp_pat
 
 
 @patch("olive.workflows.run")
-def test_workflow_run_command_with_overrides(mock_run, tmp_path):
+@patch("huggingface_hub.repo_exists", return_value=True)
+def test_workflow_run_command_with_overrides(mock_repo_exists, mock_run, tmp_path):
     # setup
+    # Prevent a live Hugging Face repo lookup when the CLI resolves the HF input model override.
     config_path = tmp_path / "config.json"
     config_path.write_text(
         json.dumps({"input_model": {"key": "value"}, "engine": {"log_severity_level": 3}, "output_dir": "output"})
@@ -150,6 +153,52 @@ def test_workflow_run_command_with_overrides(mock_run, tmp_path):
     )
 
 
+@patch("olive.workflows.run")
+def test_workflow_run_command_with_test_override(mock_run, tmp_path):
+    config_path = tmp_path / "config.json"
+    config_path.write_text(
+        json.dumps(
+            {
+                "input_model": {
+                    "type": "HfModel",
+                    "model_path": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+                    "load_kwargs": {"attn_implementation": "eager", "trust_remote_code": False},
+                },
+                "output_dir": str(tmp_path / "output"),
+            }
+        )
+    )
+    command_args = ["run", "--run-config", str(config_path), "--test"]
+
+    cli_main(command_args)
+
+    mock_run.assert_called_once_with(
+        {
+            "input_model": {
+                "type": "HfModel",
+                "model_path": "hf-internal-testing/tiny-random-LlamaForCausalLM",
+                "load_kwargs": {"attn_implementation": "eager", "trust_remote_code": False},
+                "test_model_config": {"hidden_layers": 2},
+                "test_model_path": str(tmp_path / "output" / "test_model"),
+            },
+            "output_dir": str(tmp_path / "output"),
+        },
+        list_required_packages=False,
+        package_config=None,
+        tempdir=None,
+    )
+
+
+def test_workflow_run_command_with_test_requires_hf_input_model(tmp_path):
+    config_path = tmp_path / "config.json"
+    config_path.write_text(json.dumps({"input_model": {"type": "OnnxModel", "model_path": "model.onnx"}}))
+
+    with pytest.raises(
+        ValueError, match=r"--test for olive run requires a Hugging Face input_model in the run config\."
+    ):
+        cli_main(["run", "--run-config", str(config_path), "--test"])
+
+
 @patch("olive.platform_sdk.qualcomm.configure.configure.configure")
 def test_configure_qualcomm_sdk_command(mock_configure):
     # setup
@@ -190,6 +239,119 @@ def test_finetune_command(_, mock_run, tmp_path):
     assert mock_run.call_count == 1
 
 
+@patch("huggingface_hub.repo_exists", return_value=True)
+def test_optimize_command_test_model_config(_, tmp_path):
+    output_dir = tmp_path / "output_dir"
+    test_model_dir = tmp_path / "saved_test_model"
+    command_args = [
+        "optimize",
+        "-m",
+        "dummy-model-id",
+        "--test",
+        str(test_model_dir),
+        "--dry_run",
+        "-o",
+        str(output_dir),
+    ]
+
+    cli_main(command_args)
+
+    config = json.loads((output_dir / "config.json").read_text())
+    assert config["input_model"]["test_model_config"] == {"hidden_layers": 2}
+    assert config["input_model"]["test_model_path"] == str(test_model_dir)
+
+
+@patch("huggingface_hub.repo_exists", return_value=True)
+def test_optimize_dry_run_then_run_with_test_model(mock_repo_exists, tmp_path):
+    from olive.model.config.model_config import ModelConfig
+    from olive.model.handler.hf import HfModelHandler
+    from olive.passes.onnx.model_builder import ModelBuilder
+    from olive.passes.pytorch.gptq import Gptq
+
+    model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM"
+    config_output_dir = tmp_path / "test-smoke"
+    test_model_dir = tmp_path / "test-model"
+    run_output_dir = tmp_path / "test-smoke-run"
+
+    cli_main(
+        [
+            "optimize",
+            "-m",
+            model_id,
+            "--device",
+            "cpu",
+            "--provider",
+            "CPUExecutionProvider",
+            "--precision",
+            "int4",
+            "--output_path",
+            str(config_output_dir),
+            "--dry_run",
+        ]
+    )
+
+    config_path = config_output_dir / "config.json"
+    assert config_path.exists()
+    assert mock_repo_exists.called
+
+    def fake_load_model(handler, *args, **kwargs):
+        Path(handler.test_model_path).mkdir(parents=True, exist_ok=True)
+        (Path(handler.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"}))
+        return MagicMock()
+
+    def fake_gptq_run(self, model, pass_config, output_model_path):
+        del pass_config, output_model_path
+        return model
+
+    def fake_create_model(**kwargs):
+        output_dir = Path(kwargs["output_dir"])
+        output_dir.mkdir(parents=True, exist_ok=True)
+        (output_dir / kwargs["filename"]).write_text("dummy ONNX file")
+        (output_dir / "genai_config.json").write_text("{}")
+
+    fake_builder = types.ModuleType("onnxruntime_genai.models.builder")
+    fake_builder.create_model = MagicMock(side_effect=fake_create_model)
+    fake_models = types.ModuleType("onnxruntime_genai.models")
+    fake_models.builder = fake_builder
+    fake_ort_genai = types.ModuleType("onnxruntime_genai")
+    fake_ort_genai.models = fake_models
+    fake_ort_genai.__version__ = "0.10.0"
+    mock_cfg = MagicMock()
+    mock_cfg.to_dict.return_value = {}
+
+    with (
+        patch.object(ModelConfig, "get_model_identifier", return_value="tiny-random-llama"),
+        patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg),
+        patch.object(HfModelHandler, "load_model", new=fake_load_model),
+        patch.object(HfModelHandler, "save_metadata", return_value=[]),
+        patch.object(Gptq, "_run_for_config", autospec=True, side_effect=fake_gptq_run),
+        patch.object(ModelBuilder, "maybe_patch_quant", return_value=None),
+        patch.dict(
+            sys.modules,
+            {
+                "onnxruntime_genai": fake_ort_genai,
+                "onnxruntime_genai.models": fake_models,
+                "onnxruntime_genai.models.builder": fake_builder,
+            },
+        ),
+    ):
+        cli_main(
+            [
+                "run",
+                "--config",
+                str(config_path),
+                "--test",
+                str(test_model_dir),
+                "--output_path",
+                str(run_output_dir),
+            ]
+        )
+
+    assert (test_model_dir / "config.json").exists()
+    assert (run_output_dir / "model.onnx").exists()
+    assert (run_output_dir / "genai_config.json").exists()
+
+
 @patch("olive.workflows.run")
 @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True)
 def test_diffusion_lora_command(_, mock_run, tmp_path):
diff --git a/test/common/test_hf.py b/test/common/test_hf.py
index 00b4cbd043..cab00e3df3 100644
--- a/test/common/test_hf.py
+++ b/test/common/test_hf.py
@@ -6,9 +6,10 @@
 
 import pytest
 import torch
+from transformers import BertConfig, GPT2Config, Qwen3Config
 
 from olive.common.hf.model_io import get_model_dummy_input, get_model_io_config
-from olive.common.hf.utils import load_model_from_task
+from olive.common.hf.utils import _apply_test_model_config, _load_test_model, load_model_from_task
 
 
 def test_load_model_from_task():
@@ -21,6 +22,170 @@ def test_load_model_from_task():
     assert isinstance(model, torch.nn.Module)
 
 
+@pytest.mark.parametrize(
+    ("model_config", "hidden_layers_attr"),
+    [
+        (BertConfig(num_hidden_layers=12), "num_hidden_layers"),  # pylint: disable=unexpected-keyword-arg
+        (GPT2Config(n_layer=12), "n_layer"),  # pylint: disable=unexpected-keyword-arg
+    ],
+)
+def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr):
+    created_model = MagicMock(spec=torch.nn.Module)
+
+    with (
+        patch("transformers.pipelines.check_task") as mock_check_task,
+        patch("olive.common.hf.utils.from_pretrained", return_value=model_config) as mock_from_pretrained,
+    ):
+        mock_model_class = MagicMock()
+        mock_model_class.from_config.return_value = created_model
+        mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None)
+
+        model = load_model_from_task("text-classification", "dummy-model", test_model_config={"hidden_layers": 2})
+
+    assert model is created_model
+    mock_from_pretrained.assert_called_once()
+    mock_model_class.from_config.assert_called_once()
+    assert getattr(mock_model_class.from_config.call_args.args[0], hidden_layers_attr) == 2
+
+
+def test_load_model_from_task_test_model_config_fails_without_fallback():
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
+
+    with (
+        patch("transformers.pipelines.check_task") as mock_check_task,
+        patch("olive.common.hf.utils.from_pretrained", return_value=model_config),
+    ):
+        first_model_class = MagicMock()
+        first_model_class.from_config.side_effect = ValueError("unexpected architecture")
+        second_model_class = MagicMock()
+        second_model_class.from_config.return_value = MagicMock(spec=torch.nn.Module)
+        mock_check_task.return_value = ("text-classification", {"pt": (first_model_class, second_model_class)}, None)
+
+        with pytest.raises(ValueError, match="unexpected architecture"):
+            load_model_from_task("text-classification", "dummy-model", test_model_config={"hidden_layers": 2})
+
+    first_model_class.from_config.assert_called_once()
+    second_model_class.from_config.assert_not_called()
+
+
+def test_load_model_from_task_test_model_config_saves_model(tmp_path):
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
+    created_model = MagicMock()
+    test_model_path = tmp_path / "saved_test_model"
+
+    with (
+        patch("transformers.pipelines.check_task") as mock_check_task,
+        patch("olive.common.hf.utils.from_pretrained", return_value=model_config),
+    ):
+        mock_model_class = MagicMock()
+        mock_model_class.from_config.return_value = created_model
+        mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None)
+
+        model = load_model_from_task(
+            "text-classification",
+            "dummy-model",
+            test_model_config={"num_hidden_layers": 2},
+            test_model_path=str(test_model_path),
+        )
+
+    assert model is created_model
+    mock_model_class.from_config.assert_called_once()
+    created_model.save_pretrained.assert_called_once_with(str(test_model_path))
+
+
+def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path):
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
+    test_model_path = tmp_path / "saved_test_model"
+    test_model_path.mkdir()
+    (test_model_path / "config.json").write_text("{}")
+    loaded_model = MagicMock(spec=torch.nn.Module)
+
+    with (
+        patch("transformers.pipelines.check_task") as mock_check_task,
+        patch(
+            "olive.common.hf.utils.from_pretrained", side_effect=[model_config, loaded_model]
+        ) as mock_from_pretrained,
+    ):
+        mock_model_class = MagicMock()
+        mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None)
+
+        model = load_model_from_task(
+            "text-classification",
+            "dummy-model",
+            test_model_config={"num_hidden_layers": 2},
+            test_model_path=str(test_model_path),
+        )
+
+    assert model is loaded_model
+    mock_model_class.from_config.assert_not_called()
+    assert mock_from_pretrained.call_args_list[1].args[1] == str(test_model_path)
+
+
+def test_apply_test_model_config_updates_qwen3_layer_types():
+    model_config = Qwen3Config()
+    model_config.num_hidden_layers = 4
+    model_config.layer_types = model_config.layer_types[:4]
+
+    updated_config = _apply_test_model_config(model_config, {"hidden_layers": 2})
+
+    assert updated_config.num_hidden_layers == 2
+    assert updated_config.layer_types == model_config.layer_types[:2]
+    reloaded_config = Qwen3Config(**updated_config.to_dict())
+    assert reloaded_config.num_hidden_layers == 2
+    assert len(reloaded_config.layer_types) == 2
+    assert reloaded_config.layer_types == model_config.layer_types[:2]
+
+
+def test_load_test_model_omits_unsupported_trust_remote_code_kwarg():
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
+    captured = {}
+
+    class MockModelClass:
+        @staticmethod
+        def from_config(config):
+            captured["config"] = config
+            return config
+
+    created_model = _load_test_model(MockModelClass, model_config, trust_remote_code=True)
+
+    assert created_model is model_config
+    assert captured == {"config": model_config}
+
+
+def test_load_test_model_omits_none_trust_remote_code_kwarg():
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
+    captured = {}
+
+    class MockModelClass:
+        @staticmethod
+        def from_config(config, **kwargs):
+            captured["config"] = config
+            captured["kwargs"] = kwargs
+            return config
+
+    created_model = _load_test_model(MockModelClass, model_config)
+
+    assert created_model is model_config
+    assert captured == {"config": model_config, "kwargs": {}}
+
+
+def test_load_test_model_passes_supported_trust_remote_code_kwarg():
+    model_config = BertConfig(num_hidden_layers=12)  # pylint: disable=unexpected-keyword-arg
+    captured = {}
+
+    class MockModelClass:
+        @staticmethod
+        def from_config(config, trust_remote_code=None):
+            captured["config"] = config
+            captured["trust_remote_code"] = trust_remote_code
+            return config
+
+    created_model = _load_test_model(MockModelClass, model_config, trust_remote_code=True)
+
+    assert created_model is model_config
+    assert captured == {"config": model_config, "trust_remote_code": True}
+
+
 @pytest.mark.parametrize(
     ("exceptions", "expected_exception", "expected_message"),
     [
diff --git a/test/passes/onnx/test_model_builder.py b/test/passes/onnx/test_model_builder.py
index be5b728c65..0f71535db9 100644
--- a/test/passes/onnx/test_model_builder.py
+++ b/test/passes/onnx/test_model_builder.py
@@ -6,7 +6,7 @@
 import sys
 import types
 from pathlib import Path
-from unittest.mock import Mock
+from unittest.mock import MagicMock, Mock, patch
 
 import onnx
 import pytest
@@ -17,6 +17,8 @@
 from olive.passes.pytorch.rtn import Rtn
 from test.utils import make_local_tiny_llama
 
+TINY_RANDOM_LLAMA_MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM"
+
 
 def _create_test_onnx_model(model_path: Path, node_name: str):
     input_info = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, [1, 1])
@@ -129,6 +131,62 @@ def test_model_builder_layer_annotations(tmp_path, layer_annotations):
         )
 
 
+def test_model_builder_uses_saved_test_model_path(tmp_path):
+    test_model_path = tmp_path / "saved_test_model"
+    output_folder = tmp_path / "output_model"
+
+    mock_cfg = MagicMock()
+    mock_cfg.to_dict.return_value = {}
+    with patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg):
+        input_model = HfModelHandler(
+            model_path=TINY_RANDOM_LLAMA_MODEL_ID,
+            test_model_config={"hidden_layers": 2},
+            test_model_path=str(test_model_path),
+        )
+
+    def materialize_test_model(*args, **kwargs):
+        test_model_path.mkdir(parents=True, exist_ok=True)
+        (test_model_path / "config.json").write_text("{}")
+        return MagicMock()
+
+    def fake_create_model(*_, **kwargs):
+        output_dir = Path(kwargs["output_dir"])
+        (output_dir / kwargs["filename"]).write_text("dummy onnx file")
+        (output_dir / "genai_config.json").write_text("{}")
+
+    fake_builder = types.ModuleType("onnxruntime_genai.models.builder")
+    fake_builder.create_model = MagicMock(side_effect=fake_create_model)
+    fake_models = types.ModuleType("onnxruntime_genai.models")
+    fake_models.builder = fake_builder
+    fake_ort_genai = types.ModuleType("onnxruntime_genai")
+    fake_ort_genai.models = fake_models
+    fake_ort_genai.__version__ = "0.0.0"
+
+    p = create_pass_from_dict(ModelBuilder, {"precision": "fp32"}, disable_search=True)
+
+    with (
+        patch.object(ModelBuilder, "maybe_patch_quant"),
+        patch.dict(
+            sys.modules,
+            {
+                "onnxruntime_genai": fake_ort_genai,
+                "onnxruntime_genai.models": fake_models,
+                "onnxruntime_genai.models.builder": fake_builder,
+            },
+        ),
+        patch.object(input_model, "load_model", side_effect=materialize_test_model) as mock_load_model,
+        patch.object(input_model, "save_metadata", return_value=[]),
+    ):
+        output_model = p.run(input_model, output_folder)
+
+    assert isinstance(output_model, ONNXModelHandler)
+    assert mock_load_model.call_count == 1
+    assert Path(output_model.model_path).exists()
+    assert test_model_path.exists()
+    assert fake_builder.create_model.call_args.kwargs["model_name"] == str(test_model_path)
+    assert fake_builder.create_model.call_args.kwargs["input_path"] == str(test_model_path)
+
+
 def test_model_builder_apply_annotations_on_single_file_fallback(tmp_path, monkeypatch):
     def fake_create_model(
         model_name, input_path, output_dir, precision, execution_provider, cache_dir, filename, **kwargs
@@ -144,6 +202,8 @@ def fake_create_model(
     input_model = Mock(spec=HfModelHandler)
     input_model.model_name_or_path = "dummy-model"
     input_model.adapter_path = None
+    input_model.test_model_config = None
+    input_model.test_model_path = None
     input_model.model_attributes = {"split_assignments": {"model.layers.0": 1}}
 
     p = create_pass_from_dict(
@@ -179,6 +239,8 @@ def fake_create_model(
     input_model = Mock(spec=HfModelHandler)
     input_model.model_name_or_path = "dummy-model"
     input_model.adapter_path = None
+    input_model.test_model_config = None
+    input_model.test_model_path = None
     input_model.model_attributes = {}
 
     p = create_pass_from_dict(ModelBuilder, {"precision": "fp32"}, disable_search=True)