microsoft · Copilot · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/docs/source/how-to/cli/cli-convert-qwen-test.md b/docs/source/how-to/cli/cli-convert-qwen-test.md
@@ -0,0 +1,76 @@
+# How to convert a Qwen model with a quick `--test` smoke check
+
+If you are converting a large language model, it is often useful to validate the Olive command, environment, and conversion recipe on a much smaller model before spending time on the full checkpoint.
+
+The `--test` option does that for Hugging Face models. Olive keeps the same model architecture, reduces it to a random 2-layer test model, saves it to the folder you provide, and reuses that folder on later runs.
+
+This example uses [`Qwen/Qwen3-0.6B`](https://huggingface.co/Qwen/Qwen3-0.6B), but the same pattern works for other supported Hugging Face LLMs.
+
+## Step 1: generate the workflow config
+
+Start by generating the config that Olive will run for the Qwen conversion.
+
+```bash
+olive optimize \
+    --model_name_or_path Qwen/Qwen3-0.6B \
+    --device cpu \
+    --provider CPUExecutionProvider \
+    --precision int4 \
+    --output_path out/qwen-smoke \
+    --dry_run
+```
+
+This creates `out/qwen-smoke/config.json` without launching the full conversion yet.
+
+## Step 2: run a fast smoke test with `olive run --test`
+
+Use the generated config with `olive run` and pass `--test` so Olive swaps in a reduced random Qwen model.
+
+```bash
+olive run \
+    --config out/qwen-smoke/config.json \
+    --test out/qwen-test-model \
+    --output_path out/qwen-smoke-run
+```
+
+What this does:
+
+- `--test out/qwen-test-model` creates a reduced random Qwen model and saves it in `out/qwen-test-model`
+- later runs reuse the same saved test model instead of recreating it
+- `--output_path out/qwen-smoke-run` gives the smoke test its own output folder, so the generated ONNX artifacts are easy to find
+
+After the smoke test finishes, look under `out/qwen-smoke-run` for the exported ONNX model and related files.
+
+This is a quick way to confirm that:
+
+- Olive can load the source model
+- the selected optimization recipe is valid for your setup
+- the conversion path completes before you run the full model
+
+If you omit the folder and just pass `--test`, `olive run` will save the reduced model under `<output_path>/test_model`.
+
+## Step 3: run the full conversion
+
+Once the smoke test succeeds, rerun the conversion on the full Qwen checkpoint by removing `--test`.
+
+```bash
+olive run \
+    --config out/qwen-smoke/config.json \
+    --output_path out/qwen-full
+```
+
+At this point you know the Olive command and the conversion recipe already worked on the lightweight test model, so you can focus on the full-model run instead of debugging both at once.
+
+## Why keep the test model folder?
+
+The saved test model is useful beyond the first smoke test:
+
+- you can rerun the reduced conversion quickly while iterating on options
+- you can reuse the same HF test model later when comparing the Hugging Face model against the exported ONNX model
+- you avoid recreating a new random test checkpoint every time
+
+## Related docs
+
+- [How to use the `olive optimize` command to optimize a Pytorch model](cli-optimize)
+- [How to write a new workflow from scratch](../configure-workflows/build-workflow)
+- [CLI reference](../../reference/cli)
diff --git a/docs/source/how-to/index.md b/docs/source/how-to/index.md
@@ -12,6 +12,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx
 - [how to use the `olive finetune` command to create (Q)LoRA adapters](cli/cli-finetune)
 - [How to use the `olive quantize` command to quantize your model with different precisions and techniques such as AWQ](cli/cli-quantize)
 - [How to use the `olive run` command to execute an Olive workflow.](cli/cli-run)
+- [How to convert a Qwen model with a quick `--test` smoke check](cli/cli-convert-qwen-test)
 
 # Olive Python API
 
@@ -43,6 +44,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx
 
 installation
 cli/cli-optimize
+cli/cli-convert-qwen-test
 cli/cli-auto-opt
 cli/cli-finetune
 cli/cli-quantize

diff --git a/olive/cli/base.py b/olive/cli/base.py
@@ -82,6 +82,21 @@ def run(self):
         raise NotImplementedError
 
 
+def add_hf_test_model_config(input_model: dict, test_value, output_path: Optional[str] = None) -> dict:
+    if test_value in (None, False):
+        return input_model
+
+    test_model_output_path = test_value
+    # Use 2 layers to keep the test model fast and lightweight while preserving the original architecture family.
+    input_model["test_model_config"] = {"hidden_layers": 2}
+    if test_model_output_path is True:
+        if not output_path:
+            raise ValueError("--test requires an explicit folder when output_path is not available.")
+        test_model_output_path = str(Path(output_path) / "test_model")
+    input_model["test_model_path"] = test_model_output_path
+    return input_model
+
+
 def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) -> dict:
     """Get the input model config for HuggingFace model.
 
@@ -105,7 +120,7 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS)
         input_model["adapter_path"] = args.adapter_path
     if getattr(args, "trust_remote_code", None) is not None:
         input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code
-    return input_model
+    return add_hf_test_model_config(input_model, getattr(args, "test", None), getattr(args, "output_path", None))
 
 
 def _get_onnx_input_model(args: Namespace, model_path: str) -> dict:
@@ -371,6 +386,16 @@ def add_input_model_options(
         model_group.add_argument(
             "--trust_remote_code", action="store_true", help="Trust remote code when loading a huggingface model."
         )
+        model_group.add_argument(
+            "--test",
+            type=str,
+            nargs="?",
+            const=True,
+            help=(
+                "Use a randomly initialized test model with the same Hugging Face architecture and 2 hidden layers. "
+                "Optionally provide a folder where the generated test model should be saved and reused."
+            ),
+        )
 
     if enable_hf_adapter:
         assert enable_hf, "enable_hf must be True when enable_hf_adapter is True."

diff --git a/olive/cli/run.py b/olive/cli/run.py
@@ -6,6 +6,7 @@
 
 from olive.cli.base import (
     BaseOliveCLICommand,
+    add_hf_test_model_config,
     add_input_model_options,
     add_logging_options,
     add_telemetry_options,
@@ -59,6 +60,14 @@ def run(self):
         if input_model_config := get_input_model_config(self.args, required=False):
             print("Replacing input model config in run config")
             run_config["input_model"] = input_model_config
+        elif self.args.test not in (None, False):
+            input_model = run_config.get("input_model")
+            if not isinstance(input_model, dict) or input_model.get("type") != "HfModel":
+                raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.")
+            output_path = (
+                self.args.output_path or run_config.get("output_dir") or run_config.get("engine", {}).get("output_dir")
+            )
+            run_config["input_model"] = add_hf_test_model_config(input_model, self.args.test, output_path)
 
         for arg_key, rc_key in [("output_path", "output_dir"), ("log_level", "log_severity_level")]:
             if (arg_value := getattr(self.args, arg_key)) is not None:

diff --git a/olive/common/hf/model_io.py b/olive/common/hf/model_io.py
@@ -27,6 +27,7 @@ def get_model_io_config(
     model_name: str,
     task: str,
     model: Optional["PreTrainedModel"] = None,
+    test_model_config: Optional[dict[str, Any]] = None,
     **kwargs,
 ) -> Optional[dict[str, Any]]:
     """Get the input/output config for the model and task.
@@ -35,6 +36,7 @@ def get_model_io_config(
         model_name: The model name or path.
         task: The task type (e.g., "text-generation", "text-classification").
         model: Optional loaded model for input signature inspection.
+        test_model_config: Optional overrides for creating a lightweight random test model from the same config.
         **kwargs: Additional arguments including use_cache.
 
     Returns:
@@ -68,7 +70,7 @@ def get_model_io_config(
         return None
 
     # Get model config
-    model_config = get_model_config(model_name, **kwargs)
+    model_config = get_model_config(model_name, test_model_config=test_model_config, **kwargs)
 
     # Handle PEFT models
     actual_model = model
@@ -92,6 +94,7 @@ def get_model_dummy_input(
     model_name: str,
     task: str,
     model: Optional["PreTrainedModel"] = None,
+    test_model_config: Optional[dict[str, Any]] = None,
     **kwargs,
 ) -> Optional[dict[str, Any]]:
     """Get dummy inputs for the model and task.
@@ -100,6 +103,7 @@ def get_model_dummy_input(
         model_name: The model name or path.
         task: The task type.
         model: Optional loaded model for input signature inspection.
+        test_model_config: Optional overrides for creating a lightweight random test model from the same config.
         **kwargs: Additional arguments including use_cache, batch_size, sequence_length.
 
     Returns:
@@ -133,7 +137,7 @@ def get_model_dummy_input(
         return None
 
     # Get model config (handles MLflow paths)
-    model_config = get_model_config(model_name, **kwargs)
+    model_config = get_model_config(model_name, test_model_config=test_model_config, **kwargs)
 
     # Handle PEFT models
     actual_model = model

diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py
@@ -2,9 +2,11 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
+import inspect
 import logging
+from copy import deepcopy
 from pathlib import Path
-from typing import TYPE_CHECKING, Optional, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
 
 from transformers import AutoConfig, AutoModel, AutoTokenizer, GenerationConfig
 
@@ -18,7 +20,73 @@
 logger = logging.getLogger(__name__)
 
 
-def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTrainedModel":
+def _apply_test_model_config(
+    model_config: "PretrainedConfig", test_model_config: Optional[dict[str, Any]] = None
+) -> "PretrainedConfig":
+    """Apply lightweight test-model overrides to a model config."""
+    if not test_model_config:
+        return model_config
+
+    model_config = deepcopy(model_config)
+    if "hidden_layers" in test_model_config:
+        hidden_layers = test_model_config["hidden_layers"]
+    elif "num_hidden_layers" in test_model_config:
+        hidden_layers = test_model_config["num_hidden_layers"]
+    else:
+        hidden_layers = 2
+    if hidden_layers < 1:
+        raise ValueError("test_model_config.hidden_layers must be greater than 0.")
+
+    updated = False
+    # Common Hugging Face configs do not use a single canonical field:
+    # BERT-style models use num_hidden_layers while GPT-style models often use n_layer/n_layers/num_layers.
+    for attr_name in ("num_hidden_layers", "num_layers", "n_layer", "n_layers"):
+        if hasattr(model_config, attr_name):
+            setattr(model_config, attr_name, hidden_layers)
+            updated = True
+
+    if not updated:
+        raise ValueError("Unable to create a test model because the config does not expose a hidden-layer count.")
+
+    layer_types = getattr(model_config, "layer_types", None)
+    if isinstance(layer_types, (list, tuple)):
+        model_config.layer_types = layer_types[:hidden_layers]
+
+    dtype = getattr(model_config, "dtype", None)
+    if dtype == "auto":
+        # This is not allowed anymore with transformers >=4.57,
+        # we select float16 instead.
+        model_config.dtype = "float16"
+
+    return model_config
+
+
+def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_remote_code: Optional[bool] = None):
+    """Instantiate a random-initialized HF model from config for test mode."""
+    from_config_signature = inspect.signature(model_class.from_config)
+    supports_trust_remote_code = "trust_remote_code" in from_config_signature.parameters or any(
+        parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in from_config_signature.parameters.values()
+    )
+    from_config_kwargs = {}
+    if supports_trust_remote_code and trust_remote_code is not None:
+        from_config_kwargs["trust_remote_code"] = trust_remote_code
+    return model_class.from_config(model_config, **from_config_kwargs)
+
+
+def _save_test_model(model: "PreTrainedModel", output_dir: str):
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    logger.info("Saving generated test model to %s", output_path)
+    model.save_pretrained(str(output_path))
+
+
+def load_model_from_task(
+    task: str,
+    model_name_or_path: str,
+    test_model_config: Optional[dict[str, Any]] = None,
+    test_model_path: Optional[str] = None,
+    **kwargs,
+) -> "PreTrainedModel":
     """Load huggingface model from task and model_name_or_path."""
     from transformers.pipelines import check_task
 
@@ -31,7 +99,7 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr
     else:
         raise ValueError("unsupported transformers version")
 
-    model_config = get_model_config(model_name_or_path, **kwargs)
+    model_config = get_model_config(model_name_or_path, test_model_config=test_model_config, **kwargs)
     if getattr(model_config, "quantization_config", None):
         if not isinstance(model_config.quantization_config, dict):
             model_config.quantization_config = model_config.quantization_config.to_dict()
@@ -59,10 +127,20 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr
     model = None
     for i, model_class in enumerate(class_tuple):
         try:
-            model = from_pretrained(model_class, model_name_or_path, "model", **kwargs)
+            if test_model_config:
+                if test_model_path and (Path(test_model_path) / "config.json").exists():
+                    model = from_pretrained(model_class, test_model_path, "model", **kwargs)
+                else:
+                    model = _load_test_model(model_class, model_config, kwargs.get("trust_remote_code"))
+                    if test_model_path:
+                        _save_test_model(model, test_model_path)
+            else:
+                model = from_pretrained(model_class, model_name_or_path, "model", **kwargs)
             logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path)
             break
         except (OSError, ValueError) as e:
+            if test_model_config:
+                raise
             if i == len(class_tuple) - 1:
                 # len(class_tuple) == 1 covers most common tasks like text-generation, text-classification, etc
                 # error could be device OOM, device_map: "auto" not supported, etc
@@ -94,14 +172,16 @@ def from_pretrained(cls, model_name_or_path: str, mlflow_dir: str, **kwargs):
     return cls.from_pretrained(get_pretrained_name_or_path(model_name_or_path, mlflow_dir), **kwargs)
 
 
-def get_model_config(model_name_or_path: str, **kwargs) -> "PretrainedConfig":
+def get_model_config(
+    model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs
+) -> "PretrainedConfig":
     """Get HF Config for the given model_name_or_path."""
     model_config = from_pretrained(AutoConfig, model_name_or_path, "config", **kwargs)
 
     # add quantization config
     quantization_config = kwargs.get("quantization_config")
     if not quantization_config:
-        return model_config
+        return _apply_test_model_config(model_config, test_model_config)
 
     if hasattr(model_config, "quantization_config") and model_config.quantization_config:
         logger.warning(
@@ -111,7 +191,7 @@ def get_model_config(model_name_or_path: str, **kwargs) -> "PretrainedConfig":
         )
     else:
         model_config.quantization_config = quantization_config
-    return model_config
+    return _apply_test_model_config(model_config, test_model_config)
 
 
 def save_model_config(config: Union["PretrainedConfig", "GenerationConfig"], output_dir: str, **kwargs):