diff --git a/docs/source/how-to/cli/cli-convert-qwen-test.md b/docs/source/how-to/cli/cli-convert-qwen-test.md new file mode 100644 index 0000000000..a26be99a59 --- /dev/null +++ b/docs/source/how-to/cli/cli-convert-qwen-test.md @@ -0,0 +1,76 @@ +# How to convert a Qwen model with a quick `--test` smoke check + +If you are converting a large language model, it is often useful to validate the Olive command, environment, and conversion recipe on a much smaller model before spending time on the full checkpoint. + +The `--test` option does that for Hugging Face models. Olive keeps the same model architecture, reduces it to a random 2-layer test model, saves it to the folder you provide, and reuses that folder on later runs. + +This example uses [`Qwen/Qwen3-0.6B`](https://huggingface.co/Qwen/Qwen3-0.6B), but the same pattern works for other supported Hugging Face LLMs. + +## Step 1: generate the workflow config + +Start by generating the config that Olive will run for the Qwen conversion. + +```bash +olive optimize \ + --model_name_or_path Qwen/Qwen3-0.6B \ + --device cpu \ + --provider CPUExecutionProvider \ + --precision int4 \ + --output_path out/qwen-smoke \ + --dry_run +``` + +This creates `out/qwen-smoke/config.json` without launching the full conversion yet. + +## Step 2: run a fast smoke test with `olive run --test` + +Use the generated config with `olive run` and pass `--test` so Olive swaps in a reduced random Qwen model. + +```bash +olive run \ + --config out/qwen-smoke/config.json \ + --test out/qwen-test-model \ + --output_path out/qwen-smoke-run +``` + +What this does: + +- `--test out/qwen-test-model` creates a reduced random Qwen model and saves it in `out/qwen-test-model` +- later runs reuse the same saved test model instead of recreating it +- `--output_path out/qwen-smoke-run` gives the smoke test its own output folder, so the generated ONNX artifacts are easy to find + +After the smoke test finishes, look under `out/qwen-smoke-run` for the exported ONNX model and related files. + +This is a quick way to confirm that: + +- Olive can load the source model +- the selected optimization recipe is valid for your setup +- the conversion path completes before you run the full model + +If you omit the folder and just pass `--test`, `olive run` will save the reduced model under `/test_model`. + +## Step 3: run the full conversion + +Once the smoke test succeeds, rerun the conversion on the full Qwen checkpoint by removing `--test`. + +```bash +olive run \ + --config out/qwen-smoke/config.json \ + --output_path out/qwen-full +``` + +At this point you know the Olive command and the conversion recipe already worked on the lightweight test model, so you can focus on the full-model run instead of debugging both at once. + +## Why keep the test model folder? + +The saved test model is useful beyond the first smoke test: + +- you can rerun the reduced conversion quickly while iterating on options +- you can reuse the same HF test model later when comparing the Hugging Face model against the exported ONNX model +- you avoid recreating a new random test checkpoint every time + +## Related docs + +- [How to use the `olive optimize` command to optimize a Pytorch model](cli-optimize) +- [How to write a new workflow from scratch](../configure-workflows/build-workflow) +- [CLI reference](../../reference/cli) diff --git a/docs/source/how-to/index.md b/docs/source/how-to/index.md index 8ec16aba9e..4e00636e7d 100644 --- a/docs/source/how-to/index.md +++ b/docs/source/how-to/index.md @@ -12,6 +12,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx - [how to use the `olive finetune` command to create (Q)LoRA adapters](cli/cli-finetune) - [How to use the `olive quantize` command to quantize your model with different precisions and techniques such as AWQ](cli/cli-quantize) - [How to use the `olive run` command to execute an Olive workflow.](cli/cli-run) +- [How to convert a Qwen model with a quick `--test` smoke check](cli/cli-convert-qwen-test) # Olive Python API @@ -43,6 +44,7 @@ The Olive CLI provides a set of primitives such as `quantize`, `finetune`, `onnx installation cli/cli-optimize +cli/cli-convert-qwen-test cli/cli-auto-opt cli/cli-finetune cli/cli-quantize diff --git a/olive/cli/base.py b/olive/cli/base.py index 75fd2816c7..e31fb04ddf 100644 --- a/olive/cli/base.py +++ b/olive/cli/base.py @@ -82,6 +82,21 @@ def run(self): raise NotImplementedError +def add_hf_test_model_config(input_model: dict, test_value, output_path: Optional[str] = None) -> dict: + if test_value in (None, False): + return input_model + + test_model_output_path = test_value + # Use 2 layers to keep the test model fast and lightweight while preserving the original architecture family. + input_model["test_model_config"] = {"hidden_layers": 2} + if test_model_output_path is True: + if not output_path: + raise ValueError("--test requires an explicit folder when output_path is not available.") + test_model_output_path = str(Path(output_path) / "test_model") + input_model["test_model_path"] = test_model_output_path + return input_model + + def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) -> dict: """Get the input model config for HuggingFace model. @@ -105,7 +120,7 @@ def _get_hf_input_model(args: Namespace, model_path: OLIVE_RESOURCE_ANNOTATIONS) input_model["adapter_path"] = args.adapter_path if getattr(args, "trust_remote_code", None) is not None: input_model["load_kwargs"]["trust_remote_code"] = args.trust_remote_code - return input_model + return add_hf_test_model_config(input_model, getattr(args, "test", None), getattr(args, "output_path", None)) def _get_onnx_input_model(args: Namespace, model_path: str) -> dict: @@ -371,6 +386,16 @@ def add_input_model_options( model_group.add_argument( "--trust_remote_code", action="store_true", help="Trust remote code when loading a huggingface model." ) + model_group.add_argument( + "--test", + type=str, + nargs="?", + const=True, + help=( + "Use a randomly initialized test model with the same Hugging Face architecture and 2 hidden layers. " + "Optionally provide a folder where the generated test model should be saved and reused." + ), + ) if enable_hf_adapter: assert enable_hf, "enable_hf must be True when enable_hf_adapter is True." diff --git a/olive/cli/run.py b/olive/cli/run.py index 6d2a831aef..3d85522330 100644 --- a/olive/cli/run.py +++ b/olive/cli/run.py @@ -6,6 +6,7 @@ from olive.cli.base import ( BaseOliveCLICommand, + add_hf_test_model_config, add_input_model_options, add_logging_options, add_telemetry_options, @@ -59,6 +60,14 @@ def run(self): if input_model_config := get_input_model_config(self.args, required=False): print("Replacing input model config in run config") run_config["input_model"] = input_model_config + elif self.args.test not in (None, False): + input_model = run_config.get("input_model") + if not isinstance(input_model, dict) or input_model.get("type") != "HfModel": + raise ValueError("--test for olive run requires a Hugging Face input_model in the run config.") + output_path = ( + self.args.output_path or run_config.get("output_dir") or run_config.get("engine", {}).get("output_dir") + ) + run_config["input_model"] = add_hf_test_model_config(input_model, self.args.test, output_path) for arg_key, rc_key in [("output_path", "output_dir"), ("log_level", "log_severity_level")]: if (arg_value := getattr(self.args, arg_key)) is not None: diff --git a/olive/common/hf/model_io.py b/olive/common/hf/model_io.py index a3ebd73058..7d66d57574 100644 --- a/olive/common/hf/model_io.py +++ b/olive/common/hf/model_io.py @@ -27,6 +27,7 @@ def get_model_io_config( model_name: str, task: str, model: Optional["PreTrainedModel"] = None, + test_model_config: Optional[dict[str, Any]] = None, **kwargs, ) -> Optional[dict[str, Any]]: """Get the input/output config for the model and task. @@ -35,6 +36,7 @@ def get_model_io_config( model_name: The model name or path. task: The task type (e.g., "text-generation", "text-classification"). model: Optional loaded model for input signature inspection. + test_model_config: Optional overrides for creating a lightweight random test model from the same config. **kwargs: Additional arguments including use_cache. Returns: @@ -68,7 +70,7 @@ def get_model_io_config( return None # Get model config - model_config = get_model_config(model_name, **kwargs) + model_config = get_model_config(model_name, test_model_config=test_model_config, **kwargs) # Handle PEFT models actual_model = model @@ -92,6 +94,7 @@ def get_model_dummy_input( model_name: str, task: str, model: Optional["PreTrainedModel"] = None, + test_model_config: Optional[dict[str, Any]] = None, **kwargs, ) -> Optional[dict[str, Any]]: """Get dummy inputs for the model and task. @@ -100,6 +103,7 @@ def get_model_dummy_input( model_name: The model name or path. task: The task type. model: Optional loaded model for input signature inspection. + test_model_config: Optional overrides for creating a lightweight random test model from the same config. **kwargs: Additional arguments including use_cache, batch_size, sequence_length. Returns: @@ -133,7 +137,7 @@ def get_model_dummy_input( return None # Get model config (handles MLflow paths) - model_config = get_model_config(model_name, **kwargs) + model_config = get_model_config(model_name, test_model_config=test_model_config, **kwargs) # Handle PEFT models actual_model = model diff --git a/olive/common/hf/utils.py b/olive/common/hf/utils.py index a070e85ac8..2abc534eb0 100644 --- a/olive/common/hf/utils.py +++ b/olive/common/hf/utils.py @@ -2,9 +2,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- +import inspect import logging +from copy import deepcopy from pathlib import Path -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union from transformers import AutoConfig, AutoModel, AutoTokenizer, GenerationConfig @@ -18,7 +20,73 @@ logger = logging.getLogger(__name__) -def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTrainedModel": +def _apply_test_model_config( + model_config: "PretrainedConfig", test_model_config: Optional[dict[str, Any]] = None +) -> "PretrainedConfig": + """Apply lightweight test-model overrides to a model config.""" + if not test_model_config: + return model_config + + model_config = deepcopy(model_config) + if "hidden_layers" in test_model_config: + hidden_layers = test_model_config["hidden_layers"] + elif "num_hidden_layers" in test_model_config: + hidden_layers = test_model_config["num_hidden_layers"] + else: + hidden_layers = 2 + if hidden_layers < 1: + raise ValueError("test_model_config.hidden_layers must be greater than 0.") + + updated = False + # Common Hugging Face configs do not use a single canonical field: + # BERT-style models use num_hidden_layers while GPT-style models often use n_layer/n_layers/num_layers. + for attr_name in ("num_hidden_layers", "num_layers", "n_layer", "n_layers"): + if hasattr(model_config, attr_name): + setattr(model_config, attr_name, hidden_layers) + updated = True + + if not updated: + raise ValueError("Unable to create a test model because the config does not expose a hidden-layer count.") + + layer_types = getattr(model_config, "layer_types", None) + if isinstance(layer_types, (list, tuple)): + model_config.layer_types = layer_types[:hidden_layers] + + dtype = getattr(model_config, "dtype", None) + if dtype == "auto": + # This is not allowed anymore with transformers >=4.57, + # we select float16 instead. + model_config.dtype = "float16" + + return model_config + + +def _load_test_model(model_class: type, model_config: "PretrainedConfig", trust_remote_code: Optional[bool] = None): + """Instantiate a random-initialized HF model from config for test mode.""" + from_config_signature = inspect.signature(model_class.from_config) + supports_trust_remote_code = "trust_remote_code" in from_config_signature.parameters or any( + parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in from_config_signature.parameters.values() + ) + from_config_kwargs = {} + if supports_trust_remote_code and trust_remote_code is not None: + from_config_kwargs["trust_remote_code"] = trust_remote_code + return model_class.from_config(model_config, **from_config_kwargs) + + +def _save_test_model(model: "PreTrainedModel", output_dir: str): + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + logger.info("Saving generated test model to %s", output_path) + model.save_pretrained(str(output_path)) + + +def load_model_from_task( + task: str, + model_name_or_path: str, + test_model_config: Optional[dict[str, Any]] = None, + test_model_path: Optional[str] = None, + **kwargs, +) -> "PreTrainedModel": """Load huggingface model from task and model_name_or_path.""" from transformers.pipelines import check_task @@ -31,7 +99,7 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr else: raise ValueError("unsupported transformers version") - model_config = get_model_config(model_name_or_path, **kwargs) + model_config = get_model_config(model_name_or_path, test_model_config=test_model_config, **kwargs) if getattr(model_config, "quantization_config", None): if not isinstance(model_config.quantization_config, dict): model_config.quantization_config = model_config.quantization_config.to_dict() @@ -59,10 +127,20 @@ def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTr model = None for i, model_class in enumerate(class_tuple): try: - model = from_pretrained(model_class, model_name_or_path, "model", **kwargs) + if test_model_config: + if test_model_path and (Path(test_model_path) / "config.json").exists(): + model = from_pretrained(model_class, test_model_path, "model", **kwargs) + else: + model = _load_test_model(model_class, model_config, kwargs.get("trust_remote_code")) + if test_model_path: + _save_test_model(model, test_model_path) + else: + model = from_pretrained(model_class, model_name_or_path, "model", **kwargs) logger.debug("Loaded model %s with name_or_path %s", model_class, model_name_or_path) break except (OSError, ValueError) as e: + if test_model_config: + raise if i == len(class_tuple) - 1: # len(class_tuple) == 1 covers most common tasks like text-generation, text-classification, etc # error could be device OOM, device_map: "auto" not supported, etc @@ -94,14 +172,16 @@ def from_pretrained(cls, model_name_or_path: str, mlflow_dir: str, **kwargs): return cls.from_pretrained(get_pretrained_name_or_path(model_name_or_path, mlflow_dir), **kwargs) -def get_model_config(model_name_or_path: str, **kwargs) -> "PretrainedConfig": +def get_model_config( + model_name_or_path: str, test_model_config: Optional[dict[str, Any]] = None, **kwargs +) -> "PretrainedConfig": """Get HF Config for the given model_name_or_path.""" model_config = from_pretrained(AutoConfig, model_name_or_path, "config", **kwargs) # add quantization config quantization_config = kwargs.get("quantization_config") if not quantization_config: - return model_config + return _apply_test_model_config(model_config, test_model_config) if hasattr(model_config, "quantization_config") and model_config.quantization_config: logger.warning( @@ -111,7 +191,7 @@ def get_model_config(model_name_or_path: str, **kwargs) -> "PretrainedConfig": ) else: model_config.quantization_config = quantization_config - return model_config + return _apply_test_model_config(model_config, test_model_config) def save_model_config(config: Union["PretrainedConfig", "GenerationConfig"], output_dir: str, **kwargs): diff --git a/olive/model/handler/hf.py b/olive/model/handler/hf.py index daac587bc5..fc659a57e6 100644 --- a/olive/model/handler/hf.py +++ b/olive/model/handler/hf.py @@ -27,8 +27,8 @@ @model_handler_registry("HFModel") class HfModelHandler(PyTorchModelHandlerBase, MLFlowTransformersMixin, HfMixin): # pylint: disable=too-many-ancestors - resource_keys: tuple[str, ...] = ("model_path", "adapter_path") - json_config_keys: tuple[str, ...] = ("task", "load_kwargs") + resource_keys: tuple[str, ...] = ("model_path", "adapter_path", "test_model_path") + json_config_keys: tuple[str, ...] = ("task", "load_kwargs", "test_model_config") def __init__( self, @@ -37,6 +37,8 @@ def __init__( load_kwargs: Union[dict[str, Any], HfLoadKwargs] = None, io_config: Union[dict[str, Any], IoConfig, str] = None, adapter_path: OLIVE_RESOURCE_ANNOTATIONS = None, + test_model_path: OLIVE_RESOURCE_ANNOTATIONS = None, + test_model_config: Optional[dict[str, Any]] = None, model_attributes: Optional[dict[str, Any]] = None, ): super().__init__( @@ -48,6 +50,7 @@ def __init__( self.add_resources(locals()) self.task = task self.load_kwargs = validate_config(load_kwargs, HfLoadKwargs, warn_unused_keys=False) if load_kwargs else None + self.test_model_config = test_model_config self.model_attributes = {**self.get_hf_model_config().to_dict(), **(self.model_attributes or {})} @@ -67,12 +70,23 @@ def adapter_path(self) -> str: """Return the path to the peft adapter.""" return self.get_resource("adapter_path") + @property + def test_model_path(self) -> str: + """Return the optional path to a persisted lightweight test model.""" + return self.get_resource("test_model_path") + def load_model(self, rank: int = None, cache_model: bool = True) -> "torch.nn.Module": """Load the model from the model path.""" if self.model: model = self.model else: - model = load_model_from_task(self.task, self.model_path, **self.get_load_kwargs()) + model = load_model_from_task( + self.task, + self.model_path, + test_model_config=self.test_model_config, + test_model_path=self.test_model_path, + **self.get_load_kwargs(), + ) # we only have peft adapters for now if self.adapter_path: diff --git a/olive/model/handler/mixin/hf.py b/olive/model/handler/mixin/hf.py index 730d6bd6ec..3e400c64e7 100644 --- a/olive/model/handler/mixin/hf.py +++ b/olive/model/handler/mixin/hf.py @@ -39,7 +39,11 @@ def get_hf_model_config(self, exclude_load_keys: Optional[list[str]] = None) -> :param exclude_load_keys: list of keys to exclude from load_kwargs :return: model config """ - return get_model_config(self.model_path, **self.get_load_kwargs(exclude_load_keys)) + return get_model_config( + self.model_path, + test_model_config=getattr(self, "test_model_config", None), + **self.get_load_kwargs(exclude_load_keys), + ) def get_hf_generation_config(self, exclude_load_keys: Optional[list[str]] = None) -> Optional["GenerationConfig"]: """Get generation config for the model if it exists. @@ -114,7 +118,13 @@ def save_metadata(self, output_dir: str, exclude_load_keys: Optional[list[str]] def get_hf_io_config(self) -> Optional[dict[str, Any]]: """Get Io config for the model.""" - return get_model_io_config(self.model_path, self.task, self.load_model(), **self.get_load_kwargs()) + return get_model_io_config( + self.model_path, + self.task, + self.load_model(), + test_model_config=getattr(self, "test_model_config", None), + **self.get_load_kwargs(), + ) def get_hf_dummy_inputs(self) -> Optional[dict[str, Any]]: """Get dummy inputs for the model.""" @@ -122,6 +132,7 @@ def get_hf_dummy_inputs(self) -> Optional[dict[str, Any]]: self.model_path, self.task, model=self.load_model(), + test_model_config=getattr(self, "test_model_config", None), **self.get_load_kwargs(), ) diff --git a/olive/passes/onnx/model_builder.py b/olive/passes/onnx/model_builder.py index e1062e02b9..24f3c27785 100644 --- a/olive/passes/onnx/model_builder.py +++ b/olive/passes/onnx/model_builder.py @@ -247,6 +247,15 @@ def _run_for_config( input_path = str(model.get_resource("model_path")) else: model_path = model.model_name_or_path + if model.test_model_config: + if not model.test_model_path: + raise ValueError( + "ModelBuilder requires test_model_path to be set when test_model_config is provided. " + "Please specify the path where the test model should be saved." + ) + if not (Path(model.test_model_path) / "config.json").exists(): + model.load_model(cache_model=False) + model_path = model.test_model_path # provide the model path as input path, model builder uses input_path for quantized models input_path = model_path if model.adapter_path: diff --git a/test/cli/test_base.py b/test/cli/test_base.py index 38c96c6aa9..bb34cef3f2 100644 --- a/test/cli/test_base.py +++ b/test/cli/test_base.py @@ -229,6 +229,38 @@ def test_insert_input_model_invalid_hf_model_name(): get_input_model_config(args) +@patch("huggingface_hub.repo_exists", return_value=True) +def test_get_input_model_config_hf_test_model(_): + args = SimpleNamespace( + model_name_or_path="hf_model", + trust_remote_code=False, + task="text-generation", + model_script=None, + script_dir=None, + test="saved_test_model", + ) + + config = get_input_model_config(args) + + assert config["test_model_config"] == {"hidden_layers": 2} + assert config["test_model_path"] == "saved_test_model" + + +@patch("huggingface_hub.repo_exists", return_value=True) +def test_get_input_model_config_hf_test_model_requires_path_without_output_path(_): + args = SimpleNamespace( + model_name_or_path="hf_model", + trust_remote_code=False, + task="text-generation", + model_script=None, + script_dir=None, + test=True, + ) + + with pytest.raises(ValueError, match=r"--test requires an explicit folder when output_path is not available\."): + get_input_model_config(args) + + def test_insert_input_model_cli_output_model(): # setup model_path = str(Path(__file__).parent.resolve() / "output_model") diff --git a/test/cli/test_cli.py b/test/cli/test_cli.py index a7cb39e244..8489b4586f 100644 --- a/test/cli/test_cli.py +++ b/test/cli/test_cli.py @@ -5,8 +5,9 @@ import json import subprocess import sys +import types from pathlib import Path -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest @@ -112,8 +113,10 @@ def test_workflow_run_command(mock_run, tempdir, list_required_packages, tmp_pat @patch("olive.workflows.run") -def test_workflow_run_command_with_overrides(mock_run, tmp_path): +@patch("huggingface_hub.repo_exists", return_value=True) +def test_workflow_run_command_with_overrides(mock_repo_exists, mock_run, tmp_path): # setup + # Prevent a live Hugging Face repo lookup when the CLI resolves the HF input model override. config_path = tmp_path / "config.json" config_path.write_text( json.dumps({"input_model": {"key": "value"}, "engine": {"log_severity_level": 3}, "output_dir": "output"}) @@ -150,6 +153,52 @@ def test_workflow_run_command_with_overrides(mock_run, tmp_path): ) +@patch("olive.workflows.run") +def test_workflow_run_command_with_test_override(mock_run, tmp_path): + config_path = tmp_path / "config.json" + config_path.write_text( + json.dumps( + { + "input_model": { + "type": "HfModel", + "model_path": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "load_kwargs": {"attn_implementation": "eager", "trust_remote_code": False}, + }, + "output_dir": str(tmp_path / "output"), + } + ) + ) + command_args = ["run", "--run-config", str(config_path), "--test"] + + cli_main(command_args) + + mock_run.assert_called_once_with( + { + "input_model": { + "type": "HfModel", + "model_path": "hf-internal-testing/tiny-random-LlamaForCausalLM", + "load_kwargs": {"attn_implementation": "eager", "trust_remote_code": False}, + "test_model_config": {"hidden_layers": 2}, + "test_model_path": str(tmp_path / "output" / "test_model"), + }, + "output_dir": str(tmp_path / "output"), + }, + list_required_packages=False, + package_config=None, + tempdir=None, + ) + + +def test_workflow_run_command_with_test_requires_hf_input_model(tmp_path): + config_path = tmp_path / "config.json" + config_path.write_text(json.dumps({"input_model": {"type": "OnnxModel", "model_path": "model.onnx"}})) + + with pytest.raises( + ValueError, match=r"--test for olive run requires a Hugging Face input_model in the run config\." + ): + cli_main(["run", "--run-config", str(config_path), "--test"]) + + @patch("olive.platform_sdk.qualcomm.configure.configure.configure") def test_configure_qualcomm_sdk_command(mock_configure): # setup @@ -190,6 +239,119 @@ def test_finetune_command(_, mock_run, tmp_path): assert mock_run.call_count == 1 +@patch("huggingface_hub.repo_exists", return_value=True) +def test_optimize_command_test_model_config(_, tmp_path): + output_dir = tmp_path / "output_dir" + test_model_dir = tmp_path / "saved_test_model" + command_args = [ + "optimize", + "-m", + "dummy-model-id", + "--test", + str(test_model_dir), + "--dry_run", + "-o", + str(output_dir), + ] + + cli_main(command_args) + + config = json.loads((output_dir / "config.json").read_text()) + assert config["input_model"]["test_model_config"] == {"hidden_layers": 2} + assert config["input_model"]["test_model_path"] == str(test_model_dir) + + +@patch("huggingface_hub.repo_exists", return_value=True) +def test_optimize_dry_run_then_run_with_test_model(mock_repo_exists, tmp_path): + from olive.model.config.model_config import ModelConfig + from olive.model.handler.hf import HfModelHandler + from olive.passes.onnx.model_builder import ModelBuilder + from olive.passes.pytorch.gptq import Gptq + + model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM" + config_output_dir = tmp_path / "test-smoke" + test_model_dir = tmp_path / "test-model" + run_output_dir = tmp_path / "test-smoke-run" + + cli_main( + [ + "optimize", + "-m", + model_id, + "--device", + "cpu", + "--provider", + "CPUExecutionProvider", + "--precision", + "int4", + "--output_path", + str(config_output_dir), + "--dry_run", + ] + ) + + config_path = config_output_dir / "config.json" + assert config_path.exists() + assert mock_repo_exists.called + + def fake_load_model(handler, *args, **kwargs): + Path(handler.test_model_path).mkdir(parents=True, exist_ok=True) + (Path(handler.test_model_path) / "config.json").write_text(json.dumps({"model_type": "llama"})) + return MagicMock() + + def fake_gptq_run(self, model, pass_config, output_model_path): + del pass_config, output_model_path + return model + + def fake_create_model(**kwargs): + output_dir = Path(kwargs["output_dir"]) + output_dir.mkdir(parents=True, exist_ok=True) + (output_dir / kwargs["filename"]).write_text("dummy ONNX file") + (output_dir / "genai_config.json").write_text("{}") + + fake_builder = types.ModuleType("onnxruntime_genai.models.builder") + fake_builder.create_model = MagicMock(side_effect=fake_create_model) + fake_models = types.ModuleType("onnxruntime_genai.models") + fake_models.builder = fake_builder + fake_ort_genai = types.ModuleType("onnxruntime_genai") + fake_ort_genai.models = fake_models + fake_ort_genai.__version__ = "0.10.0" + mock_cfg = MagicMock() + mock_cfg.to_dict.return_value = {} + + with ( + patch.object(ModelConfig, "get_model_identifier", return_value="tiny-random-llama"), + patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg), + patch.object(HfModelHandler, "load_model", new=fake_load_model), + patch.object(HfModelHandler, "save_metadata", return_value=[]), + patch.object(Gptq, "_run_for_config", autospec=True, side_effect=fake_gptq_run), + patch.object(ModelBuilder, "maybe_patch_quant", return_value=None), + patch.dict( + sys.modules, + { + "onnxruntime_genai": fake_ort_genai, + "onnxruntime_genai.models": fake_models, + "onnxruntime_genai.models.builder": fake_builder, + }, + ), + ): + cli_main( + [ + "run", + "--config", + str(config_path), + "--test", + str(test_model_dir), + "--output_path", + str(run_output_dir), + ] + ) + + assert (test_model_dir / "config.json").exists() + assert (run_output_dir / "model.onnx").exists() + assert (run_output_dir / "genai_config.json").exists() + + @patch("olive.workflows.run") @patch("olive.model.handler.diffusers.is_valid_diffusers_model", return_value=True) def test_diffusion_lora_command(_, mock_run, tmp_path): diff --git a/test/common/test_hf.py b/test/common/test_hf.py index 00b4cbd043..cab00e3df3 100644 --- a/test/common/test_hf.py +++ b/test/common/test_hf.py @@ -6,9 +6,10 @@ import pytest import torch +from transformers import BertConfig, GPT2Config, Qwen3Config from olive.common.hf.model_io import get_model_dummy_input, get_model_io_config -from olive.common.hf.utils import load_model_from_task +from olive.common.hf.utils import _apply_test_model_config, _load_test_model, load_model_from_task def test_load_model_from_task(): @@ -21,6 +22,170 @@ def test_load_model_from_task(): assert isinstance(model, torch.nn.Module) +@pytest.mark.parametrize( + ("model_config", "hidden_layers_attr"), + [ + (BertConfig(num_hidden_layers=12), "num_hidden_layers"), # pylint: disable=unexpected-keyword-arg + (GPT2Config(n_layer=12), "n_layer"), # pylint: disable=unexpected-keyword-arg + ], +) +def test_load_model_from_task_test_model_config(model_config, hidden_layers_attr): + created_model = MagicMock(spec=torch.nn.Module) + + with ( + patch("transformers.pipelines.check_task") as mock_check_task, + patch("olive.common.hf.utils.from_pretrained", return_value=model_config) as mock_from_pretrained, + ): + mock_model_class = MagicMock() + mock_model_class.from_config.return_value = created_model + mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None) + + model = load_model_from_task("text-classification", "dummy-model", test_model_config={"hidden_layers": 2}) + + assert model is created_model + mock_from_pretrained.assert_called_once() + mock_model_class.from_config.assert_called_once() + assert getattr(mock_model_class.from_config.call_args.args[0], hidden_layers_attr) == 2 + + +def test_load_model_from_task_test_model_config_fails_without_fallback(): + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg + + with ( + patch("transformers.pipelines.check_task") as mock_check_task, + patch("olive.common.hf.utils.from_pretrained", return_value=model_config), + ): + first_model_class = MagicMock() + first_model_class.from_config.side_effect = ValueError("unexpected architecture") + second_model_class = MagicMock() + second_model_class.from_config.return_value = MagicMock(spec=torch.nn.Module) + mock_check_task.return_value = ("text-classification", {"pt": (first_model_class, second_model_class)}, None) + + with pytest.raises(ValueError, match="unexpected architecture"): + load_model_from_task("text-classification", "dummy-model", test_model_config={"hidden_layers": 2}) + + first_model_class.from_config.assert_called_once() + second_model_class.from_config.assert_not_called() + + +def test_load_model_from_task_test_model_config_saves_model(tmp_path): + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg + created_model = MagicMock() + test_model_path = tmp_path / "saved_test_model" + + with ( + patch("transformers.pipelines.check_task") as mock_check_task, + patch("olive.common.hf.utils.from_pretrained", return_value=model_config), + ): + mock_model_class = MagicMock() + mock_model_class.from_config.return_value = created_model + mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None) + + model = load_model_from_task( + "text-classification", + "dummy-model", + test_model_config={"num_hidden_layers": 2}, + test_model_path=str(test_model_path), + ) + + assert model is created_model + mock_model_class.from_config.assert_called_once() + created_model.save_pretrained.assert_called_once_with(str(test_model_path)) + + +def test_load_model_from_task_test_model_config_reuses_saved_model(tmp_path): + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg + test_model_path = tmp_path / "saved_test_model" + test_model_path.mkdir() + (test_model_path / "config.json").write_text("{}") + loaded_model = MagicMock(spec=torch.nn.Module) + + with ( + patch("transformers.pipelines.check_task") as mock_check_task, + patch( + "olive.common.hf.utils.from_pretrained", side_effect=[model_config, loaded_model] + ) as mock_from_pretrained, + ): + mock_model_class = MagicMock() + mock_check_task.return_value = ("text-classification", {"pt": (mock_model_class,)}, None) + + model = load_model_from_task( + "text-classification", + "dummy-model", + test_model_config={"num_hidden_layers": 2}, + test_model_path=str(test_model_path), + ) + + assert model is loaded_model + mock_model_class.from_config.assert_not_called() + assert mock_from_pretrained.call_args_list[1].args[1] == str(test_model_path) + + +def test_apply_test_model_config_updates_qwen3_layer_types(): + model_config = Qwen3Config() + model_config.num_hidden_layers = 4 + model_config.layer_types = model_config.layer_types[:4] + + updated_config = _apply_test_model_config(model_config, {"hidden_layers": 2}) + + assert updated_config.num_hidden_layers == 2 + assert updated_config.layer_types == model_config.layer_types[:2] + reloaded_config = Qwen3Config(**updated_config.to_dict()) + assert reloaded_config.num_hidden_layers == 2 + assert len(reloaded_config.layer_types) == 2 + assert reloaded_config.layer_types == model_config.layer_types[:2] + + +def test_load_test_model_omits_unsupported_trust_remote_code_kwarg(): + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg + captured = {} + + class MockModelClass: + @staticmethod + def from_config(config): + captured["config"] = config + return config + + created_model = _load_test_model(MockModelClass, model_config, trust_remote_code=True) + + assert created_model is model_config + assert captured == {"config": model_config} + + +def test_load_test_model_omits_none_trust_remote_code_kwarg(): + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg + captured = {} + + class MockModelClass: + @staticmethod + def from_config(config, **kwargs): + captured["config"] = config + captured["kwargs"] = kwargs + return config + + created_model = _load_test_model(MockModelClass, model_config) + + assert created_model is model_config + assert captured == {"config": model_config, "kwargs": {}} + + +def test_load_test_model_passes_supported_trust_remote_code_kwarg(): + model_config = BertConfig(num_hidden_layers=12) # pylint: disable=unexpected-keyword-arg + captured = {} + + class MockModelClass: + @staticmethod + def from_config(config, trust_remote_code=None): + captured["config"] = config + captured["trust_remote_code"] = trust_remote_code + return config + + created_model = _load_test_model(MockModelClass, model_config, trust_remote_code=True) + + assert created_model is model_config + assert captured == {"config": model_config, "trust_remote_code": True} + + @pytest.mark.parametrize( ("exceptions", "expected_exception", "expected_message"), [ diff --git a/test/passes/onnx/test_model_builder.py b/test/passes/onnx/test_model_builder.py index be5b728c65..0f71535db9 100644 --- a/test/passes/onnx/test_model_builder.py +++ b/test/passes/onnx/test_model_builder.py @@ -6,7 +6,7 @@ import sys import types from pathlib import Path -from unittest.mock import Mock +from unittest.mock import MagicMock, Mock, patch import onnx import pytest @@ -17,6 +17,8 @@ from olive.passes.pytorch.rtn import Rtn from test.utils import make_local_tiny_llama +TINY_RANDOM_LLAMA_MODEL_ID = "hf-internal-testing/tiny-random-LlamaForCausalLM" + def _create_test_onnx_model(model_path: Path, node_name: str): input_info = onnx.helper.make_tensor_value_info("input", onnx.TensorProto.FLOAT, [1, 1]) @@ -129,6 +131,62 @@ def test_model_builder_layer_annotations(tmp_path, layer_annotations): ) +def test_model_builder_uses_saved_test_model_path(tmp_path): + test_model_path = tmp_path / "saved_test_model" + output_folder = tmp_path / "output_model" + + mock_cfg = MagicMock() + mock_cfg.to_dict.return_value = {} + with patch.object(HfModelHandler, "get_hf_model_config", return_value=mock_cfg): + input_model = HfModelHandler( + model_path=TINY_RANDOM_LLAMA_MODEL_ID, + test_model_config={"hidden_layers": 2}, + test_model_path=str(test_model_path), + ) + + def materialize_test_model(*args, **kwargs): + test_model_path.mkdir(parents=True, exist_ok=True) + (test_model_path / "config.json").write_text("{}") + return MagicMock() + + def fake_create_model(*_, **kwargs): + output_dir = Path(kwargs["output_dir"]) + (output_dir / kwargs["filename"]).write_text("dummy onnx file") + (output_dir / "genai_config.json").write_text("{}") + + fake_builder = types.ModuleType("onnxruntime_genai.models.builder") + fake_builder.create_model = MagicMock(side_effect=fake_create_model) + fake_models = types.ModuleType("onnxruntime_genai.models") + fake_models.builder = fake_builder + fake_ort_genai = types.ModuleType("onnxruntime_genai") + fake_ort_genai.models = fake_models + fake_ort_genai.__version__ = "0.0.0" + + p = create_pass_from_dict(ModelBuilder, {"precision": "fp32"}, disable_search=True) + + with ( + patch.object(ModelBuilder, "maybe_patch_quant"), + patch.dict( + sys.modules, + { + "onnxruntime_genai": fake_ort_genai, + "onnxruntime_genai.models": fake_models, + "onnxruntime_genai.models.builder": fake_builder, + }, + ), + patch.object(input_model, "load_model", side_effect=materialize_test_model) as mock_load_model, + patch.object(input_model, "save_metadata", return_value=[]), + ): + output_model = p.run(input_model, output_folder) + + assert isinstance(output_model, ONNXModelHandler) + assert mock_load_model.call_count == 1 + assert Path(output_model.model_path).exists() + assert test_model_path.exists() + assert fake_builder.create_model.call_args.kwargs["model_name"] == str(test_model_path) + assert fake_builder.create_model.call_args.kwargs["input_path"] == str(test_model_path) + + def test_model_builder_apply_annotations_on_single_file_fallback(tmp_path, monkeypatch): def fake_create_model( model_name, input_path, output_dir, precision, execution_provider, cache_dir, filename, **kwargs @@ -144,6 +202,8 @@ def fake_create_model( input_model = Mock(spec=HfModelHandler) input_model.model_name_or_path = "dummy-model" input_model.adapter_path = None + input_model.test_model_config = None + input_model.test_model_path = None input_model.model_attributes = {"split_assignments": {"model.layers.0": 1}} p = create_pass_from_dict( @@ -179,6 +239,8 @@ def fake_create_model( input_model = Mock(spec=HfModelHandler) input_model.model_name_or_path = "dummy-model" input_model.adapter_path = None + input_model.test_model_config = None + input_model.test_model_path = None input_model.model_attributes = {} p = create_pass_from_dict(ModelBuilder, {"precision": "fp32"}, disable_search=True)