From d82e7915bf42404fd750d568bf1cd3e157d6f5b2 Mon Sep 17 00:00:00 2001 From: vbaddi Date: Wed, 18 Mar 2026 09:30:31 +0000 Subject: [PATCH 1/4] nit: fix the onnx_path issue and transforms check Signed-off-by: vbaddi --- QEfficient/base/modeling_qeff.py | 44 ++++++++++++++++++++---------- QEfficient/base/onnx_transforms.py | 16 +++++++++-- tests/test_model_quickcheck.py | 34 +++++++++++++++++++++++ 3 files changed, 78 insertions(+), 16 deletions(-) diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py index f3581d281..8aa2bb3ee 100644 --- a/QEfficient/base/modeling_qeff.py +++ b/QEfficient/base/modeling_qeff.py @@ -18,7 +18,12 @@ import onnx import torch -from QEfficient.base.onnx_transforms import BaseOnnxTransform, OnnxTransformPipeline +from QEfficient.base.onnx_transforms import ( + BaseOnnxTransform, + FP16ClipTransform, + OnnxTransformPipeline, + SplitTensorsTransform, +) from QEfficient.base.pytorch_transforms import PytorchTransform from QEfficient.compile.qnn_compiler import compile as qnn_compile from QEfficient.generation.cloud_infer import QAICInferenceSession @@ -49,9 +54,8 @@ class QEFFBaseModel(ABC): _pytorch_transforms: List[PytorchTransform] _onnx_transforms = [BaseOnnxTransform] - @classmethod - def _transform_names(cls) -> List[str]: - return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms] + def _transform_names(self) -> List[str]: + return [x.__name__ for x in self._pytorch_transforms + self._onnx_transforms] def __init__(self, model: torch.nn.Module, **kwargs) -> None: super().__init__() @@ -236,15 +240,20 @@ def _export( # Return early if ONNX already exists if onnx_path.is_file(): - self.onnx_path = onnx_path - return onnx_path + try: + _ = onnx.load(onnx_path, load_external_data=False) + self.onnx_path = onnx_path + return onnx_path + except Exception as e: + logger.warning(f"Invalid cached ONNX found at {onnx_path}, re-exporting: {e}") + onnx_path.unlink(missing_ok=True) # check if the model is in meta state or weights are offloaded self._model_offloaded_check() - # Export directly into export_dir so any external data files are retained. - export_dir.mkdir(parents=True, exist_ok=True) - tmp_onnx_path = onnx_path + # Setup temporary paths + tmp_onnx_dir = export_dir / "onnx_tmp" + tmp_onnx_dir.mkdir(parents=True, exist_ok=True) # Create input_names from example_inputs input_names = [] @@ -274,7 +283,7 @@ def _export( torch.onnx.export( self.model, (example_inputs,), - str(tmp_onnx_path), + str(onnx_path), input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, @@ -283,11 +292,13 @@ def _export( ) logger.info("PyTorch export successful") _ = self._offload_model_weights(offload_pt_weights) - model = onnx.load(tmp_onnx_path, load_external_data=False) + model = onnx.load(onnx_path, load_external_data=False) - # Clear temporary references + needs_external_tensor_data = any( + transform in self._onnx_transforms for transform in (FP16ClipTransform, SplitTensorsTransform) + ) transform_kwargs = { - "onnx_base_dir": str(export_dir), + "onnx_base_dir": str(export_dir) if needs_external_tensor_data else None, "model_name": self.model_name, } if onnx_transform_kwargs is not None: @@ -302,7 +313,9 @@ def _export( ) logger.info("ONNX transforms applied") - onnx.save(model, onnx_path) + onnx_path_tmp = onnx_path.with_suffix(onnx_path.suffix + ".tmp") + onnx.save(model, onnx_path_tmp) + onnx_path_tmp.replace(onnx_path) del model gc.collect() logger.info("Transformed ONNX saved") @@ -311,6 +324,9 @@ def _export( logger.error(f"ONNX export or transforms failed: {e}") raise e + finally: + shutil.rmtree(tmp_onnx_dir, ignore_errors=True) + self.onnx_path = onnx_path return onnx_path diff --git a/QEfficient/base/onnx_transforms.py b/QEfficient/base/onnx_transforms.py index 16697cec9..3993a2ec1 100644 --- a/QEfficient/base/onnx_transforms.py +++ b/QEfficient/base/onnx_transforms.py @@ -106,16 +106,27 @@ class CustomOpTransform(BaseOnnxTransform): @classmethod def apply(cls, model: ModelProto) -> bool: op_applied = False + + # Register with PyTorch ONNX exporter (for export time) for op_name, (func_class, _) in cls._custom_ops.items(): if hasattr(func_class, "symbolic"): torch.onnx.register_custom_op_symbolic(f"::{op_name}", func_class.symbolic, ONNX_EXPORT_OPSET) + used_op_types = {node.op_type for node in model.graph.node} + for function_proto in model.functions: + used_op_types.update(node.op_type for node in function_proto.node) + + # Add function prototypes to model existing = {f.name for f in model.functions} - for _, onnxscript_func in cls._custom_ops.values(): + + for func_name, onnxscript_func in cls._custom_ops.values(): proto = onnxscript_func.to_function_proto() + if proto.name not in used_op_types: + continue if proto.name not in existing: model.functions.append(proto) op_applied = True + return op_applied @@ -228,7 +239,8 @@ def apply( do_split = SplitTensorsTransform in requested fp16_min, fp16_max = np.finfo(np.float16).min, np.finfo(np.float16).max file_num_tracker = {"num": 0, "size": 0} - external_data_helper.load_external_data_for_model(model, onnx_base_dir) + if onnx_base_dir is not None: + external_data_helper.load_external_data_for_model(model, onnx_base_dir) if do_fp16 or do_split: for tensor in external_data_helper._get_all_tensors(model): diff --git a/tests/test_model_quickcheck.py b/tests/test_model_quickcheck.py index 9a26580a5..bff68ec62 100644 --- a/tests/test_model_quickcheck.py +++ b/tests/test_model_quickcheck.py @@ -444,6 +444,40 @@ def test_causal_subfunction_export_smoke(tmp_path): assert not any("QEffGPT2Block" in name for name in without_names) +@pytest.mark.parametrize( + ("model_type", "model_id"), + sorted(CAUSAL_RUNTIME_MODEL_IDS.items()), + ids=sorted(CAUSAL_RUNTIME_MODEL_IDS), +) +def test_causal_compile_smoke_invokes_compile_path_with_subfunctions(monkeypatch, model_type, model_id, tmp_path): + del model_type + compile_calls = {} + + def _fake_compile(self, onnx_path=None, compile_dir=None, **kwargs): + compile_calls["onnx_path"] = onnx_path + compile_calls["compile_dir"] = compile_dir + compile_calls["kwargs"] = kwargs + qpc_path = tmp_path / "qpc" + qpc_path.mkdir(parents=True, exist_ok=True) + return str(qpc_path) + + monkeypatch.setattr(QEFFAutoModelForCausalLM, "_compile", _fake_compile, raising=False) + + try: + qeff_model = QEFFAutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True) + except Exception as exc: + _skip_on_model_fetch_error(exc, model_id) + + qpc = qeff_model.compile(prefill_seq_len=8, ctx_len=32, use_onnx_subfunctions=True) + + assert Path(qpc).name == "qpc" + assert compile_calls["kwargs"]["use_onnx_subfunctions"] is True + assert compile_calls["kwargs"]["specializations"][0]["seq_len"] == 8 + assert compile_calls["kwargs"]["specializations"][0]["ctx_len"] == 32 + assert compile_calls["kwargs"]["compile_only"] is True + assert compile_calls["kwargs"]["retained_state"] is True + + @pytest.mark.llm_model @pytest.mark.parametrize( ("model_type", "model_id"), From c25e97ab937b5e1060b9febdd76febfa32195c00 Mon Sep 17 00:00:00 2001 From: vbaddi Date: Wed, 18 Mar 2026 10:19:59 +0000 Subject: [PATCH 2/4] nit: remove the cache validation warning block from modeling_qeff Signed-off-by: vbaddi --- QEfficient/base/modeling_qeff.py | 9 ++------ tests/test_model_quickcheck.py | 37 +++++++++++++++----------------- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py index 8aa2bb3ee..fd50fe3e6 100644 --- a/QEfficient/base/modeling_qeff.py +++ b/QEfficient/base/modeling_qeff.py @@ -240,13 +240,8 @@ def _export( # Return early if ONNX already exists if onnx_path.is_file(): - try: - _ = onnx.load(onnx_path, load_external_data=False) - self.onnx_path = onnx_path - return onnx_path - except Exception as e: - logger.warning(f"Invalid cached ONNX found at {onnx_path}, re-exporting: {e}") - onnx_path.unlink(missing_ok=True) + self.onnx_path = onnx_path + return onnx_path # check if the model is in meta state or weights are offloaded self._model_offloaded_check() diff --git a/tests/test_model_quickcheck.py b/tests/test_model_quickcheck.py index bff68ec62..1c7b74c2b 100644 --- a/tests/test_model_quickcheck.py +++ b/tests/test_model_quickcheck.py @@ -444,38 +444,35 @@ def test_causal_subfunction_export_smoke(tmp_path): assert not any("QEffGPT2Block" in name for name in without_names) +@pytest.mark.llm_model @pytest.mark.parametrize( ("model_type", "model_id"), sorted(CAUSAL_RUNTIME_MODEL_IDS.items()), ids=sorted(CAUSAL_RUNTIME_MODEL_IDS), ) -def test_causal_compile_smoke_invokes_compile_path_with_subfunctions(monkeypatch, model_type, model_id, tmp_path): +def test_causal_compile_with_subfunctions_all_models(model_type, model_id, tmp_path): del model_type - compile_calls = {} - - def _fake_compile(self, onnx_path=None, compile_dir=None, **kwargs): - compile_calls["onnx_path"] = onnx_path - compile_calls["compile_dir"] = compile_dir - compile_calls["kwargs"] = kwargs - qpc_path = tmp_path / "qpc" - qpc_path.mkdir(parents=True, exist_ok=True) - return str(qpc_path) - - monkeypatch.setattr(QEFFAutoModelForCausalLM, "_compile", _fake_compile, raising=False) - try: qeff_model = QEFFAutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True) except Exception as exc: _skip_on_model_fetch_error(exc, model_id) - qpc = qeff_model.compile(prefill_seq_len=8, ctx_len=32, use_onnx_subfunctions=True) + try: + qpc = qeff_model.compile( + prefill_seq_len=8, + ctx_len=32, + use_onnx_subfunctions=True, + compile_dir=tmp_path / "compile-with-subfunctions", + ) + except Exception as exc: + pytest.skip( + f"Skipping compile for {model_id}: compile backend unavailable or unsupported in this environment " + f"({type(exc).__name__}: {exc})" + ) - assert Path(qpc).name == "qpc" - assert compile_calls["kwargs"]["use_onnx_subfunctions"] is True - assert compile_calls["kwargs"]["specializations"][0]["seq_len"] == 8 - assert compile_calls["kwargs"]["specializations"][0]["ctx_len"] == 32 - assert compile_calls["kwargs"]["compile_only"] is True - assert compile_calls["kwargs"]["retained_state"] is True + qpc_path = Path(qpc) + assert qpc_path.name == "qpc" + assert qpc_path.is_dir() @pytest.mark.llm_model From 8722394a078ca0e3eb4b76c6e68ca4bcdecf5c44 Mon Sep 17 00:00:00 2001 From: vbaddi Date: Thu, 19 Mar 2026 07:55:11 +0000 Subject: [PATCH 3/4] chore(export): remove unused onnx_tmp staging and silence empty-transform warning Signed-off-by: vbaddi --- QEfficient/base/modeling_qeff.py | 7 +------ QEfficient/base/onnx_transforms.py | 3 --- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py index fd50fe3e6..6f22e867e 100644 --- a/QEfficient/base/modeling_qeff.py +++ b/QEfficient/base/modeling_qeff.py @@ -246,9 +246,7 @@ def _export( # check if the model is in meta state or weights are offloaded self._model_offloaded_check() - # Setup temporary paths - tmp_onnx_dir = export_dir / "onnx_tmp" - tmp_onnx_dir.mkdir(parents=True, exist_ok=True) + export_dir.mkdir(parents=True, exist_ok=True) # Create input_names from example_inputs input_names = [] @@ -319,9 +317,6 @@ def _export( logger.error(f"ONNX export or transforms failed: {e}") raise e - finally: - shutil.rmtree(tmp_onnx_dir, ignore_errors=True) - self.onnx_path = onnx_path return onnx_path diff --git a/QEfficient/base/onnx_transforms.py b/QEfficient/base/onnx_transforms.py index 3993a2ec1..2ba53829a 100644 --- a/QEfficient/base/onnx_transforms.py +++ b/QEfficient/base/onnx_transforms.py @@ -7,7 +7,6 @@ import logging import os -import warnings from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Any, Dict, List, Optional, Tuple, Type @@ -213,8 +212,6 @@ class OnnxTransformPipeline(BaseOnnxTransform): """Pipeline to apply multiple ONNX transformations in sequence.""" def __init__(self, transforms: List[Type[BaseOnnxTransform]]): - if not transforms: - warnings.warn("Transform list is empty. No transformations will be applied.") self.transforms = transforms def apply( From 590784de979fd8cf2bd1d2eef5fb7c0523868e04 Mon Sep 17 00:00:00 2001 From: vbaddi Date: Thu, 19 Mar 2026 08:01:17 +0000 Subject: [PATCH 4/4] ci(quickcheck): add PR workflow for parallel model quickcheck Adds a dedicated GitHub Actions workflow to run the quickcheck suite on every PR for faster regression visibility. - New workflow: quickcheck.yml - Triggers: - pull_request - workflow_dispatch (manual run) - Job config: - ubuntu-latest - Python 3.10 via actions/setup-python@v5 - pip caching enabled - timeout-minutes: 90 - concurrency cancellation enabled per PR/ ref - Installs required deps: - python -m pip install -e .[test] - python -m pip install pytest-xdist - Runs: - python -m pytest -q tests/test_model_quickcheck.py -n auto Signed-off-by: vbaddi --- .github/workflows/quickcheck.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .github/workflows/quickcheck.yml diff --git a/.github/workflows/quickcheck.yml b/.github/workflows/quickcheck.yml new file mode 100644 index 000000000..ff9591420 --- /dev/null +++ b/.github/workflows/quickcheck.yml @@ -0,0 +1,32 @@ +name: Quickcheck + +on: + pull_request: + workflow_dispatch: + +concurrency: + group: quickcheck-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + quickcheck: + runs-on: ubuntu-latest + timeout-minutes: 90 + steps: + - name: Checkout Repo + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -e .[test] + python -m pip install pytest-xdist + + - name: Run Quickcheck + run: python -m pytest -q tests/test_model_quickcheck.py -n auto