From d82e7915bf42404fd750d568bf1cd3e157d6f5b2 Mon Sep 17 00:00:00 2001
From: vbaddi <vbaddi@qti.qualcomm.com>
Date: Wed, 18 Mar 2026 09:30:31 +0000
Subject: [PATCH 1/4] nit: fix the onnx_path issue and transforms check

Signed-off-by: vbaddi <vbaddi@qti.qualcomm.com>
---
 QEfficient/base/modeling_qeff.py   | 44 ++++++++++++++++++++----------
 QEfficient/base/onnx_transforms.py | 16 +++++++++--
 tests/test_model_quickcheck.py     | 34 +++++++++++++++++++++++
 3 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py
index f3581d281..8aa2bb3ee 100644
--- a/QEfficient/base/modeling_qeff.py
+++ b/QEfficient/base/modeling_qeff.py
@@ -18,7 +18,12 @@
 import onnx
 import torch
 
-from QEfficient.base.onnx_transforms import BaseOnnxTransform, OnnxTransformPipeline
+from QEfficient.base.onnx_transforms import (
+    BaseOnnxTransform,
+    FP16ClipTransform,
+    OnnxTransformPipeline,
+    SplitTensorsTransform,
+)
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.compile.qnn_compiler import compile as qnn_compile
 from QEfficient.generation.cloud_infer import QAICInferenceSession
@@ -49,9 +54,8 @@ class QEFFBaseModel(ABC):
     _pytorch_transforms: List[PytorchTransform]
     _onnx_transforms = [BaseOnnxTransform]
 
-    @classmethod
-    def _transform_names(cls) -> List[str]:
-        return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms]
+    def _transform_names(self) -> List[str]:
+        return [x.__name__ for x in self._pytorch_transforms + self._onnx_transforms]
 
     def __init__(self, model: torch.nn.Module, **kwargs) -> None:
         super().__init__()
@@ -236,15 +240,20 @@ def _export(
 
         # Return early if ONNX already exists
         if onnx_path.is_file():
-            self.onnx_path = onnx_path
-            return onnx_path
+            try:
+                _ = onnx.load(onnx_path, load_external_data=False)
+                self.onnx_path = onnx_path
+                return onnx_path
+            except Exception as e:
+                logger.warning(f"Invalid cached ONNX found at {onnx_path}, re-exporting: {e}")
+                onnx_path.unlink(missing_ok=True)
 
         # check if the model is in meta state or weights are offloaded
         self._model_offloaded_check()
 
-        # Export directly into export_dir so any external data files are retained.
-        export_dir.mkdir(parents=True, exist_ok=True)
-        tmp_onnx_path = onnx_path
+        # Setup temporary paths
+        tmp_onnx_dir = export_dir / "onnx_tmp"
+        tmp_onnx_dir.mkdir(parents=True, exist_ok=True)
 
         # Create input_names from example_inputs
         input_names = []
@@ -274,7 +283,7 @@ def _export(
             torch.onnx.export(
                 self.model,
                 (example_inputs,),
-                str(tmp_onnx_path),
+                str(onnx_path),
                 input_names=input_names,
                 output_names=output_names,
                 dynamic_axes=dynamic_axes,
@@ -283,11 +292,13 @@ def _export(
             )
             logger.info("PyTorch export successful")
             _ = self._offload_model_weights(offload_pt_weights)
-            model = onnx.load(tmp_onnx_path, load_external_data=False)
+            model = onnx.load(onnx_path, load_external_data=False)
 
-            # Clear temporary references
+            needs_external_tensor_data = any(
+                transform in self._onnx_transforms for transform in (FP16ClipTransform, SplitTensorsTransform)
+            )
             transform_kwargs = {
-                "onnx_base_dir": str(export_dir),
+                "onnx_base_dir": str(export_dir) if needs_external_tensor_data else None,
                 "model_name": self.model_name,
             }
             if onnx_transform_kwargs is not None:
@@ -302,7 +313,9 @@ def _export(
             )
             logger.info("ONNX transforms applied")
 
-            onnx.save(model, onnx_path)
+            onnx_path_tmp = onnx_path.with_suffix(onnx_path.suffix + ".tmp")
+            onnx.save(model, onnx_path_tmp)
+            onnx_path_tmp.replace(onnx_path)
             del model
             gc.collect()
             logger.info("Transformed ONNX saved")
@@ -311,6 +324,9 @@ def _export(
             logger.error(f"ONNX export or transforms failed: {e}")
             raise e
 
+        finally:
+            shutil.rmtree(tmp_onnx_dir, ignore_errors=True)
+
         self.onnx_path = onnx_path
         return onnx_path
 
diff --git a/QEfficient/base/onnx_transforms.py b/QEfficient/base/onnx_transforms.py
index 16697cec9..3993a2ec1 100644
--- a/QEfficient/base/onnx_transforms.py
+++ b/QEfficient/base/onnx_transforms.py
@@ -106,16 +106,27 @@ class CustomOpTransform(BaseOnnxTransform):
     @classmethod
     def apply(cls, model: ModelProto) -> bool:
         op_applied = False
+
+        # Register with PyTorch ONNX exporter (for export time)
         for op_name, (func_class, _) in cls._custom_ops.items():
             if hasattr(func_class, "symbolic"):
                 torch.onnx.register_custom_op_symbolic(f"::{op_name}", func_class.symbolic, ONNX_EXPORT_OPSET)
 
+        used_op_types = {node.op_type for node in model.graph.node}
+        for function_proto in model.functions:
+            used_op_types.update(node.op_type for node in function_proto.node)
+
+        # Add function prototypes to model
         existing = {f.name for f in model.functions}
-        for _, onnxscript_func in cls._custom_ops.values():
+
+        for func_name, onnxscript_func in cls._custom_ops.values():
             proto = onnxscript_func.to_function_proto()
+            if proto.name not in used_op_types:
+                continue
             if proto.name not in existing:
                 model.functions.append(proto)
                 op_applied = True
+
         return op_applied
 
 
@@ -228,7 +239,8 @@ def apply(
         do_split = SplitTensorsTransform in requested
         fp16_min, fp16_max = np.finfo(np.float16).min, np.finfo(np.float16).max
         file_num_tracker = {"num": 0, "size": 0}
-        external_data_helper.load_external_data_for_model(model, onnx_base_dir)
+        if onnx_base_dir is not None:
+            external_data_helper.load_external_data_for_model(model, onnx_base_dir)
 
         if do_fp16 or do_split:
             for tensor in external_data_helper._get_all_tensors(model):
diff --git a/tests/test_model_quickcheck.py b/tests/test_model_quickcheck.py
index 9a26580a5..bff68ec62 100644
--- a/tests/test_model_quickcheck.py
+++ b/tests/test_model_quickcheck.py
@@ -444,6 +444,40 @@ def test_causal_subfunction_export_smoke(tmp_path):
     assert not any("QEffGPT2Block" in name for name in without_names)
 
 
+@pytest.mark.parametrize(
+    ("model_type", "model_id"),
+    sorted(CAUSAL_RUNTIME_MODEL_IDS.items()),
+    ids=sorted(CAUSAL_RUNTIME_MODEL_IDS),
+)
+def test_causal_compile_smoke_invokes_compile_path_with_subfunctions(monkeypatch, model_type, model_id, tmp_path):
+    del model_type
+    compile_calls = {}
+
+    def _fake_compile(self, onnx_path=None, compile_dir=None, **kwargs):
+        compile_calls["onnx_path"] = onnx_path
+        compile_calls["compile_dir"] = compile_dir
+        compile_calls["kwargs"] = kwargs
+        qpc_path = tmp_path / "qpc"
+        qpc_path.mkdir(parents=True, exist_ok=True)
+        return str(qpc_path)
+
+    monkeypatch.setattr(QEFFAutoModelForCausalLM, "_compile", _fake_compile, raising=False)
+
+    try:
+        qeff_model = QEFFAutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
+    except Exception as exc:
+        _skip_on_model_fetch_error(exc, model_id)
+
+    qpc = qeff_model.compile(prefill_seq_len=8, ctx_len=32, use_onnx_subfunctions=True)
+
+    assert Path(qpc).name == "qpc"
+    assert compile_calls["kwargs"]["use_onnx_subfunctions"] is True
+    assert compile_calls["kwargs"]["specializations"][0]["seq_len"] == 8
+    assert compile_calls["kwargs"]["specializations"][0]["ctx_len"] == 32
+    assert compile_calls["kwargs"]["compile_only"] is True
+    assert compile_calls["kwargs"]["retained_state"] is True
+
+
 @pytest.mark.llm_model
 @pytest.mark.parametrize(
     ("model_type", "model_id"),

From c25e97ab937b5e1060b9febdd76febfa32195c00 Mon Sep 17 00:00:00 2001
From: vbaddi <vbaddi@qti.qualcomm.com>
Date: Wed, 18 Mar 2026 10:19:59 +0000
Subject: [PATCH 2/4] nit: remove the cache validation warning block from
 modeling_qeff

Signed-off-by: vbaddi <vbaddi@qti.qualcomm.com>
---
 QEfficient/base/modeling_qeff.py |  9 ++------
 tests/test_model_quickcheck.py   | 37 +++++++++++++++-----------------
 2 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py
index 8aa2bb3ee..fd50fe3e6 100644
--- a/QEfficient/base/modeling_qeff.py
+++ b/QEfficient/base/modeling_qeff.py
@@ -240,13 +240,8 @@ def _export(
 
         # Return early if ONNX already exists
         if onnx_path.is_file():
-            try:
-                _ = onnx.load(onnx_path, load_external_data=False)
-                self.onnx_path = onnx_path
-                return onnx_path
-            except Exception as e:
-                logger.warning(f"Invalid cached ONNX found at {onnx_path}, re-exporting: {e}")
-                onnx_path.unlink(missing_ok=True)
+            self.onnx_path = onnx_path
+            return onnx_path
 
         # check if the model is in meta state or weights are offloaded
         self._model_offloaded_check()
diff --git a/tests/test_model_quickcheck.py b/tests/test_model_quickcheck.py
index bff68ec62..1c7b74c2b 100644
--- a/tests/test_model_quickcheck.py
+++ b/tests/test_model_quickcheck.py
@@ -444,38 +444,35 @@ def test_causal_subfunction_export_smoke(tmp_path):
     assert not any("QEffGPT2Block" in name for name in without_names)
 
 
+@pytest.mark.llm_model
 @pytest.mark.parametrize(
     ("model_type", "model_id"),
     sorted(CAUSAL_RUNTIME_MODEL_IDS.items()),
     ids=sorted(CAUSAL_RUNTIME_MODEL_IDS),
 )
-def test_causal_compile_smoke_invokes_compile_path_with_subfunctions(monkeypatch, model_type, model_id, tmp_path):
+def test_causal_compile_with_subfunctions_all_models(model_type, model_id, tmp_path):
     del model_type
-    compile_calls = {}
-
-    def _fake_compile(self, onnx_path=None, compile_dir=None, **kwargs):
-        compile_calls["onnx_path"] = onnx_path
-        compile_calls["compile_dir"] = compile_dir
-        compile_calls["kwargs"] = kwargs
-        qpc_path = tmp_path / "qpc"
-        qpc_path.mkdir(parents=True, exist_ok=True)
-        return str(qpc_path)
-
-    monkeypatch.setattr(QEFFAutoModelForCausalLM, "_compile", _fake_compile, raising=False)
-
     try:
         qeff_model = QEFFAutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
     except Exception as exc:
         _skip_on_model_fetch_error(exc, model_id)
 
-    qpc = qeff_model.compile(prefill_seq_len=8, ctx_len=32, use_onnx_subfunctions=True)
+    try:
+        qpc = qeff_model.compile(
+            prefill_seq_len=8,
+            ctx_len=32,
+            use_onnx_subfunctions=True,
+            compile_dir=tmp_path / "compile-with-subfunctions",
+        )
+    except Exception as exc:
+        pytest.skip(
+            f"Skipping compile for {model_id}: compile backend unavailable or unsupported in this environment "
+            f"({type(exc).__name__}: {exc})"
+        )
 
-    assert Path(qpc).name == "qpc"
-    assert compile_calls["kwargs"]["use_onnx_subfunctions"] is True
-    assert compile_calls["kwargs"]["specializations"][0]["seq_len"] == 8
-    assert compile_calls["kwargs"]["specializations"][0]["ctx_len"] == 32
-    assert compile_calls["kwargs"]["compile_only"] is True
-    assert compile_calls["kwargs"]["retained_state"] is True
+    qpc_path = Path(qpc)
+    assert qpc_path.name == "qpc"
+    assert qpc_path.is_dir()
 
 
 @pytest.mark.llm_model

From 8722394a078ca0e3eb4b76c6e68ca4bcdecf5c44 Mon Sep 17 00:00:00 2001
From: vbaddi <vbaddi@qti.qualcomm.com>
Date: Thu, 19 Mar 2026 07:55:11 +0000
Subject: [PATCH 3/4] chore(export): remove unused onnx_tmp staging and silence
 empty-transform warning

Signed-off-by: vbaddi <vbaddi@qti.qualcomm.com>
---
 QEfficient/base/modeling_qeff.py   | 7 +------
 QEfficient/base/onnx_transforms.py | 3 ---
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py
index fd50fe3e6..6f22e867e 100644
--- a/QEfficient/base/modeling_qeff.py
+++ b/QEfficient/base/modeling_qeff.py
@@ -246,9 +246,7 @@ def _export(
         # check if the model is in meta state or weights are offloaded
         self._model_offloaded_check()
 
-        # Setup temporary paths
-        tmp_onnx_dir = export_dir / "onnx_tmp"
-        tmp_onnx_dir.mkdir(parents=True, exist_ok=True)
+        export_dir.mkdir(parents=True, exist_ok=True)
 
         # Create input_names from example_inputs
         input_names = []
@@ -319,9 +317,6 @@ def _export(
             logger.error(f"ONNX export or transforms failed: {e}")
             raise e
 
-        finally:
-            shutil.rmtree(tmp_onnx_dir, ignore_errors=True)
-
         self.onnx_path = onnx_path
         return onnx_path
 
diff --git a/QEfficient/base/onnx_transforms.py b/QEfficient/base/onnx_transforms.py
index 3993a2ec1..2ba53829a 100644
--- a/QEfficient/base/onnx_transforms.py
+++ b/QEfficient/base/onnx_transforms.py
@@ -7,7 +7,6 @@
 
 import logging
 import os
-import warnings
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any, Dict, List, Optional, Tuple, Type
 
@@ -213,8 +212,6 @@ class OnnxTransformPipeline(BaseOnnxTransform):
     """Pipeline to apply multiple ONNX transformations in sequence."""
 
     def __init__(self, transforms: List[Type[BaseOnnxTransform]]):
-        if not transforms:
-            warnings.warn("Transform list is empty. No transformations will be applied.")
         self.transforms = transforms
 
     def apply(

From 590784de979fd8cf2bd1d2eef5fb7c0523868e04 Mon Sep 17 00:00:00 2001
From: vbaddi <vbaddi@qti.qualcomm.com>
Date: Thu, 19 Mar 2026 08:01:17 +0000
Subject: [PATCH 4/4] ci(quickcheck): add PR workflow for parallel model
 quickcheck

Adds a dedicated GitHub Actions workflow to run the quickcheck suite on every PR for faster regression visibility.

  - New workflow: quickcheck.yml
  - Triggers:
      - pull_request
      - workflow_dispatch (manual run)
  - Job config:
      - ubuntu-latest
      - Python 3.10 via actions/setup-python@v5
      - pip caching enabled
      - timeout-minutes: 90
      - concurrency cancellation enabled per PR/
        ref

  - Installs required deps:
      - python -m pip install -e .[test]
      - python -m pip install pytest-xdist
  - Runs:
      - python -m pytest -q tests/test_model_quickcheck.py -n auto

Signed-off-by: vbaddi <vbaddi@qti.qualcomm.com>
---
 .github/workflows/quickcheck.yml | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 .github/workflows/quickcheck.yml

diff --git a/.github/workflows/quickcheck.yml b/.github/workflows/quickcheck.yml
new file mode 100644
index 000000000..ff9591420
--- /dev/null
+++ b/.github/workflows/quickcheck.yml
@@ -0,0 +1,32 @@
+name: Quickcheck
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+concurrency:
+  group: quickcheck-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  quickcheck:
+    runs-on: ubuntu-latest
+    timeout-minutes: 90
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          cache: "pip"
+
+      - name: Install Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -e .[test]
+          python -m pip install pytest-xdist
+
+      - name: Run Quickcheck
+        run: python -m pytest -q tests/test_model_quickcheck.py -n auto