quic · smedhe · Jan 6, 2026 · Jan 9, 2026 · Jan 9, 2026 · Jan 12, 2026
@@ -139,6 +139,7 @@ def main(
     qnn_config: Optional[str] = None,
     trust_remote_code: Optional[bool] = False,
     ccl_enabled: Optional[bool] = False,
+    use_onnx_subfunctions: bool = False,
     **kwargs,
 ) -> None:
     """
@@ -205,6 +206,8 @@ def main(
         Path of the QNN Config parameters file. Default is None.
     trust_remote_code : bool, optional
         If True, trusts remote code when loading models from HuggingFace. Default is False.
+    use_onnx_subfunctions : bool, optional
+        Enables ONNX subfunctions during export and compile. Default is False.
     **kwargs :
         Additional compiler options passed directly to `qaic-compile`. Any flag supported by
         `qaic-compile` can be passed. Parameters are converted to flags as follows:
@@ -231,12 +234,14 @@ def main(
     """
     cache_dir = check_and_assign_cache_dir(local_model_dir, cache_dir)
 
-    if "--mxfp6" in sys.argv:
-        if args.mxfp6:
-            logger.warning("mxfp6 is going to be deprecated in a future release, use -mxfp6_matmul instead.")
-    if "--mxint8" in sys.argv:
-        if args.mxint8:
-            logger.warning("mxint8 is going to be deprecated in a future release, use -mxint8_kv_cache instead.")
+    if "--mxfp6" in sys.argv and mxfp6:
+        logger.warning("mxfp6 is going to be deprecated in a future release, use -mxfp6_matmul instead.")
+    if "--mxint8" in sys.argv and mxint8:
+        logger.warning("mxint8 is going to be deprecated in a future release, use -mxint8_kv_cache instead.")
+
+    qaic_config = {"ccl_enabled": True} if ccl_enabled else None
+
+    qaic_config = {"ccl_enabled": True} if ccl_enabled else None
 
     qaic_config = {"ccl_enabled": True} if ccl_enabled else None
 
@@ -280,6 +285,7 @@ def main(
         allow_mxint8_mdp_io=allow_mxint8_mdp_io,
         enable_qnn=enable_qnn,
         qnn_config=qnn_config,
+        use_onnx_subfunctions=use_onnx_subfunctions,
         **kwargs,
     )
 
@@ -382,6 +388,14 @@ def main(
         action="store_true",
         help="Compress Present/Past KV to MXINT8 using CustomIO config, default is False",
     )
+    parser.add_argument(
+        "--use-onnx-subfunctions",
+        "--use_onnx_subfunctions",
+        dest="use_onnx_subfunctions",
+        action="store_true",
+        default=False,
+        help="Enable ONNX subfunctions during export/compile.",
+    )
     parser.add_argument(
         "--num_cores", "--num-cores", type=int, required=True, help="Number of cores to compile on Cloud AI 100"
     )

@@ -0,0 +1,13 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# ----------------------------------------------------------------------------
+
+from QEfficient.proxy.proxy_transform import QeffProxyEmbedding, QeffProxyLinear
+
+__all__ = [
+    "QeffProxyEmbedding",
+    "QeffProxyLinear",
+]
@@ -0,0 +1,27 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# ----------------------------------------------------------------------------
+import torch
+from torch import nn
+
+
+class QeffProxyEmbedding(nn.Module):
+    def __init__(self, num_embeddings, embedding_dim):
+        self.embed_tokens = None
+        self.num_embeddings = num_embeddings
+        self.embedding_dim = embedding_dim
+
+    def forward(self, hidden_states, past_key_values_length=None):
+        inputs_embeds = torch.unsqueeze(hidden_states.float(), 2).expand(-1, -1, self.embedding_dim)
+        return inputs_embeds
+
+
+class QeffProxyLinear(nn.Module):
+    def __init__(self, in_features, out_features, bias=False):
+        self.lm_head = None
+
+    def forward(self, hidden_states):
+        return hidden_states
@@ -0,0 +1,22 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# ----------------------------------------------------------------------------
+
+import torch.nn as nn
+
+from QEfficient.base.pytorch_transforms import ProxyModuleMappingTransform
+from QEfficient.proxy import QeffProxyEmbedding, QeffProxyLinear
+
+
+class QeffProxyModuleTransform(ProxyModuleMappingTransform):
+    """
+    This transform is used to replace the original modules with QEfficient modules.
+    """
+
+    _module_mapping = {
+        nn.Embedding: QeffProxyEmbedding,
+        nn.Linear: QeffProxyLinear,
+    }