@@ -723,7 +723,7 @@ class QEffCausalLMForTextImageToTextModel(QEFFBaseModel):
723723 ]
724724 _onnx_transforms = [FP16ClipTransform , SplitTensorsTransform ]
725725
726- def __init__ (self , model , continuous_batching : bool = False , qaic_config : Optional [ dict ] = None , ** kwargs ):
726+ def __init__ (self , model , ** kwargs ):
727727 """
728728 Initializes the language decoder component for multimodal models.
729729
@@ -872,13 +872,10 @@ def __init__(
872872 ----------
873873 model : nn.Module
874874 The full HuggingFace multimodal model.
875+ qaic_config : dict, optional
876+ A dictionary for QAIC-specific configurations.
875877 **kwargs :
876- Additional keyword arguments. `full_batch_size` is not supported here.
877-
878- Raises
879- ------
880- NotImplementedError
881- If `full_batch_size` is provided.
878+ Additional keyword arguments.
882879 """
883880 if kwargs .pop ("full_batch_size" , None ):
884881 continuous_batching = True
@@ -891,7 +888,7 @@ def __init__(
891888 self .comp_ctx_lengths_prefill , self .comp_ctx_lengths_decode = process_ccl_specializations (qaic_config )
892889
893890 self .vision_model = QEffVisionEncoderForTextImageToTextModel (model , ** kwargs )
894- self .lang_model = QEffCausalLMForTextImageToTextModel (model , continuous_batching = continuous_batching , ** kwargs )
891+ self .lang_model = QEffCausalLMForTextImageToTextModel (model , ** kwargs )
895892 self .continuous_batching = continuous_batching
896893 self .lang_model .model .qaic_config = qaic_config
897894 self .input_shapes , self .output_names = None , None
@@ -1577,15 +1574,13 @@ def __init__(
15771574 Raises
15781575 ------
15791576 NotImplementedError
1580- If `full_batch_size` is provided or `continuous_batching` is True or ` include_sampler` is True.
1577+ If `full_batch_size` is provided or `include_sampler` is True.
15811578 """
15821579 if kwargs .pop ("full_batch_size" , None ):
15831580 warnings .warn (
15841581 "full_batch_size argument is deprecated. Use continuous_batching=True instead." , DeprecationWarning , 2
15851582 )
15861583 raise NotImplementedError ("Continuous batching is not supported for image-text-to-text models yet." )
1587- if kwargs .pop ("continuous_batching" , None ):
1588- raise NotImplementedError ("Continuous batching is not supported for image-text-to-text models yet." )
15891584 if qaic_config is not None and qaic_config .pop ("include_sampler" , False ):
15901585 raise NotImplementedError ("On-device sampling is not supported for single QPC multimodal models yet." )
15911586 super ().__init__ (model , ** kwargs )
@@ -2189,10 +2184,6 @@ def from_pretrained(
21892184 If None, the default behavior of the internal classes is used (typically dual QPC).
21902185 qaic_config : dict, optional
21912186 A dictionary for QAIC-specific configurations.
2192- Only the following keys are supported by the text model of the dual QPC multimodal model:
2193- - **include_sampler** (bool): If True, enables on-device sampling of next tokens.
2194- - **max_top_k_ids** (int): Maximum number of top K tokens (<= vocab size) to consider during sampling.
2195- Additional keys will be ignored.
21962187 **kwargs :
21972188 Additional arguments passed to HuggingFace's ``from_pretrained``.
21982189
0 commit comments