@@ -723,7 +723,7 @@ class QEffCausalLMForTextImageToTextModel(QEFFBaseModel):
723723 ]
724724 _onnx_transforms = [FP16ClipTransform , SplitTensorsTransform ]
725725
726- def __init__ (self , model , continuous_batching : bool = False , qaic_config : Optional [ dict ] = None , ** kwargs ):
726+ def __init__ (self , model , ** kwargs ):
727727 """
728728 Initializes the language decoder component for multimodal models.
729729
@@ -872,6 +872,8 @@ def __init__(
872872 ----------
873873 model : nn.Module
874874 The full HuggingFace multimodal model.
875+ qaic_config : dict, optional
876+ A dictionary for QAIC-specific configurations.
875877 **kwargs :
876878 Additional keyword arguments. `full_batch_size` is not supported here.
877879
@@ -891,7 +893,7 @@ def __init__(
891893 self .comp_ctx_lengths_prefill , self .comp_ctx_lengths_decode = process_ccl_specializations (qaic_config )
892894
893895 self .vision_model = QEffVisionEncoderForTextImageToTextModel (model , ** kwargs )
894- self .lang_model = QEffCausalLMForTextImageToTextModel (model , continuous_batching = continuous_batching , ** kwargs )
896+ self .lang_model = QEffCausalLMForTextImageToTextModel (model , ** kwargs )
895897 self .continuous_batching = continuous_batching
896898 self .lang_model .model .qaic_config = qaic_config
897899 self .input_shapes , self .output_names = None , None
@@ -1577,15 +1579,13 @@ def __init__(
15771579 Raises
15781580 ------
15791581 NotImplementedError
1580- If `full_batch_size` is provided or `continuous_batching` is True or ` include_sampler` is True.
1582+ If `full_batch_size` is provided or `include_sampler` is True.
15811583 """
15821584 if kwargs .pop ("full_batch_size" , None ):
15831585 warnings .warn (
15841586 "full_batch_size argument is deprecated. Use continuous_batching=True instead." , DeprecationWarning , 2
15851587 )
15861588 raise NotImplementedError ("Continuous batching is not supported for image-text-to-text models yet." )
1587- if kwargs .pop ("continuous_batching" , None ):
1588- raise NotImplementedError ("Continuous batching is not supported for image-text-to-text models yet." )
15891589 if qaic_config is not None and qaic_config .pop ("include_sampler" , False ):
15901590 raise NotImplementedError ("On-device sampling is not supported for single QPC multimodal models yet." )
15911591 super ().__init__ (model , ** kwargs )
@@ -2189,10 +2189,6 @@ def from_pretrained(
21892189 If None, the default behavior of the internal classes is used (typically dual QPC).
21902190 qaic_config : dict, optional
21912191 A dictionary for QAIC-specific configurations.
2192- Only the following keys are supported by the text model of the dual QPC multimodal model:
2193- - **include_sampler** (bool): If True, enables on-device sampling of next tokens.
2194- - **max_top_k_ids** (int): Maximum number of top K tokens (<= vocab size) to consider during sampling.
2195- Additional keys will be ignored.
21962192 **kwargs :
21972193 Additional arguments passed to HuggingFace's ``from_pretrained``.
21982194
0 commit comments