File tree Expand file tree Collapse file tree 1 file changed +8
-7
lines changed
QEfficient/transformers/models Expand file tree Collapse file tree 1 file changed +8
-7
lines changed Original file line number Diff line number Diff line change @@ -3061,6 +3061,14 @@ def compile(
30613061 If `prefill_seq_len` is less than `num_speculative_tokens + 1` for TLM models.
30623062
30633063 """
3064+ if prefill_only is None or not prefill_only :
3065+ if self .continuous_batching and full_batch_size is None :
3066+ raise TypeError ("`full_batch_size` is required when `continuous_batching=True`." )
3067+ if kv_cache_batch_size and not full_batch_size :
3068+ raise ValueError (
3069+ "KV caching requires continuous batching. Please set `full_batch_size` and "
3070+ "enable `continuous_batching=True` in `from_pretrained`."
3071+ )
30643072
30653073 # if ccl_enabled is True read Compute-Context-Length lists
30663074 if self .ccl_enabled :
@@ -3147,13 +3155,6 @@ def compile(
31473155 )
31483156
31493157 if prefill_only is None or not prefill_only :
3150- if self .continuous_batching and full_batch_size is None :
3151- raise TypeError ("`full_batch_size` is required when `continuous_batching=True`." )
3152- if kv_cache_batch_size and not full_batch_size :
3153- raise ValueError (
3154- "KV caching requires continuous batching. Please set `full_batch_size` and "
3155- "enable `continuous_batching=True` in `from_pretrained`."
3156- )
31573158 if self .comp_ctx_lengths_decode is not None :
31583159 # Adding elements from self.comp_ctx_lengths_decode to decode_specialization
31593160 for i in range (0 , len (self .comp_ctx_lengths_decode )):
You can’t perform that action at this time.
0 commit comments