File tree Expand file tree Collapse file tree 1 file changed +8
-7
lines changed
QEfficient/transformers/models Expand file tree Collapse file tree 1 file changed +8
-7
lines changed Original file line number Diff line number Diff line change @@ -3003,6 +3003,14 @@ def compile(
30033003 If `prefill_seq_len` is less than `num_speculative_tokens + 1` for TLM models.
30043004
30053005 """
3006+ if prefill_only is None or not prefill_only :
3007+ if self .continuous_batching and full_batch_size is None :
3008+ raise TypeError ("`full_batch_size` is required when `continuous_batching=True`." )
3009+ if kv_cache_batch_size and not full_batch_size :
3010+ raise ValueError (
3011+ "KV caching requires continuous batching. Please set `full_batch_size` and "
3012+ "enable `continuous_batching=True` in `from_pretrained`."
3013+ )
30063014
30073015 # For supporting VLLM and Disaggregated with CCL
30083016 if "comp_ctx_lengths_prefill" in compiler_options and "comp_ctx_lengths_decode" in compiler_options :
@@ -3078,13 +3086,6 @@ def compile(
30783086 )
30793087
30803088 if prefill_only is None or not prefill_only :
3081- if self .continuous_batching and full_batch_size is None :
3082- raise TypeError ("`full_batch_size` is required when `continuous_batching=True`." )
3083- if kv_cache_batch_size and not full_batch_size :
3084- raise ValueError (
3085- "KV caching requires continuous batching. Please set `full_batch_size` and "
3086- "enable `continuous_batching=True` in `from_pretrained`."
3087- )
30883089 if self .comp_ctx_lengths_decode is not None :
30893090 # Adding elements from self.comp_ctx_lengths_decode to decode_specialization
30903091 for i in range (0 , len (self .comp_ctx_lengths_decode )):
You can’t perform that action at this time.
0 commit comments