File tree Expand file tree Collapse file tree 1 file changed +10
-0
lines changed
QEfficient/transformers/models Expand file tree Collapse file tree 1 file changed +10
-0
lines changed Original file line number Diff line number Diff line change @@ -3011,6 +3011,16 @@ def compile(
30113011 "KV caching requires continuous batching. Please set `full_batch_size` and "
30123012 "enable `continuous_batching=True` in `from_pretrained`."
30133013 )
3014+ else :
3015+ if self .continuous_batching :
3016+ if not enable_chunking :
3017+ raise NotImplementedError (
3018+ "Looks like you are trying to run prefix-caching without chunking, this feature is not available yet!"
3019+ )
3020+ if not isinstance (kv_cache_batch_size , int ):
3021+ raise ValueError (
3022+ "Please pass valid integer for kv_cache_batch_size as continuous_batching is enabled for prefill-only model"
3023+ )
30143024
30153025 # For supporting VLLM and Disaggregated with CCL
30163026 if "comp_ctx_lengths_prefill" in compiler_options and "comp_ctx_lengths_decode" in compiler_options :
You can’t perform that action at this time.
0 commit comments