File tree Expand file tree Collapse file tree 1 file changed +10
-0
lines changed
QEfficient/transformers/models Expand file tree Collapse file tree 1 file changed +10
-0
lines changed Original file line number Diff line number Diff line change @@ -3069,6 +3069,16 @@ def compile(
30693069 "KV caching requires continuous batching. Please set `full_batch_size` and "
30703070 "enable `continuous_batching=True` in `from_pretrained`."
30713071 )
3072+ else :
3073+ if self .continuous_batching :
3074+ if not enable_chunking :
3075+ raise NotImplementedError (
3076+ "Looks like you are trying to run prefix-caching without chunking, this feature is not available yet!"
3077+ )
3078+ if not isinstance (kv_cache_batch_size , int ):
3079+ raise ValueError (
3080+ "Please pass valid integer for kv_cache_batch_size as continuous_batching is enabled for prefill-only model"
3081+ )
30723082
30733083 # if ccl_enabled is True read Compute-Context-Length lists
30743084 if self .ccl_enabled :
You can’t perform that action at this time.
0 commit comments