added errors for prefill-only mode

ochougul · ochougul · commit d856cd9cd5c3 · 2025-12-09T12:39:24.000Z
Signed-off-by: Onkar Chougule &lt;ochougul@qti.qualcomm.com&gt;
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -3011,6 +3011,16 @@ def compile(
                     "KV caching requires continuous batching. Please set `full_batch_size` and "
                     "enable `continuous_batching=True` in `from_pretrained`."
                 )
+        else:
+            if self.continuous_batching:
+                if not enable_chunking:
+                    raise NotImplementedError(
+                        "Looks like you are trying to run prefix-caching without chunking, this feature is not available yet!"
+                    )
+                if not isinstance(kv_cache_batch_size, int):
+                    raise ValueError(
+                        "Please pass valid integer for kv_cache_batch_size as continuous_batching is enabled for prefill-only model"
+                    )
 
         # For supporting VLLM and Disaggregated with CCL
         if "comp_ctx_lengths_prefill" in compiler_options and "comp_ctx_lengths_decode" in compiler_options: