removed error

ochougul · ochougul · commit 1b60a5f3f489 · 2025-12-09T12:55:25.000Z
Signed-off-by: Onkar Chougule &lt;ochougul@qti.qualcomm.com&gt;
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -3061,6 +3061,14 @@ def compile(
             If `prefill_seq_len` is less than `num_speculative_tokens + 1` for TLM models.
 
         """
+        if prefill_only is None or not prefill_only:
+            if self.continuous_batching and full_batch_size is None:
+                raise TypeError("`full_batch_size` is required when `continuous_batching=True`.")
+            if kv_cache_batch_size and not full_batch_size:
+                raise ValueError(
+                    "KV caching requires continuous batching. Please set `full_batch_size` and "
+                    "enable `continuous_batching=True` in `from_pretrained`."
+                )
 
         # if ccl_enabled is True read Compute-Context-Length lists
         if self.ccl_enabled:
@@ -3147,13 +3155,6 @@ def compile(
                 )
 
         if prefill_only is None or not prefill_only:
-            if self.continuous_batching and full_batch_size is None:
-                raise TypeError("`full_batch_size` is required when `continuous_batching=True`.")
-            if kv_cache_batch_size and not full_batch_size:
-                raise ValueError(
-                    "KV caching requires continuous batching. Please set `full_batch_size` and "
-                    "enable `continuous_batching=True` in `from_pretrained`."
-                )
             if self.comp_ctx_lengths_decode is not None:
                 # Adding elements from self.comp_ctx_lengths_decode to decode_specialization
                 for i in range(0, len(self.comp_ctx_lengths_decode)):