removed error

ochougul · ochougul · commit 74ae0642a3de · 2025-12-09T12:25:33.000Z
Signed-off-by: Onkar Chougule &lt;ochougul@qti.qualcomm.com&gt;
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -3003,6 +3003,14 @@ def compile(
             If `prefill_seq_len` is less than `num_speculative_tokens + 1` for TLM models.
 
         """
+        if prefill_only is None or not prefill_only:
+            if self.continuous_batching and full_batch_size is None:
+                raise TypeError("`full_batch_size` is required when `continuous_batching=True`.")
+            if kv_cache_batch_size and not full_batch_size:
+                raise ValueError(
+                    "KV caching requires continuous batching. Please set `full_batch_size` and "
+                    "enable `continuous_batching=True` in `from_pretrained`."
+                )
 
         # For supporting VLLM and Disaggregated with CCL
         if "comp_ctx_lengths_prefill" in compiler_options and "comp_ctx_lengths_decode" in compiler_options:
@@ -3078,13 +3086,6 @@ def compile(
                 )
 
         if prefill_only is None or not prefill_only:
-            if self.continuous_batching and full_batch_size is None:
-                raise TypeError("`full_batch_size` is required when `continuous_batching=True`.")
-            if kv_cache_batch_size and not full_batch_size:
-                raise ValueError(
-                    "KV caching requires continuous batching. Please set `full_batch_size` and "
-                    "enable `continuous_batching=True` in `from_pretrained`."
-                )
             if self.comp_ctx_lengths_decode is not None:
                 # Adding elements from self.comp_ctx_lengths_decode to decode_specialization
                 for i in range(0, len(self.comp_ctx_lengths_decode)):