diff --git a/README.md b/README.md
index 37f4ff462..02de709af 100644
--- a/README.md
+++ b/README.md
@@ -147,14 +147,23 @@ nixmac uses separate models for **evolution** (config changes via tool use) and
 
 | Variable | Default | Description |
 |----------|---------|-------------|
-| `EVOLVE_PROVIDER` | `openrouter` | `openrouter`, `openai`, or `ollama` |
+| `EVOLVE_PROVIDER` | `openrouter` | `openrouter`, `openai`, `ollama`, or `vllm` |
 | `EVOLVE_MODEL` | `anthropic/claude-sonnet-4` | Model for config evolution |
 | `SUMMARY_AI_PROVIDER` | `openrouter` | Provider for summarization |
 | `SUMMARY_MODEL` | `openai/gpt-4o-mini` | Model for summaries |
 | `OLLAMA_API_BASE` | `http://localhost:11434` | Ollama endpoint |
+| `VLLM_API_BASE` | unset | OpenAI-compatible vLLM endpoint, for example `http://localhost:8000/v1` |
+| `VLLM_API_KEY` | unset | Optional vLLM API key |
 
 For fully local operation: `EVOLVE_PROVIDER=ollama SUMMARY_AI_PROVIDER=ollama devenv up`
 
+Evolution calls request up to 32,768 output tokens by default. For self-hosted vLLM,
+open **Settings → AI Models → Evolution Limits** and set **Max output tokens** low enough
+to leave room for the prompt inside your model's context window. For example, a model
+with a 65,536-token context window should use less than 65,536 output tokens; 32,768 is
+a safe starting point for typical prompts. The same value can be set for CLI runs with
+`nixmac evolve --max-output-tokens <tokens>`.
+
 > **Note:** Models under ~70B parameters tend to struggle with the multi-tool evolution workflow.
 
 ## CLI
@@ -167,6 +176,7 @@ nixmac evolve "install ripgrep and fd"
 nixmac evolve "enable Touch ID for sudo" \
   --config ~/.darwin \
   --max-iterations 10 \
+  --max-output-tokens 32768 \
   --evolve-provider ollama \
   --evolve-model qwen3-coder:30b
 
diff --git a/apps/native/.storybook/mocks/tauri-runtime.ts b/apps/native/.storybook/mocks/tauri-runtime.ts
index 88bb3264d..0a45bd6d4 100644
--- a/apps/native/.storybook/mocks/tauri-runtime.ts
+++ b/apps/native/.storybook/mocks/tauri-runtime.ts
@@ -60,6 +60,7 @@ const prefs = {
   evolveModel: "gpt-5",
   maxIterations: 25,
   maxBuildAttempts: 3,
+  maxOutputTokens: 32768,
   sendDiagnostics: true,
   confirmBuild: false,
   confirmClear: false,
diff --git a/apps/native/src-tauri/src/cli.rs b/apps/native/src-tauri/src/cli.rs
index 0cb4a5c52..4dc91981d 100644
--- a/apps/native/src-tauri/src/cli.rs
+++ b/apps/native/src-tauri/src/cli.rs
@@ -24,6 +24,7 @@ pub struct EvolveConfig {
     pub prompt: String,
     pub config: Option<PathBuf>,
     pub max_iterations: Option<usize>,
+    pub max_output_tokens: Option<usize>,
     pub evolve_provider: Option<String>,
     pub evolve_model: Option<String>,
     pub summary_provider: Option<String>,
@@ -58,6 +59,10 @@ pub enum Commands {
         #[arg(short, long)]
         max_iterations: Option<usize>,
 
+        /// Maximum output tokens requested per evolution model call
+        #[arg(long)]
+        max_output_tokens: Option<usize>,
+
         /// Provider for evolution (e.g., openai, openrouter, ollama)
         #[arg(long)]
         evolve_provider: Option<String>,
@@ -102,6 +107,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result
         prompt,
         config,
         max_iterations,
+        max_output_tokens,
         evolve_provider,
         evolve_model,
         summary_provider,
@@ -192,6 +198,11 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result
         None => crate::storage::store::get_max_iterations(app)
             .unwrap_or(crate::storage::store::DEFAULT_MAX_ITERATIONS),
     };
+    let effective_max_output_tokens: usize = match max_output_tokens {
+        Some(v) => v,
+        None => crate::storage::store::get_max_output_tokens(app)
+            .unwrap_or(crate::storage::store::DEFAULT_MAX_OUTPUT_TOKENS),
+    };
 
     // Max iterations
     if let Some(iterations) = max_iterations {
@@ -199,6 +210,11 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result
             .map_err(|e| format!("Failed to set max iterations: {}", e))?;
     }
 
+    if let Some(output_tokens) = max_output_tokens {
+        crate::storage::store::set_max_output_tokens(app, output_tokens)
+            .map_err(|e| format!("Failed to set max output tokens: {}", e))?;
+    }
+
     // Host
     if let Some(ref host_attr) = host {
         crate::storage::store::set_host_attr(app, host_attr)
@@ -256,6 +272,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result
             "state": state_str,
             "prompt": prompt,
             "maxIterations": effective_max_iterations,
+            "maxOutputTokens": effective_max_output_tokens,
             "evolveProvider": effective_evolve_provider,
             "evolveModel": effective_evolve_model,
             "summaryProvider": effective_summary_provider,
diff --git a/apps/native/src-tauri/src/commands/ui_prefs.rs b/apps/native/src-tauri/src/commands/ui_prefs.rs
index 3ec05a443..3f4722a6e 100644
--- a/apps/native/src-tauri/src/commands/ui_prefs.rs
+++ b/apps/native/src-tauri/src/commands/ui_prefs.rs
@@ -30,6 +30,8 @@ pub async fn ui_get_prefs(app: AppHandle) -> Result<shared_types::UiPrefs, Strin
     let max_iterations =
         Some(store::get_max_iterations(&app).unwrap_or(store::DEFAULT_MAX_ITERATIONS));
     let max_build_attempts = Some(store::get_max_build_attempts(&app).unwrap_or(5));
+    let max_output_tokens =
+        Some(store::get_max_output_tokens(&app).unwrap_or(store::DEFAULT_MAX_OUTPUT_TOKENS));
     let ollama_api_base_url: Option<String> =
         wrap_result_and_capture_err("ui_get_prefs", store::get_ollama_api_base_url(&app))?;
     let vllm_api_base_url: Option<String> =
@@ -89,6 +91,7 @@ pub async fn ui_get_prefs(app: AppHandle) -> Result<shared_types::UiPrefs, Strin
 
         max_iterations,
         max_build_attempts,
+        max_output_tokens,
 
         ollama_api_base_url,
         vllm_api_base_url,
@@ -144,6 +147,10 @@ pub async fn ui_set_prefs(
         store::set_max_build_attempts(&app, max_build_attempts)
             .map_err(|e| capture_err("ui_set_prefs", e))?;
     }
+    if let Some(max_output_tokens) = prefs.max_output_tokens {
+        store::set_max_output_tokens(&app, max_output_tokens)
+            .map_err(|e| capture_err("ui_set_prefs", e))?;
+    }
     if let Some(ollama_api_base_url) = prefs.ollama_api_base_url {
         store::set_ollama_api_base_url(&app, &ollama_api_base_url)
             .map_err(|e| capture_err("ui_set_prefs", e))?;
diff --git a/apps/native/src-tauri/src/evolve/mod.rs b/apps/native/src-tauri/src/evolve/mod.rs
index 3d9da27ef..f74e4235d 100644
--- a/apps/native/src-tauri/src/evolve/mod.rs
+++ b/apps/native/src-tauri/src/evolve/mod.rs
@@ -57,6 +57,10 @@ use providers::{AiProvider, CliProvider, OllamaProvider, OpenAIProvider, Provide
 
 use self::types::FileEdit;
 
+fn normalize_max_output_tokens(value: usize) -> u32 {
+    value.max(1).min(u32::MAX as usize) as u32
+}
+
 /// Return short hex prefix for correlation of error messages without risking sensitive content exposure.
 fn short_hash(s: &str) -> String {
     let mut h = Sha256::new();
@@ -358,6 +362,9 @@ pub async fn generate_evolution<R: Runtime>(
     info!("📝 Prompt: {}", prompt);
 
     let store_model = store::get_evolve_model(app).ok().flatten();
+    let max_output_tokens =
+        store::get_max_output_tokens(app).unwrap_or(store::DEFAULT_MAX_OUTPUT_TOKENS);
+    let max_output_tokens_for_request = normalize_max_output_tokens(max_output_tokens);
 
     // Select provider implementation
     let provider: Arc<dyn AiProvider> = if provider_type == "ollama" {
@@ -370,10 +377,14 @@ pub async fn generate_evolution<R: Runtime>(
             .or_else(|| std::env::var("OLLAMA_API_BASE").ok())
             .unwrap_or_else(|| DEFAULT_OLLAMA_API_BASE.to_string());
         info!(
-            "Using Ollama provider | Model: {} | URL: {}",
-            model, base_url
+            "Using Ollama provider | Model: {} | URL: {} | Max output tokens: {}",
+            model, base_url, max_output_tokens_for_request
         );
-        Arc::new(OllamaProvider::new(base_url, model))
+        Arc::new(OllamaProvider::new(
+            base_url,
+            model,
+            max_output_tokens_for_request,
+        ))
     } else if matches!(provider_type.as_str(), "claude" | "codex" | "opencode") {
         let tool = match provider_type.as_str() {
             "claude" => crate::ai::providers::cli::CliTool::Claude,
@@ -395,8 +406,16 @@ pub async fn generate_evolution<R: Runtime>(
             .or_else(|| std::env::var("VLLM_API_BASE").ok())
             .ok_or_else(|| anyhow!("No vLLM base URL configured. Please set it in Settings."))?;
         let api_key = store::get_effective_vllm_api_key(app)?.unwrap_or_else(|| "none".to_string());
-        info!("Using vLLM provider | Model: {} | URL: {}", model, base_url);
-        Arc::new(OpenAIProvider::new(api_key, base_url, model))
+        info!(
+            "Using vLLM provider | Model: {} | URL: {} | Max output tokens: {}",
+            model, base_url, max_output_tokens_for_request
+        );
+        Arc::new(OpenAIProvider::new(
+            api_key,
+            base_url,
+            model,
+            max_output_tokens_for_request,
+        ))
     } else {
         let (api_key, base_url) = store::get_effective_openai_compatible_credential(app)?
             .ok_or_else(|| {
@@ -417,8 +436,16 @@ pub async fn generate_evolution<R: Runtime>(
         } else {
             "OpenAI"
         };
-        info!("Using {} provider | Model: {}", provider_name, model);
-        Arc::new(OpenAIProvider::new(api_key, base_url.to_string(), model))
+        info!(
+            "Using {} provider | Model: {} | Max output tokens: {}",
+            provider_name, model, max_output_tokens_for_request
+        );
+        Arc::new(OpenAIProvider::new(
+            api_key,
+            base_url.to_string(),
+            model,
+            max_output_tokens_for_request,
+        ))
     };
 
     // Emit start event
@@ -446,11 +473,12 @@ pub async fn generate_evolution<R: Runtime>(
     let max_build_attempts =
         store::get_max_build_attempts(app).unwrap_or(DEFAULT_MAX_BUILD_ATTEMPTS);
     info!(
-        "Limits: max_iterations={}, max_iterations_before_edit={} ({}%), max_build_attempts={}",
+        "Limits: max_iterations={}, max_iterations_before_edit={} ({}%), max_build_attempts={}, max_output_tokens={}",
         max_iterations,
         max_iterations_before_edit,
         MAX_ITERATIONS_BEFORE_EDIT_PERCENT,
-        max_build_attempts
+        max_build_attempts,
+        max_output_tokens
     );
 
     let tools = create_tools(banned_tools);
diff --git a/apps/native/src-tauri/src/evolve/providers/mod.rs b/apps/native/src-tauri/src/evolve/providers/mod.rs
index 16ee93b7b..e1d96c237 100644
--- a/apps/native/src-tauri/src/evolve/providers/mod.rs
+++ b/apps/native/src-tauri/src/evolve/providers/mod.rs
@@ -75,6 +75,20 @@ pub enum ProviderError {
     Other(AnyhowError),
 }
 
+fn looks_like_context_window_error(body: &str) -> bool {
+    let body = body.to_ascii_lowercase();
+    (body.contains("context")
+        || body.contains("maximum context")
+        || body.contains("context length"))
+        && (body.contains("max_tokens")
+            || body.contains("max_output_tokens")
+            || body.contains("max tokens")
+            || body.contains("max completion")
+            || body.contains("output tokens")
+            || body.contains("token limit")
+            || body.contains("requested"))
+}
+
 impl ProviderError {
     /// Return a user-friendly error message suitable for display in the UI.
     ///
@@ -85,6 +99,9 @@ impl ProviderError {
     /// `Http { status, body }` before reaching this method.
     pub fn user_message(&self) -> String {
         match self {
+            ProviderError::Http { status, body } if looks_like_context_window_error(body) => {
+                "The AI provider rejected the request because the configured max output tokens exceed the model's context window. Lower Max output tokens in Settings or switch to a model with a larger context window.".to_string()
+            }
             ProviderError::Http { status, .. } => friendly_provider_error(status.as_u16()),
             ProviderError::Other(e) => {
                 let msg = format!("{:#}", e);
@@ -104,3 +121,27 @@ impl ProviderError {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn recognizes_context_window_token_errors() {
+        let body = "This model's maximum context length is 65536 tokens. However, you requested 65000 output tokens.";
+        assert!(looks_like_context_window_error(body));
+    }
+
+    #[test]
+    fn context_window_errors_suggest_token_setting() {
+        let err = ProviderError::Http {
+            status: StatusCode::BAD_REQUEST,
+            body: "maximum context length is 65536 tokens; requested max_tokens is too high"
+                .to_string(),
+        };
+
+        let msg = err.user_message();
+        assert!(msg.contains("Max output tokens"));
+        assert!(msg.contains("Lower"));
+    }
+}
diff --git a/apps/native/src-tauri/src/evolve/providers/ollama.rs b/apps/native/src-tauri/src/evolve/providers/ollama.rs
index 6e88a7fff..b5081296d 100644
--- a/apps/native/src-tauri/src/evolve/providers/ollama.rs
+++ b/apps/native/src-tauri/src/evolve/providers/ollama.rs
@@ -11,14 +11,16 @@ pub struct OllamaProvider {
     client: reqwest::Client,
     base_url: String,
     model: String,
+    max_output_tokens: u32,
 }
 
 impl OllamaProvider {
-    pub fn new(base_url: String, model: String) -> Self {
+    pub fn new(base_url: String, model: String, max_output_tokens: u32) -> Self {
         Self {
             client: reqwest::Client::new(),
             base_url: base_url.trim_end_matches('/').to_string(),
             model,
+            max_output_tokens,
         }
     }
 }
@@ -28,10 +30,16 @@ struct ChatRequest {
     model: String,
     messages: Vec<OllamaMessage>,
     stream: bool,
+    options: OllamaOptions,
     #[serde(skip_serializing_if = "Vec::is_empty")]
     tools: Vec<OllamaTool>,
 }
 
+#[derive(Clone, Serialize)]
+struct OllamaOptions {
+    num_predict: u32,
+}
+
 #[derive(Clone, Debug, Serialize, Deserialize)]
 struct OllamaMessage {
     role: String,
@@ -100,6 +108,9 @@ impl AiProvider for OllamaProvider {
                 model: self.model.clone(),
                 messages: ollama_messages.clone(),
                 stream: false,
+                options: OllamaOptions {
+                    num_predict: self.max_output_tokens,
+                },
                 tools: ollama_tools.clone(),
             };
 
diff --git a/apps/native/src-tauri/src/evolve/providers/openai.rs b/apps/native/src-tauri/src/evolve/providers/openai.rs
index f060013ce..90f635169 100644
--- a/apps/native/src-tauri/src/evolve/providers/openai.rs
+++ b/apps/native/src-tauri/src/evolve/providers/openai.rs
@@ -20,11 +20,12 @@ use reqwest::StatusCode;
 pub struct OpenAIProvider {
     client: Client<OpenAIConfig>,
     model: String,
+    max_output_tokens: u32,
     record_completions: bool,
 }
 
 impl OpenAIProvider {
-    pub fn new(api_key: String, api_base: String, model: String) -> Self {
+    pub fn new(api_key: String, api_base: String, model: String, max_output_tokens: u32) -> Self {
         let config = OpenAIConfig::new()
             .with_api_key(api_key)
             .with_api_base(api_base);
@@ -36,6 +37,7 @@ impl OpenAIProvider {
         Self {
             client,
             model,
+            max_output_tokens,
             record_completions,
         }
     }
@@ -62,11 +64,7 @@ impl AiProvider for OpenAIProvider {
             .tools(openai_tools)
             .temperature(0.2);
 
-        // Some models support this, others don't. For OpenAI/Claude it is usually supported/required for long checks.
-        // But let's check if we can make it optional or robust.
-        // For now, hardcode max_tokens as in original mod.rs
-        // const MAX_TOKENS: u32 = 65_000;
-        request_builder.max_completion_tokens(65000u32);
+        request_builder.max_completion_tokens(self.max_output_tokens);
 
         let request = request_builder
             .build()
diff --git a/apps/native/src-tauri/src/main.rs b/apps/native/src-tauri/src/main.rs
index e178f7add..2ca2f2cb5 100644
--- a/apps/native/src-tauri/src/main.rs
+++ b/apps/native/src-tauri/src/main.rs
@@ -302,6 +302,7 @@ fn run_cli_mode(context: tauri::Context<tauri::Wry>) -> i32 {
             prompt,
             config,
             max_iterations,
+            max_output_tokens,
             evolve_provider,
             evolve_model,
             summary_provider,
@@ -349,6 +350,7 @@ fn run_cli_mode(context: tauri::Context<tauri::Wry>) -> i32 {
                     prompt,
                     config,
                     max_iterations,
+                    max_output_tokens,
                     evolve_provider,
                     evolve_model,
                     summary_provider,
diff --git a/apps/native/src-tauri/src/shared_types/prefs.rs b/apps/native/src-tauri/src/shared_types/prefs.rs
index 4e4da630d..cf4bc7ba7 100644
--- a/apps/native/src-tauri/src/shared_types/prefs.rs
+++ b/apps/native/src-tauri/src/shared_types/prefs.rs
@@ -36,6 +36,8 @@ pub struct UiPrefs {
     pub max_iterations: Option<usize>,
     /// Maximum build attempts per evolution.
     pub max_build_attempts: Option<usize>,
+    /// Maximum output tokens requested per evolution model call.
+    pub max_output_tokens: Option<usize>,
     /// Whether diagnostic feedback may be sent.
     pub send_diagnostics: bool,
     /// Whether to confirm before running build/apply.
@@ -79,6 +81,8 @@ pub struct UiPrefsUpdate {
     pub max_iterations: Option<usize>,
     /// Maximum build-attempt count update.
     pub max_build_attempts: Option<usize>,
+    /// Maximum output token count update.
+    pub max_output_tokens: Option<usize>,
     /// Ollama base URL update.
     pub ollama_api_base_url: Option<String>,
     /// vLLM base URL update.
diff --git a/apps/native/src-tauri/src/storage/store.rs b/apps/native/src-tauri/src/storage/store.rs
index 9abbbc181..33c59bd01 100644
--- a/apps/native/src-tauri/src/storage/store.rs
+++ b/apps/native/src-tauri/src/storage/store.rs
@@ -44,6 +44,7 @@ pub const PINNED_VERSION_KEY: &str = "pinnedVersion";
 pub const UPDATE_CHANNEL_KEY: &str = "updateChannel";
 
 pub const DEFAULT_MAX_ITERATIONS: usize = 25;
+pub const DEFAULT_MAX_OUTPUT_TOKENS: usize = 32_768;
 const KEYCHAIN_SERVICE: &str = "com.darkmatter.nixmac";
 
 fn e2e_mock_system_enabled() -> bool {
@@ -522,6 +523,18 @@ pub fn set_max_build_attempts<R: Runtime>(app: &AppHandle<R>, max: usize) -> Res
     Ok(())
 }
 
+/// Gets the maximum output tokens requested per evolution model call.
+pub fn get_max_output_tokens<R: Runtime>(app: &AppHandle<R>) -> Result<usize> {
+    Ok(get_usize_pref(app, "maxOutputTokens")?.unwrap_or(DEFAULT_MAX_OUTPUT_TOKENS))
+}
+
+pub fn set_max_output_tokens<R: Runtime>(app: &AppHandle<R>, max: usize) -> Result<()> {
+    let store = get_store(app)?;
+    store.set("maxOutputTokens", serde_json::json!(max));
+    store.save()?;
+    Ok(())
+}
+
 // =============================================================================
 // Model Cache
 // =============================================================================
diff --git a/apps/native/src/components/widget/settings/__snapshots__/ai-models-tab.stories.tsx.snap b/apps/native/src/components/widget/settings/__snapshots__/ai-models-tab.stories.tsx.snap
index 05175b255..6caa2e75c 100644
--- a/apps/native/src/components/widget/settings/__snapshots__/ai-models-tab.stories.tsx.snap
+++ b/apps/native/src/components/widget/settings/__snapshots__/ai-models-tab.stories.tsx.snap
@@ -1,3 +1,3 @@
 // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
 
-exports[`Cli Providers 1`] = `"<div class="w-[560px] rounded-lg border bg-background p-6"><div class="space-y-6"><div><h2 class="mb-4 font-semibold text-base">AI Models</h2><p class="mb-4 text-muted-foreground text-xs">OpenRouter is the supported cloud provider in the main UI. Previously saved direct OpenAI keys still work as a legacy fallback, but they are no longer shown in Settings.</p><div class="space-y-6"><div class="space-y-4"><h3 class="font-medium text-sm">Evolution Model</h3><p class="text-muted-foreground text-xs">Model used to plan and apply configuration changes in Nix</p><div class="grid gap-4"><div class="space-y-2"><label class="text-xs font-medium text-muted-foreground" for="evolveProvider">Provider</label><button type="button" role="combobox" aria-controls="radix-_r_0_" aria-expanded="false" aria-autocomplete="none" dir="ltr" data-state="closed" class="flex w-fit items-center justify-between gap-2 whitespace-nowrap rounded-md border border-input bg-transparent px-3 py-2 text-sm shadow-xs outline-none transition-[color,box-shadow] focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50 disabled:cursor-not-allowed disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-destructive/20 data-[size=default]:h-9 data-[size=sm]:h-8 data-[placeholder]:text-muted-foreground *:data-[slot=select-value]:line-clamp-1 *:data-[slot=select-value]:flex *:data-[slot=select-value]:items-center *:data-[slot=select-value]:gap-2 dark:bg-input/30 dark:aria-invalid:ring-destructive/40 dark:hover:bg-input/50 [&amp;_svg:not([class*='size-'])]:size-4 [&amp;_svg:not([class*='text-'])]:text-muted-foreground [&amp;_svg]:pointer-events-none [&amp;_svg]:shrink-0" data-size="default" data-slot="select-trigger" id="evolveProvider"><span data-slot="select-value" style="pointer-events: none;">Codex CLI</span><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-chevron-down size-4 opacity-50" aria-hidden="true"><path d="m6 9 6 6 6-6"></path></svg></button></div><div class="space-y-2"><label class="text-xs font-medium text-muted-foreground" for="evolveModel">Model Name (optional)</label><input class="flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 md:text-sm" id="evolveModel" placeholder="Leave empty for CLI default" value=""></div></div></div><div class="space-y-4 pt-4 border-t border-border"><h3 class="font-medium text-sm">Summary Model</h3><p class="text-muted-foreground text-xs">Model used to explain and summarize changes</p><div class="grid gap-4"><div class="space-y-2"><label class="text-xs font-medium text-muted-foreground" for="summaryProvider">Provider</label><button type="button" role="combobox" aria-controls="radix-_r_1_" aria-expanded="false" aria-autocomplete="none" dir="ltr" data-state="closed" class="flex w-fit items-center justify-between gap-2 whitespace-nowrap rounded-md border border-input bg-transparent px-3 py-2 text-sm shadow-xs outline-none transition-[color,box-shadow] focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50 disabled:cursor-not-allowed disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-destructive/20 data-[size=default]:h-9 data-[size=sm]:h-8 data-[placeholder]:text-muted-foreground *:data-[slot=select-value]:line-clamp-1 *:data-[slot=select-value]:flex *:data-[slot=select-value]:items-center *:data-[slot=select-value]:gap-2 dark:bg-input/30 dark:aria-invalid:ring-destructive/40 dark:hover:bg-input/50 [&amp;_svg:not([class*='size-'])]:size-4 [&amp;_svg:not([class*='text-'])]:text-muted-foreground [&amp;_svg]:pointer-events-none [&amp;_svg]:shrink-0" data-size="default" data-slot="select-trigger" id="summaryProvider"><span data-slot="select-value" style="pointer-events: none;">Claude CLI</span><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-chevron-down size-4 opacity-50" aria-hidden="true"><path d="m6 9 6 6 6-6"></path></svg></button></div><div class="space-y-2"><label class="text-xs font-medium text-muted-foreground" for="summaryModel">Model Name (optional)</label><input class="flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 md:text-sm" id="summaryModel" placeholder="Leave empty for CLI default" value=""></div></div></div><div class="space-y-4 pt-4 border-t border-border"><h3 class="font-medium text-sm">Evolution Limits</h3><p class="text-muted-foreground text-xs">Control how long the AI will try before giving up</p><div class="grid grid-cols-2 gap-4"><div class="space-y-2"><div class="flex items-center gap-2"><label class="text-xs font-medium text-muted-foreground" for="maxIterations">Max Iterations</label><button type="button" class="inline-flex h-4 w-4 items-center justify-center rounded-sm text-muted-foreground transition-colors hover:text-foreground/70" aria-label="Max iterations info" data-state="closed" data-slot="tooltip-trigger"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-info h-3.5 w-3.5" aria-hidden="true"><circle cx="12" cy="12" r="10"></circle><path d="M12 16v-4"></path><path d="M12 8h.01"></path></svg></button></div><input class="flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 md:text-sm" id="maxIterations" min="10" max="200" type="number" value="25"></div><div class="space-y-2"><div class="flex items-center gap-2"><label class="text-xs font-medium text-muted-foreground" for="maxBuildAttempts">Max Build Attempts</label><button type="button" class="inline-flex h-4 w-4 items-center justify-center rounded-sm text-muted-foreground transition-colors hover:text-foreground/70" aria-label="Max build attempts info" data-state="closed" data-slot="tooltip-trigger"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-info h-3.5 w-3.5" aria-hidden="true"><circle cx="12" cy="12" r="10"></circle><path d="M12 16v-4"></path><path d="M12 8h.01"></path></svg></button></div><input class="flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 md:text-sm" id="maxBuildAttempts" min="1" max="20" type="number" value="5"></div></div></div></div></div></div></div>"`;
+exports[`Cli Providers 1`] = `"<div class="w-[560px] rounded-lg border bg-background p-6"><div class="space-y-6"><div><h2 class="mb-4 font-semibold text-base">AI Models</h2><p class="mb-4 text-muted-foreground text-xs">OpenRouter is the supported cloud provider in the main UI. Previously saved direct OpenAI keys still work as a legacy fallback, but they are no longer shown in Settings.</p><div class="space-y-6"><div class="space-y-4"><h3 class="font-medium text-sm">Evolution Model</h3><p class="text-muted-foreground text-xs">Model used to plan and apply configuration changes in Nix</p><div class="grid gap-4"><div class="space-y-2"><label class="text-xs font-medium text-muted-foreground" for="evolveProvider">Provider</label><button type="button" role="combobox" aria-controls="radix-_r_0_" aria-expanded="false" aria-autocomplete="none" dir="ltr" data-state="closed" class="flex w-fit items-center justify-between gap-2 whitespace-nowrap rounded-md border border-input bg-transparent px-3 py-2 text-sm shadow-xs outline-none transition-[color,box-shadow] focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50 disabled:cursor-not-allowed disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-destructive/20 data-[size=default]:h-9 data-[size=sm]:h-8 data-[placeholder]:text-muted-foreground *:data-[slot=select-value]:line-clamp-1 *:data-[slot=select-value]:flex *:data-[slot=select-value]:items-center *:data-[slot=select-value]:gap-2 dark:bg-input/30 dark:aria-invalid:ring-destructive/40 dark:hover:bg-input/50 [&amp;_svg:not([class*='size-'])]:size-4 [&amp;_svg:not([class*='text-'])]:text-muted-foreground [&amp;_svg]:pointer-events-none [&amp;_svg]:shrink-0" data-size="default" data-slot="select-trigger" id="evolveProvider"><span data-slot="select-value" style="pointer-events: none;">Codex CLI</span><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-chevron-down size-4 opacity-50" aria-hidden="true"><path d="m6 9 6 6 6-6"></path></svg></button></div><div class="space-y-2"><label class="text-xs font-medium text-muted-foreground" for="evolveModel">Model Name (optional)</label><input class="flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 md:text-sm" id="evolveModel" placeholder="Leave empty for CLI default" value=""></div></div></div><div class="space-y-4 pt-4 border-t border-border"><h3 class="font-medium text-sm">Summary Model</h3><p class="text-muted-foreground text-xs">Model used to explain and summarize changes</p><div class="grid gap-4"><div class="space-y-2"><label class="text-xs font-medium text-muted-foreground" for="summaryProvider">Provider</label><button type="button" role="combobox" aria-controls="radix-_r_1_" aria-expanded="false" aria-autocomplete="none" dir="ltr" data-state="closed" class="flex w-fit items-center justify-between gap-2 whitespace-nowrap rounded-md border border-input bg-transparent px-3 py-2 text-sm shadow-xs outline-none transition-[color,box-shadow] focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50 disabled:cursor-not-allowed disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-destructive/20 data-[size=default]:h-9 data-[size=sm]:h-8 data-[placeholder]:text-muted-foreground *:data-[slot=select-value]:line-clamp-1 *:data-[slot=select-value]:flex *:data-[slot=select-value]:items-center *:data-[slot=select-value]:gap-2 dark:bg-input/30 dark:aria-invalid:ring-destructive/40 dark:hover:bg-input/50 [&amp;_svg:not([class*='size-'])]:size-4 [&amp;_svg:not([class*='text-'])]:text-muted-foreground [&amp;_svg]:pointer-events-none [&amp;_svg]:shrink-0" data-size="default" data-slot="select-trigger" id="summaryProvider"><span data-slot="select-value" style="pointer-events: none;">Claude CLI</span><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-chevron-down size-4 opacity-50" aria-hidden="true"><path d="m6 9 6 6 6-6"></path></svg></button></div><div class="space-y-2"><label class="text-xs font-medium text-muted-foreground" for="summaryModel">Model Name (optional)</label><input class="flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 md:text-sm" id="summaryModel" placeholder="Leave empty for CLI default" value=""></div></div></div><div class="space-y-4 pt-4 border-t border-border"><h3 class="font-medium text-sm">Evolution Limits</h3><p class="text-muted-foreground text-xs">Control how long the AI will try before giving up</p><div class="grid grid-cols-2 gap-4"><div class="space-y-2"><div class="flex items-center gap-2"><label class="text-xs font-medium text-muted-foreground" for="maxIterations">Max Iterations</label><button type="button" class="inline-flex h-4 w-4 items-center justify-center rounded-sm text-muted-foreground transition-colors hover:text-foreground/70" aria-label="Max iterations info" data-state="closed" data-slot="tooltip-trigger"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-info h-3.5 w-3.5" aria-hidden="true"><circle cx="12" cy="12" r="10"></circle><path d="M12 16v-4"></path><path d="M12 8h.01"></path></svg></button></div><input class="flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 md:text-sm" id="maxIterations" min="10" max="200" type="number" value="25"></div><div class="space-y-2"><div class="flex items-center gap-2"><label class="text-xs font-medium text-muted-foreground" for="maxOutputTokens">Max output tokens</label><button type="button" class="inline-flex h-4 w-4 items-center justify-center rounded-sm text-muted-foreground transition-colors hover:text-foreground/70" aria-label="Max output tokens info" data-state="closed" data-slot="tooltip-trigger"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-info h-3.5 w-3.5" aria-hidden="true"><circle cx="12" cy="12" r="10"></circle><path d="M12 16v-4"></path><path d="M12 8h.01"></path></svg></button></div><input class="flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 md:text-sm" id="maxOutputTokens" min="1024" max="262144" step="1024" type="number" value="32768"></div><div class="space-y-2"><div class="flex items-center gap-2"><label class="text-xs font-medium text-muted-foreground" for="maxBuildAttempts">Max Build Attempts</label><button type="button" class="inline-flex h-4 w-4 items-center justify-center rounded-sm text-muted-foreground transition-colors hover:text-foreground/70" aria-label="Max build attempts info" data-state="closed" data-slot="tooltip-trigger"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-info h-3.5 w-3.5" aria-hidden="true"><circle cx="12" cy="12" r="10"></circle><path d="M12 16v-4"></path><path d="M12 8h.01"></path></svg></button></div><input class="flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-base shadow-sm transition-colors file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 md:text-sm" id="maxBuildAttempts" min="1" max="20" type="number" value="5"></div></div></div></div></div></div></div>"`;
diff --git a/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx b/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx
index ccf415c10..2a20f6adc 100644
--- a/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx
+++ b/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx
@@ -10,6 +10,7 @@ type ModelValues = {
   summaryModel: string;
   maxIterations: number;
   maxBuildAttempts: number;
+  maxOutputTokens: number;
   openrouterApiKey: string;
   openaiApiKey: string;
   vllmApiBaseUrl: string;
@@ -23,6 +24,7 @@ function AiModelsTabFixture() {
     summaryModel: "",
     maxIterations: 25,
     maxBuildAttempts: 5,
+    maxOutputTokens: 32768,
     openrouterApiKey: "",
     openaiApiKey: "",
     vllmApiBaseUrl: "",
@@ -55,6 +57,7 @@ function AiModelsTabFixture() {
         form={form as any}
         maxBuildAttemptsField={field("maxBuildAttempts")}
         maxIterationsField={field("maxIterations")}
+        maxOutputTokensField={field("maxOutputTokens")}
         summaryModelField={field("summaryModel")}
         summaryProviderField={field("summaryProvider")}
       />
diff --git a/apps/native/src/components/widget/settings/ai-models-tab.tsx b/apps/native/src/components/widget/settings/ai-models-tab.tsx
index a6e0bd884..e100acd5b 100644
--- a/apps/native/src/components/widget/settings/ai-models-tab.tsx
+++ b/apps/native/src/components/widget/settings/ai-models-tab.tsx
@@ -9,7 +9,7 @@ import {
 } from "@/components/ui/select";
 import { ModelCombobox } from "@/components/widget/controls/model-combobox";
 import { getProviderConfigInvalidReason, isCliProvider } from "@/lib/ai-provider-validation";
-import { DEFAULT_MAX_ITERATIONS } from "@/lib/constants";
+import { DEFAULT_MAX_ITERATIONS, DEFAULT_MAX_OUTPUT_TOKENS } from "@/lib/constants";
 import { tauriAPI } from "@/ipc/api";
 import type { CliToolsState } from "@/ipc/types";
 import type { AnyFieldApi, ReactFormExtendedApi } from "@tanstack/react-form";
@@ -30,6 +30,8 @@ interface AiModelsTabProps {
   // biome-ignore lint/suspicious/noExplicitAny: tanstack form types are complex
   maxBuildAttemptsField: AnyFieldApi;
   // biome-ignore lint/suspicious/noExplicitAny: tanstack form types are complex
+  maxOutputTokensField: AnyFieldApi;
+  // biome-ignore lint/suspicious/noExplicitAny: tanstack form types are complex
   form: ReactFormExtendedApi<any, any, any, any, any, any, any, any, any, any, any, any>;
 }
 
@@ -108,6 +110,7 @@ export function AiModelsTab({
   summaryModelField,
   maxIterationsField,
   maxBuildAttemptsField,
+  maxOutputTokensField,
   form,
 }: AiModelsTabProps) {
   const cliStatus = useCliToolStatus();
@@ -381,6 +384,49 @@ export function AiModelsTab({
                   onBlur={maxIterationsField.handleBlur}
                 />
               </div>
+              <div className="space-y-2">
+                <div className="flex items-center gap-2">
+                  <label
+                    className="text-xs font-medium text-muted-foreground"
+                    htmlFor="maxOutputTokens"
+                  >
+                    Max output tokens
+                  </label>
+                  <Tooltip>
+                    <TooltipTrigger asChild>
+                      <button
+                        type="button"
+                        className="inline-flex h-4 w-4 items-center justify-center rounded-sm text-muted-foreground transition-colors hover:text-foreground/70"
+                        aria-label="Max output tokens info"
+                      >
+                        <Info className="h-3.5 w-3.5" />
+                      </button>
+                    </TooltipTrigger>
+                    <TooltipContent side="right" className="max-w-xs text-xs">
+                      <p>Completion tokens requested from the evolution model.</p>
+                      <p className="mt-1">
+                        Default: {DEFAULT_MAX_OUTPUT_TOKENS}. Lower this if local vLLM rejects
+                        requests for exceeding the model context window.
+                      </p>
+                    </TooltipContent>
+                  </Tooltip>
+                </div>
+                <Input
+                  id="maxOutputTokens"
+                  type="number"
+                  min={1024}
+                  max={262144}
+                  step={1024}
+                  value={maxOutputTokensField.state.value}
+                  onChange={async (e) => {
+                    const value =
+                      Number.parseInt(e.target.value, 10) || DEFAULT_MAX_OUTPUT_TOKENS;
+                    maxOutputTokensField.handleChange(value);
+                    await tauriAPI.ui.setPrefs({ maxOutputTokens: value });
+                  }}
+                  onBlur={maxOutputTokensField.handleBlur}
+                />
+              </div>
               <div className="space-y-2">
                 <div className="flex items-center gap-2">
                   <label
diff --git a/apps/native/src/components/widget/settings/settings-dialog.tsx b/apps/native/src/components/widget/settings/settings-dialog.tsx
index e0b06abf2..dd6eaf011 100644
--- a/apps/native/src/components/widget/settings/settings-dialog.tsx
+++ b/apps/native/src/components/widget/settings/settings-dialog.tsx
@@ -2,7 +2,7 @@ import { Button } from "@/components/ui/button";
 import { useDarwinConfig } from "@/hooks/use-darwin-config";
 import { cn } from "@/lib/utils";
 import { type SettingsTab, useWidgetStore } from "@/stores/widget-store";
-import { DEFAULT_MAX_ITERATIONS } from "@/lib/constants";
+import { DEFAULT_MAX_ITERATIONS, DEFAULT_MAX_OUTPUT_TOKENS } from "@/lib/constants";
 import { tauriAPI } from "@/ipc/api";
 import { useForm } from "@tanstack/react-form";
 import { Bot, FolderOpen, Key, Settings2, SlidersHorizontal, Wrench } from "lucide-react";
@@ -129,6 +129,7 @@ export function SettingsDialog() {
       evolveModel: "anthropic/claude-sonnet-4",
       maxIterations: DEFAULT_MAX_ITERATIONS,
       maxBuildAttempts: 5,
+      maxOutputTokens: DEFAULT_MAX_OUTPUT_TOKENS,
       sendDiagnostics: false,
     },
   });
@@ -151,6 +152,7 @@ export function SettingsDialog() {
           form.setFieldValue("evolveModel", prefs.evolveModel ?? "anthropic/claude-sonnet-4");
           form.setFieldValue("maxIterations", prefs.maxIterations ?? DEFAULT_MAX_ITERATIONS);
           form.setFieldValue("maxBuildAttempts", prefs.maxBuildAttempts ?? 5);
+          form.setFieldValue("maxOutputTokens", prefs.maxOutputTokens ?? DEFAULT_MAX_OUTPUT_TOKENS);
           form.setFieldValue("sendDiagnostics", prefs.sendDiagnostics ?? false);
 
           setOpenrouterKeyStatus(prefs.openrouterApiKey ? "valid" : "idle");
@@ -307,15 +309,20 @@ export function SettingsDialog() {
                                 {(maxIterationsField) => (
                                   <form.Field name="maxBuildAttempts">
                                     {(maxBuildAttemptsField) => (
-                                      <AiModelsTab
-                                        evolveModelField={evolveModelField}
-                                        evolveProviderField={evolveProviderField}
-                                        form={form}
-                                        summaryModelField={summaryModelField}
-                                        summaryProviderField={summaryProviderField}
-                                        maxIterationsField={maxIterationsField}
-                                        maxBuildAttemptsField={maxBuildAttemptsField}
-                                      />
+                                      <form.Field name="maxOutputTokens">
+                                        {(maxOutputTokensField) => (
+                                          <AiModelsTab
+                                            evolveModelField={evolveModelField}
+                                            evolveProviderField={evolveProviderField}
+                                            form={form}
+                                            summaryModelField={summaryModelField}
+                                            summaryProviderField={summaryProviderField}
+                                            maxIterationsField={maxIterationsField}
+                                            maxBuildAttemptsField={maxBuildAttemptsField}
+                                            maxOutputTokensField={maxOutputTokensField}
+                                          />
+                                        )}
+                                      </form.Field>
                                     )}
                                   </form.Field>
                                 )}
diff --git a/apps/native/src/ipc/types.ts b/apps/native/src/ipc/types.ts
index a992464c5..5ca2e9876 100644
--- a/apps/native/src/ipc/types.ts
+++ b/apps/native/src/ipc/types.ts
@@ -1412,6 +1412,10 @@ maxIterations: number | null;
  * Maximum build attempts per evolution.
  */
 maxBuildAttempts: number | null; 
+/**
+ * Maximum output tokens requested per evolution model call.
+ */
+maxOutputTokens: number | null;
 /**
  * Whether diagnostic feedback may be sent.
  */
@@ -1490,6 +1494,10 @@ maxIterations: number | null;
  * Maximum build-attempt count update.
  */
 maxBuildAttempts: number | null; 
+/**
+ * Maximum output token count update.
+ */
+maxOutputTokens: number | null;
 /**
  * Ollama base URL update.
  */
diff --git a/apps/native/src/lib/constants.ts b/apps/native/src/lib/constants.ts
index bfba3e7bc..fcd8eabc1 100644
--- a/apps/native/src/lib/constants.ts
+++ b/apps/native/src/lib/constants.ts
@@ -1,4 +1,5 @@
 // Must match session_control::EVOLUTION_CANCELLED_MSG in src-tauri.
 export const EVOLUTION_CANCELLED_MSG = "Evolution cancelled by user";
 export const DEFAULT_MAX_ITERATIONS = 25;
+export const DEFAULT_MAX_OUTPUT_TOKENS = 32_768;
 export const EVOLVE_EVENT_CHANNEL = "darwin:evolve:event";