From 1717a7964987e109530ca15b875f9e32e0de734e Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 1 Jun 2026 06:27:05 +0000 Subject: [PATCH 1/2] Add configurable evolution output token limit Co-authored-by: cooper --- README.md | 12 ++++- apps/native/.storybook/mocks/tauri-runtime.ts | 1 + apps/native/src-tauri/src/cli.rs | 17 +++++++ .../native/src-tauri/src/commands/ui_prefs.rs | 7 +++ apps/native/src-tauri/src/evolve/mod.rs | 46 ++++++++++++++---- .../src-tauri/src/evolve/providers/mod.rs | 41 ++++++++++++++++ .../src-tauri/src/evolve/providers/ollama.rs | 13 ++++- .../src-tauri/src/evolve/providers/openai.rs | 10 ++-- apps/native/src-tauri/src/main.rs | 2 + .../src-tauri/src/shared_types/prefs.rs | 4 ++ apps/native/src-tauri/src/storage/store.rs | 13 +++++ .../widget/settings/ai-models-tab.stories.tsx | 3 ++ .../widget/settings/ai-models-tab.tsx | 48 ++++++++++++++++++- .../widget/settings/settings-dialog.tsx | 27 +++++++---- apps/native/src/ipc/types.ts | 8 ++++ apps/native/src/lib/constants.ts | 1 + 16 files changed, 225 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 37f4ff462..02de709af 100644 --- a/README.md +++ b/README.md @@ -147,14 +147,23 @@ nixmac uses separate models for **evolution** (config changes via tool use) and | Variable | Default | Description | |----------|---------|-------------| -| `EVOLVE_PROVIDER` | `openrouter` | `openrouter`, `openai`, or `ollama` | +| `EVOLVE_PROVIDER` | `openrouter` | `openrouter`, `openai`, `ollama`, or `vllm` | | `EVOLVE_MODEL` | `anthropic/claude-sonnet-4` | Model for config evolution | | `SUMMARY_AI_PROVIDER` | `openrouter` | Provider for summarization | | `SUMMARY_MODEL` | `openai/gpt-4o-mini` | Model for summaries | | `OLLAMA_API_BASE` | `http://localhost:11434` | Ollama endpoint | +| `VLLM_API_BASE` | unset | OpenAI-compatible vLLM endpoint, for example `http://localhost:8000/v1` | +| `VLLM_API_KEY` | unset | Optional vLLM API key | For fully local operation: `EVOLVE_PROVIDER=ollama SUMMARY_AI_PROVIDER=ollama devenv up` +Evolution calls request up to 32,768 output tokens by default. For self-hosted vLLM, +open **Settings → AI Models → Evolution Limits** and set **Max output tokens** low enough +to leave room for the prompt inside your model's context window. For example, a model +with a 65,536-token context window should use less than 65,536 output tokens; 32,768 is +a safe starting point for typical prompts. The same value can be set for CLI runs with +`nixmac evolve --max-output-tokens `. + > **Note:** Models under ~70B parameters tend to struggle with the multi-tool evolution workflow. ## CLI @@ -167,6 +176,7 @@ nixmac evolve "install ripgrep and fd" nixmac evolve "enable Touch ID for sudo" \ --config ~/.darwin \ --max-iterations 10 \ + --max-output-tokens 32768 \ --evolve-provider ollama \ --evolve-model qwen3-coder:30b diff --git a/apps/native/.storybook/mocks/tauri-runtime.ts b/apps/native/.storybook/mocks/tauri-runtime.ts index 88bb3264d..0a45bd6d4 100644 --- a/apps/native/.storybook/mocks/tauri-runtime.ts +++ b/apps/native/.storybook/mocks/tauri-runtime.ts @@ -60,6 +60,7 @@ const prefs = { evolveModel: "gpt-5", maxIterations: 25, maxBuildAttempts: 3, + maxOutputTokens: 32768, sendDiagnostics: true, confirmBuild: false, confirmClear: false, diff --git a/apps/native/src-tauri/src/cli.rs b/apps/native/src-tauri/src/cli.rs index 0cb4a5c52..4dc91981d 100644 --- a/apps/native/src-tauri/src/cli.rs +++ b/apps/native/src-tauri/src/cli.rs @@ -24,6 +24,7 @@ pub struct EvolveConfig { pub prompt: String, pub config: Option, pub max_iterations: Option, + pub max_output_tokens: Option, pub evolve_provider: Option, pub evolve_model: Option, pub summary_provider: Option, @@ -58,6 +59,10 @@ pub enum Commands { #[arg(short, long)] max_iterations: Option, + /// Maximum output tokens requested per evolution model call + #[arg(long)] + max_output_tokens: Option, + /// Provider for evolution (e.g., openai, openrouter, ollama) #[arg(long)] evolve_provider: Option, @@ -102,6 +107,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result prompt, config, max_iterations, + max_output_tokens, evolve_provider, evolve_model, summary_provider, @@ -192,6 +198,11 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result None => crate::storage::store::get_max_iterations(app) .unwrap_or(crate::storage::store::DEFAULT_MAX_ITERATIONS), }; + let effective_max_output_tokens: usize = match max_output_tokens { + Some(v) => v, + None => crate::storage::store::get_max_output_tokens(app) + .unwrap_or(crate::storage::store::DEFAULT_MAX_OUTPUT_TOKENS), + }; // Max iterations if let Some(iterations) = max_iterations { @@ -199,6 +210,11 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result .map_err(|e| format!("Failed to set max iterations: {}", e))?; } + if let Some(output_tokens) = max_output_tokens { + crate::storage::store::set_max_output_tokens(app, output_tokens) + .map_err(|e| format!("Failed to set max output tokens: {}", e))?; + } + // Host if let Some(ref host_attr) = host { crate::storage::store::set_host_attr(app, host_attr) @@ -256,6 +272,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result "state": state_str, "prompt": prompt, "maxIterations": effective_max_iterations, + "maxOutputTokens": effective_max_output_tokens, "evolveProvider": effective_evolve_provider, "evolveModel": effective_evolve_model, "summaryProvider": effective_summary_provider, diff --git a/apps/native/src-tauri/src/commands/ui_prefs.rs b/apps/native/src-tauri/src/commands/ui_prefs.rs index 3ec05a443..3f4722a6e 100644 --- a/apps/native/src-tauri/src/commands/ui_prefs.rs +++ b/apps/native/src-tauri/src/commands/ui_prefs.rs @@ -30,6 +30,8 @@ pub async fn ui_get_prefs(app: AppHandle) -> Result = wrap_result_and_capture_err("ui_get_prefs", store::get_ollama_api_base_url(&app))?; let vllm_api_base_url: Option = @@ -89,6 +91,7 @@ pub async fn ui_get_prefs(app: AppHandle) -> Result u32 { + value.max(1).min(u32::MAX as usize) as u32 +} + /// Return short hex prefix for correlation of error messages without risking sensitive content exposure. fn short_hash(s: &str) -> String { let mut h = Sha256::new(); @@ -358,6 +362,9 @@ pub async fn generate_evolution( info!("📝 Prompt: {}", prompt); let store_model = store::get_evolve_model(app).ok().flatten(); + let max_output_tokens = + store::get_max_output_tokens(app).unwrap_or(store::DEFAULT_MAX_OUTPUT_TOKENS); + let max_output_tokens_for_request = normalize_max_output_tokens(max_output_tokens); // Select provider implementation let provider: Arc = if provider_type == "ollama" { @@ -370,10 +377,14 @@ pub async fn generate_evolution( .or_else(|| std::env::var("OLLAMA_API_BASE").ok()) .unwrap_or_else(|| DEFAULT_OLLAMA_API_BASE.to_string()); info!( - "Using Ollama provider | Model: {} | URL: {}", - model, base_url + "Using Ollama provider | Model: {} | URL: {} | Max output tokens: {}", + model, base_url, max_output_tokens_for_request ); - Arc::new(OllamaProvider::new(base_url, model)) + Arc::new(OllamaProvider::new( + base_url, + model, + max_output_tokens_for_request, + )) } else if matches!(provider_type.as_str(), "claude" | "codex" | "opencode") { let tool = match provider_type.as_str() { "claude" => crate::ai::providers::cli::CliTool::Claude, @@ -395,8 +406,16 @@ pub async fn generate_evolution( .or_else(|| std::env::var("VLLM_API_BASE").ok()) .ok_or_else(|| anyhow!("No vLLM base URL configured. Please set it in Settings."))?; let api_key = store::get_effective_vllm_api_key(app)?.unwrap_or_else(|| "none".to_string()); - info!("Using vLLM provider | Model: {} | URL: {}", model, base_url); - Arc::new(OpenAIProvider::new(api_key, base_url, model)) + info!( + "Using vLLM provider | Model: {} | URL: {} | Max output tokens: {}", + model, base_url, max_output_tokens_for_request + ); + Arc::new(OpenAIProvider::new( + api_key, + base_url, + model, + max_output_tokens_for_request, + )) } else { let (api_key, base_url) = store::get_effective_openai_compatible_credential(app)? .ok_or_else(|| { @@ -417,8 +436,16 @@ pub async fn generate_evolution( } else { "OpenAI" }; - info!("Using {} provider | Model: {}", provider_name, model); - Arc::new(OpenAIProvider::new(api_key, base_url.to_string(), model)) + info!( + "Using {} provider | Model: {} | Max output tokens: {}", + provider_name, model, max_output_tokens_for_request + ); + Arc::new(OpenAIProvider::new( + api_key, + base_url.to_string(), + model, + max_output_tokens_for_request, + )) }; // Emit start event @@ -446,11 +473,12 @@ pub async fn generate_evolution( let max_build_attempts = store::get_max_build_attempts(app).unwrap_or(DEFAULT_MAX_BUILD_ATTEMPTS); info!( - "Limits: max_iterations={}, max_iterations_before_edit={} ({}%), max_build_attempts={}", + "Limits: max_iterations={}, max_iterations_before_edit={} ({}%), max_build_attempts={}, max_output_tokens={}", max_iterations, max_iterations_before_edit, MAX_ITERATIONS_BEFORE_EDIT_PERCENT, - max_build_attempts + max_build_attempts, + max_output_tokens ); let tools = create_tools(banned_tools); diff --git a/apps/native/src-tauri/src/evolve/providers/mod.rs b/apps/native/src-tauri/src/evolve/providers/mod.rs index 16ee93b7b..e1d96c237 100644 --- a/apps/native/src-tauri/src/evolve/providers/mod.rs +++ b/apps/native/src-tauri/src/evolve/providers/mod.rs @@ -75,6 +75,20 @@ pub enum ProviderError { Other(AnyhowError), } +fn looks_like_context_window_error(body: &str) -> bool { + let body = body.to_ascii_lowercase(); + (body.contains("context") + || body.contains("maximum context") + || body.contains("context length")) + && (body.contains("max_tokens") + || body.contains("max_output_tokens") + || body.contains("max tokens") + || body.contains("max completion") + || body.contains("output tokens") + || body.contains("token limit") + || body.contains("requested")) +} + impl ProviderError { /// Return a user-friendly error message suitable for display in the UI. /// @@ -85,6 +99,9 @@ impl ProviderError { /// `Http { status, body }` before reaching this method. pub fn user_message(&self) -> String { match self { + ProviderError::Http { status, body } if looks_like_context_window_error(body) => { + "The AI provider rejected the request because the configured max output tokens exceed the model's context window. Lower Max output tokens in Settings or switch to a model with a larger context window.".to_string() + } ProviderError::Http { status, .. } => friendly_provider_error(status.as_u16()), ProviderError::Other(e) => { let msg = format!("{:#}", e); @@ -104,3 +121,27 @@ impl ProviderError { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn recognizes_context_window_token_errors() { + let body = "This model's maximum context length is 65536 tokens. However, you requested 65000 output tokens."; + assert!(looks_like_context_window_error(body)); + } + + #[test] + fn context_window_errors_suggest_token_setting() { + let err = ProviderError::Http { + status: StatusCode::BAD_REQUEST, + body: "maximum context length is 65536 tokens; requested max_tokens is too high" + .to_string(), + }; + + let msg = err.user_message(); + assert!(msg.contains("Max output tokens")); + assert!(msg.contains("Lower")); + } +} diff --git a/apps/native/src-tauri/src/evolve/providers/ollama.rs b/apps/native/src-tauri/src/evolve/providers/ollama.rs index 6e88a7fff..b5081296d 100644 --- a/apps/native/src-tauri/src/evolve/providers/ollama.rs +++ b/apps/native/src-tauri/src/evolve/providers/ollama.rs @@ -11,14 +11,16 @@ pub struct OllamaProvider { client: reqwest::Client, base_url: String, model: String, + max_output_tokens: u32, } impl OllamaProvider { - pub fn new(base_url: String, model: String) -> Self { + pub fn new(base_url: String, model: String, max_output_tokens: u32) -> Self { Self { client: reqwest::Client::new(), base_url: base_url.trim_end_matches('/').to_string(), model, + max_output_tokens, } } } @@ -28,10 +30,16 @@ struct ChatRequest { model: String, messages: Vec, stream: bool, + options: OllamaOptions, #[serde(skip_serializing_if = "Vec::is_empty")] tools: Vec, } +#[derive(Clone, Serialize)] +struct OllamaOptions { + num_predict: u32, +} + #[derive(Clone, Debug, Serialize, Deserialize)] struct OllamaMessage { role: String, @@ -100,6 +108,9 @@ impl AiProvider for OllamaProvider { model: self.model.clone(), messages: ollama_messages.clone(), stream: false, + options: OllamaOptions { + num_predict: self.max_output_tokens, + }, tools: ollama_tools.clone(), }; diff --git a/apps/native/src-tauri/src/evolve/providers/openai.rs b/apps/native/src-tauri/src/evolve/providers/openai.rs index f060013ce..90f635169 100644 --- a/apps/native/src-tauri/src/evolve/providers/openai.rs +++ b/apps/native/src-tauri/src/evolve/providers/openai.rs @@ -20,11 +20,12 @@ use reqwest::StatusCode; pub struct OpenAIProvider { client: Client, model: String, + max_output_tokens: u32, record_completions: bool, } impl OpenAIProvider { - pub fn new(api_key: String, api_base: String, model: String) -> Self { + pub fn new(api_key: String, api_base: String, model: String, max_output_tokens: u32) -> Self { let config = OpenAIConfig::new() .with_api_key(api_key) .with_api_base(api_base); @@ -36,6 +37,7 @@ impl OpenAIProvider { Self { client, model, + max_output_tokens, record_completions, } } @@ -62,11 +64,7 @@ impl AiProvider for OpenAIProvider { .tools(openai_tools) .temperature(0.2); - // Some models support this, others don't. For OpenAI/Claude it is usually supported/required for long checks. - // But let's check if we can make it optional or robust. - // For now, hardcode max_tokens as in original mod.rs - // const MAX_TOKENS: u32 = 65_000; - request_builder.max_completion_tokens(65000u32); + request_builder.max_completion_tokens(self.max_output_tokens); let request = request_builder .build() diff --git a/apps/native/src-tauri/src/main.rs b/apps/native/src-tauri/src/main.rs index e178f7add..2ca2f2cb5 100644 --- a/apps/native/src-tauri/src/main.rs +++ b/apps/native/src-tauri/src/main.rs @@ -302,6 +302,7 @@ fn run_cli_mode(context: tauri::Context) -> i32 { prompt, config, max_iterations, + max_output_tokens, evolve_provider, evolve_model, summary_provider, @@ -349,6 +350,7 @@ fn run_cli_mode(context: tauri::Context) -> i32 { prompt, config, max_iterations, + max_output_tokens, evolve_provider, evolve_model, summary_provider, diff --git a/apps/native/src-tauri/src/shared_types/prefs.rs b/apps/native/src-tauri/src/shared_types/prefs.rs index 4e4da630d..cf4bc7ba7 100644 --- a/apps/native/src-tauri/src/shared_types/prefs.rs +++ b/apps/native/src-tauri/src/shared_types/prefs.rs @@ -36,6 +36,8 @@ pub struct UiPrefs { pub max_iterations: Option, /// Maximum build attempts per evolution. pub max_build_attempts: Option, + /// Maximum output tokens requested per evolution model call. + pub max_output_tokens: Option, /// Whether diagnostic feedback may be sent. pub send_diagnostics: bool, /// Whether to confirm before running build/apply. @@ -79,6 +81,8 @@ pub struct UiPrefsUpdate { pub max_iterations: Option, /// Maximum build-attempt count update. pub max_build_attempts: Option, + /// Maximum output token count update. + pub max_output_tokens: Option, /// Ollama base URL update. pub ollama_api_base_url: Option, /// vLLM base URL update. diff --git a/apps/native/src-tauri/src/storage/store.rs b/apps/native/src-tauri/src/storage/store.rs index 9abbbc181..33c59bd01 100644 --- a/apps/native/src-tauri/src/storage/store.rs +++ b/apps/native/src-tauri/src/storage/store.rs @@ -44,6 +44,7 @@ pub const PINNED_VERSION_KEY: &str = "pinnedVersion"; pub const UPDATE_CHANNEL_KEY: &str = "updateChannel"; pub const DEFAULT_MAX_ITERATIONS: usize = 25; +pub const DEFAULT_MAX_OUTPUT_TOKENS: usize = 32_768; const KEYCHAIN_SERVICE: &str = "com.darkmatter.nixmac"; fn e2e_mock_system_enabled() -> bool { @@ -522,6 +523,18 @@ pub fn set_max_build_attempts(app: &AppHandle, max: usize) -> Res Ok(()) } +/// Gets the maximum output tokens requested per evolution model call. +pub fn get_max_output_tokens(app: &AppHandle) -> Result { + Ok(get_usize_pref(app, "maxOutputTokens")?.unwrap_or(DEFAULT_MAX_OUTPUT_TOKENS)) +} + +pub fn set_max_output_tokens(app: &AppHandle, max: usize) -> Result<()> { + let store = get_store(app)?; + store.set("maxOutputTokens", serde_json::json!(max)); + store.save()?; + Ok(()) +} + // ============================================================================= // Model Cache // ============================================================================= diff --git a/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx b/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx index ccf415c10..2a20f6adc 100644 --- a/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx +++ b/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx @@ -10,6 +10,7 @@ type ModelValues = { summaryModel: string; maxIterations: number; maxBuildAttempts: number; + maxOutputTokens: number; openrouterApiKey: string; openaiApiKey: string; vllmApiBaseUrl: string; @@ -23,6 +24,7 @@ function AiModelsTabFixture() { summaryModel: "", maxIterations: 25, maxBuildAttempts: 5, + maxOutputTokens: 32768, openrouterApiKey: "", openaiApiKey: "", vllmApiBaseUrl: "", @@ -55,6 +57,7 @@ function AiModelsTabFixture() { form={form as any} maxBuildAttemptsField={field("maxBuildAttempts")} maxIterationsField={field("maxIterations")} + maxOutputTokensField={field("maxOutputTokens")} summaryModelField={field("summaryModel")} summaryProviderField={field("summaryProvider")} /> diff --git a/apps/native/src/components/widget/settings/ai-models-tab.tsx b/apps/native/src/components/widget/settings/ai-models-tab.tsx index a6e0bd884..e100acd5b 100644 --- a/apps/native/src/components/widget/settings/ai-models-tab.tsx +++ b/apps/native/src/components/widget/settings/ai-models-tab.tsx @@ -9,7 +9,7 @@ import { } from "@/components/ui/select"; import { ModelCombobox } from "@/components/widget/controls/model-combobox"; import { getProviderConfigInvalidReason, isCliProvider } from "@/lib/ai-provider-validation"; -import { DEFAULT_MAX_ITERATIONS } from "@/lib/constants"; +import { DEFAULT_MAX_ITERATIONS, DEFAULT_MAX_OUTPUT_TOKENS } from "@/lib/constants"; import { tauriAPI } from "@/ipc/api"; import type { CliToolsState } from "@/ipc/types"; import type { AnyFieldApi, ReactFormExtendedApi } from "@tanstack/react-form"; @@ -30,6 +30,8 @@ interface AiModelsTabProps { // biome-ignore lint/suspicious/noExplicitAny: tanstack form types are complex maxBuildAttemptsField: AnyFieldApi; // biome-ignore lint/suspicious/noExplicitAny: tanstack form types are complex + maxOutputTokensField: AnyFieldApi; + // biome-ignore lint/suspicious/noExplicitAny: tanstack form types are complex form: ReactFormExtendedApi; } @@ -108,6 +110,7 @@ export function AiModelsTab({ summaryModelField, maxIterationsField, maxBuildAttemptsField, + maxOutputTokensField, form, }: AiModelsTabProps) { const cliStatus = useCliToolStatus(); @@ -381,6 +384,49 @@ export function AiModelsTab({ onBlur={maxIterationsField.handleBlur} /> +
+
+ + + + + + +

Completion tokens requested from the evolution model.

+

+ Default: {DEFAULT_MAX_OUTPUT_TOKENS}. Lower this if local vLLM rejects + requests for exceeding the model context window. +

+
+
+
+ { + const value = + Number.parseInt(e.target.value, 10) || DEFAULT_MAX_OUTPUT_TOKENS; + maxOutputTokensField.handleChange(value); + await tauriAPI.ui.setPrefs({ maxOutputTokens: value }); + }} + onBlur={maxOutputTokensField.handleBlur} + /> +