diff --git a/README.md b/README.md index 37f4ff462..02de709af 100644 --- a/README.md +++ b/README.md @@ -147,14 +147,23 @@ nixmac uses separate models for **evolution** (config changes via tool use) and | Variable | Default | Description | |----------|---------|-------------| -| `EVOLVE_PROVIDER` | `openrouter` | `openrouter`, `openai`, or `ollama` | +| `EVOLVE_PROVIDER` | `openrouter` | `openrouter`, `openai`, `ollama`, or `vllm` | | `EVOLVE_MODEL` | `anthropic/claude-sonnet-4` | Model for config evolution | | `SUMMARY_AI_PROVIDER` | `openrouter` | Provider for summarization | | `SUMMARY_MODEL` | `openai/gpt-4o-mini` | Model for summaries | | `OLLAMA_API_BASE` | `http://localhost:11434` | Ollama endpoint | +| `VLLM_API_BASE` | unset | OpenAI-compatible vLLM endpoint, for example `http://localhost:8000/v1` | +| `VLLM_API_KEY` | unset | Optional vLLM API key | For fully local operation: `EVOLVE_PROVIDER=ollama SUMMARY_AI_PROVIDER=ollama devenv up` +Evolution calls request up to 32,768 output tokens by default. For self-hosted vLLM, +open **Settings → AI Models → Evolution Limits** and set **Max output tokens** low enough +to leave room for the prompt inside your model's context window. For example, a model +with a 65,536-token context window should use less than 65,536 output tokens; 32,768 is +a safe starting point for typical prompts. The same value can be set for CLI runs with +`nixmac evolve --max-output-tokens `. + > **Note:** Models under ~70B parameters tend to struggle with the multi-tool evolution workflow. ## CLI @@ -167,6 +176,7 @@ nixmac evolve "install ripgrep and fd" nixmac evolve "enable Touch ID for sudo" \ --config ~/.darwin \ --max-iterations 10 \ + --max-output-tokens 32768 \ --evolve-provider ollama \ --evolve-model qwen3-coder:30b diff --git a/apps/native/.storybook/mocks/tauri-runtime.ts b/apps/native/.storybook/mocks/tauri-runtime.ts index 88bb3264d..0a45bd6d4 100644 --- a/apps/native/.storybook/mocks/tauri-runtime.ts +++ b/apps/native/.storybook/mocks/tauri-runtime.ts @@ -60,6 +60,7 @@ const prefs = { evolveModel: "gpt-5", maxIterations: 25, maxBuildAttempts: 3, + maxOutputTokens: 32768, sendDiagnostics: true, confirmBuild: false, confirmClear: false, diff --git a/apps/native/src-tauri/src/cli.rs b/apps/native/src-tauri/src/cli.rs index 0cb4a5c52..4dc91981d 100644 --- a/apps/native/src-tauri/src/cli.rs +++ b/apps/native/src-tauri/src/cli.rs @@ -24,6 +24,7 @@ pub struct EvolveConfig { pub prompt: String, pub config: Option, pub max_iterations: Option, + pub max_output_tokens: Option, pub evolve_provider: Option, pub evolve_model: Option, pub summary_provider: Option, @@ -58,6 +59,10 @@ pub enum Commands { #[arg(short, long)] max_iterations: Option, + /// Maximum output tokens requested per evolution model call + #[arg(long)] + max_output_tokens: Option, + /// Provider for evolution (e.g., openai, openrouter, ollama) #[arg(long)] evolve_provider: Option, @@ -102,6 +107,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result prompt, config, max_iterations, + max_output_tokens, evolve_provider, evolve_model, summary_provider, @@ -192,6 +198,11 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result None => crate::storage::store::get_max_iterations(app) .unwrap_or(crate::storage::store::DEFAULT_MAX_ITERATIONS), }; + let effective_max_output_tokens: usize = match max_output_tokens { + Some(v) => v, + None => crate::storage::store::get_max_output_tokens(app) + .unwrap_or(crate::storage::store::DEFAULT_MAX_OUTPUT_TOKENS), + }; // Max iterations if let Some(iterations) = max_iterations { @@ -199,6 +210,11 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result .map_err(|e| format!("Failed to set max iterations: {}", e))?; } + if let Some(output_tokens) = max_output_tokens { + crate::storage::store::set_max_output_tokens(app, output_tokens) + .map_err(|e| format!("Failed to set max output tokens: {}", e))?; + } + // Host if let Some(ref host_attr) = host { crate::storage::store::set_host_attr(app, host_attr) @@ -256,6 +272,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result "state": state_str, "prompt": prompt, "maxIterations": effective_max_iterations, + "maxOutputTokens": effective_max_output_tokens, "evolveProvider": effective_evolve_provider, "evolveModel": effective_evolve_model, "summaryProvider": effective_summary_provider, diff --git a/apps/native/src-tauri/src/commands/ui_prefs.rs b/apps/native/src-tauri/src/commands/ui_prefs.rs index 3ec05a443..3f4722a6e 100644 --- a/apps/native/src-tauri/src/commands/ui_prefs.rs +++ b/apps/native/src-tauri/src/commands/ui_prefs.rs @@ -30,6 +30,8 @@ pub async fn ui_get_prefs(app: AppHandle) -> Result = wrap_result_and_capture_err("ui_get_prefs", store::get_ollama_api_base_url(&app))?; let vllm_api_base_url: Option = @@ -89,6 +91,7 @@ pub async fn ui_get_prefs(app: AppHandle) -> Result u32 { + value.max(1).min(u32::MAX as usize) as u32 +} + /// Return short hex prefix for correlation of error messages without risking sensitive content exposure. fn short_hash(s: &str) -> String { let mut h = Sha256::new(); @@ -358,6 +362,9 @@ pub async fn generate_evolution( info!("📝 Prompt: {}", prompt); let store_model = store::get_evolve_model(app).ok().flatten(); + let max_output_tokens = + store::get_max_output_tokens(app).unwrap_or(store::DEFAULT_MAX_OUTPUT_TOKENS); + let max_output_tokens_for_request = normalize_max_output_tokens(max_output_tokens); // Select provider implementation let provider: Arc = if provider_type == "ollama" { @@ -370,10 +377,14 @@ pub async fn generate_evolution( .or_else(|| std::env::var("OLLAMA_API_BASE").ok()) .unwrap_or_else(|| DEFAULT_OLLAMA_API_BASE.to_string()); info!( - "Using Ollama provider | Model: {} | URL: {}", - model, base_url + "Using Ollama provider | Model: {} | URL: {} | Max output tokens: {}", + model, base_url, max_output_tokens_for_request ); - Arc::new(OllamaProvider::new(base_url, model)) + Arc::new(OllamaProvider::new( + base_url, + model, + max_output_tokens_for_request, + )) } else if matches!(provider_type.as_str(), "claude" | "codex" | "opencode") { let tool = match provider_type.as_str() { "claude" => crate::ai::providers::cli::CliTool::Claude, @@ -395,8 +406,16 @@ pub async fn generate_evolution( .or_else(|| std::env::var("VLLM_API_BASE").ok()) .ok_or_else(|| anyhow!("No vLLM base URL configured. Please set it in Settings."))?; let api_key = store::get_effective_vllm_api_key(app)?.unwrap_or_else(|| "none".to_string()); - info!("Using vLLM provider | Model: {} | URL: {}", model, base_url); - Arc::new(OpenAIProvider::new(api_key, base_url, model)) + info!( + "Using vLLM provider | Model: {} | URL: {} | Max output tokens: {}", + model, base_url, max_output_tokens_for_request + ); + Arc::new(OpenAIProvider::new( + api_key, + base_url, + model, + max_output_tokens_for_request, + )) } else { let (api_key, base_url) = store::get_effective_openai_compatible_credential(app)? .ok_or_else(|| { @@ -417,8 +436,16 @@ pub async fn generate_evolution( } else { "OpenAI" }; - info!("Using {} provider | Model: {}", provider_name, model); - Arc::new(OpenAIProvider::new(api_key, base_url.to_string(), model)) + info!( + "Using {} provider | Model: {} | Max output tokens: {}", + provider_name, model, max_output_tokens_for_request + ); + Arc::new(OpenAIProvider::new( + api_key, + base_url.to_string(), + model, + max_output_tokens_for_request, + )) }; // Emit start event @@ -446,11 +473,12 @@ pub async fn generate_evolution( let max_build_attempts = store::get_max_build_attempts(app).unwrap_or(DEFAULT_MAX_BUILD_ATTEMPTS); info!( - "Limits: max_iterations={}, max_iterations_before_edit={} ({}%), max_build_attempts={}", + "Limits: max_iterations={}, max_iterations_before_edit={} ({}%), max_build_attempts={}, max_output_tokens={}", max_iterations, max_iterations_before_edit, MAX_ITERATIONS_BEFORE_EDIT_PERCENT, - max_build_attempts + max_build_attempts, + max_output_tokens ); let tools = create_tools(banned_tools); diff --git a/apps/native/src-tauri/src/evolve/providers/mod.rs b/apps/native/src-tauri/src/evolve/providers/mod.rs index 16ee93b7b..e1d96c237 100644 --- a/apps/native/src-tauri/src/evolve/providers/mod.rs +++ b/apps/native/src-tauri/src/evolve/providers/mod.rs @@ -75,6 +75,20 @@ pub enum ProviderError { Other(AnyhowError), } +fn looks_like_context_window_error(body: &str) -> bool { + let body = body.to_ascii_lowercase(); + (body.contains("context") + || body.contains("maximum context") + || body.contains("context length")) + && (body.contains("max_tokens") + || body.contains("max_output_tokens") + || body.contains("max tokens") + || body.contains("max completion") + || body.contains("output tokens") + || body.contains("token limit") + || body.contains("requested")) +} + impl ProviderError { /// Return a user-friendly error message suitable for display in the UI. /// @@ -85,6 +99,9 @@ impl ProviderError { /// `Http { status, body }` before reaching this method. pub fn user_message(&self) -> String { match self { + ProviderError::Http { status, body } if looks_like_context_window_error(body) => { + "The AI provider rejected the request because the configured max output tokens exceed the model's context window. Lower Max output tokens in Settings or switch to a model with a larger context window.".to_string() + } ProviderError::Http { status, .. } => friendly_provider_error(status.as_u16()), ProviderError::Other(e) => { let msg = format!("{:#}", e); @@ -104,3 +121,27 @@ impl ProviderError { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn recognizes_context_window_token_errors() { + let body = "This model's maximum context length is 65536 tokens. However, you requested 65000 output tokens."; + assert!(looks_like_context_window_error(body)); + } + + #[test] + fn context_window_errors_suggest_token_setting() { + let err = ProviderError::Http { + status: StatusCode::BAD_REQUEST, + body: "maximum context length is 65536 tokens; requested max_tokens is too high" + .to_string(), + }; + + let msg = err.user_message(); + assert!(msg.contains("Max output tokens")); + assert!(msg.contains("Lower")); + } +} diff --git a/apps/native/src-tauri/src/evolve/providers/ollama.rs b/apps/native/src-tauri/src/evolve/providers/ollama.rs index 6e88a7fff..b5081296d 100644 --- a/apps/native/src-tauri/src/evolve/providers/ollama.rs +++ b/apps/native/src-tauri/src/evolve/providers/ollama.rs @@ -11,14 +11,16 @@ pub struct OllamaProvider { client: reqwest::Client, base_url: String, model: String, + max_output_tokens: u32, } impl OllamaProvider { - pub fn new(base_url: String, model: String) -> Self { + pub fn new(base_url: String, model: String, max_output_tokens: u32) -> Self { Self { client: reqwest::Client::new(), base_url: base_url.trim_end_matches('/').to_string(), model, + max_output_tokens, } } } @@ -28,10 +30,16 @@ struct ChatRequest { model: String, messages: Vec, stream: bool, + options: OllamaOptions, #[serde(skip_serializing_if = "Vec::is_empty")] tools: Vec, } +#[derive(Clone, Serialize)] +struct OllamaOptions { + num_predict: u32, +} + #[derive(Clone, Debug, Serialize, Deserialize)] struct OllamaMessage { role: String, @@ -100,6 +108,9 @@ impl AiProvider for OllamaProvider { model: self.model.clone(), messages: ollama_messages.clone(), stream: false, + options: OllamaOptions { + num_predict: self.max_output_tokens, + }, tools: ollama_tools.clone(), }; diff --git a/apps/native/src-tauri/src/evolve/providers/openai.rs b/apps/native/src-tauri/src/evolve/providers/openai.rs index f060013ce..90f635169 100644 --- a/apps/native/src-tauri/src/evolve/providers/openai.rs +++ b/apps/native/src-tauri/src/evolve/providers/openai.rs @@ -20,11 +20,12 @@ use reqwest::StatusCode; pub struct OpenAIProvider { client: Client, model: String, + max_output_tokens: u32, record_completions: bool, } impl OpenAIProvider { - pub fn new(api_key: String, api_base: String, model: String) -> Self { + pub fn new(api_key: String, api_base: String, model: String, max_output_tokens: u32) -> Self { let config = OpenAIConfig::new() .with_api_key(api_key) .with_api_base(api_base); @@ -36,6 +37,7 @@ impl OpenAIProvider { Self { client, model, + max_output_tokens, record_completions, } } @@ -62,11 +64,7 @@ impl AiProvider for OpenAIProvider { .tools(openai_tools) .temperature(0.2); - // Some models support this, others don't. For OpenAI/Claude it is usually supported/required for long checks. - // But let's check if we can make it optional or robust. - // For now, hardcode max_tokens as in original mod.rs - // const MAX_TOKENS: u32 = 65_000; - request_builder.max_completion_tokens(65000u32); + request_builder.max_completion_tokens(self.max_output_tokens); let request = request_builder .build() diff --git a/apps/native/src-tauri/src/main.rs b/apps/native/src-tauri/src/main.rs index e178f7add..2ca2f2cb5 100644 --- a/apps/native/src-tauri/src/main.rs +++ b/apps/native/src-tauri/src/main.rs @@ -302,6 +302,7 @@ fn run_cli_mode(context: tauri::Context) -> i32 { prompt, config, max_iterations, + max_output_tokens, evolve_provider, evolve_model, summary_provider, @@ -349,6 +350,7 @@ fn run_cli_mode(context: tauri::Context) -> i32 { prompt, config, max_iterations, + max_output_tokens, evolve_provider, evolve_model, summary_provider, diff --git a/apps/native/src-tauri/src/shared_types/prefs.rs b/apps/native/src-tauri/src/shared_types/prefs.rs index 4e4da630d..cf4bc7ba7 100644 --- a/apps/native/src-tauri/src/shared_types/prefs.rs +++ b/apps/native/src-tauri/src/shared_types/prefs.rs @@ -36,6 +36,8 @@ pub struct UiPrefs { pub max_iterations: Option, /// Maximum build attempts per evolution. pub max_build_attempts: Option, + /// Maximum output tokens requested per evolution model call. + pub max_output_tokens: Option, /// Whether diagnostic feedback may be sent. pub send_diagnostics: bool, /// Whether to confirm before running build/apply. @@ -79,6 +81,8 @@ pub struct UiPrefsUpdate { pub max_iterations: Option, /// Maximum build-attempt count update. pub max_build_attempts: Option, + /// Maximum output token count update. + pub max_output_tokens: Option, /// Ollama base URL update. pub ollama_api_base_url: Option, /// vLLM base URL update. diff --git a/apps/native/src-tauri/src/storage/store.rs b/apps/native/src-tauri/src/storage/store.rs index 9abbbc181..33c59bd01 100644 --- a/apps/native/src-tauri/src/storage/store.rs +++ b/apps/native/src-tauri/src/storage/store.rs @@ -44,6 +44,7 @@ pub const PINNED_VERSION_KEY: &str = "pinnedVersion"; pub const UPDATE_CHANNEL_KEY: &str = "updateChannel"; pub const DEFAULT_MAX_ITERATIONS: usize = 25; +pub const DEFAULT_MAX_OUTPUT_TOKENS: usize = 32_768; const KEYCHAIN_SERVICE: &str = "com.darkmatter.nixmac"; fn e2e_mock_system_enabled() -> bool { @@ -522,6 +523,18 @@ pub fn set_max_build_attempts(app: &AppHandle, max: usize) -> Res Ok(()) } +/// Gets the maximum output tokens requested per evolution model call. +pub fn get_max_output_tokens(app: &AppHandle) -> Result { + Ok(get_usize_pref(app, "maxOutputTokens")?.unwrap_or(DEFAULT_MAX_OUTPUT_TOKENS)) +} + +pub fn set_max_output_tokens(app: &AppHandle, max: usize) -> Result<()> { + let store = get_store(app)?; + store.set("maxOutputTokens", serde_json::json!(max)); + store.save()?; + Ok(()) +} + // ============================================================================= // Model Cache // ============================================================================= diff --git a/apps/native/src/components/widget/settings/__snapshots__/ai-models-tab.stories.tsx.snap b/apps/native/src/components/widget/settings/__snapshots__/ai-models-tab.stories.tsx.snap index 05175b255..6caa2e75c 100644 --- a/apps/native/src/components/widget/settings/__snapshots__/ai-models-tab.stories.tsx.snap +++ b/apps/native/src/components/widget/settings/__snapshots__/ai-models-tab.stories.tsx.snap @@ -1,3 +1,3 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`Cli Providers 1`] = `"

AI Models

OpenRouter is the supported cloud provider in the main UI. Previously saved direct OpenAI keys still work as a legacy fallback, but they are no longer shown in Settings.

Evolution Model

Model used to plan and apply configuration changes in Nix

Summary Model

Model used to explain and summarize changes

Evolution Limits

Control how long the AI will try before giving up

"`; +exports[`Cli Providers 1`] = `"

AI Models

OpenRouter is the supported cloud provider in the main UI. Previously saved direct OpenAI keys still work as a legacy fallback, but they are no longer shown in Settings.

Evolution Model

Model used to plan and apply configuration changes in Nix

Summary Model

Model used to explain and summarize changes

Evolution Limits

Control how long the AI will try before giving up

"`; diff --git a/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx b/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx index ccf415c10..2a20f6adc 100644 --- a/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx +++ b/apps/native/src/components/widget/settings/ai-models-tab.stories.tsx @@ -10,6 +10,7 @@ type ModelValues = { summaryModel: string; maxIterations: number; maxBuildAttempts: number; + maxOutputTokens: number; openrouterApiKey: string; openaiApiKey: string; vllmApiBaseUrl: string; @@ -23,6 +24,7 @@ function AiModelsTabFixture() { summaryModel: "", maxIterations: 25, maxBuildAttempts: 5, + maxOutputTokens: 32768, openrouterApiKey: "", openaiApiKey: "", vllmApiBaseUrl: "", @@ -55,6 +57,7 @@ function AiModelsTabFixture() { form={form as any} maxBuildAttemptsField={field("maxBuildAttempts")} maxIterationsField={field("maxIterations")} + maxOutputTokensField={field("maxOutputTokens")} summaryModelField={field("summaryModel")} summaryProviderField={field("summaryProvider")} /> diff --git a/apps/native/src/components/widget/settings/ai-models-tab.tsx b/apps/native/src/components/widget/settings/ai-models-tab.tsx index a6e0bd884..e100acd5b 100644 --- a/apps/native/src/components/widget/settings/ai-models-tab.tsx +++ b/apps/native/src/components/widget/settings/ai-models-tab.tsx @@ -9,7 +9,7 @@ import { } from "@/components/ui/select"; import { ModelCombobox } from "@/components/widget/controls/model-combobox"; import { getProviderConfigInvalidReason, isCliProvider } from "@/lib/ai-provider-validation"; -import { DEFAULT_MAX_ITERATIONS } from "@/lib/constants"; +import { DEFAULT_MAX_ITERATIONS, DEFAULT_MAX_OUTPUT_TOKENS } from "@/lib/constants"; import { tauriAPI } from "@/ipc/api"; import type { CliToolsState } from "@/ipc/types"; import type { AnyFieldApi, ReactFormExtendedApi } from "@tanstack/react-form"; @@ -30,6 +30,8 @@ interface AiModelsTabProps { // biome-ignore lint/suspicious/noExplicitAny: tanstack form types are complex maxBuildAttemptsField: AnyFieldApi; // biome-ignore lint/suspicious/noExplicitAny: tanstack form types are complex + maxOutputTokensField: AnyFieldApi; + // biome-ignore lint/suspicious/noExplicitAny: tanstack form types are complex form: ReactFormExtendedApi; } @@ -108,6 +110,7 @@ export function AiModelsTab({ summaryModelField, maxIterationsField, maxBuildAttemptsField, + maxOutputTokensField, form, }: AiModelsTabProps) { const cliStatus = useCliToolStatus(); @@ -381,6 +384,49 @@ export function AiModelsTab({ onBlur={maxIterationsField.handleBlur} /> +
+
+ + + + + + +

Completion tokens requested from the evolution model.

+

+ Default: {DEFAULT_MAX_OUTPUT_TOKENS}. Lower this if local vLLM rejects + requests for exceeding the model context window. +

+
+
+
+ { + const value = + Number.parseInt(e.target.value, 10) || DEFAULT_MAX_OUTPUT_TOKENS; + maxOutputTokensField.handleChange(value); + await tauriAPI.ui.setPrefs({ maxOutputTokens: value }); + }} + onBlur={maxOutputTokensField.handleBlur} + /> +