Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,14 +147,23 @@ nixmac uses separate models for **evolution** (config changes via tool use) and

| Variable | Default | Description |
|----------|---------|-------------|
| `EVOLVE_PROVIDER` | `openrouter` | `openrouter`, `openai`, or `ollama` |
| `EVOLVE_PROVIDER` | `openrouter` | `openrouter`, `openai`, `ollama`, or `vllm` |
| `EVOLVE_MODEL` | `anthropic/claude-sonnet-4` | Model for config evolution |
| `SUMMARY_AI_PROVIDER` | `openrouter` | Provider for summarization |
| `SUMMARY_MODEL` | `openai/gpt-4o-mini` | Model for summaries |
| `OLLAMA_API_BASE` | `http://localhost:11434` | Ollama endpoint |
| `VLLM_API_BASE` | unset | OpenAI-compatible vLLM endpoint, for example `http://localhost:8000/v1` |
| `VLLM_API_KEY` | unset | Optional vLLM API key |

For fully local operation: `EVOLVE_PROVIDER=ollama SUMMARY_AI_PROVIDER=ollama devenv up`

Evolution calls request up to 32,768 output tokens by default. For self-hosted vLLM,
open **Settings → AI Models → Evolution Limits** and set **Max output tokens** low enough
to leave room for the prompt inside your model's context window. For example, a model
with a 65,536-token context window should use less than 65,536 output tokens; 32,768 is
a safe starting point for typical prompts. The same value can be set for CLI runs with
`nixmac evolve --max-output-tokens <tokens>`.

> **Note:** Models under ~70B parameters tend to struggle with the multi-tool evolution workflow.

## CLI
Expand All @@ -167,6 +176,7 @@ nixmac evolve "install ripgrep and fd"
nixmac evolve "enable Touch ID for sudo" \
--config ~/.darwin \
--max-iterations 10 \
--max-output-tokens 32768 \
--evolve-provider ollama \
--evolve-model qwen3-coder:30b

Expand Down
1 change: 1 addition & 0 deletions apps/native/.storybook/mocks/tauri-runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ const prefs = {
evolveModel: "gpt-5",
maxIterations: 25,
maxBuildAttempts: 3,
maxOutputTokens: 32768,
sendDiagnostics: true,
confirmBuild: false,
confirmClear: false,
Expand Down
17 changes: 17 additions & 0 deletions apps/native/src-tauri/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub struct EvolveConfig {
pub prompt: String,
pub config: Option<PathBuf>,
pub max_iterations: Option<usize>,
pub max_output_tokens: Option<usize>,
pub evolve_provider: Option<String>,
pub evolve_model: Option<String>,
pub summary_provider: Option<String>,
Expand Down Expand Up @@ -58,6 +59,10 @@ pub enum Commands {
#[arg(short, long)]
max_iterations: Option<usize>,

/// Maximum output tokens requested per evolution model call
#[arg(long)]
max_output_tokens: Option<usize>,

/// Provider for evolution (e.g., openai, openrouter, ollama)
#[arg(long)]
evolve_provider: Option<String>,
Expand Down Expand Up @@ -102,6 +107,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result
prompt,
config,
max_iterations,
max_output_tokens,
evolve_provider,
evolve_model,
summary_provider,
Expand Down Expand Up @@ -192,13 +198,23 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result
None => crate::storage::store::get_max_iterations(app)
.unwrap_or(crate::storage::store::DEFAULT_MAX_ITERATIONS),
};
let effective_max_output_tokens: usize = match max_output_tokens {
Some(v) => v,
None => crate::storage::store::get_max_output_tokens(app)
.unwrap_or(crate::storage::store::DEFAULT_MAX_OUTPUT_TOKENS),
};

// Max iterations
if let Some(iterations) = max_iterations {
crate::storage::store::set_max_iterations(app, iterations)
.map_err(|e| format!("Failed to set max iterations: {}", e))?;
}

if let Some(output_tokens) = max_output_tokens {
crate::storage::store::set_max_output_tokens(app, output_tokens)
.map_err(|e| format!("Failed to set max output tokens: {}", e))?;
}

// Host
if let Some(ref host_attr) = host {
crate::storage::store::set_host_attr(app, host_attr)
Expand Down Expand Up @@ -256,6 +272,7 @@ pub async fn handle_evolve_command(app: &AppHandle, cfg: EvolveConfig) -> Result
"state": state_str,
"prompt": prompt,
"maxIterations": effective_max_iterations,
"maxOutputTokens": effective_max_output_tokens,
"evolveProvider": effective_evolve_provider,
"evolveModel": effective_evolve_model,
"summaryProvider": effective_summary_provider,
Expand Down
7 changes: 7 additions & 0 deletions apps/native/src-tauri/src/commands/ui_prefs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ pub async fn ui_get_prefs(app: AppHandle) -> Result<shared_types::UiPrefs, Strin
let max_iterations =
Some(store::get_max_iterations(&app).unwrap_or(store::DEFAULT_MAX_ITERATIONS));
let max_build_attempts = Some(store::get_max_build_attempts(&app).unwrap_or(5));
let max_output_tokens =
Some(store::get_max_output_tokens(&app).unwrap_or(store::DEFAULT_MAX_OUTPUT_TOKENS));
let ollama_api_base_url: Option<String> =
wrap_result_and_capture_err("ui_get_prefs", store::get_ollama_api_base_url(&app))?;
let vllm_api_base_url: Option<String> =
Expand Down Expand Up @@ -89,6 +91,7 @@ pub async fn ui_get_prefs(app: AppHandle) -> Result<shared_types::UiPrefs, Strin

max_iterations,
max_build_attempts,
max_output_tokens,

ollama_api_base_url,
vllm_api_base_url,
Expand Down Expand Up @@ -144,6 +147,10 @@ pub async fn ui_set_prefs(
store::set_max_build_attempts(&app, max_build_attempts)
.map_err(|e| capture_err("ui_set_prefs", e))?;
}
if let Some(max_output_tokens) = prefs.max_output_tokens {
store::set_max_output_tokens(&app, max_output_tokens)
.map_err(|e| capture_err("ui_set_prefs", e))?;
}
if let Some(ollama_api_base_url) = prefs.ollama_api_base_url {
store::set_ollama_api_base_url(&app, &ollama_api_base_url)
.map_err(|e| capture_err("ui_set_prefs", e))?;
Expand Down
46 changes: 37 additions & 9 deletions apps/native/src-tauri/src/evolve/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ use providers::{AiProvider, CliProvider, OllamaProvider, OpenAIProvider, Provide

use self::types::FileEdit;

fn normalize_max_output_tokens(value: usize) -> u32 {
value.max(1).min(u32::MAX as usize) as u32
}

/// Return short hex prefix for correlation of error messages without risking sensitive content exposure.
fn short_hash(s: &str) -> String {
let mut h = Sha256::new();
Expand Down Expand Up @@ -358,6 +362,9 @@ pub async fn generate_evolution<R: Runtime>(
info!("📝 Prompt: {}", prompt);

let store_model = store::get_evolve_model(app).ok().flatten();
let max_output_tokens =
store::get_max_output_tokens(app).unwrap_or(store::DEFAULT_MAX_OUTPUT_TOKENS);
let max_output_tokens_for_request = normalize_max_output_tokens(max_output_tokens);

// Select provider implementation
let provider: Arc<dyn AiProvider> = if provider_type == "ollama" {
Expand All @@ -370,10 +377,14 @@ pub async fn generate_evolution<R: Runtime>(
.or_else(|| std::env::var("OLLAMA_API_BASE").ok())
.unwrap_or_else(|| DEFAULT_OLLAMA_API_BASE.to_string());
info!(
"Using Ollama provider | Model: {} | URL: {}",
model, base_url
"Using Ollama provider | Model: {} | URL: {} | Max output tokens: {}",
model, base_url, max_output_tokens_for_request
);
Arc::new(OllamaProvider::new(base_url, model))
Arc::new(OllamaProvider::new(
base_url,
model,
max_output_tokens_for_request,
))
} else if matches!(provider_type.as_str(), "claude" | "codex" | "opencode") {
let tool = match provider_type.as_str() {
"claude" => crate::ai::providers::cli::CliTool::Claude,
Expand All @@ -395,8 +406,16 @@ pub async fn generate_evolution<R: Runtime>(
.or_else(|| std::env::var("VLLM_API_BASE").ok())
.ok_or_else(|| anyhow!("No vLLM base URL configured. Please set it in Settings."))?;
let api_key = store::get_effective_vllm_api_key(app)?.unwrap_or_else(|| "none".to_string());
info!("Using vLLM provider | Model: {} | URL: {}", model, base_url);
Arc::new(OpenAIProvider::new(api_key, base_url, model))
info!(
"Using vLLM provider | Model: {} | URL: {} | Max output tokens: {}",
model, base_url, max_output_tokens_for_request
);
Arc::new(OpenAIProvider::new(
api_key,
base_url,
model,
max_output_tokens_for_request,
))
} else {
let (api_key, base_url) = store::get_effective_openai_compatible_credential(app)?
.ok_or_else(|| {
Expand All @@ -417,8 +436,16 @@ pub async fn generate_evolution<R: Runtime>(
} else {
"OpenAI"
};
info!("Using {} provider | Model: {}", provider_name, model);
Arc::new(OpenAIProvider::new(api_key, base_url.to_string(), model))
info!(
"Using {} provider | Model: {} | Max output tokens: {}",
provider_name, model, max_output_tokens_for_request
);
Arc::new(OpenAIProvider::new(
api_key,
base_url.to_string(),
model,
max_output_tokens_for_request,
))
};

// Emit start event
Expand Down Expand Up @@ -446,11 +473,12 @@ pub async fn generate_evolution<R: Runtime>(
let max_build_attempts =
store::get_max_build_attempts(app).unwrap_or(DEFAULT_MAX_BUILD_ATTEMPTS);
info!(
"Limits: max_iterations={}, max_iterations_before_edit={} ({}%), max_build_attempts={}",
"Limits: max_iterations={}, max_iterations_before_edit={} ({}%), max_build_attempts={}, max_output_tokens={}",
max_iterations,
max_iterations_before_edit,
MAX_ITERATIONS_BEFORE_EDIT_PERCENT,
max_build_attempts
max_build_attempts,
max_output_tokens
);

let tools = create_tools(banned_tools);
Expand Down
41 changes: 41 additions & 0 deletions apps/native/src-tauri/src/evolve/providers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,20 @@ pub enum ProviderError {
Other(AnyhowError),
}

fn looks_like_context_window_error(body: &str) -> bool {
let body = body.to_ascii_lowercase();
(body.contains("context")
|| body.contains("maximum context")
|| body.contains("context length"))
&& (body.contains("max_tokens")
|| body.contains("max_output_tokens")
|| body.contains("max tokens")
|| body.contains("max completion")
|| body.contains("output tokens")
|| body.contains("token limit")
|| body.contains("requested"))
}

impl ProviderError {
/// Return a user-friendly error message suitable for display in the UI.
///
Expand All @@ -85,6 +99,9 @@ impl ProviderError {
/// `Http { status, body }` before reaching this method.
pub fn user_message(&self) -> String {
match self {
ProviderError::Http { status, body } if looks_like_context_window_error(body) => {
"The AI provider rejected the request because the configured max output tokens exceed the model's context window. Lower Max output tokens in Settings or switch to a model with a larger context window.".to_string()
}
ProviderError::Http { status, .. } => friendly_provider_error(status.as_u16()),
ProviderError::Other(e) => {
let msg = format!("{:#}", e);
Expand All @@ -104,3 +121,27 @@ impl ProviderError {
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn recognizes_context_window_token_errors() {
let body = "This model's maximum context length is 65536 tokens. However, you requested 65000 output tokens.";
assert!(looks_like_context_window_error(body));
}

#[test]
fn context_window_errors_suggest_token_setting() {
let err = ProviderError::Http {
status: StatusCode::BAD_REQUEST,
body: "maximum context length is 65536 tokens; requested max_tokens is too high"
.to_string(),
};

let msg = err.user_message();
assert!(msg.contains("Max output tokens"));
assert!(msg.contains("Lower"));
}
}
13 changes: 12 additions & 1 deletion apps/native/src-tauri/src/evolve/providers/ollama.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@ pub struct OllamaProvider {
client: reqwest::Client,
base_url: String,
model: String,
max_output_tokens: u32,
}

impl OllamaProvider {
pub fn new(base_url: String, model: String) -> Self {
pub fn new(base_url: String, model: String, max_output_tokens: u32) -> Self {
Self {
client: reqwest::Client::new(),
base_url: base_url.trim_end_matches('/').to_string(),
model,
max_output_tokens,
}
}
}
Expand All @@ -28,10 +30,16 @@ struct ChatRequest {
model: String,
messages: Vec<OllamaMessage>,
stream: bool,
options: OllamaOptions,
#[serde(skip_serializing_if = "Vec::is_empty")]
tools: Vec<OllamaTool>,
}

#[derive(Clone, Serialize)]
struct OllamaOptions {
num_predict: u32,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
struct OllamaMessage {
role: String,
Expand Down Expand Up @@ -100,6 +108,9 @@ impl AiProvider for OllamaProvider {
model: self.model.clone(),
messages: ollama_messages.clone(),
stream: false,
options: OllamaOptions {
num_predict: self.max_output_tokens,
},
tools: ollama_tools.clone(),
};

Expand Down
10 changes: 4 additions & 6 deletions apps/native/src-tauri/src/evolve/providers/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ use reqwest::StatusCode;
pub struct OpenAIProvider {
client: Client<OpenAIConfig>,
model: String,
max_output_tokens: u32,
record_completions: bool,
}

impl OpenAIProvider {
pub fn new(api_key: String, api_base: String, model: String) -> Self {
pub fn new(api_key: String, api_base: String, model: String, max_output_tokens: u32) -> Self {
let config = OpenAIConfig::new()
.with_api_key(api_key)
.with_api_base(api_base);
Expand All @@ -36,6 +37,7 @@ impl OpenAIProvider {
Self {
client,
model,
max_output_tokens,
record_completions,
}
}
Expand All @@ -62,11 +64,7 @@ impl AiProvider for OpenAIProvider {
.tools(openai_tools)
.temperature(0.2);

// Some models support this, others don't. For OpenAI/Claude it is usually supported/required for long checks.
// But let's check if we can make it optional or robust.
// For now, hardcode max_tokens as in original mod.rs
// const MAX_TOKENS: u32 = 65_000;
request_builder.max_completion_tokens(65000u32);
request_builder.max_completion_tokens(self.max_output_tokens);

let request = request_builder
.build()
Expand Down
2 changes: 2 additions & 0 deletions apps/native/src-tauri/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ fn run_cli_mode(context: tauri::Context<tauri::Wry>) -> i32 {
prompt,
config,
max_iterations,
max_output_tokens,
evolve_provider,
evolve_model,
summary_provider,
Expand Down Expand Up @@ -349,6 +350,7 @@ fn run_cli_mode(context: tauri::Context<tauri::Wry>) -> i32 {
prompt,
config,
max_iterations,
max_output_tokens,
evolve_provider,
evolve_model,
summary_provider,
Expand Down
4 changes: 4 additions & 0 deletions apps/native/src-tauri/src/shared_types/prefs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ pub struct UiPrefs {
pub max_iterations: Option<usize>,
/// Maximum build attempts per evolution.
pub max_build_attempts: Option<usize>,
/// Maximum output tokens requested per evolution model call.
pub max_output_tokens: Option<usize>,
/// Whether diagnostic feedback may be sent.
pub send_diagnostics: bool,
/// Whether to confirm before running build/apply.
Expand Down Expand Up @@ -79,6 +81,8 @@ pub struct UiPrefsUpdate {
pub max_iterations: Option<usize>,
/// Maximum build-attempt count update.
pub max_build_attempts: Option<usize>,
/// Maximum output token count update.
pub max_output_tokens: Option<usize>,
/// Ollama base URL update.
pub ollama_api_base_url: Option<String>,
/// vLLM base URL update.
Expand Down
Loading
Loading