diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index b0be66866c..7ce4509649 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -228,16 +228,123 @@ fn with_provider_detail(summary: &str, err: &str) -> String { } } +/// Extract a Retry-After / retry_after seconds hint from a free-form +/// error string. Mirrors the typed [`crate::openhuman::inference:: +/// provider::reliable::parse_retry_after_ms`] helper but operates on +/// the already-flattened `String` that reaches the channel-classifier +/// layer. +/// +/// Returns `Some(n)` when a non-negative integer or fractional value +/// follows one of the canonical headers; fractional values are +/// rounded up so the user is never told to retry sooner than the +/// upstream actually allows. +fn parse_retry_after_secs_from_str(err: &str) -> Option { + // Normalise quoted JSON-key wrappers ("retry_after": 30) by + // stripping double quotes before scanning for prefixes + // (CodeRabbit review on #2371). A serialised provider body like + // `{"retry_after": 30}` would otherwise miss every prefix and + // the user would lose the retry hint the provider supplied. + let normalized = err.to_ascii_lowercase().replace('"', ""); + for prefix in &[ + "retry-after:", + "retry_after:", + "retry-after ", + "retry_after ", + ] { + if let Some(pos) = normalized.find(prefix) { + let after = &normalized[pos + prefix.len()..]; + let num_str: String = after + .trim() + .chars() + .take_while(|c| c.is_ascii_digit() || *c == '.') + .collect(); + if let Ok(secs) = num_str.parse::() { + if secs.is_finite() && secs >= 0.0 { + return Some(secs.ceil() as u64); + } + } + } + } + None +} + +/// Format the retry-after hint as a short user-friendly suffix +/// (`" Try again in 30 seconds."`). Returns an empty string when no +/// hint is available so callers can `format!("{summary}{hint}")` +/// without branching on `Option`. +fn retry_after_hint(secs: Option) -> String { + match secs { + Some(0) => " You can retry immediately.".to_string(), + Some(1) => " Try again in 1 second.".to_string(), + Some(n) if n < 90 => format!(" Try again in {n} seconds."), + Some(n) => { + // Round UP — never tell the user to retry sooner than + // the upstream actually allows. 90–119s used to render + // as "about 1 minutes" both because of integer flooring + // and missing singular/plural handling (CodeRabbit + // review on #2371). + let mins = (n / 60) + u64::from(n % 60 != 0); + let unit = if mins == 1 { "minute" } else { "minutes" }; + format!(" Try again in about {mins} {unit}.") + } + None => String::new(), + } +} + +/// Detect the SecurityPolicy global hourly action-budget signal +/// emitted by the built-in tools (`web_fetch`, `curl`, `http_request`, +/// `polymarket`, `composio`, etc.) — see `src/openhuman/security/ +/// policy.rs::SecurityPolicy::is_rate_limited`. +/// +/// We match the canonical English strings those tools emit. This is +/// load-bearing for issue #2364: before this check ran, any string +/// containing "rate limit" was misclassified as a provider 429 and +/// the user saw the generic "You're being rate-limited" copy, which +/// hides that the cap is OpenHuman's own per-hour safety budget, +/// not the upstream LLM provider. +fn is_action_budget_exhausted(err_lower: &str) -> bool { + err_lower.contains("rate limit exceeded: action budget exhausted") + || err_lower.contains("rate limit exceeded: too many actions in the last hour") + || err_lower.contains("action blocked: rate limit exceeded") +} + fn classify_inference_error(err: &str) -> (&'static str, String) { let lower = err.to_lowercase(); - if lower.contains("rate limit") || lower.contains("429") { + // Order matters: the SecurityPolicy hourly cap and the + // agent-loop max-iterations error both surface as strings that + // contain "rate limit" / "iteration", so they MUST be checked + // before the generic provider-429 branch — otherwise users see + // a confusing "your AI provider is rate-limiting you" message + // for limits OpenHuman itself enforced (issue #2364). + if is_action_budget_exhausted(&lower) { + ( + "action_budget_exceeded", + with_provider_detail( + "You've hit OpenHuman's per-hour action budget — this is a local safety cap, \ + not your AI provider. The window decays gradually; you can keep chatting in \ + this thread and tool-heavy steps will resume as the budget refills.", + err, + ), + ) + } else if crate::openhuman::agent::error::is_max_iterations_error(err) { ( - "rate_limited", + "max_iterations", with_provider_detail( - "You're being rate-limited. Please wait a moment and try again.", + "The agent ran the maximum number of tool steps for one turn without \ + finishing. This usually means a tool kept failing (often a rate limit on a \ + web fetch). You can retry the same question in this thread once the \ + underlying limit clears.", err, ), ) + } else if lower.contains("rate limit") || lower.contains("429") { + let retry = parse_retry_after_secs_from_str(err); + let summary = format!( + "Your AI provider is rate-limiting requests. This is a transient upstream \ + limit, not a thread-level block — you can retry in this thread.{}", + retry_after_hint(retry) + ); + ("rate_limited", with_provider_detail(summary.as_str(), err)) } else if lower.contains("timeout") || lower.contains("timed out") { ( "timeout", diff --git a/src/openhuman/channels/providers/web_tests.rs b/src/openhuman/channels/providers/web_tests.rs index d2551eb0ad..b18ad63556 100644 --- a/src/openhuman/channels/providers/web_tests.rs +++ b/src/openhuman/channels/providers/web_tests.rs @@ -207,6 +207,151 @@ fn classify_inference_error_surfaces_provider_config_rejection_actionably() { } } +// ── #2364: rate-limit classification + retry-after surfacing ──── + +#[test] +fn classify_inference_error_distinguishes_action_budget_from_provider_429() { + // SecurityPolicy hourly cap (web_fetch / curl / http_request emit + // these strings). Before #2364 these were misclassified as a + // provider 429 and the user saw the "your AI provider is rate- + // limiting you" copy — which is wrong, the limit is OpenHuman's + // own per-hour safety budget. + for raw in [ + "Rate limit exceeded: action budget exhausted", + "Rate limit exceeded: too many actions in the last hour", + "Action blocked: rate limit exceeded", + ] { + let (category, message) = classify_inference_error(raw); + assert_eq!( + category, "action_budget_exceeded", + "action-budget signal must NOT classify as provider rate_limited: {raw}" + ); + assert!( + message.contains("local safety cap"), + "must clarify the limit is OpenHuman-local, not upstream: {message}" + ); + assert!( + message.contains("can keep chatting in this thread"), + "must tell the user the thread isn't blocked: {message}" + ); + } +} + +#[test] +fn classify_inference_error_max_iterations_gets_dedicated_branch() { + // The agent loop's MaxIterationsExceeded variant renders as + // "Agent exceeded maximum tool iterations (N)". Before #2364 + // this fell through to the generic `inference` bucket and the + // user saw a vague "something went wrong" copy. Now it gets a + // specific message that says retrying in the same thread is OK. + let raw = "run_chat_task failed client_id=abc thread_id=t1 \ + error=Agent exceeded maximum tool iterations (10)"; + let (category, message) = classify_inference_error(raw); + assert_eq!(category, "max_iterations"); + assert!( + message.contains("maximum number of tool steps"), + "must explain the cap: {message}" + ); + assert!( + message.contains("retry the same question in this thread"), + "must reassure same-thread recovery: {message}" + ); +} + +#[test] +fn classify_inference_error_rate_limited_surfaces_retry_after_seconds() { + let raw = "openrouter API error (429 Too Many Requests): Retry-After: 30"; + let (category, message) = classify_inference_error(raw); + assert_eq!(category, "rate_limited"); + assert!( + message.contains("Try again in 30 seconds"), + "must surface the parsed retry-after window: {message}" + ); + assert!( + message.contains("retry in this thread"), + "must clarify the thread isn't blocked: {message}" + ); +} + +#[test] +fn classify_inference_error_rate_limited_no_retry_after_omits_hint() { + let raw = "openrouter API error (429 Too Many Requests)"; + let (category, message) = classify_inference_error(raw); + assert_eq!(category, "rate_limited"); + // Generic copy must still describe the situation accurately. + assert!(message.contains("transient upstream limit")); + // No hallucinated countdown when none was parsed. + assert!( + !message.contains("Try again in"), + "must NOT invent a retry-after when none was parsed: {message}" + ); +} + +#[test] +fn classify_inference_error_rate_limited_handles_fractional_and_minute_windows() { + // Fractional seconds round up — never tell the user to retry + // sooner than the upstream actually allows. + let (_, message) = classify_inference_error("429 Too Many Requests: retry_after: 2.4"); + assert!( + message.contains("Try again in 3 seconds"), + "fractional 2.4 must round up to 3: {message}" + ); + + // Long windows switch to a "minutes" rendering at the 90s + // threshold so the user gets a less precise but more readable + // hint. + let (_, message) = classify_inference_error("429 Too Many Requests: Retry-After: 180"); + assert!( + message.contains("about 3 minutes"), + "180s must render as minutes: {message}" + ); +} + +#[test] +fn classify_inference_error_rate_limited_minute_window_uses_singular_and_rounds_up() { + // CodeRabbit on #2371: the 90–119s band used to render + // "about 1 minutes" (floor + missing plural handling). Round + // up + singular/plural now produces "about 2 minutes" for 90s + // (since 90s ceils to 2 minutes) and "about 2 minutes" for + // 119s (ditto). 60s lands in the seconds band; 61s is the + // smallest minute-band input but still <90 so seconds; 90s is + // the first true minute-band input. + let (_, m_90) = classify_inference_error("429 Too Many Requests: Retry-After: 90"); + assert!( + m_90.contains("about 2 minutes"), + "90s must round up to 2 minutes (not floor to 1): {m_90}" + ); + let (_, m_119) = classify_inference_error("429 Too Many Requests: Retry-After: 119"); + assert!( + m_119.contains("about 2 minutes"), + "119s must round up to 2 minutes: {m_119}" + ); + // Exactly 60-multiple inputs above the 90s threshold render as + // exact minutes with no round-up bump. + let (_, m_120) = classify_inference_error("429 Too Many Requests: Retry-After: 120"); + assert!( + m_120.contains("about 2 minutes"), + "exact 120s must stay as 2 minutes: {m_120}" + ); +} + +#[test] +fn classify_inference_error_rate_limited_parses_quoted_json_retry_after() { + // CodeRabbit on #2371: a serialised provider body like + // {"retry_after": 30} would previously miss every prefix + // because the quote stopped `lower.find("retry_after:")` from + // matching. The parser now strips quotes so the JSON-key shape + // resolves the same as the unquoted header shape. + let (category, message) = classify_inference_error( + r#"openrouter API error (429 Too Many Requests): {"retry_after": 30, "code": "rate_limited"}"#, + ); + assert_eq!(category, "rate_limited"); + assert!( + message.contains("Try again in 30 seconds"), + "quoted JSON retry_after must be parsed: {message}" + ); +} + #[test] fn generic_error_copy_is_sanitized_and_has_discord_report_action() { let message = generic_inference_error_user_message();