diff --git a/src/core/observability.rs b/src/core/observability.rs index 8f685f17d8..d2dd31799b 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -88,6 +88,22 @@ pub enum ExpectedErrorKind { /// 4xx body embedded, which would otherwise escape the /// [`is_backend_user_error_message`] 4xx-only matcher. ProviderUserState, + /// A user-configured custom cloud provider (`custom_openai` → DeepSeek + /// / OpenRouter / Moonshot / …) rejected the request because of the + /// user's **model / parameter configuration**: an OpenHuman abstract + /// tier alias leaked to a provider that only speaks its native ids + /// (#2079), an unknown / stale model pin (#2202), or a model-specific + /// temperature constraint (#2076 — Moonshot Kimi K2). The provider + /// HTTP layer (`providers::ops::api_error`) already demotes its own + /// per-attempt event; this catches the *re-report* when the same + /// error is raised again by `agent.run_single` / + /// `web_channel.run_chat_task` under `domain=agent` / `web_channel`. + /// Deterministic user-config state surfaced in the UI — Sentry has no + /// remediation path (OPENHUMAN-TAURI-WJ / -QW / -HB / -NH, ~273 + /// events). See + /// [`crate::openhuman::inference::provider::is_provider_config_rejection_message`] + /// for the polarity contract and exact body shapes. + ProviderConfigRejection, LocalAiCapabilityUnavailable, BudgetExhausted, SessionExpired, @@ -120,6 +136,15 @@ pub fn expected_error_kind(message: &str) -> Option { if is_backend_user_error_message(&lower) { return Some(ExpectedErrorKind::BackendUserError); } + // Provider config-rejection (unknown model / abstract tier leaked to a + // custom provider / model-specific temperature). Body-shape based and + // intrinsically scoped to third-party providers — the OpenHuman + // backend never emits these phrases. See the predicate's polarity + // contract. Drops OPENHUMAN-TAURI-WJ / -QW / -HB / -NH re-reports + // (#2079 / #2076 / #2202). + if crate::openhuman::inference::provider::is_provider_config_rejection_message(message) { + return Some(ExpectedErrorKind::ProviderConfigRejection); + } if is_local_ai_capability_unavailable_message(&lower) { return Some(ExpectedErrorKind::LocalAiCapabilityUnavailable); } @@ -503,6 +528,26 @@ fn report_expected_message(kind: ExpectedErrorKind, message: &str, domain: &str, "[observability] {domain}.{operation} skipped expected provider-user-state error: {message}" ); } + ExpectedErrorKind::ProviderConfigRejection => { + // User-config state: a custom cloud provider rejected the + // request because of the user's model / parameter setup — an + // OpenHuman abstract tier alias leaked to a provider that only + // speaks its native ids (#2079), an unknown / stale model pin + // (#2202), or a model-specific temperature constraint (#2076, + // Moonshot Kimi K2). The provider HTTP layer already demoted + // its own per-attempt event; this is the re-report raised + // again by agent.run_single / web_channel.run_chat_task. The + // UI surfaces an actionable "fix your model/provider settings" + // error — Sentry has no remediation path + // (OPENHUMAN-TAURI-WJ / -QW / -HB / -NH). + tracing::info!( + domain = domain, + operation = operation, + kind = "provider_config_rejection", + error = %message, + "[observability] {domain}.{operation} skipped expected provider config-rejection error: {message}" + ); + } ExpectedErrorKind::LocalAiCapabilityUnavailable => { // User-state condition: the local-AI service refused a // capability (vision summarization, vision asset download) @@ -1497,6 +1542,57 @@ mod tests { ); } + #[test] + fn classifies_provider_config_rejection() { + // #2079 — an OpenHuman abstract tier alias leaked to a custom + // provider; raised again by `agent.run_single` / + // `web_channel.run_chat_task` so it escapes the provider-layer + // demotion and reaches `report_error_or_expected` here. + assert_eq!( + expected_error_kind( + "agent.run_single failed: custom_openai API error (400 Bad Request): \ + The supported API model names are deepseek-v4-pro or deepseek-v4-flash, \ + but you passed reasoning-v1." + ), + Some(ExpectedErrorKind::ProviderConfigRejection) + ); + // #2076 — Moonshot Kimi K2 temperature constraint. + assert_eq!( + expected_error_kind( + "custom_openai API error (400): invalid temperature: only 1 is allowed for this model" + ), + Some(ExpectedErrorKind::ProviderConfigRejection) + ); + // #2202 — unknown / stale model pin (OpenAI-compatible body). + assert_eq!( + expected_error_kind( + "custom_openai API error (400): Model 'claude-opus-4-7' is not available. \ + Use GET /openai/v1/models to list available models." + ), + Some(ExpectedErrorKind::ProviderConfigRejection) + ); + } + + #[test] + fn does_not_classify_unrelated_provider_failures_as_config_rejection() { + // Inverted polarity / scope guard: a 5xx or a generic 4xx with no + // config-rejection body must still reach Sentry as actionable. + // (The OpenHuman backend never emits these phrases, so the + // message-level predicate is intrinsically custom-provider scoped; + // the HTTP-layer twin enforces the non-backend guard explicitly.) + assert_eq!( + expected_error_kind("custom_openai API error (500): internal server error"), + None + ); + assert_eq!( + expected_error_kind( + "custom_openai API error (400 Bad Request): missing required field 'messages'" + ), + None, + "generic 4xx without a config-rejection body must NOT demote" + ); + } + #[test] fn unrelated_missing_required_fields_classifies_as_accepted_false_positive() { // Documents the breadth of the `"missing required fields"` arm — @@ -2138,6 +2234,23 @@ mod tests { "provider_chat", &[("provider", "ollama")], ); + // #2079 / #2076 / #2202 — exercises the expected_error_kind + // ProviderConfigRejection branch AND the report_expected_message + // skip-log arm (the agent/web-channel re-report demotion path). + report_error_or_expected( + "agent.run_single failed: custom_openai API error (400 Bad Request): \ + The supported API model names are deepseek-v4-pro or deepseek-v4-flash, \ + but you passed reasoning-v1.", + "agent", + "native_chat", + &[("provider", "custom_openai")], + ); + report_error_or_expected( + "custom_openai API error (400): invalid temperature: only 1 is allowed for this model", + "web_channel", + "run_chat_task", + &[("provider", "custom_openai")], + ); } fn event_with_message(msg: &str) -> sentry::protocol::Event<'static> { diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index 0ecdba1cec..b0be66866c 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -287,6 +287,23 @@ fn classify_inference_error(err: &str) -> (&'static str, String) { err, ), ) + } else if crate::openhuman::inference::provider::is_provider_config_rejection_message(err) { + // #2079 / #2076 / #2202: an OpenHuman abstract tier alias leaked to + // a custom provider, a stale model pin, or a model-specific + // temperature constraint. Checked BEFORE the generic + // model-unavailable arm so config-rejection bodies that also + // contain "model"/"does not exist"/"does not have access" get the + // specific "Settings → LLM" remediation instead of the generic + // copy. Shared predicate keeps this in lockstep with the + // Sentry-demotion classifier. + ( + "model_unavailable", + with_provider_detail( + "Your AI provider rejected the request's model or temperature setting. \ + Check your model and routing in Settings → LLM.", + err, + ), + ) } else if lower.contains("model") && (lower.contains("not found") || lower.contains("unavailable") diff --git a/src/openhuman/channels/providers/web_tests.rs b/src/openhuman/channels/providers/web_tests.rs index 9373d3f630..d2551eb0ad 100644 --- a/src/openhuman/channels/providers/web_tests.rs +++ b/src/openhuman/channels/providers/web_tests.rs @@ -161,16 +161,52 @@ fn extract_provider_error_detail_returns_none_for_transport_errors() { #[test] fn classify_inference_error_quotes_model_unavailable_detail() { + // A stale model pin (`model_not_found` / "does not exist or you do not + // have access") is the #2202 config-rejection class: it now resolves + // via the provider-config-rejection arm (ordered before the generic + // model-unavailable arm) and gets the actionable Settings remediation, + // while still classifying as `model_unavailable` and quoting the + // upstream detail. let raw = r#"custom_openai API error (404 Not Found): {"error":{"message":"The model `gpt-5.5` does not exist or you do not have access to it.","code":"model_not_found"}}"#; let (category, message) = classify_inference_error(raw); assert_eq!(category, "model_unavailable"); - assert!(message.contains("Check your model settings")); + assert!( + message.contains("Settings → LLM"), + "config-rejection must give the actionable remediation: {message}" + ); assert!( message.contains("gpt-5.5"), "should quote model name: {message}" ); } +#[test] +fn classify_inference_error_surfaces_provider_config_rejection_actionably() { + // #2079 / #2076 / #2202: before this arm these fell through to the + // generic "inference" bucket and the user saw no actionable + // remediation. Each must now classify as `model_unavailable` with the + // "fix your model/routing" copy, and quote the upstream detail. + let cases = [ + // #2079 — abstract tier alias leaked to a custom provider. + r#"custom_openai API error (400 Bad Request): {"error":{"message":"The supported API model names are deepseek-v4-pro or deepseek-v4-flash, but you passed reasoning-v1.","type":"invalid_request_error"}}"#, + // #2076 — Moonshot Kimi K2 only accepts temperature: 1. + r#"custom_openai API error (400): {"error":{"message":"invalid temperature: only 1 is allowed for this model","type":"invalid_request_error"}}"#, + // #2202 — unknown / stale model pin. + r#"custom_openai API error (400): {"error":{"message":"Model 'claude-opus-4-7' is not available. Use GET /openai/v1/models to list available models."}}"#, + ]; + for raw in cases { + let (category, message) = classify_inference_error(raw); + assert_eq!( + category, "model_unavailable", + "config-rejection must classify as model_unavailable, not generic: {raw}" + ); + assert!( + message.contains("Settings → LLM"), + "must give actionable remediation: {message}" + ); + } +} + #[test] fn generic_error_copy_is_sanitized_and_has_discord_report_action() { let message = generic_inference_error_user_message(); diff --git a/src/openhuman/inference/provider/config_rejection.rs b/src/openhuman/inference/provider/config_rejection.rs new file mode 100644 index 0000000000..542ef8723b --- /dev/null +++ b/src/openhuman/inference/provider/config_rejection.rs @@ -0,0 +1,134 @@ +//! Classifier for **provider configuration-rejection** errors. +//! +//! When OpenHuman talks to a user-configured custom cloud endpoint +//! (`custom_openai` → DeepSeek / OpenRouter / Moonshot / …) the upstream +//! API rejects requests whose model id or sampling params it doesn't +//! understand: +//! +//! - `"The supported API model names are deepseek-v4-pro or +//! deepseek-v4-flash, but you passed reasoning-v1."` (#2079 — an +//! OpenHuman abstract tier alias leaked to a provider that only speaks +//! its own native ids) +//! - `"Model 'deepseek-v4-pro' is not available. Use GET +//! /openai/v1/models to list available models."` (#2202) +//! - `"invalid temperature: only 1 is allowed for this model"` (#2076 — +//! Moonshot Kimi K2) +//! - `"The model \`gpt-5.5\` does not exist or you do not have access to +//! it."` / `"model_not_found"` (stale model pin) +//! +//! These are **deterministic user-configuration state**, not bugs the +//! maintainers can act on: the user pointed OpenHuman at a custom +//! provider with a model / temperature that provider does not accept. The +//! remediation is "fix the model or routing in Settings", which the UI +//! surfaces. Yet every agent turn produces a fresh Sentry event +//! (OPENHUMAN-TAURI-WJ / -QW / -HB / -NH — 88 + 146 + 39 events). This is +//! the same class as budget-exhaustion ([`super::billing_error`]) and +//! must be demoted from Sentry to an info log the same way. +//! +//! ## Provider-aware polarity (important) +//! +//! The phrases below are emitted by **third-party upstream APIs** +//! (DeepSeek / OpenRouter / Moonshot). The OpenHuman hosted backend +//! resolves tier aliases natively and never emits "supported API model +//! names are deepseek-…" or "invalid temperature: only 1 is allowed" — so +//! the phrase set is intrinsically scoped to custom providers. The +//! HTTP-layer wrapper [`super::ops::is_provider_config_rejection_http`] +//! additionally guards on `provider != openhuman_backend::PROVIDER_LABEL` +//! so a model-rejection from our **own** backend (which would be a real +//! regression we sent it a bad request) still reaches Sentry. The +//! message-only predicate is consumed by +//! [`crate::core::observability::expected_error_kind`] for the +//! re-reported error that escapes the provider layer and is raised again +//! by `agent.run_single` / `web_channel.run_chat_task`. +//! +//! Keep the list deliberately tight: a false positive demotes a real +//! provider/backend bug to an info log. + +/// Returns true if a provider error body indicates the request was +/// rejected because of the user's model / parameter **configuration** +/// (unknown model id, abstract tier leaked to a custom provider, +/// model-specific temperature constraint), as opposed to a transient +/// failure or a server bug. +/// +/// Case-insensitive substring match. See the module docs for the polarity +/// contract and the OPENHUMAN-TAURI Sentry issues each phrase drops. +pub fn is_provider_config_rejection_message(body: &str) -> bool { + const PHRASES: &[&str] = &[ + // #2079 — an OpenHuman abstract tier alias (`reasoning-v1`, + // `chat-v1`, …) reached a custom provider that lists its own + // native ids back at us. + "supported api model names are", + // #2202 — OpenAI-compatible "unknown model" body. The + // `/openai/v1/models` remediation hint is the stable, unique + // anchor (the quoted model id varies per user). + "/openai/v1/models", + // OpenAI / OpenRouter stale-pin shape (`claude-opus-4-7`, + // `gpt-5.5`, …) — model removed or no access. + "does not exist or you do not have access", + "model_not_found", + // #2076 — Moonshot Kimi K2 only accepts `temperature: 1`. + "invalid temperature", + "only 1 is allowed for this model", + // Our own actionable error once a proper tier→model resolution + // is in place (keeps this classifier stable across that fix). + "is an abstract tier", + ]; + + let lower = body.to_ascii_lowercase(); + PHRASES.iter().any(|phrase| lower.contains(phrase)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn detects_real_sentry_bodies() { + // The exact upstream bodies from OPENHUMAN-TAURI-WJ / -QW / -HB + // / -NH and the stale-pin family. + for body in [ + "The supported API model names are deepseek-v4-pro or deepseek-v4-flash, but you passed reasoning-v1.", + "Model 'deepseek-v4-pro' is not available. Use GET /openai/v1/models to list available models.", + "Model 'claude-opus-4-7' is not available. Use GET /openai/v1/models to list available models.", + "invalid temperature: only 1 is allowed for this model", + "The model `gpt-5.5` does not exist or you do not have access to it.", + r#"{"error":{"message":"model not found","code":"model_not_found"}}"#, + "Model 'reasoning-v1' is an abstract tier — configure a concrete model for your custom provider", + ] { + assert!( + is_provider_config_rejection_message(body), + "{body:?} must classify as a provider config-rejection user-state" + ); + } + } + + #[test] + fn detection_is_case_insensitive() { + assert!(is_provider_config_rejection_message( + "INVALID TEMPERATURE: ONLY 1 IS ALLOWED FOR THIS MODEL" + )); + assert!(is_provider_config_rejection_message( + "The Supported API Model Names Are gpt-4o or gpt-4o-mini" + )); + } + + #[test] + fn ignores_transient_and_server_and_unrelated() { + // Must NOT demote: transient/server failures and generic 4xx + // that carry no config-rejection signal — those stay Sentry + // actionable. (A real backend bug must not be silenced.) + for body in [ + "Internal server error", + "503 Service Unavailable", + "Bad request: missing field", + "rate limit exceeded, retry after 1s", + "insufficient budget — add credits", + "", + ] { + assert!( + !is_provider_config_rejection_message(body), + "{body:?} must NOT classify as a provider config-rejection" + ); + } + } +} diff --git a/src/openhuman/inference/provider/mod.rs b/src/openhuman/inference/provider/mod.rs index e98b51659b..f47f71e2da 100644 --- a/src/openhuman/inference/provider/mod.rs +++ b/src/openhuman/inference/provider/mod.rs @@ -10,6 +10,7 @@ pub mod compatible_dump; pub mod compatible_parse; pub mod compatible_stream; pub mod compatible_types; +pub mod config_rejection; pub mod factory; pub mod openhuman_backend; pub mod ops; @@ -27,5 +28,6 @@ pub use traits::{ }; pub use billing_error::is_budget_exhausted_message; +pub use config_rejection::is_provider_config_rejection_message; pub use factory::{create_chat_provider, provider_for_role}; pub use ops::*; diff --git a/src/openhuman/inference/provider/ops.rs b/src/openhuman/inference/provider/ops.rs index f54bace279..b358cbfb5f 100644 --- a/src/openhuman/inference/provider/ops.rs +++ b/src/openhuman/inference/provider/ops.rs @@ -319,6 +319,47 @@ pub(super) fn log_budget_exhausted_http_400( ); } +/// Whether a provider non-2xx response is a deterministic +/// **configuration-rejection** user-state error (unknown model id, +/// abstract tier leaked to a custom provider, model-specific temperature +/// constraint) that should be demoted from Sentry to an info log. +/// +/// Provider-aware (inverted polarity vs. the 401/403 backend rule): the +/// same body from the OpenHuman **backend** stays Sentry-actionable — +/// that would mean we sent our own backend a bad request (a regression, +/// e.g. #2079). Only client errors from a *custom / third-party* +/// provider are user-config state. Restricted to the observed shapes +/// (400 invalid-param / unknown-model, 404 model-does-not-exist, 422 +/// unprocessable); 408/429 are transient and handled separately. +pub(super) fn is_provider_config_rejection_http( + status: reqwest::StatusCode, + provider: &str, + body: &str, +) -> bool { + matches!(status.as_u16(), 400 | 404 | 422) + && provider != openhuman_backend::PROVIDER_LABEL + && super::is_provider_config_rejection_message(body) +} + +pub(super) fn log_provider_config_rejection( + operation: &str, + provider: &str, + model: Option<&str>, + status: reqwest::StatusCode, +) { + tracing::info!( + domain = "llm_provider", + operation = operation, + provider = provider, + model = model.unwrap_or(""), + status = status.as_u16(), + failure = "non_2xx", + kind = "provider_config_rejection", + "[llm_provider] {operation} provider config-rejection ({status}) — \ + user model/param configuration, not reporting to Sentry" + ); +} + /// Build a sanitized provider error from a failed HTTP response. /// /// Reports the failure to Sentry with `provider` and `status` tags so @@ -337,6 +378,12 @@ pub(super) fn log_budget_exhausted_http_400( /// override, halting downstream LLM work. 401/403 from **other** providers /// (OpenAI, Anthropic, …) still go to Sentry — those mean a misconfigured /// API key, which is actionable. +/// - **Provider config-rejection** (4xx unknown-model / abstract-tier / +/// model-specific temperature) from a **non-backend** provider — the +/// user pointed a custom provider at a model/param it doesn't accept. +/// Deterministic user-config state, surfaced in the UI; demoted to an +/// info log (#2079 / #2076 / #2202). See +/// [`is_provider_config_rejection_http`]. pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::Error { let status = response.status(); let status_str = status.as_u16().to_string(); @@ -350,6 +397,7 @@ pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::E let is_auth_failure = matches!(status.as_u16(), 401 | 403); let is_backend = provider == openhuman_backend::PROVIDER_LABEL; let is_budget_exhausted_user_state = is_budget_exhausted_http_400(status, &body); + let is_provider_config_rejection = is_provider_config_rejection_http(status, provider, &body); if is_auth_failure && is_backend { tracing::warn!( @@ -372,6 +420,8 @@ pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::E ); } else if is_budget_exhausted_user_state { log_budget_exhausted_http_400("api_error", provider, None, status); + } else if is_provider_config_rejection { + log_provider_config_rejection("api_error", provider, None, status); } else if should_report_provider_http_failure(status) { crate::core::observability::report_error( message.as_str(), @@ -823,6 +873,94 @@ mod tests { } } + // Exercises the real `is_provider_config_rejection_http` decision used + // by `api_error`, including the inverted provider-aware polarity. + mod provider_config_rejection_suppression { + use super::*; + + // The exact #2079 Sentry body shape. + const TIER_LEAK_BODY: &str = + "The supported API model names are deepseek-v4-pro or deepseek-v4-flash, \ + but you passed reasoning-v1."; + // #2076 Moonshot Kimi K2 temperature constraint. + const TEMP_BODY: &str = "invalid temperature: only 1 is allowed for this model"; + + #[test] + fn custom_provider_4xx_config_rejection_is_suppressed() { + assert!(is_provider_config_rejection_http( + reqwest::StatusCode::BAD_REQUEST, + "custom_openai", + TIER_LEAK_BODY, + )); + assert!(is_provider_config_rejection_http( + reqwest::StatusCode::BAD_REQUEST, + "custom_openai", + TEMP_BODY, + )); + // 404 "model does not exist" is the same user-config class. + assert!(is_provider_config_rejection_http( + reqwest::StatusCode::NOT_FOUND, + "custom_openai", + "The model `gpt-5.5` does not exist or you do not have access to it.", + )); + } + + #[test] + fn openhuman_backend_same_body_is_not_suppressed() { + // Inverted polarity: a model-rejection from our OWN backend + // means we sent it a bad request — a real regression that must + // still reach Sentry. (Mirror of the 401/403 backend rule.) + assert!(!is_provider_config_rejection_http( + reqwest::StatusCode::BAD_REQUEST, + openhuman_backend::PROVIDER_LABEL, + TIER_LEAK_BODY, + )); + } + + #[test] + fn server_error_is_not_suppressed() { + // A 5xx is a server bug, not user-config — keep reporting. + assert!(!is_provider_config_rejection_http( + reqwest::StatusCode::INTERNAL_SERVER_ERROR, + "custom_openai", + TIER_LEAK_BODY, + )); + } + + #[test] + fn transient_429_is_not_suppressed_here() { + // 429 is transient; handled by should_report_provider_http_failure, + // not this classifier (must not be swallowed as user-config). + assert!(!is_provider_config_rejection_http( + reqwest::StatusCode::TOO_MANY_REQUESTS, + "custom_openai", + TIER_LEAK_BODY, + )); + } + + #[test] + fn unrelated_4xx_body_is_not_suppressed() { + assert!(!is_provider_config_rejection_http( + reqwest::StatusCode::BAD_REQUEST, + "custom_openai", + "Bad request: missing required field 'messages'", + )); + } + + #[test] + fn log_helper_runs_without_panicking() { + // Covers the demotion log path taken by `api_error` when a + // custom provider rejects the user's model/param config. No + // tracing subscriber in unit tests, so this is a pure smoke. + log_provider_config_rejection( + "api_error", + "custom_openai", + Some("reasoning-v1"), + reqwest::StatusCode::BAD_REQUEST, + ); + } + } + #[test] fn test_sanitize_api_error_utf8() { let input = "🦀".repeat(MAX_API_ERROR_CHARS + 10);