diff --git a/src/core/observability.rs b/src/core/observability.rs index c42e96b5c7..2b285dcd2c 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -435,6 +435,25 @@ fn is_provider_user_state_message(lower: &str) -> bool { return true; } + // OPENHUMAN-TAURI-XX: custom_openai upstream rejected the request with + // its own 400. Wire shape produced by + // `inference/provider/compatible.rs::is_custom_openai_upstream_bad_request_http_400`: + // + // custom_openai API error (400 Bad Request): {"error":{ + // "message":"Bad request to upstream provider", + // "type":"upstream_error","status":400}} + // + // Anchored to the `custom_openai api error (400` prefix so this can't + // silence unrelated errors that happen to mention both + // "bad request to upstream provider" and "upstream_error" elsewhere + // (e.g. a future provider whose envelope reuses one of those strings). + if lower.contains("custom_openai api error (400") + && lower.contains("bad request to upstream provider") + && lower.contains("upstream_error") + { + return true; + } + // OPENHUMAN-TAURI-97: composio authorize with a blank required field — // SharePoint Subdomain, WhatsApp WABA ID, Tenant Name, etc. // Backend returns 500 with `"Missing required fields: …"` body. @@ -1574,6 +1593,56 @@ mod tests { ); } + #[test] + fn classifies_custom_openai_upstream_bad_request_as_provider_user_state() { + assert_eq!( + expected_error_kind( + "custom_openai API error (400 Bad Request): \ + {\"error\":{\"message\":\"Bad request to upstream provider\",\ + \"type\":\"upstream_error\",\"status\":400}}" + ), + Some(ExpectedErrorKind::ProviderUserState) + ); + + // Wrapped by higher-level callers (`agent.run_single`, + // `rpc.invoke_method`) must still classify. + assert_eq!( + expected_error_kind( + "agent.run_single failed: custom_openai API error (400 Bad Request): \ + {\"error\":{\"message\":\"Bad request to upstream provider\",\ + \"type\":\"upstream_error\",\"status\":400}}" + ), + Some(ExpectedErrorKind::ProviderUserState) + ); + } + + /// Regression for CodeRabbit feedback on PR #2107: the matcher must + /// not demote unrelated errors that happen to contain both + /// "bad request to upstream provider" and "upstream_error" without + /// the `custom_openai API error (400` anchor. + #[test] + fn does_not_silence_unrelated_error_with_only_inner_substrings() { + // No `custom_openai API error (400` prefix → must NOT classify + // as ProviderUserState, otherwise we'd silence actionable bugs. + assert_eq!( + expected_error_kind( + "internal panic in router: bad request to upstream provider \ + (state=upstream_error)" + ), + None, + ); + + // A future hypothetical provider envelope reusing one substring + // also must not classify. + assert_eq!( + expected_error_kind( + "anthropic_api error: upstream_error encountered while \ + forwarding bad request to upstream provider" + ), + None, + ); + } + #[test] fn classifies_missing_required_fields_as_provider_user_state() { // OPENHUMAN-TAURI-97: composio authorize with a blank required diff --git a/src/openhuman/channels/runtime/supervision.rs b/src/openhuman/channels/runtime/supervision.rs index 0667237c0a..25114d556d 100644 --- a/src/openhuman/channels/runtime/supervision.rs +++ b/src/openhuman/channels/runtime/supervision.rs @@ -57,7 +57,13 @@ pub(crate) fn spawn_supervised_listener( backoff = initial_backoff_secs.max(1); } Err(e) => { - tracing::error!("Channel {} error: {e}; restarting", ch.name()); + let message = format!("Channel {} error: {e:#}; restarting", ch.name()); + crate::core::observability::report_error_or_expected( + message.as_str(), + "channels", + "supervised_listener", + &[("channel", ch.name())], + ); publish_global(DomainEvent::ChannelDisconnected { channel: ch.name().to_string(), reason: e.to_string(), @@ -118,4 +124,18 @@ mod tests { let result = compute_max_in_flight_messages(usize::MAX); assert!(result <= CHANNEL_MAX_IN_FLIGHT_MESSAGES); } + + #[test] + fn supervision_discord_gateway_reqwest_failure_classifies_as_expected() { + let raw = "error sending request for url (https://discord.com/api/v10/gateway/bot)"; + let wrapped = format!("Channel discord error: {raw}; restarting"); + let kind = crate::core::observability::expected_error_kind(&wrapped); + assert_eq!( + kind, + Some(crate::core::observability::ExpectedErrorKind::NetworkUnreachable), + "supervision wrapper must keep transient transport phrase visible \ + to the classifier so Sentry stays quiet for OPENHUMAN-TAURI-VP \ + (got {kind:?} for message {wrapped:?})" + ); + } } diff --git a/src/openhuman/inference/provider/compatible.rs b/src/openhuman/inference/provider/compatible.rs index cee05215f6..d17bf8dd17 100644 --- a/src/openhuman/inference/provider/compatible.rs +++ b/src/openhuman/inference/provider/compatible.rs @@ -472,6 +472,17 @@ impl OpenAiCompatibleProvider { Some(model), status, ); + } else if super::is_custom_openai_upstream_bad_request_http_400( + self.name.as_str(), + status, + &error, + ) { + super::log_custom_openai_upstream_bad_request_http_400( + "responses_api", + self.name.as_str(), + Some(model), + status, + ); } else if super::is_provider_access_policy_denied_http_403(status, &error) { super::log_provider_access_policy_denied_http_403( "responses_api", @@ -827,6 +838,17 @@ impl OpenAiCompatibleProvider { Some(native_request.model.as_str()), status, ); + } else if super::is_custom_openai_upstream_bad_request_http_400( + self.name.as_str(), + status, + &body, + ) { + super::log_custom_openai_upstream_bad_request_http_400( + "streaming_chat", + self.name.as_str(), + Some(native_request.model.as_str()), + status, + ); } else if super::is_provider_access_policy_denied_http_403(status, &body) { super::log_provider_access_policy_denied_http_403( "streaming_chat", @@ -1308,6 +1330,17 @@ impl Provider for OpenAiCompatibleProvider { Some(model), status, ); + } else if super::is_custom_openai_upstream_bad_request_http_400( + self.name.as_str(), + status, + &error, + ) { + super::log_custom_openai_upstream_bad_request_http_400( + "chat_completions", + self.name.as_str(), + Some(model), + status, + ); } else if super::is_provider_access_policy_denied_http_403(status, &error) { super::log_provider_access_policy_denied_http_403( "chat_completions", @@ -1746,6 +1779,17 @@ impl Provider for OpenAiCompatibleProvider { Some(model), status, ); + } else if super::is_custom_openai_upstream_bad_request_http_400( + self.name.as_str(), + status, + &error, + ) { + super::log_custom_openai_upstream_bad_request_http_400( + "native_chat", + self.name.as_str(), + Some(model), + status, + ); } else if super::is_provider_access_policy_denied_http_403(status, &error) { super::log_provider_access_policy_denied_http_403( "native_chat", @@ -1890,6 +1934,17 @@ impl Provider for OpenAiCompatibleProvider { Some(model_owned.as_str()), status, ); + } else if super::is_custom_openai_upstream_bad_request_http_400( + provider_name.as_str(), + status, + &raw_error, + ) { + super::log_custom_openai_upstream_bad_request_http_400( + "stream_chat", + provider_name.as_str(), + Some(model_owned.as_str()), + status, + ); } else if super::is_provider_access_policy_denied_http_403(status, &raw_error) { super::log_provider_access_policy_denied_http_403( "stream_chat", diff --git a/src/openhuman/inference/provider/ops.rs b/src/openhuman/inference/provider/ops.rs index b4696c224d..c6d02b7353 100644 --- a/src/openhuman/inference/provider/ops.rs +++ b/src/openhuman/inference/provider/ops.rs @@ -301,6 +301,25 @@ pub(super) fn is_budget_exhausted_http_400(status: reqwest::StatusCode, body: &s status == reqwest::StatusCode::BAD_REQUEST && super::is_budget_exhausted_message(body) } +/// Whether a custom OpenAI-compatible proxy returned the known generic +/// upstream 400 envelope: +/// `{"error":{"message":"Bad request to upstream provider","type":"upstream_error","status":400}}`. +/// +/// This shape is deterministic provider/user-state (endpoint-model mismatch, +/// unsupported schema, provider-side validation) and does not provide +/// actionable signal for OpenHuman Sentry triage. +pub(super) fn is_custom_openai_upstream_bad_request_http_400( + provider: &str, + status: reqwest::StatusCode, + body: &str, +) -> bool { + if provider != "custom_openai" || status != reqwest::StatusCode::BAD_REQUEST { + return false; + } + let lower = body.to_ascii_lowercase(); + lower.contains("bad request to upstream provider") && lower.contains("upstream_error") +} + /// Whether a provider non-2xx response is a deterministic provider-policy /// denial (not a product bug) that should be demoted from Sentry. /// @@ -337,6 +356,25 @@ pub(super) fn log_budget_exhausted_http_400( ); } +pub(super) fn log_custom_openai_upstream_bad_request_http_400( + operation: &str, + provider: &str, + model: Option<&str>, + status: reqwest::StatusCode, +) { + tracing::info!( + domain = "llm_provider", + operation = operation, + provider = provider, + model = model.unwrap_or(""), + status = status.as_u16(), + failure = "non_2xx", + kind = "provider_user_state", + reason = "custom_openai_upstream_bad_request", + "[llm_provider] {operation} custom_openai upstream 400 — not reporting to Sentry" + ); +} + pub(super) fn log_provider_access_policy_denied_http_403( operation: &str, provider: &str, @@ -433,6 +471,8 @@ pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::E let is_auth_failure = matches!(status.as_u16(), 401 | 403); let is_backend = provider == openhuman_backend::PROVIDER_LABEL; let is_budget_exhausted_user_state = is_budget_exhausted_http_400(status, &body); + let is_custom_openai_upstream_bad_request = + is_custom_openai_upstream_bad_request_http_400(provider, status, &body); let is_provider_access_policy_denied = is_provider_access_policy_denied_http_403(status, &body); let is_provider_config_rejection = is_provider_config_rejection_http(status, provider, &body); @@ -457,6 +497,8 @@ pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::E ); } else if is_budget_exhausted_user_state { log_budget_exhausted_http_400("api_error", provider, None, status); + } else if is_custom_openai_upstream_bad_request { + log_custom_openai_upstream_bad_request_http_400("api_error", provider, None, status); } else if is_provider_access_policy_denied { log_provider_access_policy_denied_http_403("api_error", provider, None, status); } else if is_provider_config_rejection {