diff --git a/src/api/rest.rs b/src/api/rest.rs index d85e3b0ad9..ffa9c530c1 100644 --- a/src/api/rest.rs +++ b/src/api/rest.rs @@ -545,6 +545,27 @@ impl BackendOAuthClient { method.as_str(), url.path(), ); + } else if status_code == 401 { + // True OpenHuman backend session-expiry signal. Tag the + // bail string with the `SESSION_EXPIRED:` sentinel so the + // dispatch-site classifier + // (`crate::core::jsonrpc::is_session_expired_error`) only + // clears the local token on this boundary, not on + // downstream provider / integration 401s. Skip Sentry — + // the SessionExpired event subscriber handles the + // teardown, and a signed-out user is an expected state. + // See #2286. + tracing::info!( + domain = "backend_api", + operation = "authed_json", + method = method.as_str(), + path = url.path(), + status = 401, + failure = "session_expired", + "[backend_api] 401 on {} {} — surfacing SESSION_EXPIRED to dispatch classifier", + method.as_str(), + url.path(), + ); } else if is_transient_infra { tracing::warn!( domain = "backend_api", @@ -576,6 +597,13 @@ impl BackendOAuthClient { ], ); } + if status_code == 401 { + anyhow::bail!( + "SESSION_EXPIRED: {} {} failed ({status}): {text}", + method.as_str(), + url.path() + ); + } anyhow::bail!( "{} {} failed ({status}): {text}", method.as_str(), diff --git a/src/api/rest_tests.rs b/src/api/rest_tests.rs index 35b558d8dd..8bbec0566e 100644 --- a/src/api/rest_tests.rs +++ b/src/api/rest_tests.rs @@ -327,6 +327,74 @@ async fn authed_json_surfaces_message_not_found_on_404() { assert_eq!(message_id, "abc"); } +#[tokio::test] +async fn authed_json_tags_401_with_session_expired_sentinel() { + // #2286: real OpenHuman backend 401s must carry the SESSION_EXPIRED: + // sentinel so the dispatch-site classifier in + // `crate::core::jsonrpc::is_session_expired_error` can sign the user + // out — and downstream / integration 401s (which do NOT route through + // this client) stay recoverable. + let app = Router::new().route( + "/auth/profile", + get(|| async { + ( + axum::http::StatusCode::UNAUTHORIZED, + "{\"error\":\"unauthorized\"}", + ) + }), + ); + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + + let base_url = format!("http://{addr}"); + let client = BackendOAuthClient::new(&base_url).unwrap(); + + let err = client + .authed_json("mock-jwt", Method::GET, "/auth/profile", None) + .await + .unwrap_err(); + let msg = err.to_string(); + assert!( + msg.starts_with("SESSION_EXPIRED:"), + "401 from backend must be tagged with SESSION_EXPIRED: sentinel, got: {msg}" + ); + assert!(msg.contains("/auth/profile")); + assert!(msg.contains("401")); +} + +#[tokio::test] +async fn authed_json_non_401_failure_is_not_tagged_session_expired() { + // #2286 negative: a 500 from the backend must NOT carry the + // SESSION_EXPIRED sentinel — that's reserved for the auth boundary + // so the classifier can stay narrow. + let app = Router::new().route( + "/auth/profile", + get(|| async { (axum::http::StatusCode::INTERNAL_SERVER_ERROR, "boom") }), + ); + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + + let base_url = format!("http://{addr}"); + let client = BackendOAuthClient::new(&base_url).unwrap(); + + let err = client + .authed_json("mock-jwt", Method::GET, "/auth/profile", None) + .await + .unwrap_err(); + let msg = err.to_string(); + assert!( + !msg.contains("SESSION_EXPIRED"), + "non-401 failure must not be tagged session-expired, got: {msg}" + ); + assert!(msg.contains("500")); +} + #[tokio::test] async fn authed_json_404_outside_messages_path_still_reports() { // 404 on a non-`/channels//messages/` path should NOT be diff --git a/src/core/jsonrpc.rs b/src/core/jsonrpc.rs index 3621579d46..d938b28cf8 100644 --- a/src/core/jsonrpc.rs +++ b/src/core/jsonrpc.rs @@ -179,9 +179,23 @@ pub async fn invoke_method(state: AppState, method: &str, params: Value) -> Resu // (this one, `llm_provider.api_error`, …) gets the same teardown. if let Err(ref msg) = result { if is_session_expired_error(msg) { + // Method context + a short marker substring on the matched + // error lets us tell at a glance whether the SessionExpired + // came from the backend boundary (`SESSION_EXPIRED:` tag), + // the local missing-profile guard, or the no-JWT guard. #2286. + let matched = if msg.contains("SESSION_EXPIRED") { + "backend-sentinel" + } else if msg.to_lowercase().contains("no backend session token") { + "no-backend-session-token" + } else if msg.to_lowercase().contains("session jwt required") { + "session-jwt-required" + } else { + "unknown" + }; log::warn!( - "[jsonrpc] backend returned 401 for method '{}' — publishing SessionExpired", - method + "[jsonrpc] session-expired signal for method '{}' (match={}) — publishing SessionExpired", + method, + matched ); // Scrub before publishing — subscribers log `reason`, and the // upstream error string could include API keys / tokens from @@ -199,39 +213,38 @@ pub async fn invoke_method(state: AppState, method: &str, params: Value) -> Resu result } -/// Helper to determine if an error message indicates an expired or invalid session. +/// Helper to determine if an error message indicates an expired or invalid +/// OpenHuman user session. +/// +/// **Narrow on purpose** (see #2286). Earlier versions matched any +/// `"401 + unauthorized"` / `"invalid token"` substring, which signed users +/// out for downstream / integration / BYO-key failures that had nothing to do +/// with the OpenHuman session (Discord card-open, BYO-key mis-config, +/// integration 401s, Lark channel 401, MCP server 401, …). Each of those +/// would publish `DomainEvent::SessionExpired`, the credentials subscriber +/// would clear the local token, and the user would bounce back to Welcome / +/// sign-in for a 401 that the OpenHuman backend never emitted. /// -/// Deliberately **looser** than -/// [`crate::core::observability::is_session_expired_message`]: this -/// dispatch-site predicate also matches the generic `"401 + unauthorized"` / -/// `"invalid token"` pair so token cleanup + -/// `DomainEvent::SessionExpired` publish fire on *any* 401, including -/// BYO-key provider failures (which clear the stale local token even if -/// the user mis-configured an OpenAI / Anthropic key). The strict -/// classifier in `observability` is for the agent / web-channel -/// `report_error_or_expected` call sites, where matching too loosely would -/// silence actionable BYO-key configuration errors (OPENHUMAN-TAURI-26 -/// rationale: the agent-layer demote must NOT also swallow generic -/// provider 401s). +/// To fix it we tagged real backend-session 401s at the API boundary in +/// [`crate::api::rest`] with the `SESSION_EXPIRED:` sentinel and dropped the +/// generic `"401 + unauthorized"` match here. The classifier now matches +/// only: /// -/// "No backend session token" is also treated as a session-expired signal: the -/// auth profile is missing entirely (the user was never signed in, or their -/// stored profile was wiped between login and the next RPC). The frontend may -/// still believe it holds a session token from an optimistic post-login patch, -/// so we want the same auto-cleanup + UI-level re-auth path to fire instead of -/// repeatedly reporting this as a hard error to Sentry. See #1465-ish: users -/// stuck on the onboarding `SkillsStep` would spam `composio_list_connections` -/// failures every 5 s without ever being bounced back to the login screen. +/// - `SESSION_EXPIRED` — the authoritative backend-boundary marker +/// (`api/rest.rs` 401 path and `openhuman_backend.rs` no-session path). +/// - `"no backend session token"` — local guard that fires when the auth +/// profile is missing entirely (e.g. users stuck on onboarding's +/// `SkillsStep` would otherwise spam `composio_list_connections` failures +/// every 5 s without ever being bounced back to sign-in — see #1465-ish). +/// - `"session jwt required"` — local guard for the case where a prior 401 +/// already cleared the token and the very next RPC call finds no JWT in +/// the store. Same auth-boundary condition, just surfaced locally. /// -/// "session JWT required" covers the case where a prior 401 already cleared the -/// token and the very next RPC call (e.g. `channels_telegram_login_start`) finds -/// no JWT in the store. This is the same auth-boundary condition, just surfaced -/// as a local guard rather than a backend response. +/// Downstream / integration / BYO-key 401s are *not* matched: they surface +/// as typed recoverable errors via their own provider paths. fn is_session_expired_error(msg: &str) -> bool { let lower = msg.to_lowercase(); - (lower.contains("401") && lower.contains("unauthorized")) - || lower.contains("invalid token") - || lower.contains("no backend session token") + lower.contains("no backend session token") || lower.contains("session jwt required") || msg.contains("SESSION_EXPIRED") } diff --git a/src/core/jsonrpc_tests.rs b/src/core/jsonrpc_tests.rs index 3eed02cf0f..53a84b1a16 100644 --- a/src/core/jsonrpc_tests.rs +++ b/src/core/jsonrpc_tests.rs @@ -564,32 +564,61 @@ fn parse_json_params_reports_error_message() { } #[test] -fn is_session_expired_error_matches_401_unauthorized() { - assert!(is_session_expired_error( +fn is_session_expired_error_does_not_match_generic_401_unauthorized() { + // #2286: bare `"401 + unauthorized"` shapes from downstream / integration + // / BYO-key paths used to match and clear the OpenHuman user session. + // The classifier is narrow now — only the boundary-tagged sentinel and + // the two literal local guards trigger a SessionExpired publish. + assert!(!is_session_expired_error( "backend returned 401 Unauthorized" )); - assert!(is_session_expired_error("401 UNAUTHORIZED")); - assert!(is_session_expired_error("got 401 and unauthorized body")); + assert!(!is_session_expired_error("401 UNAUTHORIZED")); + assert!(!is_session_expired_error("got 401 and unauthorized body")); } #[test] -fn is_session_expired_error_requires_both_401_and_unauthorized() { - // 401 alone is not sufficient — could be HTTP/3.01 nonsense or - // unrelated text. We require the string "unauthorized" too. - assert!(!is_session_expired_error("server returned 401")); - assert!(!is_session_expired_error("unauthorized without code")); +fn is_session_expired_error_does_not_match_byo_key_provider_401() { + // #2286 acceptance: BYO-key provider mis-configuration (the user pasted + // a stale OpenAI / Anthropic key) must NOT sign the user out of + // OpenHuman. The inference provider path emits shapes like these. + assert!(!is_session_expired_error( + "openai: 401 Unauthorized: Incorrect API key provided" + )); + assert!(!is_session_expired_error( + "anthropic api_error: 401 invalid x-api-key" + )); + assert!(!is_session_expired_error( + "POST https://api.openai.com/v1/chat/completions failed (401): Invalid token" + )); } #[test] -fn is_session_expired_error_matches_invalid_token_case_insensitive() { - assert!(is_session_expired_error("Invalid Token")); - assert!(is_session_expired_error("got an invalid token here")); +fn is_session_expired_error_does_not_match_integration_or_channel_401() { + // #2286 acceptance: integration / channel-status fetches returning 401 + // (Discord card-open, Lark channel auth refresh, MCP server token + // refresh, …) must not clear the OpenHuman session. + assert!(!is_session_expired_error( + "lark channel status fetch returned 401 Unauthorized" + )); + assert!(!is_session_expired_error( + "mcp_client: 401 Unauthorized fetching tools/list" + )); + assert!(!is_session_expired_error( + "composio integration GET /connections failed (401): invalid integration token" + )); } #[test] -fn is_session_expired_error_matches_session_expired_sentinel() { - // The SESSION_EXPIRED sentinel is case-sensitive by design. +fn is_session_expired_error_matches_backend_sentinel() { + // `SESSION_EXPIRED:` is the authoritative marker emitted by both the + // OpenHuman backend boundary in `api/rest.rs` (#2286) and the inference + // backend's no-session path in + // `openhuman/inference/provider/openhuman_backend.rs`. assert!(is_session_expired_error("SESSION_EXPIRED: please re-auth")); + assert!(is_session_expired_error( + "SESSION_EXPIRED: POST /api/me failed (401): {\"error\":\"unauthorized\"}" + )); + // Case-sensitive by design. assert!(!is_session_expired_error("session_expired lowercase")); } @@ -598,6 +627,9 @@ fn is_session_expired_error_does_not_match_unrelated_errors() { assert!(!is_session_expired_error("network timeout")); assert!(!is_session_expired_error("500 internal server error")); assert!(!is_session_expired_error("")); + // Plain "invalid token" from a provider must no longer match — that + // was the BYO-key signal pre-#2286. + assert!(!is_session_expired_error("Invalid Token")); } #[test]