From 6f98f1e28a113afd0ea47908d0a397ecfe681560 Mon Sep 17 00:00:00 2001 From: aqilaziz Date: Wed, 20 May 2026 13:46:57 +0700 Subject: [PATCH] fix(jsonrpc): keep scoped 401s from expiring session --- src/core/jsonrpc.rs | 67 +++++++++++++++++++++------------------ src/core/jsonrpc_tests.rs | 43 ++++++++++++++++++------- src/core/observability.rs | 28 ++++++---------- 3 files changed, 78 insertions(+), 60 deletions(-) diff --git a/src/core/jsonrpc.rs b/src/core/jsonrpc.rs index 3621579d46..f78b7b3dcf 100644 --- a/src/core/jsonrpc.rs +++ b/src/core/jsonrpc.rs @@ -160,8 +160,9 @@ pub async fn rpc_handler(State(state): State, Json(req): Json, Json(req): Json Result { let result = invoke_method_inner(state, method, params).await; - // Session auto-cleanup: if the backend says we're unauthorized, publish - // a `SessionExpired` event. The credentials subscriber clears the stored - // token, flips the scheduler-gate signed-out override so background - // workers stand down, and (eventually) pushes a sign-out to the UI. - // Centralising via the event bus means 401 detection from any path - // (this one, `llm_provider.api_error`, …) gets the same teardown. + // Session auto-cleanup: if the OpenHuman auth session is explicitly + // expired, publish a `SessionExpired` event. The credentials subscriber + // clears the stored token, flips the scheduler-gate signed-out override + // so background workers stand down, and (eventually) pushes a sign-out to + // the UI. Generic downstream/provider 401s must stay recoverable errors; + // otherwise a scoped integration failure can log the user out. if let Err(ref msg) = result { + let sanitized_reason = crate::openhuman::inference::provider::ops::sanitize_api_error(msg); if is_session_expired_error(msg) { log::warn!( - "[jsonrpc] backend returned 401 for method '{}' — publishing SessionExpired", - method + "[jsonrpc] confirmed session expiry for method '{}' — publishing SessionExpired: {}", + method, + sanitized_reason ); // Scrub before publishing — subscribers log `reason`, and the // upstream error string could include API keys / tokens from - // pasted-through provider replies. `sanitize_api_error` runs - // `scrub_secret_patterns` and truncates. + // pasted-through provider replies. crate::core::event_bus::publish_global( crate::core::event_bus::DomainEvent::SessionExpired { source: format!("jsonrpc.invoke_method:{method}"), - reason: crate::openhuman::inference::provider::ops::sanitize_api_error(msg), + reason: sanitized_reason, }, ); + } else if is_unconfirmed_unauthorized_error(msg) { + log::warn!( + "[jsonrpc] unauthorized error for method '{}' did not match OpenHuman session expiry — leaving session intact: {}", + method, + sanitized_reason + ); } } @@ -201,18 +209,12 @@ pub async fn invoke_method(state: AppState, method: &str, params: Value) -> Resu /// Helper to determine if an error message indicates an expired or invalid session. /// -/// Deliberately **looser** than -/// [`crate::core::observability::is_session_expired_message`]: this -/// dispatch-site predicate also matches the generic `"401 + unauthorized"` / -/// `"invalid token"` pair so token cleanup + -/// `DomainEvent::SessionExpired` publish fire on *any* 401, including -/// BYO-key provider failures (which clear the stale local token even if -/// the user mis-configured an OpenAI / Anthropic key). The strict -/// classifier in `observability` is for the agent / web-channel -/// `report_error_or_expected` call sites, where matching too loosely would -/// silence actionable BYO-key configuration errors (OPENHUMAN-TAURI-26 -/// rationale: the agent-layer demote must NOT also swallow generic -/// provider 401s). +/// Uses the same strict classifier as observability so only explicit +/// OpenHuman auth-session failures clear the app session. A generic +/// `"401 Unauthorized"` or `"invalid token"` can come from BYO-key +/// providers, Composio, channels, or other scoped downstream calls; those +/// must surface as recoverable errors rather than publishing +/// `DomainEvent::SessionExpired`. /// /// "No backend session token" is also treated as a session-expired signal: the /// auth profile is missing entirely (the user was never signed in, or their @@ -228,12 +230,15 @@ pub async fn invoke_method(state: AppState, method: &str, params: Value) -> Resu /// no JWT in the store. This is the same auth-boundary condition, just surfaced /// as a local guard rather than a backend response. fn is_session_expired_error(msg: &str) -> bool { - let lower = msg.to_lowercase(); - (lower.contains("401") && lower.contains("unauthorized")) - || lower.contains("invalid token") - || lower.contains("no backend session token") - || lower.contains("session jwt required") - || msg.contains("SESSION_EXPIRED") + crate::core::observability::is_session_expired_message(msg) +} + +/// Detect auth-looking failures that are not specific enough to clear the +/// OpenHuman session. This is only for diagnostics; it must not feed the +/// `SessionExpired` publish path. +fn is_unconfirmed_unauthorized_error(msg: &str) -> bool { + let lower = msg.to_ascii_lowercase(); + (lower.contains("401") && lower.contains("unauthorized")) || lower.contains("invalid token") } /// Returns `true` when the error message comes from JSON-RPC params validation diff --git a/src/core/jsonrpc_tests.rs b/src/core/jsonrpc_tests.rs index 3eed02cf0f..f8d1926bd4 100644 --- a/src/core/jsonrpc_tests.rs +++ b/src/core/jsonrpc_tests.rs @@ -7,7 +7,8 @@ use tokio_util::sync::CancellationToken; use super::{ build_http_schema_dump, default_state, escape_html, invoke_method, is_param_validation_error, - is_session_expired_error, params_to_object, parse_json_params, rpc_handler, type_name, + is_session_expired_error, is_unconfirmed_unauthorized_error, params_to_object, + parse_json_params, rpc_handler, type_name, }; struct EnvVarGuard { @@ -564,26 +565,46 @@ fn parse_json_params_reports_error_message() { } #[test] -fn is_session_expired_error_matches_401_unauthorized() { - assert!(is_session_expired_error( +fn is_session_expired_error_does_not_match_generic_401_unauthorized() { + assert!(!is_session_expired_error( "backend returned 401 Unauthorized" )); - assert!(is_session_expired_error("401 UNAUTHORIZED")); - assert!(is_session_expired_error("got 401 and unauthorized body")); + assert!(!is_session_expired_error("401 UNAUTHORIZED")); + assert!(!is_session_expired_error("got 401 and unauthorized body")); +} + +#[test] +fn unconfirmed_unauthorized_error_matches_generic_401_for_diagnostics_only() { + assert!(is_unconfirmed_unauthorized_error( + "backend returned 401 Unauthorized" + )); + assert!(is_unconfirmed_unauthorized_error("401 UNAUTHORIZED")); + assert!(is_unconfirmed_unauthorized_error( + "got 401 and unauthorized body" + )); } #[test] -fn is_session_expired_error_requires_both_401_and_unauthorized() { - // 401 alone is not sufficient — could be HTTP/3.01 nonsense or - // unrelated text. We require the string "unauthorized" too. +fn is_session_expired_error_does_not_match_partial_auth_text() { assert!(!is_session_expired_error("server returned 401")); assert!(!is_session_expired_error("unauthorized without code")); } #[test] -fn is_session_expired_error_matches_invalid_token_case_insensitive() { - assert!(is_session_expired_error("Invalid Token")); - assert!(is_session_expired_error("got an invalid token here")); +fn is_session_expired_error_does_not_match_invalid_token_case_insensitive() { + assert!(!is_session_expired_error("Invalid Token")); + assert!(!is_session_expired_error("got an invalid token here")); + assert!(is_unconfirmed_unauthorized_error("Invalid Token")); + assert!(is_unconfirmed_unauthorized_error( + "got an invalid token here" + )); +} + +#[test] +fn is_session_expired_error_matches_openhuman_session_expired_body() { + assert!(is_session_expired_error( + r#"OpenHuman API error (401 Unauthorized): {"success":false,"error":"Session expired. Please log in again."}"# + )); } #[test] diff --git a/src/core/observability.rs b/src/core/observability.rs index 2b285dcd2c..c96a6b804e 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -193,13 +193,10 @@ pub fn expected_error_kind(message: &str) -> Option { /// Detect **app-session-expired** boundary errors that bubble up from any /// backend-touching call site (agent, web channel, cron, integrations). /// -/// Deliberately stricter than the dispatch-site classifier in -/// [`crate::core::jsonrpc`]: the dispatch-site predicate matches a generic -/// "401 + unauthorized" pair to trigger token cleanup on *any* 401 (even an -/// OpenAI / Anthropic BYO-key 401 that means a misconfigured key — see -/// `providers::ops::api_error`). Replicating that loose match here would -/// silence BYO-key configuration errors at the agent layer, where they -/// *are* actionable and should reach Sentry as errors. +/// This is also the JSON-RPC dispatch-site classifier. Keep it stricter than +/// a bare "401 + unauthorized" pair: OpenAI / Anthropic BYO-key failures, +/// Composio scope failures, and channel-provider 401s are actionable scoped +/// errors, not proof that the user's OpenHuman app session expired. /// /// The canonical OpenHuman session-expired wire shapes: /// @@ -217,13 +214,9 @@ pub fn expected_error_kind(message: &str) -> Option { /// stored profile is empty (`#1465`-ish onboarding spam) or has been /// cleared by a previous 401 cycle. Both shapes are OpenHuman-specific. /// -/// At the JSON-RPC dispatch boundary the looser classifier in -/// `crate::core::jsonrpc::is_session_expired_error` keeps its existing -/// generic "401 + unauthorized" match so token cleanup + `DomainEvent::SessionExpired` -/// publish still fires for every 401. Adding the demote here therefore does -/// **not** silence the auto-cleanup teardown — it only stops the duplicate -/// per-attempt error event that escaped via `report_error_or_expected` from -/// the agent / web-channel layers (OPENHUMAN-TAURI-26). +/// At the JSON-RPC dispatch boundary the same strict match controls +/// `DomainEvent::SessionExpired` publication, so downstream/provider 401s stay +/// recoverable and do not clear the stored app session. pub fn is_session_expired_message(msg: &str) -> bool { let lower = msg.to_ascii_lowercase(); lower.contains("session expired") @@ -1995,10 +1988,9 @@ mod tests { // to fix in settings. It must reach Sentry as an error and must // NOT be classified as session-expired at the agent layer — the // strict classifier requires the OpenHuman backend's - // "session expired" body to anchor the match. The dispatch-site - // classifier (`crate::core::jsonrpc::is_session_expired_error`) - // still matches these for the `DomainEvent::SessionExpired` - // auto-cleanup path, which clears stale local state defensively. + // "session expired" body to anchor the match. The JSON-RPC + // dispatch-site classifier uses the same strict rule so these + // scoped provider failures never clear the app session either. for raw in [ "OpenAI API error (401 Unauthorized): invalid_api_key", "Anthropic API error (401 Unauthorized): authentication_error",