diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index 9aeae1caea..8de76f7788 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -1314,6 +1314,19 @@ pub fn run() { ); return None; } + // Defense-in-depth: drop max-tool-iterations cap events that + // slipped past the call-site filters in the core (see + // `openhuman_core::core::observability::is_max_iterations_event` + // for the rationale). The shell links the core in-process so + // any captured event for this deterministic agent-state + // outcome is filtered here too (OPENHUMAN-TAURI-99 / -98). + if openhuman_core::core::observability::is_max_iterations_event(&event) { + log::debug!( + "[sentry-max-iter-filter] dropping max-iteration cap noise event: {:?}", + event.message.as_deref().unwrap_or("") + ); + return None; + } if openhuman_core::core::observability::is_transient_backend_api_failure(&event) || openhuman_core::core::observability::is_transient_integrations_failure(&event) { diff --git a/src/core/observability.rs b/src/core/observability.rs index 88d9fe61bf..bb48a93f31 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -352,6 +352,34 @@ pub fn is_transient_provider_http_failure(event: &sentry::protocol::Event<'_>) - TRANSIENT_PROVIDER_HTTP_STATUSES.contains(&status_u16) } +/// Returns true when a Sentry event's message/exception text contains the +/// canonical max-tool-iterations cap phrase (see +/// `openhuman::agent::error::MAX_ITERATIONS_ERROR_PREFIX`). +/// +/// Defense-in-depth filter for the Sentry `before_send` hook: the primary +/// suppression lives at the call sites in `agent::harness::session:: +/// runtime::run_single`, `channels::runtime::dispatch`, and +/// `channels::providers::web::run_chat_task`, all of which now skip +/// `report_error` when this variant is detected. This filter catches any +/// future call site that re-emits the message without going through those +/// funnels — e.g. a new wrapper that calls `tracing::error!` directly with +/// the typed error rendering — and keeps OPENHUMAN-TAURI-99 / -98 +/// permanently off Sentry without requiring touch-ups at each new site. +/// +/// Match strategy: scans `event.message` first (the path used by +/// `report_error_message` → `sentry::capture_message`) and falls back to +/// the last exception's `value` (the shape `sentry-tracing` produces when +/// stacktraces are attached). Both fields are checked for the canonical +/// prefix so the filter stays robust to future Sentry plumbing changes. +pub fn is_max_iterations_event(event: &sentry::protocol::Event<'_>) -> bool { + let direct = event.message.as_deref(); + let from_exception = event.exception.last().and_then(|e| e.value.as_deref()); + [direct, from_exception] + .into_iter() + .flatten() + .any(crate::openhuman::agent::error::is_max_iterations_error) +} + pub fn is_transient_http_status(status: &str) -> bool { TRANSIENT_HTTP_STATUSES.contains(&status) } @@ -1000,4 +1028,49 @@ mod tests { &[("provider", "ollama")], ); } + + fn event_with_message(msg: &str) -> sentry::protocol::Event<'static> { + let mut event = sentry::protocol::Event::default(); + event.message = Some(msg.to_string()); + event + } + + fn event_with_exception_value(value: &str) -> sentry::protocol::Event<'static> { + let mut event = sentry::protocol::Event::default(); + event.exception = vec![sentry::protocol::Exception { + value: Some(value.to_string()), + ..Default::default() + }] + .into(); + event + } + + #[test] + fn max_iterations_filter_matches_message_path() { + // `report_error_message` calls `sentry::capture_message`, which + // populates `event.message`. The filter must see the canonical + // phrase on that field path. + let event = event_with_message("Agent exceeded maximum tool iterations (8)"); + assert!(is_max_iterations_event(&event)); + } + + #[test] + fn max_iterations_filter_matches_exception_path() { + // sentry-tracing with attach_stacktrace=true populates the + // exception list instead of (or in addition to) `event.message`. + // Filter must still catch the noise. + let event = event_with_exception_value( + "agent.run_single failed: Agent exceeded maximum tool iterations (10)", + ); + assert!(is_max_iterations_event(&event)); + } + + #[test] + fn max_iterations_filter_keeps_unrelated_events() { + assert!(!is_max_iterations_event(&event_with_message( + "provider returned 503" + ))); + assert!(!is_max_iterations_event(&event_with_message(""))); + assert!(!is_max_iterations_event(&sentry::protocol::Event::default())); + } } diff --git a/src/main.rs b/src/main.rs index 65316a6745..fe68b59d06 100644 --- a/src/main.rs +++ b/src/main.rs @@ -59,6 +59,17 @@ fn main() { if openhuman_core::core::observability::is_transient_provider_http_failure(&event) { return None; } + // Defense-in-depth: drop max-tool-iterations cap events that + // slipped past the call-site filters in + // `agent::harness::session::runtime::run_single`, + // `channels::runtime::dispatch`, and + // `channels::providers::web::run_chat_task`. The cap is a + // deterministic agent-state outcome surfaced to the user via + // the chat-rendered "Error: …" message — Sentry is the wrong + // surface for it (OPENHUMAN-TAURI-99 / -98). + if openhuman_core::core::observability::is_max_iterations_event(&event) { + return None; + } if openhuman_core::core::observability::is_transient_backend_api_failure(&event) || openhuman_core::core::observability::is_transient_integrations_failure(&event) { diff --git a/src/openhuman/agent/error.rs b/src/openhuman/agent/error.rs index d6d769c5c5..5d57747df9 100644 --- a/src/openhuman/agent/error.rs +++ b/src/openhuman/agent/error.rs @@ -76,7 +76,7 @@ impl fmt::Display for AgentError { ) } Self::MaxIterationsExceeded { max } => { - write!(f, "Agent exceeded maximum tool iterations ({max})") + write!(f, "{MAX_ITERATIONS_ERROR_PREFIX} ({max})") } Self::CompactionFailed { message, @@ -121,6 +121,31 @@ impl From for AgentError { } } +/// Canonical user-facing prefix for the max-tool-iterations cap. +/// +/// Single source of truth for: +/// - `AgentError::MaxIterationsExceeded` `Display` (in this file) +/// - Substring detection at Sentry-emit funnels where the typed variant has +/// already been marshalled through `String` (channels dispatch path, +/// web-channel run_chat_task, optional `before_send` defense) +/// +/// Keep the literal **exactly** in sync with the `Display` impl above — UI +/// surfaces and tests grep for this prefix. +pub const MAX_ITERATIONS_ERROR_PREFIX: &str = "Agent exceeded maximum tool iterations"; + +/// Returns true when an error rendering contains the canonical +/// max-tool-iterations cap message. +/// +/// Use this at Sentry-emit sites (`channels::dispatch`, `web_channel:: +/// run_chat_task`, and Sentry `before_send` filters) where the typed +/// [`AgentError::MaxIterationsExceeded`] variant has already been flattened +/// to a `String` by the native bus / handler boundary and cannot be +/// downcast directly. Sites that still hold an `anyhow::Error` should +/// prefer `err.downcast_ref::()` for precision. +pub fn is_max_iterations_error(error_msg: &str) -> bool { + error_msg.contains(MAX_ITERATIONS_ERROR_PREFIX) +} + /// Check if an error message indicates a context/prompt-too-long failure. pub fn is_context_limit_error(error_msg: &str) -> bool { let lower = error_msg.to_lowercase(); @@ -158,6 +183,25 @@ mod tests { assert!(!is_context_limit_error("rate limit exceeded")); } + #[test] + fn max_iterations_detection_matches_display() { + // The substring helper must match the variant's own Display output — + // the channels dispatch / web_channel sites flatten the typed error + // through a `String` boundary, so any drift between the constant + // and `Display` silently re-enables Sentry emission for the cap + // (OPENHUMAN-TAURI-99 / -98). + let rendered = AgentError::MaxIterationsExceeded { max: 8 }.to_string(); + assert!(is_max_iterations_error(&rendered)); + assert!(is_max_iterations_error( + "run_chat_task failed client_id=abc thread_id=t1 \ + error=Agent exceeded maximum tool iterations (10)" + )); + assert!(!is_max_iterations_error("provider returned 503")); + assert!(!is_max_iterations_error( + "Tool execution error [shell]: denied" + )); + } + #[test] fn permission_denied_display() { let err = AgentError::PermissionDenied { diff --git a/src/openhuman/agent/harness/session/runtime.rs b/src/openhuman/agent/harness/session/runtime.rs index 67c106ca27..82bc58ab7f 100644 --- a/src/openhuman/agent/harness/session/runtime.rs +++ b/src/openhuman/agent/harness/session/runtime.rs @@ -502,25 +502,46 @@ impl Agent { } Err(err) => { let sanitized_message = Self::sanitize_event_error_message(&err); - // OPENHUMAN-TAURI-5Z: upstream transient HTTP failures - // (408/429/502/503/504) are already retried + filtered at the - // provider layer. When retries exhaust they bubble up here via - // `Result::Err`, and re-reporting under `domain=agent` escapes - // the `domain=llm_provider` filter — one Sentry event per - // failed turn for a transient infrastructure blip. Route - // through `report_error_or_expected` so the classifier demotes - // those (and other expected user-state errors) to a warn-level - // breadcrumb while preserving the AgentError publish + return. - crate::core::observability::report_error_or_expected( - &err, - "agent", - "run_single", - &[ - ("session_id", self.event_session_id()), - ("channel", self.event_channel()), - ("error_kind", sanitized_message.as_str()), - ], + // The max-tool-iterations cap is a deterministic agent-state + // outcome, not a bug — the UI already surfaces the failure + // to the user via the chat-rendered "Error: Agent exceeded + // maximum tool iterations" message. Skip the Sentry funnel + // for this variant entirely and emit a structured + // `log::info!` (OPENHUMAN-TAURI-99 / -98). + // + // Other agent errors go through `report_error_or_expected` + // so OPENHUMAN-TAURI-5Z and friends — upstream transient + // HTTP that bubbles up under `domain=agent` and escapes + // the `domain=llm_provider` filter — get demoted to a + // warn-level breadcrumb without losing genuine bugs. + // `Err` propagation, the `AgentError` domain event, and + // downstream `recoverable=false` semantics are preserved. + let is_max_iter = matches!( + err.downcast_ref::(), + Some(AgentError::MaxIterationsExceeded { .. }) ); + if is_max_iter { + log::info!( + target: "agent", + "[agent.run_single] suppressed Sentry emission for max-iteration cap \ + session_id={} channel={} error_kind={} message={}", + self.event_session_id(), + self.event_channel(), + sanitized_message.as_str(), + err + ); + } else { + crate::core::observability::report_error_or_expected( + &err, + "agent", + "run_single", + &[ + ("session_id", self.event_session_id()), + ("channel", self.event_channel()), + ("error_kind", sanitized_message.as_str()), + ], + ); + } publish_global(DomainEvent::AgentError { session_id: self.event_session_id().to_string(), message: sanitized_message, diff --git a/src/openhuman/agent/harness/session/runtime_tests.rs b/src/openhuman/agent/harness/session/runtime_tests.rs index ef7f826c71..acf5ec422b 100644 --- a/src/openhuman/agent/harness/session/runtime_tests.rs +++ b/src/openhuman/agent/harness/session/runtime_tests.rs @@ -143,6 +143,56 @@ fn sanitizers_and_tool_call_helpers_cover_fallback_paths() { assert_eq!(Agent::count_iterations(&history), 3); } +#[tokio::test] +async fn run_single_preserves_typed_max_iterations_error_for_sentry_skip() { + // OPENHUMAN-TAURI-99 regression guard: when the agent hits its tool + // iteration cap, `run_single` MUST surface the typed + // `AgentError::MaxIterationsExceeded` variant so the call site can + // downcast and skip `report_error`. If the error reaches the funnel as + // a plain `anyhow::Error::msg(..)` (e.g. someone reverts to + // `anyhow::bail!`), the downcast fails and Sentry re-floods with the + // exact noise this fix removes. + let _ = init_global(64); + + let err_provider: Arc = Arc::new(StaticProvider { + response: Mutex::new(Some(Err(anyhow!(AgentError::MaxIterationsExceeded { + max: 8 + })))), + }); + let mut agent = make_agent(err_provider); + let err = agent + .run_single("hello") + .await + .expect_err("run_single should surface max-iter cap"); + + // The user-visible chat string MUST stay byte-identical — the UI + // (and `runtime_tool_calls.rs` channel test) reads this verbatim. + assert!( + err.to_string() + .contains("Agent exceeded maximum tool iterations"), + "canonical phrase missing: {err}" + ); + + // The downcast is the load-bearing condition for the Sentry skip in + // `Agent::run_single` (matches!(err.downcast_ref::(), + // Some(AgentError::MaxIterationsExceeded { .. }))). If this assertion + // ever fails the suppression silently regresses to error-level + // emission. + let downcast = err.downcast_ref::(); + assert!( + matches!(downcast, Some(AgentError::MaxIterationsExceeded { max: 8 })), + "expected MaxIterationsExceeded {{ max: 8 }}, got {downcast:?}" + ); + + // Sanitized event message round-trips to the stable kind tag so the + // structured `log::info!` we emit instead of `report_error` carries + // the right `error_kind` for log-side filtering. + assert_eq!( + Agent::sanitize_event_error_message(&err), + "max_iterations_exceeded" + ); +} + #[tokio::test] async fn run_single_publishes_completed_and_error_events() { let _ = init_global(64); diff --git a/src/openhuman/agent/harness/session/turn.rs b/src/openhuman/agent/harness/session/turn.rs index 155cd87790..628ae27bf5 100644 --- a/src/openhuman/agent/harness/session/turn.rs +++ b/src/openhuman/agent/harness/session/turn.rs @@ -859,10 +859,18 @@ impl Agent { "[agent_loop] exceeded maximum tool iterations max={}", self.config.max_tool_iterations ); - anyhow::bail!( - "Agent exceeded maximum tool iterations ({})", - self.config.max_tool_iterations - ) + // Return the typed `AgentError::MaxIterationsExceeded` variant + // (boxed through `anyhow::Error`) so `Agent::run_single` can + // downcast at the Sentry funnel and suppress emission — this is + // a deterministic agent-state outcome, not a bug (OPENHUMAN- + // TAURI-99 / -98). The `Display` text is preserved verbatim so + // the user-visible chat-rendered "Error: Agent exceeded + // maximum tool iterations" string is unchanged. + Err(anyhow::Error::new( + crate::openhuman::agent::error::AgentError::MaxIterationsExceeded { + max: self.config.max_tool_iterations, + }, + )) }; // end of `turn_body` async block // Run the turn body inside the parent-execution-context scope so diff --git a/src/openhuman/agent/harness/tool_loop.rs b/src/openhuman/agent/harness/tool_loop.rs index 4da38fe6df..0b5afd8042 100644 --- a/src/openhuman/agent/harness/tool_loop.rs +++ b/src/openhuman/agent/harness/tool_loop.rs @@ -851,7 +851,18 @@ pub(crate) async fn run_tool_call_loop( } } - anyhow::bail!("Agent exceeded maximum tool iterations ({max_iterations})") + // Return the typed `AgentError::MaxIterationsExceeded` variant (boxed + // through `anyhow::Error`) so downstream wrappers — notably + // `Agent::run_single` in `harness/session/runtime.rs` — can downcast and + // suppress Sentry emission for this deterministic agent-state outcome + // (OPENHUMAN-TAURI-99 / -98). The `Display` text is preserved verbatim so + // any caller that already inspects the string (UI chat surface, tests) + // continues to work. + Err(anyhow::Error::new( + crate::openhuman::agent::error::AgentError::MaxIterationsExceeded { + max: max_iterations, + }, + )) } #[cfg(test)] diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index b2c1fa5eb2..7d30dc37e3 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -430,24 +430,46 @@ pub async fn start_chat( ); let (classified_type, classified_message) = classify_inference_error(&err); let classified_type_string = classified_type.to_string(); - // Route through `report_error_or_expected` so transport-level - // connection failures (DNS/TCP/TLS handshake, ISP blocks — - // see OPENHUMAN-TAURI-32 for a RU user who couldn't reach - // `api.tinyhumans.ai` at all) get logged as warn-level - // breadcrumbs instead of error events. Sentry has no signal - // to act on these — no status, no trace, no payload — and - // every retry exhaustion produces another noisy event. - crate::core::observability::report_error_or_expected( - detailed.as_str(), - "web_channel", - "run_chat_task", - &[ - ("channel", "web"), - ("error_type", classified_type), - ("thread_id", thread_id_task.as_str()), - ("request_id", request_id_task.as_str()), - ], - ); + // Max-tool-iterations cap is a deterministic agent-state + // outcome surfaced to the user via the existing + // `WebChannelEvent::chat_error` event below. Skip the + // Sentry funnel entirely for that variant + // (OPENHUMAN-TAURI-98). Substring match is required here + // because the typed `AgentError` was flattened to a + // `String` at the native-bus boundary. + // + // Other errors flow through `report_error_or_expected` + // so transport-level transient failures (DNS/TCP/TLS + // handshake, ISP blocks — OPENHUMAN-TAURI-32 for the RU + // user who couldn't reach api.tinyhumans.ai at all) get + // logged as warn-level breadcrumbs instead of error + // events. Sentry has no signal to act on those — no + // status, no trace, no payload — and every retry + // exhaustion produces another noisy event. + if crate::openhuman::agent::error::is_max_iterations_error(&detailed) { + log::info!( + target: "web_channel", + "[web_channel.run_chat_task] suppressed Sentry emission for max-iteration \ + cap client_id={} thread_id={} request_id={} error_type={} message={}", + client_id_task, + thread_id_task, + request_id_task, + classified_type, + detailed + ); + } else { + crate::core::observability::report_error_or_expected( + detailed.as_str(), + "web_channel", + "run_chat_task", + &[ + ("channel", "web"), + ("error_type", classified_type), + ("thread_id", thread_id_task.as_str()), + ("request_id", request_id_task.as_str()), + ], + ); + } publish_web_channel_event(WebChannelEvent { event: "chat_error".to_string(), client_id: client_id_task.clone(), diff --git a/src/openhuman/channels/runtime/dispatch.rs b/src/openhuman/channels/runtime/dispatch.rs index 804fcc5f0a..0340eb68e3 100644 --- a/src/openhuman/channels/runtime/dispatch.rs +++ b/src/openhuman/channels/runtime/dispatch.rs @@ -1167,15 +1167,36 @@ pub(crate) async fn process_channel_message( " ❌ LLM error after {}ms: {e}", started_at.elapsed().as_millis() ); - crate::core::observability::report_error( - &e, - "channels", - "dispatch_llm_error", - &[ - ("channel", msg.channel.as_str()), - ("provider", route.provider.as_str()), - ], - ); + // The typed `AgentError` is flattened to a `String` at the + // native-bus boundary (`agent::bus` map_err → `e.to_string()`), + // so the downcast that works in `Agent::run_single` is not an + // option here — fall back to canonical-phrase substring match. + // The max-tool-iterations cap is a deterministic agent-state + // outcome and is already surfaced to the user as the + // chat-rendered "⚠️ Error: …" message just above. Skip the + // Sentry funnel (OPENHUMAN-TAURI-98) and emit `log::info!` + // instead — `Err` propagation through the surrounding match + // arm is unchanged. + if crate::openhuman::agent::error::is_max_iterations_error(&e.to_string()) { + log::info!( + target: "channels", + "[channels.dispatch] suppressed Sentry emission for max-iteration cap \ + channel={} provider={} message={}", + msg.channel.as_str(), + route.provider.as_str(), + e + ); + } else { + crate::core::observability::report_error( + &e, + "channels", + "dispatch_llm_error", + &[ + ("channel", msg.channel.as_str()), + ("provider", route.provider.as_str()), + ], + ); + } if let Some(channel) = target_channel.as_ref() { if let Some(ref draft_id) = draft_message_id { let _ = channel