Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions app/src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1314,6 +1314,19 @@ pub fn run() {
);
return None;
}
if openhuman_core::core::observability::is_budget_event(&event) {
// Log only structured tag metadata — `event.message` can carry
// upstream provider error text including tokens / pasted-through
// secrets, and per `CLAUDE.md` "never log secrets or full PII".
// The (domain, status) pair is sufficient diagnostic since
// those are the tags `is_budget_event` gates on.
log::debug!(
"[sentry-budget-filter] dropping budget-exhausted event (domain={:?}, status={:?})",
event.tags.get("domain"),
event.tags.get("status")
);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
return None;
}
// Defense-in-depth: drop max-tool-iterations cap events that
// slipped past the call-site filters in the core (see
// `openhuman_core::core::observability::is_max_iterations_event`
Expand Down
15 changes: 14 additions & 1 deletion src/api/rest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,20 @@ impl BackendOAuthClient {
// implement retry/disable logic, so skip Sentry to avoid noise.
let is_transient_infra =
crate::core::observability::is_transient_http_status_code(status_code);
if is_transient_infra {
let is_budget_exhausted = status_code == 400
&& crate::openhuman::providers::is_budget_exhausted_message(&text);
if is_budget_exhausted {
tracing::info!(
method = method.as_str(),
path = url.path(),
status = status_code,
failure = "non_2xx",
kind = "budget",
"[backend_api] budget-exhausted 400 on {} {} — not reporting to Sentry",
method.as_str(),
url.path(),
);
} else if is_transient_infra {
tracing::warn!(
domain = "backend_api",
operation = "authed_json",
Expand Down
109 changes: 107 additions & 2 deletions src/core/observability.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Centralised error reporting for the core, plus a Sentry
//! `before_send` filter that drops per-attempt transient-upstream
//! provider failures.
//! `before_send` filters that drop deterministic provider noise:
//! per-attempt transient-upstream failures and budget-exhausted user-state.
//!
//! Wraps `tracing::error!` (which the global subscriber forwards to Sentry via
//! `sentry-tracing`) inside a `sentry::with_scope` so each captured event
Expand Down Expand Up @@ -61,6 +61,7 @@ pub enum ExpectedErrorKind {
LocalAiBinaryMissing,
BackendUserError,
LocalAiCapabilityUnavailable,
BudgetExhausted,
}

pub fn expected_error_kind(message: &str) -> Option<ExpectedErrorKind> {
Expand All @@ -86,6 +87,9 @@ pub fn expected_error_kind(message: &str) -> Option<ExpectedErrorKind> {
if is_local_ai_capability_unavailable_message(&lower) {
return Some(ExpectedErrorKind::LocalAiCapabilityUnavailable);
}
if crate::openhuman::providers::is_budget_exhausted_message(message) {
return Some(ExpectedErrorKind::BudgetExhausted);
}
None
}

Expand Down Expand Up @@ -321,6 +325,22 @@ fn report_expected_message(kind: ExpectedErrorKind, message: &str, domain: &str,
"[observability] {domain}.{operation} skipped expected local-ai capability-unavailable error: {message}"
);
}
ExpectedErrorKind::BudgetExhausted => {
// User-state condition: the backend reports the user is out of
// budget / credits / balance (HTTP 400 from the OpenHuman backend,
// surfaced by `providers::is_budget_exhausted_message`). The UI
// already surfaces this as an actionable toast — Sentry would
// turn each affected turn into noise (OPENHUMAN-TAURI-3M / -12 /
// -13). Demote to info so it still appears in breadcrumbs but
// never spawns a Sentry error event.
tracing::info!(
domain = domain,
operation = operation,
kind = "budget",
error = %message,
"[observability] {domain}.{operation} skipped expected budget-exhausted error: {message}"
);
}
}
}

Expand Down Expand Up @@ -533,6 +553,47 @@ pub fn is_transient_message_failure(msg: &str) -> bool {
|| contains_transient_transport_phrase(&lower)
}

/// Returns true when a Sentry event is a budget-exhausted 400 that should be
/// dropped from `before_send`.
///
/// Match criteria (all required):
/// - tag `failure == "non_2xx"`
/// - tag `status == "400"`
/// - the event message or any exception value contains one of the tight
/// budget-exhaustion phrases
///
/// Note: `domain` is intentionally not gated here as defense-in-depth over
/// the emit-site classifier — any non_2xx/400 event that carries the
/// budget-exhausted phrasing is dropped regardless of which domain produced
/// it, so a future re-emitter under a different tag still gets filtered.
pub fn is_budget_event(event: &sentry::protocol::Event<'_>) -> bool {
let tags = &event.tags;
if tags.get("failure").map(String::as_str) != Some("non_2xx") {
return false;
}
if tags.get("status").map(String::as_str) != Some("400") {
return false;
}
event_contains_budget_exhausted_message(event)
}

fn event_contains_budget_exhausted_message(event: &sentry::protocol::Event<'_>) -> bool {
if event
.message
.as_deref()
.is_some_and(crate::openhuman::providers::is_budget_exhausted_message)
{
return true;
}

event.exception.values.iter().any(|exception| {
exception
.value
.as_deref()
.is_some_and(crate::openhuman::providers::is_budget_exhausted_message)
})
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -1153,6 +1214,50 @@ mod tests {
}
}

#[test]
fn budget_filter_drops_budget_message_on_tagged_400() {
let event = event_with_tags_and_message(
&[("failure", "non_2xx"), ("status", "400")],
r#"OpenHuman API error (400 Bad Request): {"success":false,"error":"Insufficient budget"}"#,
);

assert!(is_budget_event(&event));
}

#[test]
fn budget_filter_drops_budget_exception_on_tagged_400() {
let mut event = event_with_tags(&[("failure", "non_2xx"), ("status", "400")]);
event.exception.values.push(sentry::protocol::Exception {
value: Some("Budget exceeded — add credits to continue".to_string()),
..Default::default()
});

assert!(is_budget_event(&event));
}

#[test]
fn budget_filter_keeps_non_budget_400() {
let event = event_with_tags_and_message(
&[("failure", "non_2xx"), ("status", "400")],
"Bad request: missing field",
);

assert!(!is_budget_event(&event));
}

#[test]
fn budget_filter_requires_non_2xx_failure_and_400_status() {
let message = "Budget exceeded — add credits to continue";
for tags in [
vec![("failure", "transport"), ("status", "400")],
vec![("failure", "non_2xx"), ("status", "500")],
vec![("failure", "non_2xx")],
] {
let event = event_with_tags_and_message(&tags, message);
assert!(!is_budget_event(&event));
}
}

#[test]
fn report_error_or_expected_does_not_panic() {
report_error_or_expected(
Expand Down
7 changes: 7 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ fn main() {
if openhuman_core::core::observability::is_transient_provider_http_failure(&event) {
return None;
}
// Defense-in-depth for budget-exhausted 400s. Emit sites demote the
// known backend responses before they hit Sentry; this catches any
// future non_2xx/status=400 event that carries the same tight body
// phrases.
if openhuman_core::core::observability::is_budget_event(&event) {
return None;
}
// Defense-in-depth: drop max-tool-iterations cap events that
// slipped past the call-site filters in
// `agent::harness::session::runtime::run_single`,
Expand Down
9 changes: 5 additions & 4 deletions src/openhuman/agent/harness/session/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,10 +510,11 @@ impl Agent {
// `log::info!` (OPENHUMAN-TAURI-99 / -98).
//
// Other agent errors go through `report_error_or_expected`
// so OPENHUMAN-TAURI-5Z and friends — upstream transient
// HTTP that bubbles up under `domain=agent` and escapes
// the `domain=llm_provider` filter — get demoted to a
// warn-level breadcrumb without losing genuine bugs.
// so OPENHUMAN-TAURI-5Z and the budget-noise cluster —
// upstream transient HTTP and backend budget-exhausted 400s
// that bubble up under `domain=agent` and escape the
// `domain=llm_provider` filter — get demoted to a
// warn/info-level breadcrumb without losing genuine bugs.
// `Err` propagation, the `AgentError` domain event, and
// downstream `recoverable=false` semantics are preserved.
let is_max_iter = matches!(
Expand Down
60 changes: 60 additions & 0 deletions src/openhuman/providers/billing_error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/// Returns true if a 400 response body indicates the user is out of
/// budget / has insufficient balance / over their plan. These are
/// deterministic user-state errors — already surfaced in the UI as a
/// toast — and must not flow to Sentry as errors.
///
/// Match is case-insensitive against any of the known phrases. Keep the
/// list deliberately tight: false positives demote real backend bugs.
pub fn is_budget_exhausted_message(body: &str) -> bool {
const PHRASES: &[&str] = &[
"insufficient budget",
"budget exceeded",
"add credits",
"insufficient balance",
];

let lower = body.to_ascii_lowercase();
PHRASES.iter().any(|phrase| lower.contains(phrase))
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn detects_known_budget_exhaustion_phrases() {
for body in [
"Insufficient budget",
"Budget exceeded",
"Insufficient balance",
"Add credits to continue",
] {
assert!(
is_budget_exhausted_message(body),
"{body:?} must be classified as budget-exhausted user-state"
);
}
}

#[test]
fn detection_is_case_insensitive() {
assert!(is_budget_exhausted_message("INSUFFICIENT BUDGET"));
assert!(is_budget_exhausted_message("budget EXCEEDED — ADD credits"));
assert!(is_budget_exhausted_message("Insufficient BALANCE"));
}

#[test]
fn ignores_non_budget_messages() {
for body in [
"Bad request: missing field",
"Invalid request: model not found",
"HTTP 400 Bad Request",
"",
] {
assert!(
!is_budget_exhausted_message(body),
"{body:?} must not be classified as budget-exhausted"
);
}
}
}
45 changes: 40 additions & 5 deletions src/openhuman/providers/compatible.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,14 @@ impl OpenAiCompatibleProvider {
let error = response.text().await?;
let sanitized = super::sanitize_api_error(&error);
let message = format!("{} Responses API error: {sanitized}", self.name);
if super::should_report_provider_http_failure(status) {
if super::is_budget_exhausted_http_400(status, &error) {
super::log_budget_exhausted_http_400(
"responses_api",
self.name.as_str(),
Some(model),
status,
);
} else if super::should_report_provider_http_failure(status) {
crate::core::observability::report_error(
message.as_str(),
"llm_provider",
Expand Down Expand Up @@ -736,7 +743,14 @@ impl OpenAiCompatibleProvider {
"{} streaming API error ({}): {}",
self.name, status, sanitized
);
if super::should_report_provider_http_failure(status) {
if super::is_budget_exhausted_http_400(status, &body) {
super::log_budget_exhausted_http_400(
"streaming_chat",
self.name.as_str(),
Some(native_request.model.as_str()),
status,
);
} else if super::should_report_provider_http_failure(status) {
Comment on lines +746 to +753
Copy link
Copy Markdown
Contributor

@coderabbitai coderabbitai Bot May 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

Skip the non-streaming fallback for deterministic budget 400s.

This branch now knows the failure is a terminal user-state error, but stream_native_chat still returns a generic Err, so chat() falls through to the non-streaming retry on Line 1485. That adds a second doomed provider call and duplicates the budget log for every streaming budget miss. Please bubble up a typed/sentinel error here so the caller can short-circuit instead of retrying.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/openhuman/providers/compatible.rs` around lines 738 - 745, When detecting
a deterministic budget-400 in the streaming path (after
is_budget_exhausted_http_400 and log_budget_exhausted_http_400), return a
specific sentinel/typed error from stream_native_chat (e.g., a
ProviderError::BudgetExhausted or similar new variant) instead of the current
generic Err so the caller chat() can detect it and short-circuit the
non-streaming fallback; update stream_native_chat to produce that typed error
and ensure chat() checks for that sentinel and stops retrying/does not invoke
the non-streaming fallback, preventing duplicate logs and duplicate provider
calls.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Acknowledged — this is a valid follow-up but out of scope for the noise-drop PR. The typed sentinel error would require a provider-error taxonomy refactor (ProviderError::BudgetExhausted variant + every chat()-consumer learning to short-circuit). I'd rather ship this Sentry-drop now (which already prevents the second event from reaching Sentry via before_send) and open a follow-up for the streaming-fallback short-circuit. Tracking as TODO.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@oxoxDev, understood — the rationale is sound. The before_send defense-in-depth already prevents the second call's event from reaching Sentry, so the duplicate-log risk is bounded to structured tracing::info! only (not a Sentry noise problem). The full ProviderError::BudgetExhausted refactor is a meaningful taxonomy change that deserves its own PR.

Would you like me to open a follow-up GitHub issue to track the streaming-fallback short-circuit work (ProviderError::BudgetExhausted typed error + chat() short-circuit)?


🧠 Learnings used
Learnt from: sanil-23
Repo: tinyhumansai/openhuman PR: 416
File: src/openhuman/memory/relex.rs:441-464
Timestamp: 2026-04-07T15:49:51.275Z
Learning: When using the `ort` Rust crate v2.x with the `load-dynamic` feature enabled, don’t require individual execution-provider feature flags (e.g., `directml`, `coreml`, `cuda`) alongside `load-dynamic` to get EP registration code. The `ort` crate already compiles EP registration via `#[cfg(any(feature = "load-dynamic", feature = "<ep_name>"))]` guards, and adding per-EP feature flags can pull in static-linking dependencies that conflict with the dynamic loading approach. At runtime, EP availability is determined by what the dynamically loaded ONNX Runtime library (`onnxruntime.dll`/`.so`/`.dylib`) supports; ort docs indicate providers like `directml`/`xnnpack`/`coreml` are available in builds when the platform supports them.

Learnt from: sanil-23
Repo: tinyhumansai/openhuman PR: 416
File: src/openhuman/memory/relex.rs:441-464
Timestamp: 2026-04-07T15:49:51.275Z
Learning: When integrating the `ort` Rust crate v2.x with the `load-dynamic` feature enabled, do NOT also require/enable individual provider EP Cargo features like `directml`, `coreml`, or `cuda`. In `ort` v2.x, EP registration for providers (e.g., DirectML, CoreML, CUDA, etc.) is already compiled in under source-level `#[cfg(any(feature = "load-dynamic", feature = "<provider>"))]` guards, such as in `ep/directml.rs`. Adding provider feature flags alongside `load-dynamic` can pull in static-linking dependencies that conflict with the dynamic-loading approach. Provider availability should be treated as runtime-determined by what the loaded `onnxruntime` library (`onnxruntime.dll`/`libonnxruntime.so`/`libonnxruntime.dylib`) actually supports.

Learnt from: oxoxDev
Repo: tinyhumansai/openhuman PR: 571
File: src/openhuman/local_ai/service/whisper_engine.rs:69-80
Timestamp: 2026-04-14T19:59:04.826Z
Learning: When reviewing Rust code in this repo that uses the upstream `whisper-rs` crate (v0.16.0), do not report `WhisperContextParameters::use_gpu(...)` or `WhisperContextParameters::flash_attn(...)` as missing/invalid APIs. These builder-style methods exist upstream and return `&mut Self`; they are not limited to `WhisperVadContextParams`.

Learnt from: graycyrus
Repo: tinyhumansai/openhuman PR: 1078
File: src/openhuman/agent/agents/welcome/prompt.rs:24-24
Timestamp: 2026-05-01T13:41:00.958Z
Learning: For Rust code under `src/openhuman/**/*.rs`, use `snake_case` for local variables (not `camelCase`). If a local variable name is written in `camelCase`, treat it as a style/lint issue because it will trigger Rust’s `non_snake_case` warning (and related clippy linting, if enabled). Avoid suggesting `camelCase` for any Rust local variable names in this repository.

Learnt from: senamakel
Repo: tinyhumansai/openhuman PR: 1173
File: tests/agent_memory_loader_public.rs:88-88
Timestamp: 2026-05-04T06:50:47.877Z
Learning: In this repository, the general camelCase naming guideline should not be applied to Rust source files. For all .rs files, Rust function (and related) names should use snake_case, and snake_case Rust function names should not be flagged—even for async test functions annotated with attributes like #[tokio::test]. This is consistent with Rust’s non_snake_case lint behavior.

crate::core::observability::report_error(
message.as_str(),
"llm_provider",
Expand Down Expand Up @@ -1190,7 +1204,14 @@ impl Provider for OpenAiCompatibleProvider {

let status_str = status.as_u16().to_string();
let message = format!("{} API error ({status}): {sanitized}", self.name);
if super::should_report_provider_http_failure(status) {
if super::is_budget_exhausted_http_400(status, &error) {
super::log_budget_exhausted_http_400(
"chat_completions",
self.name.as_str(),
Some(model),
status,
);
} else if super::should_report_provider_http_failure(status) {
crate::core::observability::report_error(
message.as_str(),
"llm_provider",
Expand Down Expand Up @@ -1574,7 +1595,14 @@ impl Provider for OpenAiCompatibleProvider {

let status_str = status.as_u16().to_string();
let message = format!("{} API error ({status}): {sanitized}", self.name);
if super::should_report_provider_http_failure(status) {
if super::is_budget_exhausted_http_400(status, &error) {
super::log_budget_exhausted_http_400(
"native_chat",
self.name.as_str(),
Some(model),
status,
);
} else if super::should_report_provider_http_failure(status) {
crate::core::observability::report_error(
message.as_str(),
"llm_provider",
Expand Down Expand Up @@ -1701,7 +1729,14 @@ impl Provider for OpenAiCompatibleProvider {
};
let sanitized_error = super::sanitize_api_error(&raw_error);
let message = format!("{}: {}", status, sanitized_error);
if super::should_report_provider_http_failure(status) {
if super::is_budget_exhausted_http_400(status, &raw_error) {
super::log_budget_exhausted_http_400(
"stream_chat",
provider_name.as_str(),
Some(model_owned.as_str()),
status,
);
} else if super::should_report_provider_http_failure(status) {
crate::core::observability::report_error(
message.as_str(),
"llm_provider",
Expand Down
2 changes: 2 additions & 0 deletions src/openhuman/providers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod billing_error;
pub mod compatible;
pub mod openhuman_backend;
pub mod ops;
Expand All @@ -12,4 +13,5 @@ pub use traits::{
ProviderDelta, ToolCall, ToolResultMessage, UsageInfo,
};

pub use billing_error::is_budget_exhausted_message;
pub use ops::*;
Loading
Loading