From 24d30d6aaa677d0b790d1f75516fe27d4477c0e9 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Tue, 19 May 2026 17:22:45 +0530 Subject: [PATCH 1/2] perf(app-state): parallelize runtime snapshot and add per-stage timeouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace serial screen_intelligence → local_ai → autocomplete → service status calls in build_runtime_snapshot with tokio::join! so all four subsystems execute concurrently - Wrap synchronous service::status in spawn_blocking to avoid blocking the async executor under high CPU boot pressure - Add status_with_config(config) on AutocompleteEngine to eliminate the redundant Config::load_or_init() disk parse on every snapshot poll - Add AUTH_FETCH_TIMEOUT (5s) and RUNTIME_SNAPSHOT_TIMEOUT (10s) to keep total snapshot time well under the 30s frontend RPC timeout; degraded fallback returned on runtime timeout rather than hanging - Add 2s TTL RUNTIME_SNAPSHOT_CACHE so repeated 2s polls skip full subsystem recomputation when within the TTL window - Emit per-stage timing diagnostics on every snapshot call to surface future regressions Tests: cache TTL hit/miss, degraded fallback shape, timeout constant assertions, status_with_config without disk load Closes #2155 --- .claude/memory.md | 35 +++ src/openhuman/app_state/ops.rs | 258 +++++++++++++++--- src/openhuman/app_state/ops_tests.rs | 84 ++++++ src/openhuman/autocomplete/core/engine.rs | 6 +- .../autocomplete/core/engine_tests.rs | 33 +++ 5 files changed, 376 insertions(+), 40 deletions(-) diff --git a/.claude/memory.md b/.claude/memory.md index f6a0f56720..bc7bfeac88 100644 --- a/.claude/memory.md +++ b/.claude/memory.md @@ -4,6 +4,7 @@ Quick reference for anyone starting with Claude on this project. Updated by the ## Fixes & Gotchas +- **macOS close button does not dismiss window (issue #2049)** — `WebviewWindow::hide()` routes through CEF's `WindowMessage::Hide` → `cef::Window::hide()` which does NOT propagate to the visible NSWindow frame. Fix: use `AppHandle::hide()` which calls `[NSApp hide:]` via `set_application_visibility(false)`. This is macOS-only (`#[cfg(target_os = "macos")]`); the `CloseRequested` handler is in `app/src-tauri/src/lib.rs` around line 2809. PR #2118. - **ServiceBlockingGate CORS errors** — The gate calls `openhumanServiceStatus()` and `openhumanAgentServerStatus()` at startup. These used `callCoreRpc()` which falls back to raw `fetch()` when socket isn't connected yet, causing CORS errors. Fix: route through `invoke('core_rpc_relay')` instead (Tauri IPC, no CORS). - **Socket not connected at startup** — `SocketProvider` only connects when a Redux `auth.token` is set. At fresh launch (no token), socket is null, so any `callCoreRpc()` call falls back to `fetch()`. Always use `invoke('core_rpc_relay')` for local sidecar RPC calls. - **`openhuman.agent_server_status` doesn't exist** — This RPC method is not registered in the core. The gate checks it but it always errors. The gate passes if either service is Running OR agent server is running OR core is reachable. @@ -25,6 +26,13 @@ Quick reference for anyone starting with Claude on this project. Updated by the - **Always read CLAUDE.md first** before any issue work - **Ask user when in doubt** — never assume scope or approach - **PRs target upstream** — `tinyhumansai/openhuman` main branch, not fork +- **GraphQL project board can return empty** — `gh project item-list` on board #2 sometimes returns no items even when issues exist. Fall back to `gh issue list --repo tinyhumansai/openhuman` directly. +- **jq regex: use POSIX classes, not `\s`** — jq's `test()` uses ONIG regex; `\s` is not supported. Use `[[:space:]]` for whitespace matching in `gh pr list --json ... --jq` pipelines. +- **PR conflict check: `Closes #N` syntax not always used** — `gh pr list --jq "select(.body | test('Closes #N'))"` misses PRs that mention an issue thematically without a closing keyword. Also search PR title + body for the raw issue number (`#N`) with broader matching to catch related open PRs before claiming an issue is unassigned. +- **`pnpm debug unit` path is relative to `app/src/`** — Pass `providers/__tests__/Foo.test.tsx`, not `app/src/providers/__tests__/Foo.test.tsx`. +- **Prettier must run after codecrusher adds test cases** — New test blocks often fail `format:check`. Run `pnpm --filter openhuman-app format` before committing when test files are touched. +- **Check for existing PRs before implementing** — When the workflow picks an issue, search open PRs for the issue number and related keywords before starting work. A contributor may have already shipped the fix (e.g. PR #2101 for issue #2075). +- **Project board `gh project item-list` paginates closed items first** — The first 100 items returned are often CLOSED. Must `--limit 500` or paginate to find open/unassigned work. Fall back to `gh issue list --repo tinyhumansai/openhuman --state open` for reliability. ## Local AI Presets @@ -68,6 +76,7 @@ Quick reference for anyone starting with Claude on this project. Updated by the - **Auth session tokens are NOT in Redux persist** — They live entirely in the Rust sidecar, fetched via `fetchCoreAppSnapshot()` RPC. `PersistGate` only gates non-auth state (AI config, threads, channel connections). `CoreStateProvider` bootstrap is the critical auth path. - **`CoreStateProvider` premature `isBootstrapping: false` causes blank Settings** — If the initial RPC call fails (sidecar still starting), the old error handler set `isBootstrapping: false` immediately, causing `ProtectedRoute` to redirect to `/` before the 3s poll could recover. Fix (issue #413): keep `isBootstrapping: true` on initial failure, let the poll retry, give up after 5 attempts (~15s). - **`CoreStateProvider` is consumed by ~25 components** — Changes to its state shape or bootstrap behavior affect routes, socket, onboarding, nav, settings, and hooks. Treat it as a high-blast-radius file. +- **`bootstrapFailCountRef` retry counter bug (issue #2158)** — The ref is a cumulative lifetime counter; logging it against `MAX_BOOTSTRAP_RETRIES` (5) as denominator produced impossible `attempt 11/5`. Fix: distinguish bootstrap phase ("attempt X/5") from continuous-poll phase (separate message, 10s backoff). Reset the counter to 0 on any successful snapshot fetch. - **Settings is a full route, not a modal** — `/settings/*` uses nested `` in `Settings.tsx`. The `.claude/rules/15-settings-modal-system.md` doc describing a portal/modal approach is outdated. A catch-all `` redirects unmatched sub-paths to `/settings`. - **`PersistGate loading={null}` causes flash** — Changed to `loading={}` (issue #413). `RouteLoadingScreen` accepts an optional `label` prop (defaults to "Initializing OpenHuman...") and can be rendered with no props. @@ -110,6 +119,11 @@ Quick reference for anyone starting with Claude on this project. Updated by the - **UnifiedSkillCard** — All skill types (built-in, channels, 3rd party) use `UnifiedSkillCard` from `app/src/components/skills/SkillCard.tsx`. Secondary actions use an overflow menu. `data-testid` attributes (`skill-sync-button-*`, `skill-debug-button-*`) must be preserved. - **SkillSearchBar + SkillCategoryFilter** — New components in `app/src/components/skills/` for search and category filtering on the Skills page. +## Composio Backend URL Bug (Issue #2075, PR #2101) + +- **`effective_backend_api_url` env-fallback branch skipped normalization** — In `src/api/config.rs`, the override branch normalized via `normalize_backend_api_base_url` but the env-fallback branch (`OPENHUMAN_BACKEND_API_URL`) did not, so scheme-less URLs like `api.example.com` were used raw. Fix: normalize the env-fallback branch too (3-layer defense: config → env-fallback → `IntegrationClient::new`). +- **`normalize_backend_api_base_url` and `redact_url_for_log` are `pub(crate)`** — Available for reuse across `src/api/` after PR #2101 merge. + ## Composio Identity (Issue #691) - **`ProviderUserProfile.profile_url`** — New optional field on the struct in `src/openhuman/composio/providers/types.rs`. Providers should populate it when available from upstream profile payloads. @@ -149,6 +163,10 @@ Quick reference for anyone starting with Claude on this project. Updated by the - **`pnpm typecheck` script was renamed** — Check `app/package.json` for the current name; as of issue #830 work, use `pnpm workspace openhuman-app compile` for tsc checks. - **PR #745 (command palette) merged without its deps** — `@radix-ui/react-dialog`, `cmdk`, and `@testing-library/user-event` are missing from `package.json`. Install them if tsc fails after syncing main. - **Pre-push hooks fail on upstream lint warnings** — ESLint warns on `setState` in effects and unused `eslint-disable` directives inherited from upstream. Use `--no-verify` only when the lint errors are pre-existing upstream issues, not new code. +- **`pnpm tauri icon ` generates all platform icons at once** — Produces `.icns`, `.ico`, all PNG sizes, Windows Store tiles, and iOS/Android sets. Use this instead of manual `sips`/ImageMagick resizing. +- **`tauri-cef` submodule update can fix missing Tauri runtime modules** — e.g. updating to f75bc21f5 added the missing `tauri_runtime_cef::audio` module that was causing pre-push hook compile failures on the Tauri shell. When the shell fails to compile with a missing module error, check if the submodule needs updating. +- **`git add` must run from repo root** — Staging paths like `app/public/...` with `git add` from inside `app/` won't match. Always run `git add` from `/Users/megamind/tinyhuman/openhuman-claude`. +- **Brand kit assets live at `app/public/brand/`** — Copied there during session work; original source is in `~/Downloads/Brand kit/`. Not auto-synced; re-copy manually if Downloads content changes. ## Mascot Native Window (macOS) @@ -188,3 +206,20 @@ Quick reference for anyone starting with Claude on this project. Updated by the - **`pnpm core:stage`** — no-op (sidecar removed in PR #1061). Use `pnpm dev:app` for full Tauri+core dev. - **Kill stuck processes** — `lsof -i :7788` then `kill `. Useful when `dev:app` reports a stale listener and you want to force a fresh boot rather than relying on the handle's auto-recovery. - **Skills runtime removed** — the QuickJS / `rquickjs` runtime is gone; `src/openhuman/skills/` is metadata-only ("Legacy skill metadata helpers retained after QuickJS runtime removal"). Skill execution surfaces are being rebuilt; don't assume a `.skill` can run end-to-end without checking the current code. + +## Rust Testing Patterns + +- **Memory tree tests filter** — `cargo test -p openhuman -- "memory::tree"` runs the memory tree unit tests (602 tests); full module paths are `openhuman::memory::tree::ingest::tests::*` and `openhuman::memory::tree::canonicalize::email_clean::tests::*`. +- **`cargo fmt --all`** — Required after codecrusher generates Rust; it doesn't always produce perfectly formatted output and CI will reject unformatted code. +- **PR quality scripts are soft checks** — `scripts/check-pr-checklist.mjs` and `scripts/check-coverage-matrix.mjs` exit cleanly with summary lines; CI treats them as advisory, not blocking. +- **`ceil_char_boundary`** — Safe string slicing utility at `src/openhuman/util.rs`; use this throughout the codebase instead of raw byte-index slicing to avoid UTF-8 panics. +- **Global static cache tests need a reset guard** — When testing code that reads/writes a `Lazy>>` global cache, use a `struct CacheResetGuard; impl Drop for CacheResetGuard { fn drop(&mut self) { *CACHE.lock() = None; } }` pattern so each test starts clean. See `SnapshotCacheResetGuard` / `CacheResetGuard` in `ops_tests.rs`. +- **Test assertions must match the actual dummy value** — When a builder (e.g. `build_dummy_runtime_snapshot()`) wraps `degraded_runtime_snapshot()`, assert against `dummy.field` rather than a hardcoded string (e.g. `"idle"` vs the actual `"degraded"`) to verify round-trip correctness without false mismatches. +- **`composio::action_tool::tests::mode_toggle_between_calls_is_observed` is flaky in full suite** — Fails intermittently due to shared global composio session state; passes in isolation. Pre-existing; not caused by snapshot perf work. + +## App State Snapshot (Issue #2155 — first-launch perf) + +- **`build_runtime_snapshot` was serial, now parallel** — The four subsystems (screen intelligence, local AI, autocomplete, service status) in `src/openhuman/app_state/ops.rs` ran sequentially. Fixed with `tokio::join!`. Also added a 2s TTL cache (`RUNTIME_SNAPSHOT_CACHE`) so repeated polls within the TTL skip recomputation. +- **`service::status` is sync — must use `spawn_blocking`** — `crate::openhuman::service::status(config)` may shell out to `launchctl`. Wrap it in `tokio::task::spawn_blocking` when called from an async context. +- **`autocomplete::global_engine().status()` calls `Config::load_or_init()` internally** — Avoid this inside snapshot code. Use the new `status_with_config(config)` method which accepts an already-loaded config. +- **Per-stage snapshot timeouts** — `AUTH_FETCH_TIMEOUT = 5s` and `RUNTIME_SNAPSHOT_TIMEOUT = 10s` are constants in `ops.rs`; they sum to 15s, well under the 30s frontend RPC timeout. diff --git a/src/openhuman/app_state/ops.rs b/src/openhuman/app_state/ops.rs index d94a49b86a..7f56fa01d6 100644 --- a/src/openhuman/app_state/ops.rs +++ b/src/openhuman/app_state/ops.rs @@ -27,8 +27,19 @@ use crate::rpc::RpcOutcome; const LOG_PREFIX: &str = "[app_state]"; const APP_STATE_FILENAME: &str = "app-state.json"; const CURRENT_USER_REFRESH_TTL: Duration = Duration::from_secs(5); +const RUNTIME_SNAPSHOT_TTL: Duration = Duration::from_secs(2); +const AUTH_FETCH_TIMEOUT: Duration = Duration::from_secs(5); +const RUNTIME_SNAPSHOT_TIMEOUT: Duration = Duration::from_secs(10); static APP_STATE_FILE_LOCK: Lazy> = Lazy::new(|| Mutex::new(())); static CURRENT_USER_CACHE: Lazy>> = Lazy::new(|| Mutex::new(None)); +static RUNTIME_SNAPSHOT_CACHE: Lazy>> = + Lazy::new(|| Mutex::new(None)); + +#[derive(Debug, Clone)] +struct CachedRuntimeSnapshot { + snapshot: RuntimeSnapshot, + fetched_at: Instant, +} #[derive(Debug, Clone)] struct CachedCurrentUser { @@ -401,68 +412,169 @@ pub fn peek_cached_current_user_identity() -> Option RuntimeSnapshot { - let screen_intelligence = { - let _ = crate::openhuman::screen_intelligence::global_engine() - .apply_config(config.screen_intelligence.clone()) - .await; - crate::openhuman::screen_intelligence::global_engine() - .status() - .await - }; + { + let cache = RUNTIME_SNAPSHOT_CACHE.lock(); + if let Some(entry) = cache.as_ref() { + if entry.fetched_at.elapsed() < RUNTIME_SNAPSHOT_TTL { + debug!( + "{LOG_PREFIX} build_runtime_snapshot: returning cached snapshot age_ms={}", + entry.fetched_at.elapsed().as_millis() + ); + return entry.snapshot.clone(); + } + } + } - let local_ai = match crate::openhuman::inference::rpc::inference_status(config).await { - Ok(outcome) => outcome.value, - Err(error) => { - warn!("{LOG_PREFIX} local_ai status failed during snapshot: {error}"); - crate::openhuman::inference::LocalAiStatus::disabled(config) + let si_config = config.screen_intelligence.clone(); + let config_for_local_ai = config.clone(); + let config_for_autocomplete = config.clone(); + let config_for_service = config.clone(); + + let t0 = Instant::now(); + + let (screen_intelligence, local_ai, autocomplete, service) = tokio::join!( + async { + let t = Instant::now(); + let _ = crate::openhuman::screen_intelligence::global_engine() + .apply_config(si_config) + .await; + let status = crate::openhuman::screen_intelligence::global_engine() + .status() + .await; + (status, t.elapsed().as_millis()) + }, + async { + let t = Instant::now(); + let status = match crate::openhuman::inference::rpc::inference_status( + &config_for_local_ai, + ) + .await + { + Ok(outcome) => outcome.value, + Err(error) => { + warn!("{LOG_PREFIX} local_ai status failed during snapshot: {error}"); + crate::openhuman::inference::LocalAiStatus::disabled(&config_for_local_ai) + } + }; + (status, t.elapsed().as_millis()) + }, + async { + let t = Instant::now(); + let status = crate::openhuman::autocomplete::global_engine() + .status_with_config(&config_for_autocomplete) + .await; + (status, t.elapsed().as_millis()) + }, + async { + let t = Instant::now(); + let status = tokio::task::spawn_blocking(move || { + crate::openhuman::service::status(&config_for_service) + }) + .await + .unwrap_or_else(|_| Err(anyhow::anyhow!("service status task panicked"))); + let status = match status { + Ok(s) => s, + Err(error) => { + let message = error.to_string(); + warn!("{LOG_PREFIX} service status failed during snapshot: {message}"); + ServiceStatus { + state: ServiceState::Unknown(message.clone()), + unit_path: None, + label: "OpenHuman".to_string(), + details: Some(message), + } + } + }; + (status, t.elapsed().as_millis()) } - }; + ); - let autocomplete = crate::openhuman::autocomplete::global_engine() - .status() - .await; + let total_ms = t0.elapsed().as_millis(); + debug!( + "{LOG_PREFIX} build_runtime_snapshot timings si_ms={} local_ai_ms={} autocomplete_ms={} service_ms={} total_ms={}", + screen_intelligence.1, + local_ai.1, + autocomplete.1, + service.1, + total_ms, + ); - let service = match crate::openhuman::service::status(config) { - Ok(status) => status, - Err(error) => { - let message = error.to_string(); - warn!("{LOG_PREFIX} service status failed during snapshot: {message}"); - ServiceStatus { - state: ServiceState::Unknown(message.clone()), - unit_path: None, - label: "OpenHuman".to_string(), - details: Some(message), - } - } + let snapshot = RuntimeSnapshot { + screen_intelligence: screen_intelligence.0, + local_ai: local_ai.0, + autocomplete: autocomplete.0, + service: service.0, }; - RuntimeSnapshot { - screen_intelligence, - local_ai, - autocomplete, - service, - } + *RUNTIME_SNAPSHOT_CACHE.lock() = Some(CachedRuntimeSnapshot { + snapshot: snapshot.clone(), + fetched_at: Instant::now(), + }); + + snapshot } pub async fn snapshot() -> Result, String> { + let t_total = Instant::now(); + + let t_config = Instant::now(); let config = config_rpc::load_config_with_timeout().await?; + let config_ms = t_config.elapsed().as_millis(); + + let t_auth = Instant::now(); let mut auth = build_session_state(&config)?; let session_token = get_session_token(&config)?; let stored_user = sanitize_snapshot_user(auth.user.clone()); let current_user = if let Some(token) = session_token.clone().filter(|t| !t.trim().is_empty()) { - match fetch_current_user_cached(&config, &token).await { - Ok(fresh_user) => fresh_user.or(stored_user.clone()), - Err(error) => { + match tokio::time::timeout( + AUTH_FETCH_TIMEOUT, + fetch_current_user_cached(&config, &token), + ) + .await + { + Ok(Ok(fresh_user)) => fresh_user.or(stored_user.clone()), + Ok(Err(error)) => { warn!("{LOG_PREFIX} current user refresh failed; using stored snapshot fallback: {error}"); stored_user.clone() } + Err(_) => { + warn!("{LOG_PREFIX} current user fetch timed out after {}s; using stored snapshot fallback", AUTH_FETCH_TIMEOUT.as_secs()); + stored_user.clone() + } } } else { stored_user.clone() }; auth.user = current_user.clone(); + let auth_ms = t_auth.elapsed().as_millis(); + + let t_local_state = Instant::now(); let local_state = load_stored_app_state(&config)?; - let runtime = build_runtime_snapshot(&config).await; + let local_state_ms = t_local_state.elapsed().as_millis(); + + let t_runtime = Instant::now(); + let runtime = match tokio::time::timeout( + RUNTIME_SNAPSHOT_TIMEOUT, + build_runtime_snapshot(&config), + ) + .await + { + Ok(snapshot) => snapshot, + Err(_) => { + warn!( + "{LOG_PREFIX} build_runtime_snapshot timed out after {}s; returning degraded runtime snapshot", + RUNTIME_SNAPSHOT_TIMEOUT.as_secs() + ); + degraded_runtime_snapshot(&config) + } + }; + let runtime_ms = t_runtime.elapsed().as_millis(); + + let total_ms = t_total.elapsed().as_millis(); + debug!( + "{LOG_PREFIX} snapshot timings config_ms={} auth_ms={} local_state_ms={} runtime_ms={} total_ms={}", + config_ms, auth_ms, local_state_ms, runtime_ms, total_ms + ); debug!( "{LOG_PREFIX} snapshot auth={} onboarding={} chat_onboarding={} analytics={} meet_handoff={} si_active={} local_ai_state={} autocomplete_phase={} service_state={:?}", @@ -493,6 +605,74 @@ pub async fn snapshot() -> Result, String> { )) } +fn degraded_runtime_snapshot(config: &Config) -> RuntimeSnapshot { + use crate::openhuman::screen_intelligence::{ + AccessibilityFeatures, PermissionState, PermissionStatus, SessionStatus, + }; + + RuntimeSnapshot { + screen_intelligence: AccessibilityStatus { + platform_supported: cfg!(target_os = "macos"), + permissions: PermissionStatus { + screen_recording: PermissionState::Unknown, + accessibility: PermissionState::Unknown, + input_monitoring: PermissionState::Unknown, + microphone: PermissionState::Unknown, + }, + features: AccessibilityFeatures { + screen_monitoring: false, + }, + session: SessionStatus { + active: false, + started_at_ms: None, + expires_at_ms: None, + remaining_ms: None, + ttl_secs: 0, + panic_hotkey: config.screen_intelligence.panic_stop_hotkey.clone(), + stop_reason: None, + capture_count: 0, + frames_in_memory: 0, + last_capture_at_ms: None, + last_context: None, + last_window_title: None, + vision_enabled: false, + vision_state: "degraded".to_string(), + vision_queue_depth: 0, + last_vision_at_ms: None, + last_vision_summary: None, + vision_persist_count: 0, + last_vision_persisted_key: None, + last_vision_persist_error: None, + }, + foreground_context: None, + config: config.screen_intelligence.clone(), + denylist: vec![], + is_context_blocked: false, + permission_check_process_path: None, + core_process: None, + }, + local_ai: crate::openhuman::inference::LocalAiStatus::disabled(config), + autocomplete: crate::openhuman::autocomplete::AutocompleteStatus { + platform_supported: cfg!(target_os = "macos"), + enabled: config.autocomplete.enabled, + running: false, + phase: "degraded".to_string(), + debounce_ms: config.autocomplete.debounce_ms, + model_id: config.local_ai.chat_model_id.clone(), + app_name: None, + last_error: Some("snapshot timed out".to_string()), + updated_at_ms: None, + suggestion: None, + }, + service: ServiceStatus { + state: ServiceState::Unknown("snapshot timed out".to_string()), + unit_path: None, + label: "OpenHuman".to_string(), + details: Some("runtime snapshot timed out".to_string()), + }, + } +} + pub async fn update_local_state( patch: StoredAppStatePatch, ) -> Result, String> { diff --git a/src/openhuman/app_state/ops_tests.rs b/src/openhuman/app_state/ops_tests.rs index b180457f83..0cdb42ef6c 100644 --- a/src/openhuman/app_state/ops_tests.rs +++ b/src/openhuman/app_state/ops_tests.rs @@ -184,3 +184,87 @@ fn peek_cached_current_user_identity_returns_none_when_only_empty_fields_exist() assert!(peek_cached_current_user_identity().is_none()); } + +// ── RuntimeSnapshot cache tests ────────────────────────────────────────────── + +struct SnapshotCacheResetGuard; +impl Drop for SnapshotCacheResetGuard { + fn drop(&mut self) { + *RUNTIME_SNAPSHOT_CACHE.lock() = None; + } +} + +#[test] +fn runtime_snapshot_cache_hit_within_ttl() { + let _reset = SnapshotCacheResetGuard; + + let dummy = build_dummy_runtime_snapshot(); + *RUNTIME_SNAPSHOT_CACHE.lock() = Some(CachedRuntimeSnapshot { + snapshot: dummy.clone(), + fetched_at: Instant::now(), + }); + + let cache = RUNTIME_SNAPSHOT_CACHE.lock(); + let entry = cache.as_ref().expect("cache should have entry"); + assert!( + entry.fetched_at.elapsed() < RUNTIME_SNAPSHOT_TTL, + "fresh entry should be within TTL" + ); + assert_eq!(entry.snapshot.autocomplete.phase, dummy.autocomplete.phase); +} + +#[test] +fn runtime_snapshot_cache_miss_after_ttl() { + let _reset = SnapshotCacheResetGuard; + + *RUNTIME_SNAPSHOT_CACHE.lock() = Some(CachedRuntimeSnapshot { + snapshot: build_dummy_runtime_snapshot(), + fetched_at: Instant::now() - (RUNTIME_SNAPSHOT_TTL + Duration::from_millis(100)), + }); + + let cache = RUNTIME_SNAPSHOT_CACHE.lock(); + let entry = cache.as_ref().expect("cache should have entry"); + assert!( + entry.fetched_at.elapsed() >= RUNTIME_SNAPSHOT_TTL, + "stale entry should be past TTL" + ); +} + +#[test] +fn degraded_runtime_snapshot_has_expected_degraded_fields() { + let cfg = Config::default(); + let snapshot = degraded_runtime_snapshot(&cfg); + + assert_eq!(snapshot.autocomplete.phase, "degraded"); + assert_eq!(snapshot.local_ai.state, "disabled"); + assert!( + matches!( + snapshot.service.state, + crate::openhuman::service::ServiceState::Unknown(_) + ), + "service state should be Unknown in degraded snapshot" + ); + assert!(!snapshot.screen_intelligence.session.active); +} + +#[test] +fn auth_fetch_timeout_constant_is_below_rpc_timeout() { + // The 30s RPC timeout on the frontend means auth fetch + runtime snapshot + // must fit comfortably. Verify the constants are sane. + assert!( + AUTH_FETCH_TIMEOUT.as_secs() < 15, + "auth fetch timeout should be well under the 30s RPC timeout" + ); + assert!( + RUNTIME_SNAPSHOT_TIMEOUT.as_secs() < 20, + "runtime snapshot timeout should be well under the 30s RPC timeout" + ); + assert!( + AUTH_FETCH_TIMEOUT + RUNTIME_SNAPSHOT_TIMEOUT < Duration::from_secs(30), + "total of auth + runtime timeouts must fit within the 30s RPC timeout" + ); +} + +fn build_dummy_runtime_snapshot() -> RuntimeSnapshot { + degraded_runtime_snapshot(&Config::default()) +} diff --git a/src/openhuman/autocomplete/core/engine.rs b/src/openhuman/autocomplete/core/engine.rs index b471a26022..93a160a6ad 100644 --- a/src/openhuman/autocomplete/core/engine.rs +++ b/src/openhuman/autocomplete/core/engine.rs @@ -98,6 +98,10 @@ impl AutocompleteEngine { let config = Config::load_or_init() .await .unwrap_or_else(|_| Config::default()); + self.status_with_config(&config).await + } + + pub async fn status_with_config(&self, config: &Config) -> AutocompleteStatus { let state = self.inner.lock().await; AutocompleteStatus { @@ -106,7 +110,7 @@ impl AutocompleteEngine { running: state.running, phase: state.phase.clone(), debounce_ms: state.debounce_ms, - model_id: config.local_ai.chat_model_id, + model_id: config.local_ai.chat_model_id.clone(), app_name: state.app_name.clone(), last_error: state.last_error.clone(), updated_at_ms: state.updated_at_ms, diff --git a/src/openhuman/autocomplete/core/engine_tests.rs b/src/openhuman/autocomplete/core/engine_tests.rs index d382d4b548..b94431b115 100644 --- a/src/openhuman/autocomplete/core/engine_tests.rs +++ b/src/openhuman/autocomplete/core/engine_tests.rs @@ -1,5 +1,7 @@ use super::detect_tab_artifact_suffix; use super::is_low_quality_suggestion; +use super::{AutocompleteEngine, AutocompleteStatus}; +use crate::openhuman::config::Config; #[test] fn low_quality_rejects_too_short() { @@ -52,3 +54,34 @@ fn returns_zero_when_context_does_not_match_expected_tail() { fn returns_zero_when_no_tab_like_suffix_present() { assert_eq!(detect_tab_artifact_suffix("hello world", "hello worldx"), 0); } + +#[tokio::test] +async fn status_with_config_returns_valid_status_without_disk_load() { + let engine = AutocompleteEngine::new(); + let config = Config::default(); + + let status: AutocompleteStatus = engine.status_with_config(&config).await; + + assert_eq!(status.enabled, config.autocomplete.enabled); + assert!(!status.running, "fresh engine should not be running"); + assert_eq!(status.phase, "idle"); + assert_eq!(status.model_id, config.local_ai.chat_model_id); + assert!(status.last_error.is_none()); + assert!(status.suggestion.is_none()); +} + +#[tokio::test] +async fn status_with_config_reflects_provided_config_not_disk() { + let engine = AutocompleteEngine::new(); + let mut config = Config::default(); + config.autocomplete.enabled = false; + config.local_ai.chat_model_id = "test-model-xyz".to_string(); + + let status = engine.status_with_config(&config).await; + + assert!( + !status.enabled, + "should reflect the passed-in config, not disk state" + ); + assert_eq!(status.model_id, "test-model-xyz"); +} From d97399539809f030bab66bc85ec8b536a945e865 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Tue, 19 May 2026 18:54:33 +0530 Subject: [PATCH 2/2] refactor(app-state): add request-scoped ID to snapshot diagnostics Thread a monotonic req_id through snapshot() and build_runtime_snapshot() so concurrent calls produce grep-friendly correlated log lines: [app_state] snapshot timings req_id=42 config_ms=1 ... [app_state] build_runtime_snapshot timings req_id=42 si_ms=... total_ms=... Addresses CodeRabbit review feedback on PR #2209. --- src/openhuman/app_state/ops.rs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/openhuman/app_state/ops.rs b/src/openhuman/app_state/ops.rs index 7f56fa01d6..7d9d28ceda 100644 --- a/src/openhuman/app_state/ops.rs +++ b/src/openhuman/app_state/ops.rs @@ -3,6 +3,7 @@ use std::fs; use std::fs::File; use std::io::Write; use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use log::{debug, warn}; @@ -34,6 +35,7 @@ static APP_STATE_FILE_LOCK: Lazy> = Lazy::new(|| Mutex::new(())); static CURRENT_USER_CACHE: Lazy>> = Lazy::new(|| Mutex::new(None)); static RUNTIME_SNAPSHOT_CACHE: Lazy>> = Lazy::new(|| Mutex::new(None)); +static SNAPSHOT_REQ_COUNTER: AtomicU64 = AtomicU64::new(0); #[derive(Debug, Clone)] struct CachedRuntimeSnapshot { @@ -411,13 +413,14 @@ pub fn peek_cached_current_user_identity() -> Option RuntimeSnapshot { +async fn build_runtime_snapshot(config: &Config, req_id: u64) -> RuntimeSnapshot { { let cache = RUNTIME_SNAPSHOT_CACHE.lock(); if let Some(entry) = cache.as_ref() { if entry.fetched_at.elapsed() < RUNTIME_SNAPSHOT_TTL { debug!( - "{LOG_PREFIX} build_runtime_snapshot: returning cached snapshot age_ms={}", + "{LOG_PREFIX} build_runtime_snapshot: returning cached snapshot req_id={} age_ms={}", + req_id, entry.fetched_at.elapsed().as_millis() ); return entry.snapshot.clone(); @@ -491,7 +494,8 @@ async fn build_runtime_snapshot(config: &Config) -> RuntimeSnapshot { let total_ms = t0.elapsed().as_millis(); debug!( - "{LOG_PREFIX} build_runtime_snapshot timings si_ms={} local_ai_ms={} autocomplete_ms={} service_ms={} total_ms={}", + "{LOG_PREFIX} build_runtime_snapshot timings req_id={} si_ms={} local_ai_ms={} autocomplete_ms={} service_ms={} total_ms={}", + req_id, screen_intelligence.1, local_ai.1, autocomplete.1, @@ -515,6 +519,7 @@ async fn build_runtime_snapshot(config: &Config) -> RuntimeSnapshot { } pub async fn snapshot() -> Result, String> { + let req_id = SNAPSHOT_REQ_COUNTER.fetch_add(1, Ordering::Relaxed); let t_total = Instant::now(); let t_config = Instant::now(); @@ -555,15 +560,16 @@ pub async fn snapshot() -> Result, String> { let t_runtime = Instant::now(); let runtime = match tokio::time::timeout( RUNTIME_SNAPSHOT_TIMEOUT, - build_runtime_snapshot(&config), + build_runtime_snapshot(&config, req_id), ) .await { Ok(snapshot) => snapshot, Err(_) => { warn!( - "{LOG_PREFIX} build_runtime_snapshot timed out after {}s; returning degraded runtime snapshot", - RUNTIME_SNAPSHOT_TIMEOUT.as_secs() + "{LOG_PREFIX} build_runtime_snapshot timed out after {}s req_id={}; returning degraded runtime snapshot", + RUNTIME_SNAPSHOT_TIMEOUT.as_secs(), + req_id ); degraded_runtime_snapshot(&config) } @@ -572,12 +578,13 @@ pub async fn snapshot() -> Result, String> { let total_ms = t_total.elapsed().as_millis(); debug!( - "{LOG_PREFIX} snapshot timings config_ms={} auth_ms={} local_state_ms={} runtime_ms={} total_ms={}", - config_ms, auth_ms, local_state_ms, runtime_ms, total_ms + "{LOG_PREFIX} snapshot timings req_id={} config_ms={} auth_ms={} local_state_ms={} runtime_ms={} total_ms={}", + req_id, config_ms, auth_ms, local_state_ms, runtime_ms, total_ms ); debug!( - "{LOG_PREFIX} snapshot auth={} onboarding={} chat_onboarding={} analytics={} meet_handoff={} si_active={} local_ai_state={} autocomplete_phase={} service_state={:?}", + "{LOG_PREFIX} snapshot req_id={} auth={} onboarding={} chat_onboarding={} analytics={} meet_handoff={} si_active={} local_ai_state={} autocomplete_phase={} service_state={:?}", + req_id, auth.is_authenticated, config.onboarding_completed, config.chat_onboarding_completed,