From 69160ce1f8da9a9dd4c99df496cf3f6e34175868 Mon Sep 17 00:00:00 2001 From: gordonlu Date: Tue, 2 Jun 2026 17:57:46 +0800 Subject: [PATCH 1/6] fix: detect engine task death mid-turn and recover UI immediately When the engine task panics (caught by spawn_supervised's catch_unwind) between TurnStarted and TurnComplete, the event channel's sender is dropped and try_recv returns Disconnected. The UI's event loop (while let Ok(event) = rx.try_recv()) exits silently on Err, leaving the turn stuck with is_loading=true until the 300-second TURN_STALL_WATCHDOG_TIMEOUT. Add a post-loop check for TryRecvError::Disconnected after the event processing loop. If the channel is disconnected while is_loading is true, immediately finalize streaming state and reset the UI, reducing recovery from 300 seconds to ~1 frame. --- crates/tui/src/tui/ui.rs | 29 +++++++++++++++++++ crates/tui/src/tui/ui/tests.rs | 53 ++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 9aa56b05a..e8b73b953 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -89,6 +89,7 @@ use crate::tui::pager::PagerView; use crate::tui::persistence_actor::{self, PersistRequest}; use crate::tui::plan_prompt::PlanPromptView; use crate::tui::scrolling::TranscriptScroll; +use tokio::sync::mpsc::error::TryRecvError; // SelectionAutoscroll unused use crate::tui::session_picker::SessionPickerView; use crate::tui::shell_job_routing::{ @@ -2234,6 +2235,34 @@ async fn run_event_loop( } } } + // Detect engine task death mid-turn. When the engine task + // panics (caught by spawn_supervised's catch_unwind) or exits + // unexpectedly between TurnStarted and TurnComplete, the event + // channel's sender is dropped and try_recv returns Disconnected. + // The while-let loop above exits silently on Err, so we must + // check post-loop and recover the UI state immediately instead + // of waiting for the 300-second TURN_STALL_WATCHDOG_TIMEOUT. + if app.is_loading && matches!(rx.try_recv(), Err(TryRecvError::Disconnected)) { + streaming_thinking::finalize_current(app); + app.finalize_streaming_assistant_as_interrupted(); + app.finalize_active_cell_as_interrupted(); + app.streaming_state.reset(); + app.streaming_message_index = None; + app.streaming_thinking_active_entry = None; + + app.is_loading = false; + app.turn_started_at = None; + app.turn_last_activity_at = None; + app.runtime_turn_status = None; + app.runtime_turn_id = None; + app.dispatch_started_at = None; + app.user_scrolled_during_stream = false; + app.push_status_toast( + "Engine process has terminated unexpectedly.", + StatusToastLevel::Error, + None, + ); + } } if let Some(index) = app.streaming_message_index { let committed = app.streaming_state.commit_text(0); diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 04d1dc4cc..cee991889 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -2607,6 +2607,59 @@ fn turn_liveness_recovers_stalled_in_progress_turn() { assert!(toast.text.contains("Turn stalled")); } +#[test] +fn engine_event_channel_disconnect_recovers_mid_turn_ui_state() { + // Simulate the event-loop post-check that detects engine task death. + // Creates a real mpsc channel, sets up app as if a turn is in progress, + // drops the sender (mirroring engine task exit in spawn_supervised), + // and verifies the post-loop recovery logic cleans up the UI state. + let (tx_event, mut rx) = tokio::sync::mpsc::channel::(256); + drop(tx_event); + + // Confirm the channel is disconnected + assert!(matches!(rx.try_recv(), Err(TryRecvError::Disconnected))); + + let mut app = create_test_app(); + app.is_loading = true; + app.runtime_turn_status = Some("in_progress".to_string()); + app.runtime_turn_id = Some("turn-42".to_string()); + app.streaming_message_index = Some(0); + app.user_scrolled_during_stream = true; + + // Apply the same post-loop logic from ui.rs + if app.is_loading && matches!(rx.try_recv(), Err(TryRecvError::Disconnected)) { + streaming_thinking::finalize_current(&mut app); + app.finalize_streaming_assistant_as_interrupted(); + app.finalize_active_cell_as_interrupted(); + app.streaming_state.reset(); + app.streaming_message_index = None; + app.streaming_thinking_active_entry = None; + + app.is_loading = false; + app.turn_started_at = None; + app.turn_last_activity_at = None; + app.runtime_turn_status = None; + app.runtime_turn_id = None; + app.dispatch_started_at = None; + app.user_scrolled_during_stream = false; + app.push_status_toast( + "Engine process has terminated unexpectedly.", + StatusToastLevel::Error, + None, + ); + } + + // Verify the fix: UI state is fully recovered + assert!(!app.is_loading, "loading must be cleared"); + assert!(app.runtime_turn_status.is_none(), "turn status cleared"); + assert!(app.runtime_turn_id.is_none(), "turn id cleared"); + assert!(app.streaming_message_index.is_none()); + assert!(!app.user_scrolled_during_stream); + let toast = app.status_toasts.back().expect("error toast pushed"); + assert_eq!(toast.level, StatusToastLevel::Error); + assert!(toast.text.contains("Engine process has terminated")); +} + #[test] fn fixed_model_auto_thinking_skips_auto_model_router() { let mut app = create_test_app(); From 5fc674e14db4102ab9c541738309bbb50debb1c4 Mon Sep 17 00:00:00 2001 From: Gordon Lu Date: Tue, 2 Jun 2026 21:10:56 +0800 Subject: [PATCH 2/6] fix: address PR review - add needs_redraw, fix import order, expand disconnect detection to compaction/purge --- crates/tui/src/tui/ui.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index e8b73b953..622374301 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -33,6 +33,8 @@ use tracing; #[cfg(target_os = "windows")] use windows::Win32::System::Console::{GetConsoleMode, GetStdHandle, SetConsoleMode}; +use tokio::sync::mpsc::error::TryRecvError; + use crate::audit::log_sensitive_event; use crate::automation_manager::{AutomationManager, AutomationSchedulerConfig, spawn_scheduler}; use crate::client::{ @@ -89,7 +91,6 @@ use crate::tui::pager::PagerView; use crate::tui::persistence_actor::{self, PersistRequest}; use crate::tui::plan_prompt::PlanPromptView; use crate::tui::scrolling::TranscriptScroll; -use tokio::sync::mpsc::error::TryRecvError; // SelectionAutoscroll unused use crate::tui::session_picker::SessionPickerView; use crate::tui::shell_job_routing::{ @@ -2242,7 +2243,9 @@ async fn run_event_loop( // The while-let loop above exits silently on Err, so we must // check post-loop and recover the UI state immediately instead // of waiting for the 300-second TURN_STALL_WATCHDOG_TIMEOUT. - if app.is_loading && matches!(rx.try_recv(), Err(TryRecvError::Disconnected)) { + if (app.is_loading || app.is_compacting || app.is_purging) + && matches!(rx.try_recv(), Err(TryRecvError::Disconnected)) + { streaming_thinking::finalize_current(app); app.finalize_streaming_assistant_as_interrupted(); app.finalize_active_cell_as_interrupted(); @@ -2251,6 +2254,11 @@ async fn run_event_loop( app.streaming_thinking_active_entry = None; app.is_loading = false; + app.is_compacting = false; + app.is_purging = false; + app.active_allowed_tools = None; + app.agent_progress.clear(); + app.agent_activity_started_at = None; app.turn_started_at = None; app.turn_last_activity_at = None; app.runtime_turn_status = None; @@ -2262,6 +2270,7 @@ async fn run_event_loop( StatusToastLevel::Error, None, ); + app.needs_redraw = true; } } if let Some(index) = app.streaming_message_index { From 17ab0d243aa522c5538de5b4348f0c89240fdef0 Mon Sep 17 00:00:00 2001 From: Gordon Lu Date: Tue, 2 Jun 2026 21:21:55 +0800 Subject: [PATCH 3/6] fix: use rx.is_closed() instead of try_recv() probe to avoid race condition --- crates/tui/src/tui/ui.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 622374301..66a0fcaab 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -33,8 +33,6 @@ use tracing; #[cfg(target_os = "windows")] use windows::Win32::System::Console::{GetConsoleMode, GetStdHandle, SetConsoleMode}; -use tokio::sync::mpsc::error::TryRecvError; - use crate::audit::log_sensitive_event; use crate::automation_manager::{AutomationManager, AutomationSchedulerConfig, spawn_scheduler}; use crate::client::{ @@ -2239,12 +2237,13 @@ async fn run_event_loop( // Detect engine task death mid-turn. When the engine task // panics (caught by spawn_supervised's catch_unwind) or exits // unexpectedly between TurnStarted and TurnComplete, the event - // channel's sender is dropped and try_recv returns Disconnected. - // The while-let loop above exits silently on Err, so we must - // check post-loop and recover the UI state immediately instead - // of waiting for the 300-second TURN_STALL_WATCHDOG_TIMEOUT. + // channel's sender is dropped. The while-let loop above exits + // silently on Err, so we must check post-loop and recover the + // UI state immediately instead of waiting for the 300-second + // TURN_STALL_WATCHDOG_TIMEOUT. Use is_closed() rather than + // try_recv() so the probe never consumes a valid event. if (app.is_loading || app.is_compacting || app.is_purging) - && matches!(rx.try_recv(), Err(TryRecvError::Disconnected)) + && rx.is_closed() { streaming_thinking::finalize_current(app); app.finalize_streaming_assistant_as_interrupted(); From 49e5239c16def4e881d13034edbe1a73f14d6fdb Mon Sep 17 00:00:00 2001 From: gordonlu Date: Tue, 2 Jun 2026 21:36:13 +0800 Subject: [PATCH 4/6] fix: align test with production logic, fix fmt --- crates/tui/src/tui/ui.rs | 4 +--- crates/tui/src/tui/ui/tests.rs | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 66a0fcaab..c26bb92c0 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -2242,9 +2242,7 @@ async fn run_event_loop( // UI state immediately instead of waiting for the 300-second // TURN_STALL_WATCHDOG_TIMEOUT. Use is_closed() rather than // try_recv() so the probe never consumes a valid event. - if (app.is_loading || app.is_compacting || app.is_purging) - && rx.is_closed() - { + if (app.is_loading || app.is_compacting || app.is_purging) && rx.is_closed() { streaming_thinking::finalize_current(app); app.finalize_streaming_assistant_as_interrupted(); app.finalize_active_cell_as_interrupted(); diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index cee991889..a82b06c40 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -2616,18 +2616,20 @@ fn engine_event_channel_disconnect_recovers_mid_turn_ui_state() { let (tx_event, mut rx) = tokio::sync::mpsc::channel::(256); drop(tx_event); - // Confirm the channel is disconnected - assert!(matches!(rx.try_recv(), Err(TryRecvError::Disconnected))); + // Confirm the channel is closed + assert!(rx.is_closed()); let mut app = create_test_app(); app.is_loading = true; + app.is_compacting = false; + app.is_purging = false; app.runtime_turn_status = Some("in_progress".to_string()); app.runtime_turn_id = Some("turn-42".to_string()); app.streaming_message_index = Some(0); app.user_scrolled_during_stream = true; // Apply the same post-loop logic from ui.rs - if app.is_loading && matches!(rx.try_recv(), Err(TryRecvError::Disconnected)) { + if (app.is_loading || app.is_compacting || app.is_purging) && rx.is_closed() { streaming_thinking::finalize_current(&mut app); app.finalize_streaming_assistant_as_interrupted(); app.finalize_active_cell_as_interrupted(); @@ -2636,6 +2638,11 @@ fn engine_event_channel_disconnect_recovers_mid_turn_ui_state() { app.streaming_thinking_active_entry = None; app.is_loading = false; + app.is_compacting = false; + app.is_purging = false; + app.active_allowed_tools = None; + app.agent_progress.clear(); + app.agent_activity_started_at = None; app.turn_started_at = None; app.turn_last_activity_at = None; app.runtime_turn_status = None; @@ -2647,14 +2654,21 @@ fn engine_event_channel_disconnect_recovers_mid_turn_ui_state() { StatusToastLevel::Error, None, ); + app.needs_redraw = true; } // Verify the fix: UI state is fully recovered assert!(!app.is_loading, "loading must be cleared"); + assert!(!app.is_compacting, "compacting must be cleared"); + assert!(!app.is_purging, "purging must be cleared"); + assert!(app.active_allowed_tools.is_none()); + assert!(app.agent_progress.is_empty()); + assert!(app.agent_activity_started_at.is_none()); assert!(app.runtime_turn_status.is_none(), "turn status cleared"); assert!(app.runtime_turn_id.is_none(), "turn id cleared"); assert!(app.streaming_message_index.is_none()); assert!(!app.user_scrolled_during_stream); + assert!(app.needs_redraw, "needs_redraw must be set"); let toast = app.status_toasts.back().expect("error toast pushed"); assert_eq!(toast.level, StatusToastLevel::Error); assert!(toast.text.contains("Engine process has terminated")); From 50c452d11b73443bce2f96ea195d9df718178d0a Mon Sep 17 00:00:00 2001 From: gordonlu Date: Tue, 2 Jun 2026 21:50:28 +0800 Subject: [PATCH 5/6] fix: remove unused mut on rx in test --- crates/tui/src/tui/ui/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index a82b06c40..a29badd3d 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -2613,7 +2613,7 @@ fn engine_event_channel_disconnect_recovers_mid_turn_ui_state() { // Creates a real mpsc channel, sets up app as if a turn is in progress, // drops the sender (mirroring engine task exit in spawn_supervised), // and verifies the post-loop recovery logic cleans up the UI state. - let (tx_event, mut rx) = tokio::sync::mpsc::channel::(256); + let (tx_event, rx) = tokio::sync::mpsc::channel::(256); drop(tx_event); // Confirm the channel is closed From 649d7bc9ab12f10a53edecd866f670bb180ff7c0 Mon Sep 17 00:00:00 2001 From: gordonlu Date: Wed, 3 Jun 2026 09:12:03 +0800 Subject: [PATCH 6/6] fix: preserve pending steers on engine disconnect recovery Matching TurnComplete::Failed hard-fail recovery: drain pending steers into the visible queue so Esc-held messages are not silently lost when the engine dies mid-turn. --- crates/tui/src/tui/ui.rs | 8 ++++++++ crates/tui/src/tui/ui/tests.rs | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index c26bb92c0..ef3e05d37 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -2262,6 +2262,14 @@ async fn run_event_loop( app.runtime_turn_id = None; app.dispatch_started_at = None; app.user_scrolled_during_stream = false; + + // Preserve pending steers: same hard-fail recovery as + // TurnComplete::Failed — surface them in the visible queue + // so they are not silently lost. + for msg in app.drain_pending_steers() { + app.queue_message(msg); + } + app.push_status_toast( "Engine process has terminated unexpectedly.", StatusToastLevel::Error, diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index a29badd3d..78681d1e4 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -2628,6 +2628,14 @@ fn engine_event_channel_disconnect_recovers_mid_turn_ui_state() { app.streaming_message_index = Some(0); app.user_scrolled_during_stream = true; + // Verify pending steers are surfaced to the visible queue on reconnect + app.pending_steers.push_back(QueuedMessage { + display: "steer content".to_string(), + skill_instruction: None, + }); + assert_eq!(app.pending_steers.len(), 1); + assert_eq!(app.queued_message_count(), 0); + // Apply the same post-loop logic from ui.rs if (app.is_loading || app.is_compacting || app.is_purging) && rx.is_closed() { streaming_thinking::finalize_current(&mut app); @@ -2649,6 +2657,11 @@ fn engine_event_channel_disconnect_recovers_mid_turn_ui_state() { app.runtime_turn_id = None; app.dispatch_started_at = None; app.user_scrolled_during_stream = false; + + for msg in app.drain_pending_steers() { + app.queue_message(msg); + } + app.push_status_toast( "Engine process has terminated unexpectedly.", StatusToastLevel::Error, @@ -2672,6 +2685,12 @@ fn engine_event_channel_disconnect_recovers_mid_turn_ui_state() { let toast = app.status_toasts.back().expect("error toast pushed"); assert_eq!(toast.level, StatusToastLevel::Error); assert!(toast.text.contains("Engine process has terminated")); + + // Verify pending steers were preserved in the visible queue + assert!(app.pending_steers.is_empty(), "pending_steers drained"); + assert_eq!(app.queued_message_count(), 1, "steer requeued"); + let queued = app.pop_queued_message().expect("queued steer available"); + assert_eq!(queued.display, "steer content"); } #[test]