variablefate · variablefate · May 7, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
diff --git a/RidestrSDK/Sources/RidestrSDK/Nostr/RelayManager.swift b/RidestrSDK/Sources/RidestrSDK/Nostr/RelayManager.swift
@@ -14,7 +14,6 @@ public actor RelayManager: RelayManagerProtocol {
     private var connectedRelayURLs: [URL] = []
     private var notificationHandler: NotificationRouter?
     private var notificationTask: Task<Void, Never>?
-    private var _handlerAlive = false  // Explicit liveness flag — Task.isCancelled doesn't detect normal completion
 
     public init(keypair: NostrKeypair) {
         self.keypair = keypair
@@ -31,21 +30,42 @@ public actor RelayManager: RelayManagerProtocol {
         await teardownConnection(clearRelayURLs: true)
     }
 
+    /// At least one relay is currently connected. Queries the underlying
+    /// rust-nostr `Client` for the live per-relay status — the previous
+    /// impl checked only Swift-side cached state (the client pointer, a
+    /// configured-URLs list, and a handler-liveness flag), all of which
+    /// stay true through airplane-mode toggles and other transient network
+    /// drops, so it lied about the actual WebSocket state.
     public var isConnected: Bool {
-        client != nil && !connectedRelayURLs.isEmpty && _handlerAlive
-    }
-
-    private func markHandlerDead() {
-        _handlerAlive = false
+        get async {
+            guard let client else { return false }
+            let relays = await client.relays()
+            for relay in relays.values where relay.isConnected() {
+                return true
+            }
+            return false
+        }
     }
 
-    /// Reconnect to relays if disconnected. Call from app foreground handler.
-    /// Does NOT restart subscriptions — callers must re-subscribe after this returns.
+    /// Force-rebuild the relay client. Call from app foreground handler.
+    ///
+    /// Always replaces the client (when relay URLs are configured) rather
+    /// than short-circuiting on cached state because the cached state
+    /// lies on iOS background→foreground: the OS suspends WebSockets on
+    /// background, and rust-nostr's per-relay status doesn't update until
+    /// the next read/write attempt fails (which can take minutes on a
+    /// silently-killed socket). Tearing down and rebuilding is the only
+    /// way to get truthful state — the alternative is letting the user
+    /// sit in "everything looks fine" while their relays are dead.
+    ///
+    /// Does NOT restart subscriptions — callers must re-subscribe after
+    /// this returns. Cheap when relays are reachable (~1s handshake);
+    /// the trade-off vs. per-call cost is correctness on every foreground.
+    /// No-op if `connect(to:)` has not been called yet (no URLs to reach).
     public func reconnectIfNeeded() async {
         guard !connectedRelayURLs.isEmpty else { return }
-        guard client == nil || !_handlerAlive else { return }
 
-        RidestrLogger.error("[RelayManager] Reconnecting relay client")
+        RidestrLogger.info("[RelayManager] Force-rebuilding relay client (foreground / explicit reconnect)")
         do {
             try await replaceClient(with: connectedRelayURLs)
         } catch {
@@ -80,7 +100,6 @@ public actor RelayManager: RelayManagerProtocol {
     private func startNotificationHandler(for client: Client) {
         let router = NotificationRouter()
         self.notificationHandler = router
-        self._handlerAlive = true
         let clientRef = client
         notificationTask = Task.detached { [router] in
             do {
@@ -93,15 +112,9 @@ public actor RelayManager: RelayManagerProtocol {
             router.removeAll()
             RidestrLogger.error("[RelayManager] All subscription streams terminated due to disconnect")
         }
-        // Monitor handler liveness from a separate task
-        Task { [weak self] in
-            await self?.notificationTask?.value  // Suspends until task completes
-            await self?.markHandlerDead()
-        }
     }
 
     private func teardownConnection(clearRelayURLs: Bool) async {
-        _handlerAlive = false
         notificationHandler?.removeAll()
         activeStreams.removeAll()
         subscriptionGenerations.removeAll()

diff --git a/RoadFlare/RoadFlareCore/ViewModels/ConnectionCoordinator.swift b/RoadFlare/RoadFlareCore/ViewModels/ConnectionCoordinator.swift
@@ -1,16 +1,45 @@
 import Foundation
+import Network
 
-/// Owns the periodic relay connection watchdog task.
+/// Owns the periodic relay connection watchdog task and a path-change
+/// reactive signal.
 ///
-/// Checks connectivity on a fixed interval and triggers reconnection
-/// when the relay drops. All behavior is injected via closures so the
-/// coordinator has no direct dependencies on AppState or SDK services.
+/// Two complementary mechanisms drive reconnects:
+///
+/// 1. **Periodic watchdog** — polls the live `isConnected` query (which,
+///    after PR #101, asks rust-nostr for per-relay status rather than
+///    reading a cached flag) on a fixed interval and triggers reconnect
+///    when it returns false. Bounded latency for any drop visible to
+///    rust-nostr.
+///
+/// 2. **`NWPathMonitor` reactive signal** — fires whenever iOS observes a
+///    network path change (Wi-Fi/cellular swap, airplane mode toggle,
+///    captive portal). Forces a reconnect immediately, bypassing the
+///    `isConnected` gate, because the previous connection's cached state
+///    is by definition suspect after a path change. rust-nostr's
+///    per-relay status doesn't update until its next read/write fails,
+///    which can take minutes on a silently-killed socket — the OS-level
+///    signal is much faster.
+///
+/// All behavior is injected via closures so the coordinator has no
+/// direct dependencies on AppState or SDK services.
 @MainActor
 final class ConnectionCoordinator {
     private var watchdogTask: Task<Void, Never>?
+    private var pathMonitor: NWPathMonitor?
+    private var pathMonitorQueue: DispatchQueue?
+    /// Most recently spawned path-event reconnect Task. `isReconnecting`
+    /// already serializes path-spawned reconnects (a second event during an
+    /// in-flight reconnect short-circuits via the guard), so at most one
+    /// such Task is doing real work at any moment — tracking the latest
+    /// is sufficient to cancel an in-flight reconnect on `stop()` (e.g.
+    /// logout / identity replacement). Mirrors the tracked-Task pattern
+    /// from PR #95's onboarding-publish watchdog.
+    private var pathReconnectTask: Task<Void, Never>?
+    private var hasReceivedFirstPath = false
     private var isReconnecting = false
 
-    /// Start the periodic watchdog.
+    /// Start the periodic watchdog and the path-change reactive signal.
     ///
     /// - Parameters:
     ///   - interval: Time between connectivity checks.
@@ -29,17 +58,65 @@ final class ConnectionCoordinator {
                 try? await Task.sleep(for: interval)
                 guard let self, !self.isReconnecting, shouldReconnect() else { continue }
                 guard !(await isConnected()) else { continue }
+                // Re-check after the connectivity await: a path-monitor
+                // event landing during the suspension can have set
+                // `isReconnecting = true` and started its own reconnect.
+                // Without this guard we'd kick off a second concurrent
+                // reconnect, tearing down the in-flight rebuild's client.
+                guard !self.isReconnecting else { continue }
                 self.isReconnecting = true
                 defer { self.isReconnecting = false }
                 await reconnect()
             }
         }
+
+        let monitor = NWPathMonitor()
+        let queue = DispatchQueue(label: "com.roadflare.connection-coordinator.path-monitor")
+        monitor.pathUpdateHandler = { [weak self] _ in
+            // The very first path update fires on monitor start with the
+            // current path — not a real change in production paths. Skip
+            // it to avoid a spurious rebuild on app launch right after
+            // `setupServices` has established the initial connection.
+            // (Apple's `NWPathMonitor` documentation does not strictly
+            // contract this, but it is the consistently observed behavior
+            // for `start(queue:)` and is widely relied on across iOS apps.)
+            Task { @MainActor [weak self] in
+                guard let self else { return }
+                if !self.hasReceivedFirstPath {
+                    self.hasReceivedFirstPath = true
+                    return
+                }
+                guard !self.isReconnecting, shouldReconnect() else { return }
+                // Set the busy flag synchronously, in the same critical
+                // section as the guard check, so a second path event
+                // landing between this Task and the inner reconnect Task
+                // observes the busy state and short-circuits. (Setting it
+                // inside the inner Task left a race window where two
+                // rapid path events both passed the guard and spawned
+                // overlapping reconnects.)
+                self.isReconnecting = true
+                self.pathReconnectTask = Task { @MainActor [weak self] in
+                    defer { self?.isReconnecting = false }
+                    guard !Task.isCancelled else { return }
+                    await reconnect()
+                }
+            }
+        }
+        monitor.start(queue: queue)
+        self.pathMonitor = monitor
+        self.pathMonitorQueue = queue
     }
 
-    /// Stop the watchdog and cancel any in-flight reconnection.
+    /// Stop the watchdog, cancel the path monitor, and any in-flight reconnection.
     func stop() {
         watchdogTask?.cancel()
         watchdogTask = nil
+        pathMonitor?.cancel()
+        pathMonitor = nil
+        pathMonitorQueue = nil
+        pathReconnectTask?.cancel()
+        pathReconnectTask = nil
+        hasReceivedFirstPath = false
         isReconnecting = false
     }
 }