From 4c1dafe35a1cfa8d9f825dddac82267bbefba1b8 Mon Sep 17 00:00:00 2001 From: "SYM.BOT" Date: Wed, 29 Apr 2026 14:44:53 +0100 Subject: [PATCH] fix: reject same-direction duplicate sessions in addPeer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v0.3.79 introduced dual-dial dedup but applied the tie-break unconditionally whenever a prior bonjour transport existed. The tie-break assumes the prior is in the OPPOSITE direction of the new session — which is true for genuine simultaneous-dial collisions but NOT for same-direction duplicates. When two inbound sessions arrive in rapid succession (TCP retry, multipath, or repeated newConnectionHandler firing), the tie-break was applied as if the prior were an outbound. For higher-nodeId peers this returns preferNew=true, replacing the established healthy inbound with the duplicate. The replaced session was force-disconnected; its remote wire pair saw EOF; removeTransport fired on the remote side; peer-left storm. Observed in the field on iPhone↔Mac Catalyst v0.3.79: iPhone log shows two consecutive "replacing prior with new inbound" lines back-to-back — the second replace tearing down the survivor of the first. Fix: in addPeer, only apply the dual-dial tie-break when prior and new differ in direction. Same-direction prior → keep established session, reject duplicate (delegate detached + disconnect). Test: testSameDirectionDuplicatesAreRejected — truth-table coverage of both same-direction cases (both inbound / both outbound) for both nodeId orderings, plus regression guard that dual-dial tie-break still applies correctly through the combined decision. 71/71 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- Sources/SYM/SymNode.swift | 55 ++++++++++++++++++++------------ Tests/SYMTests/SYMTests.swift | 60 +++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 21 deletions(-) diff --git a/Sources/SYM/SymNode.swift b/Sources/SYM/SymNode.swift index 8876e78..e64e076 100644 --- a/Sources/SYM/SymNode.swift +++ b/Sources/SYM/SymNode.swift @@ -978,28 +978,41 @@ public final class SymNode { let outcome: AddPeerOutcome = peerQueue.sync { if var existing = self.peers[nodeId] { if let prev = existing.transports["bonjour"] ?? nil { - // Simultaneous-dial collision: both peers Bonjour-discovered - // each other within ~50ms and both initiated outbound TCP. Each - // side now holds two handshaked sessions for the same nodeId - // (one outbound, one inbound). Without explicit dedup the new - // session silently overwrites the prior in the transports dict - // — the orphan keeps a live NWConnection + read loop, eventually - // fires didDisconnectWith, and `removeTransport` strips the - // surviving winner from the dict. Net effect: every connection - // dies within ~1–2s of handshake, application payloads never - // cross the wire. + // Two cases land here: // - // Tie-break deterministically by nodeId so both peers agree on - // the same physical connection without exchanging frames: the - // lower nodeId acts as client and keeps its outbound; the - // higher node keeps the matching inbound. The loser's delegate - // is detached before disconnect (see fall-through below) so its - // teardown can't ripple back through removeTransport. - let preferNew = SymNode.preferNewSessionInDualDial( - localNodeId: self.identity.nodeId, - remoteNodeId: nodeId, - newIsOutbound: isOutbound - ) + // (1) Dual-dial collision (different directions): both peers + // Bonjour-discovered each other and both initiated outbound + // TCP. Each side holds one outbound + one inbound for the + // same nodeId. Tie-break deterministically by nodeId — the + // lower nodeId keeps its outbound, the higher its inbound — + // so both peers select the same physical socket without + // exchanging coordination frames. + // + // (2) Same-direction duplicate: a second connection in the same + // direction as the established one. Happens when the OS + // newConnectionHandler fires twice for the same advertised + // service (multipath / TCP-retry / Bonjour-republish), or + // when discoveryDidFindPeer fires repeatedly and the dedup + // guard's window let one through. The first is healthy and + // in active use — replacing it with the duplicate would + // disconnect the wire pair on the remote side and trigger + // a peer-left storm. Always reject the duplicate. + // + // The losing session has its delegate detached before disconnect + // (see fall-through below) so its teardown can't ripple through + // removeTransport and clobber the surviving registered session. + let isDualDial = prev.isOutbound != session.isOutbound + let preferNew: Bool + if isDualDial { + preferNew = SymNode.preferNewSessionInDualDial( + localNodeId: self.identity.nodeId, + remoteNodeId: nodeId, + newIsOutbound: isOutbound + ) + } else { + // Same direction → keep the established prior, reject duplicate. + preferNew = false + } if preferNew { existing.transports["bonjour"] = session existing.lastSeen = Date() diff --git a/Tests/SYMTests/SYMTests.swift b/Tests/SYMTests/SYMTests.swift index 724f139..41f7039 100644 --- a/Tests/SYMTests/SYMTests.swift +++ b/Tests/SYMTests/SYMTests.swift @@ -533,4 +533,64 @@ final class SimultaneousDialDedupTests: XCTestCase { XCTAssertTrue(SymNode.preferNewSessionInDualDial(localNodeId: b, remoteNodeId: a, newIsOutbound: false), "higher nodeId should keep its inbound") } + + /// Documents the dedup decision matrix `addPeer` applies when a prior + /// session already exists for `nodeId`. The dual-dial tie-break is only + /// safe to invoke when prior and new differ in direction. When they + /// match (e.g. two inbound sessions in rapid succession from a TCP + /// retry, multipath, or repeated newConnectionHandler firing), the + /// established prior must be kept and the duplicate rejected. + /// + /// Without this rule a second inbound replaces a known-good prior + /// inbound, the prior is force-disconnected, the wire pair on the + /// remote side sees EOF, removeTransport fires there, and peer-left + /// storms ensue. (Observed in iPhone↔Mac Catalyst on v0.3.79 before + /// this rule was added.) + func testSameDirectionDuplicatesAreRejected() { + // The decision in addPeer is: + // isDualDial = (prev.isOutbound != session.isOutbound) + // preferNew = isDualDial ? tieBreak(...) : false + // + // Restate as a pure function so the test reads as a truth table: + func preferNew(localNodeId: String, remoteNodeId: String, + priorIsOutbound: Bool, newIsOutbound: Bool) -> Bool { + let isDualDial = priorIsOutbound != newIsOutbound + guard isDualDial else { return false } + return SymNode.preferNewSessionInDualDial( + localNodeId: localNodeId, + remoteNodeId: remoteNodeId, + newIsOutbound: newIsOutbound + ) + } + + let a = "00000000-0000-7000-8000-000000000001" + let b = "00000000-0000-7000-8000-000000000002" + + // Same direction (both inbound) — reject duplicate regardless of + // nodeId ordering. This is the bug-fix case. + XCTAssertFalse(preferNew(localNodeId: a, remoteNodeId: b, + priorIsOutbound: false, newIsOutbound: false), + "duplicate inbound on lower nodeId must be rejected") + XCTAssertFalse(preferNew(localNodeId: b, remoteNodeId: a, + priorIsOutbound: false, newIsOutbound: false), + "duplicate inbound on higher nodeId must be rejected") + + // Same direction (both outbound) — also reject duplicate. + XCTAssertFalse(preferNew(localNodeId: a, remoteNodeId: b, + priorIsOutbound: true, newIsOutbound: true), + "duplicate outbound must be rejected") + XCTAssertFalse(preferNew(localNodeId: b, remoteNodeId: a, + priorIsOutbound: true, newIsOutbound: true), + "duplicate outbound must be rejected") + + // Sanity: dual-dial cases still apply tie-break correctly through + // this combined function (regression guard so a future refactor + // doesn't accidentally change the dual-dial behavior here). + XCTAssertFalse(preferNew(localNodeId: a, remoteNodeId: b, + priorIsOutbound: true, newIsOutbound: false), + "lower nodeId keeps its prior outbound, rejects new inbound under dual-dial") + XCTAssertTrue(preferNew(localNodeId: b, remoteNodeId: a, + priorIsOutbound: true, newIsOutbound: false), + "higher nodeId replaces prior outbound with new inbound under dual-dial") + } }