diff --git a/packages/libp2p/src/connection-monitor.ts b/packages/libp2p/src/connection-monitor.ts index da24eaa04c..51e30a1fa8 100644 --- a/packages/libp2p/src/connection-monitor.ts +++ b/packages/libp2p/src/connection-monitor.ts @@ -11,7 +11,14 @@ const PROTOCOL_VERSION = '1.0.0' const PROTOCOL_NAME = 'ping' const PROTOCOL_PREFIX = 'ipfs' const PING_LENGTH = 32 -const DEFAULT_ABORT_CONNECTION_ON_PING_FAILURE = true +// Probe failures are not reliable evidence a connection is dead under real +// WAN conditions — trans-continental RTT, event-loop contention, and transient +// network jitter routinely cause a single 10s ping exchange to fail. Yamux +// keepalive detects genuinely dead connections at the muxer layer without +// tearing down the TCP socket, so aborting on a single probe failure is +// measurable self-harm at non-trivial scale. Applications that want the +// aggressive behaviour can still opt in by setting this to true. +const DEFAULT_ABORT_CONNECTION_ON_PING_FAILURE = false export interface ConnectionMonitorInit { /** @@ -38,9 +45,12 @@ export interface ConnectionMonitorInit { pingTimeout?: Omit /** - * If true, any connection that fails the ping will be aborted + * If true, any connection that fails the ping will be aborted. Defaults to + * false because a single 10s probe failure is not reliable evidence that + * the connection is dead under real WAN conditions. Set to true to restore + * the previous aggressive behaviour. * - * @default true + * @default false */ abortConnectionOnPingFailure?: boolean diff --git a/packages/libp2p/test/connection-monitor/index.spec.ts b/packages/libp2p/test/connection-monitor/index.spec.ts index 7fb906a77c..2afdfbac90 100644 --- a/packages/libp2p/test/connection-monitor/index.spec.ts +++ b/packages/libp2p/test/connection-monitor/index.spec.ts @@ -112,7 +112,8 @@ describe('connection monitor', () => { pingInterval: 50, pingTimeout: { maxTimeout: 50 - } + }, + abortConnectionOnPingFailure: true }) await start(monitor) @@ -133,7 +134,8 @@ describe('connection monitor', () => { it('should abort a connection that fails', async () => { monitor = new ConnectionMonitor(components, { - pingInterval: 10 + pingInterval: 10, + abortConnectionOnPingFailure: true }) await start(monitor) @@ -150,6 +152,25 @@ describe('connection monitor', () => { expect(connection.abort).to.have.property('called', true) }) + it('should not abort a connection that fails by default', async () => { + monitor = new ConnectionMonitor(components, { + pingInterval: 10 + }) + + await start(monitor) + + const connection = stubInterface() + connection.newStream.withArgs('/ipfs/ping/1.0.0').callsFake(async (protocols, opts) => { + throw new ConnectionClosedError('Connection closed') + }) + + components.connectionManager.getConnections.returns([connection]) + + await delay(500) + + expect(connection.abort).to.have.property('called', false) + }) + it('should not abort a connection that fails when abortConnectionOnPingFailure is false', async () => { monitor = new ConnectionMonitor(components, { pingInterval: 10,