From 30aff0902e0b704ea838701fbbd5a211df418cd9 Mon Sep 17 00:00:00 2001 From: Josh Gratton Date: Tue, 30 Jun 2026 15:54:43 -0400 Subject: [PATCH 1/7] Fix WSL backend getting permanently stuck "connecting" (#3611) Three independently-mergeable changes addressing two distinct root causes of the same code=0/no-port-bound symptom, plus the missing recovery path that turns either into a permanent hang: 1. Hold the WSL bootstrap stdin stream open (Stream.concat(Stream.never)) instead of closing it the instant the single JSON chunk drains. The prior behavior raced stdin EOF against the child's readline 'line' vs 'close' listeners across the extra wsl.exe relay hop, sometimes losing the bootstrap envelope entirely. Adds a regression test capturing command.options.stdin and asserting it never completes for the WSL path, plus a companion assertion that the Windows-native fd3 path is unaffected. 2. Validate the resolved WSL Node's version against nodeEngineRange during ensureNodePty preflight. Previously, once any node resolved the version was never checked, so a too-old Node (e.g. nvm default 18) would load and run the server bundle, but import.meta.main is undefined pre-20.11/22.x, so the launch gate never fires and the process exits 0 with no output -- identical symptoms to (1), entirely independent cause. 3. Cap post-preflight, pre-readiness restart loops the same way the pre-spawn fatal-preflight path already is. Previously any clean exit before readiness (including both causes above) looped forever with no escalation, which is why a persisted wslOnly:true could trap a user across restarts/reinstalls. Gated to bootstrapDelivery:"stdin" so a Windows-native primary crash-looping for an unrelated reason never shows the WSL-specific "falling back to Windows" dialog. --- .../src/backend/DesktopBackendManager.test.ts | 201 ++++++++++++++++++ .../src/backend/DesktopBackendManager.ts | 58 ++++- .../src/wsl/DesktopWslEnvironment.test.ts | 122 +++++++++++ apps/desktop/src/wsl/DesktopWslEnvironment.ts | 46 +++- 4 files changed, 424 insertions(+), 3 deletions(-) diff --git a/apps/desktop/src/backend/DesktopBackendManager.test.ts b/apps/desktop/src/backend/DesktopBackendManager.test.ts index 858c9b0d560..9c428d924d0 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.test.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.test.ts @@ -6,6 +6,7 @@ import { assert, describe, it } from "@effect/vitest"; import * as Deferred from "effect/Deferred"; import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; +import * as Fiber from "effect/Fiber"; import * as FileSystem from "effect/FileSystem"; import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; @@ -220,6 +221,101 @@ describe("DesktopBackendManager", () => { ), ); + it.effect( + "holds stdin open forever for bootstrapDelivery:'stdin' (#3611 fix 1) while fd3 still terminates normally", + () => + Effect.gen(function* () { + // --- stdin-delivered (WSL) path: the bootstrap stream must never + // complete in normal operation, eliminating the readline 'line' vs + // 'close' race in apps/server/src/bootstrap.ts. --- + let stdinStream: ChildProcess.CommandInput | undefined; + const stdinSpawned = yield* Deferred.make(); + const stdinSpawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make((command) => + Effect.gen(function* () { + if (command._tag === "StandardCommand") { + stdinStream = command.options.stdin; + } + yield* Deferred.succeed(stdinSpawned, void 0); + return makeProcess({ exitCode: Effect.never }); + }), + ), + ); + + yield* Effect.scoped( + Effect.gen(function* () { + const instance = yield* makeTestInstance({ + spawnerLayer: stdinSpawnerLayer, + config: { + ...baseConfig, + bootstrapDelivery: "stdin", + args: ["/server/bin.mjs", "--bootstrap-fd", "0"], + }, + httpClientLayer: httpClientLayer(() => Effect.never), + }); + + yield* instance.start; + yield* Deferred.await(stdinSpawned); + assert.isDefined(stdinStream); + if (typeof stdinStream === "string") { + throw new Error("Expected stdin to be a Stream, not a CommandInput string."); + } + + const drained = yield* Stream.runDrain(stdinStream).pipe( + Effect.timeoutOption(Duration.seconds(30)), + Effect.forkScoped, + ); + yield* TestClock.adjust(Duration.seconds(30)); + const result = yield* Fiber.join(drained); + assert.isTrue(Option.isNone(result), "stdin stream completed but must never end"); + }), + ); + + // --- fd3-delivered (Windows-native) path: unaffected regression + // check -- the bootstrap stream must still terminate normally. --- + let fd3Stream: Stream.Stream | undefined; + const fd3Spawned = yield* Deferred.make(); + const fd3SpawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make((command) => + Effect.gen(function* () { + if (command._tag === "StandardCommand") { + const fd3 = command.options.additionalFds?.fd3; + if (fd3?.type === "input") fd3Stream = fd3.stream; + } + yield* Deferred.succeed(fd3Spawned, void 0); + return makeProcess({ exitCode: Effect.never }); + }), + ), + ); + + yield* Effect.scoped( + Effect.gen(function* () { + const instance = yield* makeTestInstance({ + spawnerLayer: fd3SpawnerLayer, + httpClientLayer: httpClientLayer(() => Effect.never), + }); + + yield* instance.start; + yield* Deferred.await(fd3Spawned); + assert.isDefined(fd3Stream); + + const drained = yield* Stream.runDrain(fd3Stream!).pipe( + Effect.timeoutOption(Duration.seconds(30)), + Effect.forkScoped, + ); + yield* TestClock.adjust(Duration.seconds(30)); + const result = yield* Fiber.join(drained); + assert.isTrue( + Option.isSome(result), + "fd3 stream should still terminate normally (no regression)", + ); + }), + ); + }).pipe(Effect.provide(TestClock.layer())), + ); + it.effect("retries HTTP readiness before reporting the backend ready", () => Effect.scoped( Effect.gen(function* () { @@ -616,6 +712,111 @@ describe("DesktopBackendManager", () => { ), ); + it.effect( + "surfaces and falls back after 5 consecutive never-ready exits for bootstrapDelivery:'stdin' (#3611 fix 3)", + () => + Effect.scoped( + Effect.gen(function* () { + const starts = yield* Queue.unbounded(); + let startCount = 0; + const failures: Array<{ reason: string; fatal: boolean }> = []; + + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.sync(() => { + startCount += 1; + return makeProcess({ + // Clean exit, code=0, before readiness -- matches the + // reporter's server-child.log (no stdout/stderr, nothing + // ever binds the port). + exitCode: Queue.offer(starts, startCount).pipe( + Effect.as(ChildProcessSpawner.ExitCode(0)), + ), + }); + }), + ), + ); + + const instance = yield* makeTestInstance({ + spawnerLayer, + config: { ...baseConfig, bootstrapDelivery: "stdin" }, + httpClientLayer: httpClientLayer(() => Effect.never), + onPreflightFailed: (failure) => + Effect.sync(() => { + failures.push(failure); + }).pipe(Effect.as(false)), + }); + + yield* instance.start; + assert.equal(yield* Queue.take(starts), 1); + + // Drive well past 5 consecutive never-ready exits. + for (let i = 0; i < 10; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + + assert.equal(failures.length, 1); + assert.equal(failures[0]?.fatal, true); + assert.isTrue(startCount >= 5 && startCount <= 7); + + const snapshot = yield* instance.snapshot; + assert.equal(snapshot.desiredRunning, false); + }).pipe(Effect.provide(TestClock.layer())), + ), + ); + + it.effect( + "does not surface a never-ready exit loop for bootstrapDelivery:'fd3' (Windows-native primary stays gated out, #3611 fix 3)", + () => + Effect.scoped( + Effect.gen(function* () { + const starts = yield* Queue.unbounded(); + let startCount = 0; + const failures: Array<{ reason: string; fatal: boolean }> = []; + + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.sync(() => { + startCount += 1; + return makeProcess({ + exitCode: Queue.offer(starts, startCount).pipe( + Effect.as(ChildProcessSpawner.ExitCode(0)), + ), + }); + }), + ), + ); + + const instance = yield* makeTestInstance({ + spawnerLayer, + config: { ...baseConfig, bootstrapDelivery: "fd3" }, + httpClientLayer: httpClientLayer(() => Effect.never), + onPreflightFailed: (failure) => + Effect.sync(() => { + failures.push(failure); + }).pipe(Effect.as(false)), + }); + + yield* instance.start; + assert.equal(yield* Queue.take(starts), 1); + + for (let i = 0; i < 10; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + + // The Windows-native primary must never trip the WSL-specific + // "falling back to Windows" dialog for its own crash loop. + assert.deepEqual(failures, []); + assert.isTrue(startCount > 7, "fd3 path should keep restarting uncapped, unlike stdin"); + + const snapshot = yield* instance.snapshot; + assert.equal(snapshot.desiredRunning, true); + }).pipe(Effect.provide(TestClock.layer())), + ), + ); + it.effect("cancels a scheduled restart when start is requested manually", () => Effect.scoped( Effect.gen(function* () { diff --git a/apps/desktop/src/backend/DesktopBackendManager.ts b/apps/desktop/src/backend/DesktopBackendManager.ts index e3a4de661ac..dbc91db54a6 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.ts @@ -335,7 +335,24 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( ), ); const onOutput = options.onOutput ?? (() => Effect.void); - const bootstrapStream = Stream.encodeText(Stream.make(`${bootstrapJson}\n`)); + // The WSL path delivers the bootstrap envelope over stdin (see the + // additionalFds comment below). A plain `Stream.make(...)` completes the + // instant the single chunk is queued, and the platform spawner's stdin + // sink defaults to closing the write end as soon as the stream completes + // (`endOnDone: true`) -- i.e. stdin EOF can land immediately after the + // JSON line is flushed, racing the extra wsl.exe relay hop against the + // child's readline `'line'` vs `'close'` listeners + // (apps/server/src/bootstrap.ts:readBootstrapEnvelope) and sometimes + // losing the bootstrap envelope entirely. Appending `Stream.never` keeps + // stdin open forever for the WSL path so the write end is never closed in + // normal operation; this is intentional, not a leak -- the stream is + // forked into this run's own Scope, which is interrupted (closing stdin) + // on every teardown path (process exit, restart, stop, SIGTERM). Do not + // "fix" this by removing the `Stream.never` tail. + const bootstrapStream = Stream.make(`${bootstrapJson}\n`).pipe( + options.bootstrapDelivery === "stdin" ? Stream.concat(Stream.never) : (s) => s, + Stream.encodeText, + ); const command = ChildProcess.make(options.executablePath, options.args, { cwd: options.cwd, env: options.env, @@ -622,7 +639,44 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( } if (isCurrentRun && nextState.desiredRunning) { - yield* scheduleRestart(reason); + // A process that exits cleanly (or otherwise) before ever + // reaching readiness, repeatedly, would otherwise loop here + // forever with no escalation -- this is exactly how a WSL + // backend that starts and exits with code 0 every time + // (e.g. the stdin-EOF race or a Node engine mismatch) gets + // permanently stuck "connecting" with no actionable error. + // Cap it the same way the pre-spawn fatal-preflight path + // already is, but only for the stdin-delivered (WSL) path: + // the Windows-native fd3 primary's onPreflightFailed is + // wired unconditionally to a WSL-specific "falling back to + // Windows" dialog, which would be nonsensical for a Windows + // backend crash-looping for an unrelated reason. + if ( + config.value.bootstrapDelivery === "stdin" && + nextState.restartAttempt >= MAX_PREFLIGHT_FAILURE_ATTEMPTS + ) { + yield* logInstanceError( + "backend exited repeatedly before becoming ready; surfacing and falling back", + { reason, attempt: nextState.restartAttempt }, + ); + const shouldRestart = yield* ( + spec.onPreflightFailed?.({ + reason: `backend process exited ${nextState.restartAttempt} times in a row without becoming ready (last: ${reason})`, + fatal: true, + }) ?? Effect.succeed(false) + ); + if (shouldRestart) { + yield* scheduleRestart(reason); + } else { + yield* Ref.update(state, (latest) => ({ + ...latest, + desiredRunning: false, + ready: false, + })); + } + } else { + yield* scheduleRestart(reason); + } } }), ); diff --git a/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts b/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts index cdb3fc78286..a65c37fa207 100644 --- a/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts +++ b/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts @@ -4,6 +4,7 @@ import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Fiber from "effect/Fiber"; import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; import * as Sink from "effect/Sink"; import * as Stream from "effect/Stream"; import * as TestClock from "effect/testing/TestClock"; @@ -12,10 +13,12 @@ import { ChildProcessSpawner } from "effect/unstable/process"; import { buildWslNodeEnvPreamble, DesktopWslDistroListError, + ensureNodePtyImpl, formatMissingToolsReason, formatNodePtyProbeFailureReason, formatWslShellTransportFailureReason, parseNodePath, + parseNodeVersion, parseResolvedPath, parseToolchainReport, probeWslDistros, @@ -193,6 +196,25 @@ describe("parseResolvedPath", () => { }); }); +describe("parseNodeVersion", () => { + it("extracts the version from a nodeVersion: line", () => { + const stdout = "nodePath:/usr/bin/node\nresolvedPath:/usr/bin\nnodeVersion:18.20.8\n"; + expect(parseNodeVersion(stdout)).toBe("18.20.8"); + }); + + it("returns null when there is no nodeVersion line at all", () => { + expect(parseNodeVersion("nodePath:/usr/bin/node\nresolvedPath:/usr/bin\n")).toBeNull(); + }); + + it("returns null when the value after the prefix is empty", () => { + expect(parseNodeVersion("nodeVersion:\n")).toBeNull(); + }); + + it("trims surrounding whitespace and a trailing carriage return", () => { + expect(parseNodeVersion(" nodeVersion:22.22.1\r\n")).toBe("22.22.1"); + }); +}); + describe("formatMissingToolsReason", () => { it("returns null when everything is present and node is in range", () => { expect( @@ -235,3 +257,103 @@ describe("formatMissingToolsReason", () => { expect(reason).not.toContain("nvm"); }); }); + +describe("ensureNodePtyImpl: WSL Node engine-range preflight (#3611)", () => { + // Always resolves -- isolates the engine-range branch from the separate + // `wslpath` round trip that production windowsToWslPath performs. + const windowsToWslPath = () => Effect.succeed(Option.some("/home/josh/repo")); + + const makeProbeSpawner = (probeStdout: string) => + ChildProcessSpawner.make(() => + Effect.succeed( + ChildProcessSpawner.makeHandle({ + pid: ChildProcessSpawner.ProcessId(1), + exitCode: Effect.succeed(ChildProcessSpawner.ExitCode(0)), + isRunning: Effect.succeed(false), + kill: () => Effect.void, + unref: Effect.succeed(Effect.void), + stdin: Sink.drain, + stdout: Stream.make(encoder.encode(probeStdout)), + stderr: Stream.empty, + all: Stream.empty, + getInputFd: () => Sink.drain, + getOutputFd: () => Stream.empty, + }), + ), + ); + + it("rejects a resolved Node that does not satisfy nodeEngineRange (the reporter's exact case)", () => + Effect.gen(function* () { + const result = yield* ensureNodePtyImpl( + "Ubuntu", + "C:\\repo", + windowsToWslPath, + { nodeEngineRange: "^22.16 || ^23.11 || >=24.10" }, + ); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.fatal).toBe(true); + expect(result.reason).toContain("18.20.8"); + expect(result.reason).toContain("^22.16 || ^23.11 || >=24.10"); + expect(result.reason).toContain("/home/josh/.nvm/versions/node/v18.20.8/bin/node"); + } + }).pipe( + Effect.provideService( + ChildProcessSpawner.ChildProcessSpawner, + makeProbeSpawner( + [ + "nodePath:/home/josh/.nvm/versions/node/v18.20.8/bin/node", + "resolvedPath:/home/josh/.nvm/versions/node/v18.20.8/bin:/usr/bin", + "nodeVersion:18.20.8", + ].join("\n"), + ), + ), + )); + + it("does not false-positive when the resolved Node satisfies nodeEngineRange (happy path unchanged)", () => + Effect.gen(function* () { + const result = yield* ensureNodePtyImpl( + "Ubuntu", + "C:\\repo", + windowsToWslPath, + { nodeEngineRange: "^22.16 || ^23.11 || >=24.10" }, + ); + expect(result).toEqual({ + ok: true, + nodePath: "/home/josh/.nvm/versions/node/v22.22.1/bin/node", + resolvedPath: "/home/josh/.nvm/versions/node/v22.22.1/bin:/usr/bin", + }); + }).pipe( + Effect.provideService( + ChildProcessSpawner.ChildProcessSpawner, + makeProbeSpawner( + [ + "nodePath:/home/josh/.nvm/versions/node/v22.22.1/bin/node", + "resolvedPath:/home/josh/.nvm/versions/node/v22.22.1/bin:/usr/bin", + "nodeVersion:22.22.1", + ].join("\n"), + ), + ), + )); + + it("does not check the engine range when nodeEngineRange is not configured", () => + Effect.gen(function* () { + const result = yield* ensureNodePtyImpl("Ubuntu", "C:\\repo", windowsToWslPath, {}); + expect(result).toEqual({ + ok: true, + nodePath: "/home/josh/.nvm/versions/node/v18.20.8/bin/node", + resolvedPath: "/home/josh/.nvm/versions/node/v18.20.8/bin:/usr/bin", + }); + }).pipe( + Effect.provideService( + ChildProcessSpawner.ChildProcessSpawner, + makeProbeSpawner( + [ + "nodePath:/home/josh/.nvm/versions/node/v18.20.8/bin/node", + "resolvedPath:/home/josh/.nvm/versions/node/v18.20.8/bin:/usr/bin", + "nodeVersion:18.20.8", + ].join("\n"), + ), + ), + )); +}); diff --git a/apps/desktop/src/wsl/DesktopWslEnvironment.ts b/apps/desktop/src/wsl/DesktopWslEnvironment.ts index b38675f0e1f..0cb1ffe2c3a 100644 --- a/apps/desktop/src/wsl/DesktopWslEnvironment.ts +++ b/apps/desktop/src/wsl/DesktopWslEnvironment.ts @@ -223,6 +223,7 @@ const NODE_PTY_PROBE_SCRIPT = ( linuxServerDir: string, ) => `printf 'nodePath:%s\\n' "$(command -v node 2>/dev/null)" printf 'resolvedPath:%s\\n' "$PATH" +printf 'nodeVersion:%s\\n' "$(node -p 'process.versions.node' 2>/dev/null)" cd ${shellQuote(linuxServerDir)} && node <<'NODE' >/dev/null 2>&1 // The server bundle externalizes its deps to node_modules, and the WSL Node // can't read inside app.asar, so confirm those deps are unpacked on the real @@ -329,6 +330,23 @@ export const parseResolvedPath = (stdout: string): string | null => { return resolvedPath.length > 0 ? resolvedPath : null; }; +// Captures the resolved node's own reported version (`process.versions.node`) +// from NODE_PTY_PROBE_SCRIPT, so a node that resolves but doesn't satisfy +// `engines.node` can be caught at preflight instead of silently no-op'ing +// (Node < the version where `import.meta.main` exists makes the server's +// `if (import.meta.main)` launch gate never run, producing a clean exit +// code=0 with no stdout/stderr and nothing ever listening on the port). +export const parseNodeVersion = (stdout: string): string | null => { + const prefix = "nodeVersion:"; + const line = stdout + .split("\n") + .map((candidate) => candidate.trim()) + .find((candidate) => candidate.startsWith(prefix)); + if (line === undefined) return null; + const version = line.slice(prefix.length).trim(); + return version.length > 0 ? version : null; +}; + export const formatMissingToolsReason = ( report: ToolchainReport, requiredRange: string | null, @@ -370,7 +388,10 @@ export const formatMissingToolsReason = ( return `WSL distro is missing required tools: ${issues.join(", ")}. Install ${remediations.join(" and ")}, then retry.`; }; -const ensureNodePtyImpl = ( +// Exported for unit testing the preflight branches directly against a fake +// ChildProcessSpawner without needing to also fake the separate `wslpath` +// round trip that `windowsToWslPath` performs. +export const ensureNodePtyImpl = ( distro: string | null, windowsRepoRoot: string, windowsToWslPath: ( @@ -443,6 +464,29 @@ const ensureNodePtyImpl = ( } as const; } + // Some node resolved -- but resolving isn't enough, it also has to + // satisfy engines.node. A too-old Node (e.g. nvm's default alias still + // pointing at 18) loads and runs the server bundle without error, but + // `import.meta.main` is undefined pre-20.11/22.x, so the server's + // `if (import.meta.main)` launch gate never fires: the process exits + // 0 with no stdout/stderr and nothing ever binds the port -- identical + // symptoms to the stdin-EOF race below, but a structurally different + // cause that the probe previously never checked for once *any* node + // resolved. + const nodeVersion = parseNodeVersion(probe.stdout); + const requiredRange = options.nodeEngineRange?.trim() || null; + if ( + requiredRange !== null && + nodeVersion !== null && + !satisfiesSemverRange(nodeVersion, requiredRange) + ) { + return { + ok: false, + reason: `Found Node.js v${nodeVersion} at ${nodePath}, which does not satisfy the required range ${requiredRange}. Activate a supported version (e.g. nvm alias default 22 && nvm use 22) and restart T3 Code.`, + fatal: true, + } as const; + } + // Server dependencies (e.g. "effect") couldn't be resolved on the WSL // filesystem — a packaging regression, since the server bundle needs its // node_modules unpacked from the asar. Fatal so wsl-only mode falls back to From 63c44093837e215b33e13952cc1fc819769401ad Mon Sep 17 00:00:00 2001 From: Josh Gratton Date: Tue, 30 Jun 2026 17:25:52 -0400 Subject: [PATCH 2/7] Detach the bootstrap reader once the envelope is parsed readBootstrapEnvelope's cleanup() (closes the readline interface, destroys its backing stream) was only ever wired as the Effect async registration's interrupt finalizer, so it never ran on a normal successful parse, decode failure, fd-unavailable, or stream-close outcome. The readline interface and its stream stayed attached to the bootstrap fd for the rest of the process lifetime. That was harmless while the desktop side closed stdin immediately after writing the bootstrap line. It stopped being harmless once the WSL bootstrap path (this branch's earlier stdin-EOF-race fix) started holding stdin open indefinitely: a readline interface left idling on a never-closing, wsl.exe-relayed stdin pipe surfaced a spurious EAGAIN 'error' event well after the envelope was already read. Node's readline.Interface registers its own internal 'error' listener on the input stream and re-emits it onto itself; since nothing here listened for 'error' on the interface itself, the unhandled re-emit crashed the process before it ever logged "Listening on" -- the WSL backend crash-looping observed during live validation. Call cleanup() at the start of every terminal branch (handleError, handleLine, handleClose) so the reader stops touching the fd as soon as it resolves one way or another, instead of only on interruption. Updated bootstrap.test.ts: now that cleanup() always destroys the underlying autoClose stream, two tests that separately closed the same fd in an acquireRelease finalizer raced that destroy and hit EBADF; switched them to the no-acquireRelease pattern already used elsewhere in this file for the same reason. Added a regression test asserting the fd is actually closed after a successful parse. --- apps/server/src/bootstrap.test.ts | 71 +++++++++++++++++++++++++++---- apps/server/src/bootstrap.ts | 21 +++++++++ 2 files changed, 84 insertions(+), 8 deletions(-) diff --git a/apps/server/src/bootstrap.test.ts b/apps/server/src/bootstrap.test.ts index 05155f32ec4..ed0b54a206e 100644 --- a/apps/server/src/bootstrap.test.ts +++ b/apps/server/src/bootstrap.test.ts @@ -69,10 +69,14 @@ it.layer(NodeServices.layer)("readBootstrapEnvelope", (it) => { `${yield* encodeTestEnvelopeSchema({ mode: "desktop" })}\n`, ); - const fd = yield* Effect.acquireRelease( - Effect.sync(() => NodeFS.openSync(filePath, "r")), - (fd) => Effect.sync(() => NodeFS.closeSync(fd)), - ); + // Open without acquireRelease: readBootstrapEnvelope now detaches its + // reader (closing the readline interface and destroying its backing + // stream, which is opened with autoClose: true) as soon as it resolves, + // on every path -- not just the duplication-fallback path below. The + // stream therefore owns fd's lifecycle here; also closing it + // synchronously in a finalizer would race the stream's async close and + // produce an uncaught EBADF. + const fd = NodeFS.openSync(filePath, "r"); const payload = yield* readBootstrapEnvelope(TestEnvelopeSchema, fd, { timeoutMs: 100 }); assertSome(payload, { @@ -81,6 +85,57 @@ it.layer(NodeServices.layer)("readBootstrapEnvelope", (it) => { }), ); + it.effect( + "stops reading from the fd once the envelope is parsed instead of leaving the reader attached", + () => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem; + const filePath = yield* fs.makeTempFileScoped({ + prefix: "t3-bootstrap-", + suffix: ".ndjson", + }); + + yield* fs.writeFileString( + filePath, + `${yield* encodeTestEnvelopeSchema({ mode: "desktop" })}\n`, + ); + + // Force the win32 branch (resolveFdPath returns undefined there) so + // this test is deterministic on every OS it runs on: it always + // exercises makeDirectBootstrapStream's autoClose: true stream over + // the original fd, regardless of the host platform actually running + // the suite. + const fd = NodeFS.openSync(filePath, "r"); + + const payload = yield* readBootstrapEnvelope(TestEnvelopeSchema, fd, { + timeoutMs: 100, + }).pipe(Effect.provideService(HostProcessPlatform, "win32")); + assertSome(payload, { mode: "desktop" }); + + // Regression guard for the bug fixed alongside the WSL stdin-held-open + // change: previously, the cleanup() that closes the readline + // interface and destroys its backing stream was wired only as the + // async-interrupt finalizer, so it never ran on a successful parse -- + // the reader (and the fd's stream) stayed attached and "live" for the + // rest of the process. If that regresses, this fd will still be open + // here and fstatSync will succeed instead of throwing EBADF. + const isFdOpen = Effect.sync(() => { + try { + NodeFS.fstatSync(fd); + return true; + } catch { + return false; + } + }); + let fdStillOpen = yield* isFdOpen; + for (let attempt = 0; fdStillOpen && attempt < 20; attempt++) { + yield* Effect.sleep(Duration.millis(25)); + fdStillOpen = yield* isFdOpen; + } + assert.isFalse(fdStillOpen, "expected the bootstrap reader to have closed its fd"); + }), + ); + it.effect("falls back to reading the inherited fd when path duplication fails", () => Effect.gen(function* () { const fs = yield* FileSystem.FileSystem; @@ -183,10 +238,10 @@ it.layer(NodeServices.layer)("readBootstrapEnvelope", (it) => { const filePath = yield* fs.makeTempFileScoped({ prefix: "t3-bootstrap-", suffix: ".ndjson" }); yield* fs.writeFileString(filePath, '{"mode":42}\n'); - const fd = yield* Effect.acquireRelease( - Effect.sync(() => NodeFS.openSync(filePath, "r")), - (fd) => Effect.sync(() => NodeFS.closeSync(fd)), - ); + // No acquireRelease here either -- a decode failure is still a + // terminal, detaching outcome (see the comment on the first test in + // this suite), so the stream's autoClose owns fd the same way. + const fd = NodeFS.openSync(filePath, "r"); const error = yield* readBootstrapEnvelope(TestEnvelopeSchema, fd, { timeoutMs: 100, }).pipe(Effect.flip); diff --git a/apps/server/src/bootstrap.ts b/apps/server/src/bootstrap.ts index 04dcf277e21..bb9bc181dc3 100644 --- a/apps/server/src/bootstrap.ts +++ b/apps/server/src/bootstrap.ts @@ -102,7 +102,26 @@ export const readBootstrapEnvelope = Effect.fn("readBootstrapEnvelope")(function stream.destroy(); }; + // Every branch below is terminal -- it resolves the envelope read one way + // or another -- so each one stops reading from `fd` before resuming. + // Without this, the readline interface and its backing stream stay + // attached and keep reading from `fd` for the rest of the process + // lifetime. That was harmless while the desktop side closed stdin right + // after writing the bootstrap line (the stream would hit EOF almost + // immediately either way), but it stopped being harmless once the WSL + // bootstrap path started holding stdin open indefinitely (see + // DesktopBackendManager.ts's runBackendProcess): a readline interface + // left idling on a never-closing, wsl.exe-relayed stdin pipe can surface + // a spurious EAGAIN 'error' event on that stream well after the envelope + // was already read. Node's readline.Interface registers its own internal + // 'error' listener on the input stream and re-emits onto itself + // (node:internal/readline/interface.js); since nothing here ever + // listened for 'error' on the *interface*, an unhandled 'error' event on + // an EventEmitter is fatal and crashes the process. Calling cleanup() + // synchronously closes the readline interface and destroys the stream + // before that can happen. const handleError = (error: Error) => { + cleanup(); if (isUnavailableBootstrapFdError(error)) { resume(Effect.succeedNone); return; @@ -118,6 +137,7 @@ export const readBootstrapEnvelope = Effect.fn("readBootstrapEnvelope")(function }; const handleLine = (line: string) => { + cleanup(); const parsed = decodeJsonResult(schema)(line); if (Result.isSuccess(parsed)) { resume(Effect.succeedSome(parsed.success)); @@ -134,6 +154,7 @@ export const readBootstrapEnvelope = Effect.fn("readBootstrapEnvelope")(function }; const handleClose = () => { + cleanup(); resume(Effect.succeedNone); }; From 8c8e132725aaac153022b56baebc8e743873466a Mon Sep 17 00:00:00 2001 From: Josh Gratton Date: Tue, 30 Jun 2026 18:13:29 -0400 Subject: [PATCH 3/7] Gate WSL Node engine-range check on a successful probe Check the resolved Node version against nodeEngineRange only once the rest of the preflight probe has succeeded (exitCode === 0), instead of unconditionally as soon as a node path resolves. This avoids masking a more specific, unrelated probe failure (e.g. the exitCode === 3 unpacked-deps case) behind a Node-version error in the rare case both conditions are true at once. --- apps/desktop/src/wsl/DesktopWslEnvironment.ts | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/apps/desktop/src/wsl/DesktopWslEnvironment.ts b/apps/desktop/src/wsl/DesktopWslEnvironment.ts index 0cb1ffe2c3a..947a5cd84b6 100644 --- a/apps/desktop/src/wsl/DesktopWslEnvironment.ts +++ b/apps/desktop/src/wsl/DesktopWslEnvironment.ts @@ -464,29 +464,6 @@ export const ensureNodePtyImpl = ( } as const; } - // Some node resolved -- but resolving isn't enough, it also has to - // satisfy engines.node. A too-old Node (e.g. nvm's default alias still - // pointing at 18) loads and runs the server bundle without error, but - // `import.meta.main` is undefined pre-20.11/22.x, so the server's - // `if (import.meta.main)` launch gate never fires: the process exits - // 0 with no stdout/stderr and nothing ever binds the port -- identical - // symptoms to the stdin-EOF race below, but a structurally different - // cause that the probe previously never checked for once *any* node - // resolved. - const nodeVersion = parseNodeVersion(probe.stdout); - const requiredRange = options.nodeEngineRange?.trim() || null; - if ( - requiredRange !== null && - nodeVersion !== null && - !satisfiesSemverRange(nodeVersion, requiredRange) - ) { - return { - ok: false, - reason: `Found Node.js v${nodeVersion} at ${nodePath}, which does not satisfy the required range ${requiredRange}. Activate a supported version (e.g. nvm alias default 22 && nvm use 22) and restart T3 Code.`, - fatal: true, - } as const; - } - // Server dependencies (e.g. "effect") couldn't be resolved on the WSL // filesystem — a packaging regression, since the server bundle needs its // node_modules unpacked from the asar. Fatal so wsl-only mode falls back to @@ -501,7 +478,34 @@ export const ensureNodePtyImpl = ( } as const; } - if (probe.exitCode === 0) return { ok: true, nodePath, resolvedPath } as const; + if (probe.exitCode === 0) { + // The rest of the probe succeeded -- but that isn't enough on its own, + // it also has to satisfy engines.node. A too-old Node (e.g. nvm's + // default alias still pointing at 18) loads and runs the server bundle + // without error, but `import.meta.main` is undefined pre-20.11/22.x, so + // the server's `if (import.meta.main)` launch gate never fires: the + // process exits 0 with no stdout/stderr and nothing ever binds the port + // -- identical symptoms to the stdin-EOF race below, but a structurally + // different cause that the probe previously never checked for once + // *any* node resolved. Gated on exitCode === 0 (rather than checked + // unconditionally as soon as nodePath resolves) so this doesn't mask a + // more specific, unrelated probe failure (e.g. the exitCode === 3 case + // above) behind a Node-version error in the rare case both are true. + const nodeVersion = parseNodeVersion(probe.stdout); + const requiredRange = options.nodeEngineRange?.trim() || null; + if ( + requiredRange !== null && + nodeVersion !== null && + !satisfiesSemverRange(nodeVersion, requiredRange) + ) { + return { + ok: false, + reason: `Found Node.js v${nodeVersion} at ${nodePath}, which does not satisfy the required range ${requiredRange}. Activate a supported version (e.g. nvm alias default 22 && nvm use 22) and restart T3 Code.`, + fatal: true, + } as const; + } + return { ok: true, nodePath, resolvedPath } as const; + } if (options.allowBuild !== true) { const packagedProbeFailure = formatNodePtyProbeFailureReason(probe.exitCode); From 4d3e49bb4e606c8c2adc21f12bf3784568f897c0 Mon Sep 17 00:00:00 2001 From: Josh Gratton Date: Tue, 30 Jun 2026 18:28:52 -0400 Subject: [PATCH 4/7] Track never-ready exits with a dedicated counter, fail closed on missing Node version Three independent automated reviewers (Macroscope, Cursor Bugbot, Codex) flagged that the never-ready cap reused restartAttempt, which is also incremented by ordinary pre-spawn preflight retries (e.g. WSL cold-start) and never reset on a fresh start. This let the cap trip on far fewer than 5 actual post-spawn never-ready exits, and gave no fresh retry budget after it fired once. Add neverReadyAttempt, a counter scoped strictly to post-spawn exits, reset on a successful onReady or a clean start from a stopped state (mirroring the existing preflightFailureAttempt pattern). Also fail closed in the WSL Node engine-range check: if a range is required but the probe didn't report a version, reject rather than silently letting an unchecked Node through (Cursor Bugbot). --- .../src/backend/DesktopBackendManager.test.ts | 133 ++++++++++++++++++ .../src/backend/DesktopBackendManager.ts | 18 ++- .../src/wsl/DesktopWslEnvironment.test.ts | 27 ++++ apps/desktop/src/wsl/DesktopWslEnvironment.ts | 30 ++-- 4 files changed, 195 insertions(+), 13 deletions(-) diff --git a/apps/desktop/src/backend/DesktopBackendManager.test.ts b/apps/desktop/src/backend/DesktopBackendManager.test.ts index 9c428d924d0..553476ff4d2 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.test.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.test.ts @@ -817,6 +817,139 @@ describe("DesktopBackendManager", () => { ), ); + it.effect( + "pre-spawn transient preflight retries do not count toward the never-ready cap (bot-flagged fix)", + () => + Effect.scoped( + Effect.gen(function* () { + const starts = yield* Queue.unbounded(); + let startCount = 0; + const failures: Array<{ reason: string; fatal: boolean }> = []; + const stillColdStarting = yield* Ref.make(true); + + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.sync(() => { + startCount += 1; + return makeProcess({ + exitCode: Queue.offer(starts, startCount).pipe( + Effect.as(ChildProcessSpawner.ExitCode(0)), + ), + }); + }), + ), + ); + + const instance = yield* makeTestInstance({ + spawnerLayer, + configResolve: Ref.get(stillColdStarting).pipe( + Effect.map((coldStarting) => + coldStarting + ? { + ...baseConfig, + bootstrapDelivery: "stdin", + preflightFailure: Option.some({ + reason: "WSL is still cold-starting", + fatal: false, + }), + } + : { ...baseConfig, bootstrapDelivery: "stdin" }, + ), + ), + httpClientLayer: httpClientLayer(() => Effect.never), + onPreflightFailed: (failure) => + Effect.sync(() => { + failures.push(failure); + }).pipe(Effect.as(false)), + }); + + yield* instance.start; + + // Several transient (unbounded, pre-spawn) preflight retries -- the + // process never spawns during this window, so the general restart + // counter climbs well past MAX_PREFLIGHT_FAILURE_ATTEMPTS, but the + // never-ready cap must not be affected by any of this. + for (let i = 0; i < 8; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + assert.equal(yield* Queue.size(starts), 0); + assert.deepEqual(failures, []); + + // Cold start finishes: preflight goes clean and the process + // actually spawns, but exits before ever becoming ready, repeatedly. + yield* Ref.set(stillColdStarting, false); + for (let i = 0; i < 10; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + + // Must take its own fresh 5 consecutive post-spawn never-ready + // exits -- not fire on the first one because the pre-spawn retries + // had already elevated an unrelated counter. + assert.equal(failures.length, 1); + assert.equal(failures[0]?.fatal, true); + assert.isTrue(startCount >= 5 && startCount <= 7); + }).pipe(Effect.provide(TestClock.layer())), + ), + ); + + it.effect( + "a fresh start after the never-ready cap fires gets a new retry budget (bot-flagged fix)", + () => + Effect.scoped( + Effect.gen(function* () { + const starts = yield* Queue.unbounded(); + let startCount = 0; + const failures: Array<{ reason: string; fatal: boolean }> = []; + + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.sync(() => { + startCount += 1; + return makeProcess({ + exitCode: Queue.offer(starts, startCount).pipe( + Effect.as(ChildProcessSpawner.ExitCode(0)), + ), + }); + }), + ), + ); + + const instance = yield* makeTestInstance({ + spawnerLayer, + config: { ...baseConfig, bootstrapDelivery: "stdin" }, + httpClientLayer: httpClientLayer(() => Effect.never), + onPreflightFailed: (failure) => + Effect.sync(() => { + failures.push(failure); + }).pipe(Effect.as(false)), + }); + + yield* instance.start; + for (let i = 0; i < 10; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + assert.equal(failures.length, 1); + assert.equal((yield* instance.snapshot).desiredRunning, false); + + const startCountAtTrip = startCount; + + // Restarting after the cap fired (without ever calling stop()) + // must not immediately re-trip on the very next failure because a + // stale counter survived from before. + yield* instance.start; + for (let i = 0; i < 10; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + + assert.equal(failures.length, 2); + const spawnsSinceRestart = startCount - startCountAtTrip; + assert.isTrue(spawnsSinceRestart >= 5 && spawnsSinceRestart <= 7); + }).pipe(Effect.provide(TestClock.layer())), + ), + ); + it.effect("cancels a scheduled restart when start is requested manually", () => Effect.scoped( Effect.gen(function* () { diff --git a/apps/desktop/src/backend/DesktopBackendManager.ts b/apps/desktop/src/backend/DesktopBackendManager.ts index dbc91db54a6..0071afae94a 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.ts @@ -236,6 +236,13 @@ interface BackendManagerState { // Consecutive bounded/fatal preflight failures, reset on a clean or // unbounded-transient preflight. restartAttempt counts all restarts. readonly preflightFailureAttempt: number; + // Consecutive post-spawn exits that never reached readiness for this + // instance, reset on a successful onReady or a fresh start from a + // stopped state. Deliberately separate from restartAttempt, which also + // counts pre-spawn preflight retries (e.g. WSL cold-start) -- using + // restartAttempt for the never-ready cap would let ordinary transient + // preflight retries trip it well before 5 actual post-spawn failures. + readonly neverReadyAttempt: number; readonly restartFiber: Option.Option>; readonly nextRunId: number; } @@ -247,6 +254,7 @@ const initialState: BackendManagerState = { active: Option.none(), restartAttempt: 0, preflightFailureAttempt: 0, + neverReadyAttempt: 0, restartFiber: Option.none(), nextRunId: 1, }; @@ -487,6 +495,7 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( const resetFatalPreflightCounter = !current.desiredRunning && current.preflightFailureAttempt > 0; + const resetNeverReadyCounter = !current.desiredRunning && current.neverReadyAttempt > 0; yield* cancelRestart; yield* Ref.update(state, (latest) => ({ ...latest, @@ -494,6 +503,7 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( ready: false, config: Option.some(config.value), preflightFailureAttempt: resetFatalPreflightCounter ? 0 : latest.preflightFailureAttempt, + neverReadyAttempt: resetNeverReadyCounter ? 0 : latest.neverReadyAttempt, })); const preflightFailure = config.value.preflightFailure; @@ -616,6 +626,7 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( ...latest, active: Option.none(), ready: false, + neverReadyAttempt: latest.neverReadyAttempt + 1, }; return [ { @@ -653,15 +664,15 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( // backend crash-looping for an unrelated reason. if ( config.value.bootstrapDelivery === "stdin" && - nextState.restartAttempt >= MAX_PREFLIGHT_FAILURE_ATTEMPTS + nextState.neverReadyAttempt >= MAX_PREFLIGHT_FAILURE_ATTEMPTS ) { yield* logInstanceError( "backend exited repeatedly before becoming ready; surfacing and falling back", - { reason, attempt: nextState.restartAttempt }, + { reason, attempt: nextState.neverReadyAttempt }, ); const shouldRestart = yield* ( spec.onPreflightFailed?.({ - reason: `backend process exited ${nextState.restartAttempt} times in a row without becoming ready (last: ${reason})`, + reason: `backend process exited ${nextState.neverReadyAttempt} times in a row without becoming ready (last: ${reason})`, fatal: true, }) ?? Effect.succeed(false) ); @@ -706,6 +717,7 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( { ...latest, restartAttempt: 0, + neverReadyAttempt: 0, ready: true, }, ] as const; diff --git a/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts b/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts index a65c37fa207..82c6856a535 100644 --- a/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts +++ b/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts @@ -310,6 +310,33 @@ describe("ensureNodePtyImpl: WSL Node engine-range preflight (#3611)", () => { ), )); + it("rejects (fail closed) when a range is required but the probe didn't report a version (bot-flagged fix)", () => + Effect.gen(function* () { + const result = yield* ensureNodePtyImpl( + "Ubuntu", + "C:\\repo", + windowsToWslPath, + { nodeEngineRange: "^22.16 || ^23.11 || >=24.10" }, + ); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.fatal).toBe(true); + expect(result.reason).toContain("^22.16 || ^23.11 || >=24.10"); + } + }).pipe( + Effect.provideService( + ChildProcessSpawner.ChildProcessSpawner, + makeProbeSpawner( + // No nodeVersion: line at all -- the probe omitted/failed to + // report it even though node itself resolved fine. + [ + "nodePath:/home/josh/.nvm/versions/node/v18.20.8/bin/node", + "resolvedPath:/home/josh/.nvm/versions/node/v18.20.8/bin:/usr/bin", + ].join("\n"), + ), + ), + )); + it("does not false-positive when the resolved Node satisfies nodeEngineRange (happy path unchanged)", () => Effect.gen(function* () { const result = yield* ensureNodePtyImpl( diff --git a/apps/desktop/src/wsl/DesktopWslEnvironment.ts b/apps/desktop/src/wsl/DesktopWslEnvironment.ts index 947a5cd84b6..178dba050cb 100644 --- a/apps/desktop/src/wsl/DesktopWslEnvironment.ts +++ b/apps/desktop/src/wsl/DesktopWslEnvironment.ts @@ -493,16 +493,26 @@ export const ensureNodePtyImpl = ( // above) behind a Node-version error in the rare case both are true. const nodeVersion = parseNodeVersion(probe.stdout); const requiredRange = options.nodeEngineRange?.trim() || null; - if ( - requiredRange !== null && - nodeVersion !== null && - !satisfiesSemverRange(nodeVersion, requiredRange) - ) { - return { - ok: false, - reason: `Found Node.js v${nodeVersion} at ${nodePath}, which does not satisfy the required range ${requiredRange}. Activate a supported version (e.g. nvm alias default 22 && nvm use 22) and restart T3 Code.`, - fatal: true, - } as const; + if (requiredRange !== null) { + if (nodeVersion === null) { + // A range is required but the probe didn't report a version -- + // fail closed rather than silently letting an unchecked Node + // through (the exact failure class this check exists to catch). + // Fatal/bounded like the mismatch case below, so a one-off probe + // hiccup still gets a few retries before falling back. + return { + ok: false, + reason: `Could not determine the WSL Node.js version, which is required to satisfy ${requiredRange}. The version probe did not report a version; please report this if it persists.`, + fatal: true, + } as const; + } + if (!satisfiesSemverRange(nodeVersion, requiredRange)) { + return { + ok: false, + reason: `Found Node.js v${nodeVersion} at ${nodePath}, which does not satisfy the required range ${requiredRange}. Activate a supported version (e.g. nvm alias default 22 && nvm use 22) and restart T3 Code.`, + fatal: true, + } as const; + } } return { ok: true, nodePath, resolvedPath } as const; } From 000e9de37faa8702ba7fc4d59dd5a5e2132931b8 Mon Sep 17 00:00:00 2001 From: Josh Gratton Date: Tue, 30 Jun 2026 18:39:50 -0400 Subject: [PATCH 5/7] Listen for bootstrap reader errors on the readline interface, not the raw stream An adversarial review correctly found that registering the error handler on the raw stream (rather than the readline Interface) left a gap: an error on the stream before any line ever arrives is an uncaught exception regardless of cleanup(), because readline's own internal error listener is attached to the stream first and re-emits onto the interface with no listener, which throws before our handler ever runs. Verified directly against the production WSL Node version (v24.14.0): an error emitted on the stream before any line/close event crashes the process when only a stream-level listener is registered, and is handled cleanly once the listener moves to the interface. The previous fix's cleanup()-on-every-terminal-path logic still matters for errors *after* a line has been read or the reader has otherwise started winding down; this closes the remaining pre-first-line gap. --- apps/server/src/bootstrap.ts | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/apps/server/src/bootstrap.ts b/apps/server/src/bootstrap.ts index bb9bc181dc3..8c4f3c5964d 100644 --- a/apps/server/src/bootstrap.ts +++ b/apps/server/src/bootstrap.ts @@ -95,7 +95,7 @@ export const readBootstrapEnvelope = Effect.fn("readBootstrapEnvelope")(function }); const cleanup = () => { - stream.removeListener("error", handleError); + input.removeListener("error", handleError); input.removeListener("line", handleLine); input.removeListener("close", handleClose); input.close(); @@ -113,13 +113,22 @@ export const readBootstrapEnvelope = Effect.fn("readBootstrapEnvelope")(function // DesktopBackendManager.ts's runBackendProcess): a readline interface // left idling on a never-closing, wsl.exe-relayed stdin pipe can surface // a spurious EAGAIN 'error' event on that stream well after the envelope - // was already read. Node's readline.Interface registers its own internal - // 'error' listener on the input stream and re-emits onto itself - // (node:internal/readline/interface.js); since nothing here ever - // listened for 'error' on the *interface*, an unhandled 'error' event on - // an EventEmitter is fatal and crashes the process. Calling cleanup() - // synchronously closes the readline interface and destroys the stream - // before that can happen. + // was already read. + // + // The error listener is registered on the readline *interface* (`input`), + // not the raw `stream`, and deliberately not on both. Node's + // readline.Interface attaches its own internal 'error' listener to the + // input stream at construction time and, on error, re-emits onto itself + // (node:internal/readline/interface.js); that internal listener runs + // before any listener registered on `stream` afterward, and a thrown + // exception from one stream listener prevents later ones on the same + // stream from running at all -- so a `stream.once("error", ...)` + // registered here would never fire for an error that happens before the + // first line ever arrives (verified directly: an error emitted on + // `stream` before any "line"/"close" event is an uncaught exception with + // only a stream-level listener, since readline's re-emit-with-no-listener + // on the interface throws first). Listening on `input` instead receives + // the same re-emitted error reliably, exactly once, in every case. const handleError = (error: Error) => { cleanup(); if (isUnavailableBootstrapFdError(error)) { @@ -158,7 +167,7 @@ export const readBootstrapEnvelope = Effect.fn("readBootstrapEnvelope")(function resume(Effect.succeedNone); }; - stream.once("error", handleError); + input.once("error", handleError); input.once("line", handleLine); input.once("close", handleClose); From e7652e96baf6ae2522af1896984a1bd734ff5053 Mon Sep 17 00:00:00 2001 From: Josh Gratton Date: Thu, 2 Jul 2026 08:36:33 -0400 Subject: [PATCH 6/7] Share the fatal-failure surfacing path and park when the fallback doesn't take Extract surfaceFatalFailure as the common tail of the fatal-preflight cap and the never-ready cap, and give the never-ready path the same already-surfaced guard the preflight path has: if the cap fired and the config still resolves a stdin backend that exits before ready, park the instance instead of re-showing the fallback dialog on every exit. Also fix two type errors in the new tests (stdin captures the full CommandInput | StdinConfig union, fd3 keeps its PlatformError channel). --- .../src/backend/DesktopBackendManager.test.ts | 66 ++++++++++++++++- .../src/backend/DesktopBackendManager.ts | 73 ++++++++++++------- 2 files changed, 110 insertions(+), 29 deletions(-) diff --git a/apps/desktop/src/backend/DesktopBackendManager.test.ts b/apps/desktop/src/backend/DesktopBackendManager.test.ts index 553476ff4d2..0bd5d88b939 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.test.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.test.ts @@ -10,6 +10,7 @@ import * as Fiber from "effect/Fiber"; import * as FileSystem from "effect/FileSystem"; import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; +import * as PlatformError from "effect/PlatformError"; import * as Queue from "effect/Queue"; import * as Ref from "effect/Ref"; import * as Schema from "effect/Schema"; @@ -228,7 +229,7 @@ describe("DesktopBackendManager", () => { // --- stdin-delivered (WSL) path: the bootstrap stream must never // complete in normal operation, eliminating the readline 'line' vs // 'close' race in apps/server/src/bootstrap.ts. --- - let stdinStream: ChildProcess.CommandInput | undefined; + let stdinStream: ChildProcess.CommandInput | ChildProcess.StdinConfig | undefined; const stdinSpawned = yield* Deferred.make(); const stdinSpawnerLayer = Layer.succeed( ChildProcessSpawner.ChildProcessSpawner, @@ -258,8 +259,8 @@ describe("DesktopBackendManager", () => { yield* instance.start; yield* Deferred.await(stdinSpawned); assert.isDefined(stdinStream); - if (typeof stdinStream === "string") { - throw new Error("Expected stdin to be a Stream, not a CommandInput string."); + if (!Stream.isStream(stdinStream)) { + throw new Error("Expected stdin to be a raw Stream, not a string stdio config."); } const drained = yield* Stream.runDrain(stdinStream).pipe( @@ -274,7 +275,7 @@ describe("DesktopBackendManager", () => { // --- fd3-delivered (Windows-native) path: unaffected regression // check -- the bootstrap stream must still terminate normally. --- - let fd3Stream: Stream.Stream | undefined; + let fd3Stream: Stream.Stream | undefined; const fd3Spawned = yield* Deferred.make(); const fd3SpawnerLayer = Layer.succeed( ChildProcessSpawner.ChildProcessSpawner, @@ -950,6 +951,63 @@ describe("DesktopBackendManager", () => { ), ); + it.effect( + "stops without re-surfacing when the never-ready fallback didn't take (config still resolves stdin)", + () => + Effect.scoped( + Effect.gen(function* () { + const starts = yield* Queue.unbounded(); + let startCount = 0; + const failures: Array<{ reason: string; fatal: boolean }> = []; + + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.sync(() => { + startCount += 1; + return makeProcess({ + exitCode: Queue.offer(starts, startCount).pipe( + Effect.as(ChildProcessSpawner.ExitCode(0)), + ), + }); + }), + ), + ); + + const instance = yield* makeTestInstance({ + spawnerLayer, + config: { ...baseConfig, bootstrapDelivery: "stdin" }, + httpClientLayer: httpClientLayer(() => Effect.never), + // Asks for the post-fallback restart (like the wsl-only primary + // does after persisting the Windows fallback), but the config + // above keeps resolving the same stdin backend -- the fallback + // "didn't take". + onPreflightFailed: (failure) => + Effect.sync(() => { + failures.push(failure); + }).pipe(Effect.as(true)), + }); + + yield* instance.start; + for (let i = 0; i < 12; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + + // Surfaced exactly once; the guard parks the instance on the next + // never-ready exit instead of re-showing the dialog on every exit. + assert.equal(failures.length, 1); + assert.equal(failures[0]?.fatal, true); + assert.isTrue( + startCount >= 6 && startCount <= 8, + "one extra spawn after the cap fired, then stop", + ); + + const snapshot = yield* instance.snapshot; + assert.equal(snapshot.desiredRunning, false); + }).pipe(Effect.provide(TestClock.layer())), + ), + ); + it.effect("cancels a scheduled restart when start is requested manually", () => Effect.scoped( Effect.gen(function* () { diff --git a/apps/desktop/src/backend/DesktopBackendManager.ts b/apps/desktop/src/backend/DesktopBackendManager.ts index 0071afae94a..ff4d1effd3f 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.ts @@ -464,6 +464,29 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( }); }); + // Shared tail of both bounded-failure escalations (fatal preflight + // failures and the post-spawn never-ready cap): surface the failure via + // spec.onPreflightFailed, then either schedule the restart the handler + // asked for (it changed something -- e.g. persisted the Windows fallback + // -- and wants the next config resolve to pick it up) or park the + // instance. Referenced before scheduleRestart is declared, which is safe + // because the generator body only runs once the instance is started. + const surfaceFatalFailure = Effect.fn("desktop.backendInstance.surfaceFatalFailure")(function* ( + failure: PreflightFailure, + restartReason: string, + ) { + const shouldRestart = yield* spec.onPreflightFailed?.(failure) ?? Effect.succeed(false); + if (shouldRestart) { + yield* scheduleRestart(restartReason); + } else { + yield* Ref.update(state, (latest) => ({ + ...latest, + desiredRunning: false, + ready: false, + })); + } + }); + const start: Effect.Effect = Effect.suspend(() => mutex.withPermits(1)( Effect.gen(function* () { @@ -550,18 +573,7 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( "backend preflight failed repeatedly; surfacing and falling back", { reason, attempt }, ); - const shouldRestart = yield* ( - spec.onPreflightFailed?.(preflightFailure.value) ?? Effect.succeed(false) - ); - if (shouldRestart) { - yield* scheduleRestart(reason); - } else { - yield* Ref.update(state, (latest) => ({ - ...latest, - desiredRunning: false, - ready: false, - })); - } + yield* surfaceFatalFailure(preflightFailure.value, reason); return; } yield* scheduleRestart(reason); @@ -666,24 +678,35 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( config.value.bootstrapDelivery === "stdin" && nextState.neverReadyAttempt >= MAX_PREFLIGHT_FAILURE_ATTEMPTS ) { - yield* logInstanceError( - "backend exited repeatedly before becoming ready; surfacing and falling back", - { reason, attempt: nextState.neverReadyAttempt }, - ); - const shouldRestart = yield* ( - spec.onPreflightFailed?.({ - reason: `backend process exited ${nextState.neverReadyAttempt} times in a row without becoming ready (last: ${reason})`, - fatal: true, - }) ?? Effect.succeed(false) - ); - if (shouldRestart) { - yield* scheduleRestart(reason); - } else { + if (nextState.neverReadyAttempt > MAX_PREFLIGHT_FAILURE_ATTEMPTS) { + // Mirrors the preflight path's attempt > attemptLimit + // guard above: we already surfaced and asked for the + // fallback, yet the config still resolves a + // stdin-delivered backend that exits before ready -- the + // fallback didn't take (e.g. the settings write failed). + // Stop rather than re-surface the dialog on every + // subsequent exit. + yield* logInstanceError("backend still never-ready after fallback; stopping", { + reason, + attempt: nextState.neverReadyAttempt, + }); yield* Ref.update(state, (latest) => ({ ...latest, desiredRunning: false, ready: false, })); + } else { + yield* logInstanceError( + "backend exited repeatedly before becoming ready; surfacing and falling back", + { reason, attempt: nextState.neverReadyAttempt }, + ); + yield* surfaceFatalFailure( + { + reason: `backend process exited ${nextState.neverReadyAttempt} times in a row without becoming ready (last: ${reason})`, + fatal: true, + }, + reason, + ); } } else { yield* scheduleRestart(reason); From 80f8711e13459bd6671d82c923de7c21196a58d0 Mon Sep 17 00:00:00 2001 From: Josh Gratton Date: Thu, 2 Jul 2026 08:36:48 -0400 Subject: [PATCH 7/7] Reuse the toolchain-report parser and wording for the engine-range check Drop parseNodeVersion in favor of parseToolchainReport().nodeVersion, and source the out-of-range message from formatMissingToolsReason so the two paths can't give conflicting nvm advice (the resolved node path is still appended to pin down which install needs switching). A version mismatch is deterministic between back-to-back preflights, so return retryLimit: 1 and surface the dialog on the first attempt instead of burning the default five rounds; the fail-closed missing-version case keeps the default allowance for one-off probe hiccups. The probe also no longer pays a second Node cold start per preflight: the existing heredoc invocation now reports nodeVersion over fd 3. --- .../src/wsl/DesktopWslEnvironment.test.ts | 53 ++++++------------- apps/desktop/src/wsl/DesktopWslEnvironment.ts | 48 +++++++++-------- 2 files changed, 40 insertions(+), 61 deletions(-) diff --git a/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts b/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts index 82c6856a535..e18fc431c9c 100644 --- a/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts +++ b/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts @@ -18,7 +18,6 @@ import { formatNodePtyProbeFailureReason, formatWslShellTransportFailureReason, parseNodePath, - parseNodeVersion, parseResolvedPath, parseToolchainReport, probeWslDistros, @@ -196,25 +195,6 @@ describe("parseResolvedPath", () => { }); }); -describe("parseNodeVersion", () => { - it("extracts the version from a nodeVersion: line", () => { - const stdout = "nodePath:/usr/bin/node\nresolvedPath:/usr/bin\nnodeVersion:18.20.8\n"; - expect(parseNodeVersion(stdout)).toBe("18.20.8"); - }); - - it("returns null when there is no nodeVersion line at all", () => { - expect(parseNodeVersion("nodePath:/usr/bin/node\nresolvedPath:/usr/bin\n")).toBeNull(); - }); - - it("returns null when the value after the prefix is empty", () => { - expect(parseNodeVersion("nodeVersion:\n")).toBeNull(); - }); - - it("trims surrounding whitespace and a trailing carriage return", () => { - expect(parseNodeVersion(" nodeVersion:22.22.1\r\n")).toBe("22.22.1"); - }); -}); - describe("formatMissingToolsReason", () => { it("returns null when everything is present and node is in range", () => { expect( @@ -284,18 +264,18 @@ describe("ensureNodePtyImpl: WSL Node engine-range preflight (#3611)", () => { it("rejects a resolved Node that does not satisfy nodeEngineRange (the reporter's exact case)", () => Effect.gen(function* () { - const result = yield* ensureNodePtyImpl( - "Ubuntu", - "C:\\repo", - windowsToWslPath, - { nodeEngineRange: "^22.16 || ^23.11 || >=24.10" }, - ); + const result = yield* ensureNodePtyImpl("Ubuntu", "C:\\repo", windowsToWslPath, { + nodeEngineRange: "^22.16 || ^23.11 || >=24.10", + }); expect(result.ok).toBe(false); if (!result.ok) { expect(result.fatal).toBe(true); expect(result.reason).toContain("18.20.8"); expect(result.reason).toContain("^22.16 || ^23.11 || >=24.10"); expect(result.reason).toContain("/home/josh/.nvm/versions/node/v18.20.8/bin/node"); + // Deterministic mismatch: surfaced on the first attempt instead of + // burning the default preflight retry allowance. + expect(result.retryLimit).toBe(1); } }).pipe( Effect.provideService( @@ -312,16 +292,16 @@ describe("ensureNodePtyImpl: WSL Node engine-range preflight (#3611)", () => { it("rejects (fail closed) when a range is required but the probe didn't report a version (bot-flagged fix)", () => Effect.gen(function* () { - const result = yield* ensureNodePtyImpl( - "Ubuntu", - "C:\\repo", - windowsToWslPath, - { nodeEngineRange: "^22.16 || ^23.11 || >=24.10" }, - ); + const result = yield* ensureNodePtyImpl("Ubuntu", "C:\\repo", windowsToWslPath, { + nodeEngineRange: "^22.16 || ^23.11 || >=24.10", + }); expect(result.ok).toBe(false); if (!result.ok) { expect(result.fatal).toBe(true); expect(result.reason).toContain("^22.16 || ^23.11 || >=24.10"); + // Possibly a one-off probe hiccup: keeps the default retry allowance, + // unlike the deterministic version-mismatch case. + expect(result.retryLimit).toBeUndefined(); } }).pipe( Effect.provideService( @@ -339,12 +319,9 @@ describe("ensureNodePtyImpl: WSL Node engine-range preflight (#3611)", () => { it("does not false-positive when the resolved Node satisfies nodeEngineRange (happy path unchanged)", () => Effect.gen(function* () { - const result = yield* ensureNodePtyImpl( - "Ubuntu", - "C:\\repo", - windowsToWslPath, - { nodeEngineRange: "^22.16 || ^23.11 || >=24.10" }, - ); + const result = yield* ensureNodePtyImpl("Ubuntu", "C:\\repo", windowsToWslPath, { + nodeEngineRange: "^22.16 || ^23.11 || >=24.10", + }); expect(result).toEqual({ ok: true, nodePath: "/home/josh/.nvm/versions/node/v22.22.1/bin/node", diff --git a/apps/desktop/src/wsl/DesktopWslEnvironment.ts b/apps/desktop/src/wsl/DesktopWslEnvironment.ts index 178dba050cb..33e09845034 100644 --- a/apps/desktop/src/wsl/DesktopWslEnvironment.ts +++ b/apps/desktop/src/wsl/DesktopWslEnvironment.ts @@ -223,8 +223,13 @@ const NODE_PTY_PROBE_SCRIPT = ( linuxServerDir: string, ) => `printf 'nodePath:%s\\n' "$(command -v node 2>/dev/null)" printf 'resolvedPath:%s\\n' "$PATH" -printf 'nodeVersion:%s\\n' "$(node -p 'process.versions.node' 2>/dev/null)" -cd ${shellQuote(linuxServerDir)} && node <<'NODE' >/dev/null 2>&1 +cd ${shellQuote(linuxServerDir)} && node <<'NODE' 3>&1 >/dev/null 2>&1 +// Emit the version over fd 3 -- duplicated onto the probe's real stdout by +// the 3>&1 above, before stdout/stderr are silenced -- so the engine-range +// check reuses this node invocation instead of paying a second Node cold +// start on every preflight. First statement on purpose: the version must be +// reported even when the dependency checks below exit early. +require("node:fs").writeSync(3, "nodeVersion:" + process.versions.node + "\\n"); // The server bundle externalizes its deps to node_modules, and the WSL Node // can't read inside app.asar, so confirm those deps are unpacked on the real // filesystem before reporting the backend healthy. "effect" is the framework @@ -330,23 +335,6 @@ export const parseResolvedPath = (stdout: string): string | null => { return resolvedPath.length > 0 ? resolvedPath : null; }; -// Captures the resolved node's own reported version (`process.versions.node`) -// from NODE_PTY_PROBE_SCRIPT, so a node that resolves but doesn't satisfy -// `engines.node` can be caught at preflight instead of silently no-op'ing -// (Node < the version where `import.meta.main` exists makes the server's -// `if (import.meta.main)` launch gate never run, producing a clean exit -// code=0 with no stdout/stderr and nothing ever listening on the port). -export const parseNodeVersion = (stdout: string): string | null => { - const prefix = "nodeVersion:"; - const line = stdout - .split("\n") - .map((candidate) => candidate.trim()) - .find((candidate) => candidate.startsWith(prefix)); - if (line === undefined) return null; - const version = line.slice(prefix.length).trim(); - return version.length > 0 ? version : null; -}; - export const formatMissingToolsReason = ( report: ToolchainReport, requiredRange: string | null, @@ -491,15 +479,15 @@ export const ensureNodePtyImpl = ( // unconditionally as soon as nodePath resolves) so this doesn't mask a // more specific, unrelated probe failure (e.g. the exitCode === 3 case // above) behind a Node-version error in the rare case both are true. - const nodeVersion = parseNodeVersion(probe.stdout); + const nodeVersion = parseToolchainReport(probe.stdout).nodeVersion; const requiredRange = options.nodeEngineRange?.trim() || null; if (requiredRange !== null) { if (nodeVersion === null) { // A range is required but the probe didn't report a version -- // fail closed rather than silently letting an unchecked Node // through (the exact failure class this check exists to catch). - // Fatal/bounded like the mismatch case below, so a one-off probe - // hiccup still gets a few retries before falling back. + // Fatal/bounded with the default retry allowance, so a one-off + // probe hiccup still gets a few retries before falling back. return { ok: false, reason: `Could not determine the WSL Node.js version, which is required to satisfy ${requiredRange}. The version probe did not report a version; please report this if it persists.`, @@ -507,10 +495,24 @@ export const ensureNodePtyImpl = ( } as const; } if (!satisfiesSemverRange(nodeVersion, requiredRange)) { + // Single source for the out-of-range wording and remediation -- + // formatMissingToolsReason's nodeOutOfRange branch -- so this + // message can't drift from the toolchain-report path's. The + // resolved path is appended because it pins down WHICH node needs + // switching (e.g. an nvm default alias). + const mismatch = + formatMissingToolsReason({ missingTools: [], nodeVersion }, requiredRange) ?? + `WSL Node.js v${nodeVersion} does not satisfy the required range ${requiredRange}.`; return { ok: false, - reason: `Found Node.js v${nodeVersion} at ${nodePath}, which does not satisfy the required range ${requiredRange}. Activate a supported version (e.g. nvm alias default 22 && nvm use 22) and restart T3 Code.`, + reason: `${mismatch} (Resolved node: ${nodePath}.)`, fatal: true, + // Unlike the missing-version case above, the resolved version + // cannot change between the manager's back-to-back preflight + // retries, so surface the actionable error immediately instead + // of burning the default retry allowance on a deterministic + // failure. + retryLimit: 1, } as const; } }