diff --git a/apps/desktop/src/backend/DesktopBackendManager.test.ts b/apps/desktop/src/backend/DesktopBackendManager.test.ts index 858c9b0d560..0bd5d88b939 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.test.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.test.ts @@ -6,9 +6,11 @@ import { assert, describe, it } from "@effect/vitest"; import * as Deferred from "effect/Deferred"; import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; +import * as Fiber from "effect/Fiber"; import * as FileSystem from "effect/FileSystem"; import * as Layer from "effect/Layer"; import * as Option from "effect/Option"; +import * as PlatformError from "effect/PlatformError"; import * as Queue from "effect/Queue"; import * as Ref from "effect/Ref"; import * as Schema from "effect/Schema"; @@ -220,6 +222,101 @@ describe("DesktopBackendManager", () => { ), ); + it.effect( + "holds stdin open forever for bootstrapDelivery:'stdin' (#3611 fix 1) while fd3 still terminates normally", + () => + Effect.gen(function* () { + // --- stdin-delivered (WSL) path: the bootstrap stream must never + // complete in normal operation, eliminating the readline 'line' vs + // 'close' race in apps/server/src/bootstrap.ts. --- + let stdinStream: ChildProcess.CommandInput | ChildProcess.StdinConfig | undefined; + const stdinSpawned = yield* Deferred.make(); + const stdinSpawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make((command) => + Effect.gen(function* () { + if (command._tag === "StandardCommand") { + stdinStream = command.options.stdin; + } + yield* Deferred.succeed(stdinSpawned, void 0); + return makeProcess({ exitCode: Effect.never }); + }), + ), + ); + + yield* Effect.scoped( + Effect.gen(function* () { + const instance = yield* makeTestInstance({ + spawnerLayer: stdinSpawnerLayer, + config: { + ...baseConfig, + bootstrapDelivery: "stdin", + args: ["/server/bin.mjs", "--bootstrap-fd", "0"], + }, + httpClientLayer: httpClientLayer(() => Effect.never), + }); + + yield* instance.start; + yield* Deferred.await(stdinSpawned); + assert.isDefined(stdinStream); + if (!Stream.isStream(stdinStream)) { + throw new Error("Expected stdin to be a raw Stream, not a string stdio config."); + } + + const drained = yield* Stream.runDrain(stdinStream).pipe( + Effect.timeoutOption(Duration.seconds(30)), + Effect.forkScoped, + ); + yield* TestClock.adjust(Duration.seconds(30)); + const result = yield* Fiber.join(drained); + assert.isTrue(Option.isNone(result), "stdin stream completed but must never end"); + }), + ); + + // --- fd3-delivered (Windows-native) path: unaffected regression + // check -- the bootstrap stream must still terminate normally. --- + let fd3Stream: Stream.Stream | undefined; + const fd3Spawned = yield* Deferred.make(); + const fd3SpawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make((command) => + Effect.gen(function* () { + if (command._tag === "StandardCommand") { + const fd3 = command.options.additionalFds?.fd3; + if (fd3?.type === "input") fd3Stream = fd3.stream; + } + yield* Deferred.succeed(fd3Spawned, void 0); + return makeProcess({ exitCode: Effect.never }); + }), + ), + ); + + yield* Effect.scoped( + Effect.gen(function* () { + const instance = yield* makeTestInstance({ + spawnerLayer: fd3SpawnerLayer, + httpClientLayer: httpClientLayer(() => Effect.never), + }); + + yield* instance.start; + yield* Deferred.await(fd3Spawned); + assert.isDefined(fd3Stream); + + const drained = yield* Stream.runDrain(fd3Stream!).pipe( + Effect.timeoutOption(Duration.seconds(30)), + Effect.forkScoped, + ); + yield* TestClock.adjust(Duration.seconds(30)); + const result = yield* Fiber.join(drained); + assert.isTrue( + Option.isSome(result), + "fd3 stream should still terminate normally (no regression)", + ); + }), + ); + }).pipe(Effect.provide(TestClock.layer())), + ); + it.effect("retries HTTP readiness before reporting the backend ready", () => Effect.scoped( Effect.gen(function* () { @@ -616,6 +713,301 @@ describe("DesktopBackendManager", () => { ), ); + it.effect( + "surfaces and falls back after 5 consecutive never-ready exits for bootstrapDelivery:'stdin' (#3611 fix 3)", + () => + Effect.scoped( + Effect.gen(function* () { + const starts = yield* Queue.unbounded(); + let startCount = 0; + const failures: Array<{ reason: string; fatal: boolean }> = []; + + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.sync(() => { + startCount += 1; + return makeProcess({ + // Clean exit, code=0, before readiness -- matches the + // reporter's server-child.log (no stdout/stderr, nothing + // ever binds the port). + exitCode: Queue.offer(starts, startCount).pipe( + Effect.as(ChildProcessSpawner.ExitCode(0)), + ), + }); + }), + ), + ); + + const instance = yield* makeTestInstance({ + spawnerLayer, + config: { ...baseConfig, bootstrapDelivery: "stdin" }, + httpClientLayer: httpClientLayer(() => Effect.never), + onPreflightFailed: (failure) => + Effect.sync(() => { + failures.push(failure); + }).pipe(Effect.as(false)), + }); + + yield* instance.start; + assert.equal(yield* Queue.take(starts), 1); + + // Drive well past 5 consecutive never-ready exits. + for (let i = 0; i < 10; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + + assert.equal(failures.length, 1); + assert.equal(failures[0]?.fatal, true); + assert.isTrue(startCount >= 5 && startCount <= 7); + + const snapshot = yield* instance.snapshot; + assert.equal(snapshot.desiredRunning, false); + }).pipe(Effect.provide(TestClock.layer())), + ), + ); + + it.effect( + "does not surface a never-ready exit loop for bootstrapDelivery:'fd3' (Windows-native primary stays gated out, #3611 fix 3)", + () => + Effect.scoped( + Effect.gen(function* () { + const starts = yield* Queue.unbounded(); + let startCount = 0; + const failures: Array<{ reason: string; fatal: boolean }> = []; + + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.sync(() => { + startCount += 1; + return makeProcess({ + exitCode: Queue.offer(starts, startCount).pipe( + Effect.as(ChildProcessSpawner.ExitCode(0)), + ), + }); + }), + ), + ); + + const instance = yield* makeTestInstance({ + spawnerLayer, + config: { ...baseConfig, bootstrapDelivery: "fd3" }, + httpClientLayer: httpClientLayer(() => Effect.never), + onPreflightFailed: (failure) => + Effect.sync(() => { + failures.push(failure); + }).pipe(Effect.as(false)), + }); + + yield* instance.start; + assert.equal(yield* Queue.take(starts), 1); + + for (let i = 0; i < 10; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + + // The Windows-native primary must never trip the WSL-specific + // "falling back to Windows" dialog for its own crash loop. + assert.deepEqual(failures, []); + assert.isTrue(startCount > 7, "fd3 path should keep restarting uncapped, unlike stdin"); + + const snapshot = yield* instance.snapshot; + assert.equal(snapshot.desiredRunning, true); + }).pipe(Effect.provide(TestClock.layer())), + ), + ); + + it.effect( + "pre-spawn transient preflight retries do not count toward the never-ready cap (bot-flagged fix)", + () => + Effect.scoped( + Effect.gen(function* () { + const starts = yield* Queue.unbounded(); + let startCount = 0; + const failures: Array<{ reason: string; fatal: boolean }> = []; + const stillColdStarting = yield* Ref.make(true); + + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.sync(() => { + startCount += 1; + return makeProcess({ + exitCode: Queue.offer(starts, startCount).pipe( + Effect.as(ChildProcessSpawner.ExitCode(0)), + ), + }); + }), + ), + ); + + const instance = yield* makeTestInstance({ + spawnerLayer, + configResolve: Ref.get(stillColdStarting).pipe( + Effect.map((coldStarting) => + coldStarting + ? { + ...baseConfig, + bootstrapDelivery: "stdin", + preflightFailure: Option.some({ + reason: "WSL is still cold-starting", + fatal: false, + }), + } + : { ...baseConfig, bootstrapDelivery: "stdin" }, + ), + ), + httpClientLayer: httpClientLayer(() => Effect.never), + onPreflightFailed: (failure) => + Effect.sync(() => { + failures.push(failure); + }).pipe(Effect.as(false)), + }); + + yield* instance.start; + + // Several transient (unbounded, pre-spawn) preflight retries -- the + // process never spawns during this window, so the general restart + // counter climbs well past MAX_PREFLIGHT_FAILURE_ATTEMPTS, but the + // never-ready cap must not be affected by any of this. + for (let i = 0; i < 8; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + assert.equal(yield* Queue.size(starts), 0); + assert.deepEqual(failures, []); + + // Cold start finishes: preflight goes clean and the process + // actually spawns, but exits before ever becoming ready, repeatedly. + yield* Ref.set(stillColdStarting, false); + for (let i = 0; i < 10; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + + // Must take its own fresh 5 consecutive post-spawn never-ready + // exits -- not fire on the first one because the pre-spawn retries + // had already elevated an unrelated counter. + assert.equal(failures.length, 1); + assert.equal(failures[0]?.fatal, true); + assert.isTrue(startCount >= 5 && startCount <= 7); + }).pipe(Effect.provide(TestClock.layer())), + ), + ); + + it.effect( + "a fresh start after the never-ready cap fires gets a new retry budget (bot-flagged fix)", + () => + Effect.scoped( + Effect.gen(function* () { + const starts = yield* Queue.unbounded(); + let startCount = 0; + const failures: Array<{ reason: string; fatal: boolean }> = []; + + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.sync(() => { + startCount += 1; + return makeProcess({ + exitCode: Queue.offer(starts, startCount).pipe( + Effect.as(ChildProcessSpawner.ExitCode(0)), + ), + }); + }), + ), + ); + + const instance = yield* makeTestInstance({ + spawnerLayer, + config: { ...baseConfig, bootstrapDelivery: "stdin" }, + httpClientLayer: httpClientLayer(() => Effect.never), + onPreflightFailed: (failure) => + Effect.sync(() => { + failures.push(failure); + }).pipe(Effect.as(false)), + }); + + yield* instance.start; + for (let i = 0; i < 10; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + assert.equal(failures.length, 1); + assert.equal((yield* instance.snapshot).desiredRunning, false); + + const startCountAtTrip = startCount; + + // Restarting after the cap fired (without ever calling stop()) + // must not immediately re-trip on the very next failure because a + // stale counter survived from before. + yield* instance.start; + for (let i = 0; i < 10; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + + assert.equal(failures.length, 2); + const spawnsSinceRestart = startCount - startCountAtTrip; + assert.isTrue(spawnsSinceRestart >= 5 && spawnsSinceRestart <= 7); + }).pipe(Effect.provide(TestClock.layer())), + ), + ); + + it.effect( + "stops without re-surfacing when the never-ready fallback didn't take (config still resolves stdin)", + () => + Effect.scoped( + Effect.gen(function* () { + const starts = yield* Queue.unbounded(); + let startCount = 0; + const failures: Array<{ reason: string; fatal: boolean }> = []; + + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.sync(() => { + startCount += 1; + return makeProcess({ + exitCode: Queue.offer(starts, startCount).pipe( + Effect.as(ChildProcessSpawner.ExitCode(0)), + ), + }); + }), + ), + ); + + const instance = yield* makeTestInstance({ + spawnerLayer, + config: { ...baseConfig, bootstrapDelivery: "stdin" }, + httpClientLayer: httpClientLayer(() => Effect.never), + // Asks for the post-fallback restart (like the wsl-only primary + // does after persisting the Windows fallback), but the config + // above keeps resolving the same stdin backend -- the fallback + // "didn't take". + onPreflightFailed: (failure) => + Effect.sync(() => { + failures.push(failure); + }).pipe(Effect.as(true)), + }); + + yield* instance.start; + for (let i = 0; i < 12; i++) { + yield* TestClock.adjust(Duration.seconds(10)); + } + + // Surfaced exactly once; the guard parks the instance on the next + // never-ready exit instead of re-showing the dialog on every exit. + assert.equal(failures.length, 1); + assert.equal(failures[0]?.fatal, true); + assert.isTrue( + startCount >= 6 && startCount <= 8, + "one extra spawn after the cap fired, then stop", + ); + + const snapshot = yield* instance.snapshot; + assert.equal(snapshot.desiredRunning, false); + }).pipe(Effect.provide(TestClock.layer())), + ), + ); + it.effect("cancels a scheduled restart when start is requested manually", () => Effect.scoped( Effect.gen(function* () { diff --git a/apps/desktop/src/backend/DesktopBackendManager.ts b/apps/desktop/src/backend/DesktopBackendManager.ts index e3a4de661ac..ff4d1effd3f 100644 --- a/apps/desktop/src/backend/DesktopBackendManager.ts +++ b/apps/desktop/src/backend/DesktopBackendManager.ts @@ -236,6 +236,13 @@ interface BackendManagerState { // Consecutive bounded/fatal preflight failures, reset on a clean or // unbounded-transient preflight. restartAttempt counts all restarts. readonly preflightFailureAttempt: number; + // Consecutive post-spawn exits that never reached readiness for this + // instance, reset on a successful onReady or a fresh start from a + // stopped state. Deliberately separate from restartAttempt, which also + // counts pre-spawn preflight retries (e.g. WSL cold-start) -- using + // restartAttempt for the never-ready cap would let ordinary transient + // preflight retries trip it well before 5 actual post-spawn failures. + readonly neverReadyAttempt: number; readonly restartFiber: Option.Option>; readonly nextRunId: number; } @@ -247,6 +254,7 @@ const initialState: BackendManagerState = { active: Option.none(), restartAttempt: 0, preflightFailureAttempt: 0, + neverReadyAttempt: 0, restartFiber: Option.none(), nextRunId: 1, }; @@ -335,7 +343,24 @@ const runBackendProcess = Effect.fn("runBackendProcess")(function* ( ), ); const onOutput = options.onOutput ?? (() => Effect.void); - const bootstrapStream = Stream.encodeText(Stream.make(`${bootstrapJson}\n`)); + // The WSL path delivers the bootstrap envelope over stdin (see the + // additionalFds comment below). A plain `Stream.make(...)` completes the + // instant the single chunk is queued, and the platform spawner's stdin + // sink defaults to closing the write end as soon as the stream completes + // (`endOnDone: true`) -- i.e. stdin EOF can land immediately after the + // JSON line is flushed, racing the extra wsl.exe relay hop against the + // child's readline `'line'` vs `'close'` listeners + // (apps/server/src/bootstrap.ts:readBootstrapEnvelope) and sometimes + // losing the bootstrap envelope entirely. Appending `Stream.never` keeps + // stdin open forever for the WSL path so the write end is never closed in + // normal operation; this is intentional, not a leak -- the stream is + // forked into this run's own Scope, which is interrupted (closing stdin) + // on every teardown path (process exit, restart, stop, SIGTERM). Do not + // "fix" this by removing the `Stream.never` tail. + const bootstrapStream = Stream.make(`${bootstrapJson}\n`).pipe( + options.bootstrapDelivery === "stdin" ? Stream.concat(Stream.never) : (s) => s, + Stream.encodeText, + ); const command = ChildProcess.make(options.executablePath, options.args, { cwd: options.cwd, env: options.env, @@ -439,6 +464,29 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( }); }); + // Shared tail of both bounded-failure escalations (fatal preflight + // failures and the post-spawn never-ready cap): surface the failure via + // spec.onPreflightFailed, then either schedule the restart the handler + // asked for (it changed something -- e.g. persisted the Windows fallback + // -- and wants the next config resolve to pick it up) or park the + // instance. Referenced before scheduleRestart is declared, which is safe + // because the generator body only runs once the instance is started. + const surfaceFatalFailure = Effect.fn("desktop.backendInstance.surfaceFatalFailure")(function* ( + failure: PreflightFailure, + restartReason: string, + ) { + const shouldRestart = yield* spec.onPreflightFailed?.(failure) ?? Effect.succeed(false); + if (shouldRestart) { + yield* scheduleRestart(restartReason); + } else { + yield* Ref.update(state, (latest) => ({ + ...latest, + desiredRunning: false, + ready: false, + })); + } + }); + const start: Effect.Effect = Effect.suspend(() => mutex.withPermits(1)( Effect.gen(function* () { @@ -470,6 +518,7 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( const resetFatalPreflightCounter = !current.desiredRunning && current.preflightFailureAttempt > 0; + const resetNeverReadyCounter = !current.desiredRunning && current.neverReadyAttempt > 0; yield* cancelRestart; yield* Ref.update(state, (latest) => ({ ...latest, @@ -477,6 +526,7 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( ready: false, config: Option.some(config.value), preflightFailureAttempt: resetFatalPreflightCounter ? 0 : latest.preflightFailureAttempt, + neverReadyAttempt: resetNeverReadyCounter ? 0 : latest.neverReadyAttempt, })); const preflightFailure = config.value.preflightFailure; @@ -523,18 +573,7 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( "backend preflight failed repeatedly; surfacing and falling back", { reason, attempt }, ); - const shouldRestart = yield* ( - spec.onPreflightFailed?.(preflightFailure.value) ?? Effect.succeed(false) - ); - if (shouldRestart) { - yield* scheduleRestart(reason); - } else { - yield* Ref.update(state, (latest) => ({ - ...latest, - desiredRunning: false, - ready: false, - })); - } + yield* surfaceFatalFailure(preflightFailure.value, reason); return; } yield* scheduleRestart(reason); @@ -599,6 +638,7 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( ...latest, active: Option.none(), ready: false, + neverReadyAttempt: latest.neverReadyAttempt + 1, }; return [ { @@ -622,7 +662,55 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( } if (isCurrentRun && nextState.desiredRunning) { - yield* scheduleRestart(reason); + // A process that exits cleanly (or otherwise) before ever + // reaching readiness, repeatedly, would otherwise loop here + // forever with no escalation -- this is exactly how a WSL + // backend that starts and exits with code 0 every time + // (e.g. the stdin-EOF race or a Node engine mismatch) gets + // permanently stuck "connecting" with no actionable error. + // Cap it the same way the pre-spawn fatal-preflight path + // already is, but only for the stdin-delivered (WSL) path: + // the Windows-native fd3 primary's onPreflightFailed is + // wired unconditionally to a WSL-specific "falling back to + // Windows" dialog, which would be nonsensical for a Windows + // backend crash-looping for an unrelated reason. + if ( + config.value.bootstrapDelivery === "stdin" && + nextState.neverReadyAttempt >= MAX_PREFLIGHT_FAILURE_ATTEMPTS + ) { + if (nextState.neverReadyAttempt > MAX_PREFLIGHT_FAILURE_ATTEMPTS) { + // Mirrors the preflight path's attempt > attemptLimit + // guard above: we already surfaced and asked for the + // fallback, yet the config still resolves a + // stdin-delivered backend that exits before ready -- the + // fallback didn't take (e.g. the settings write failed). + // Stop rather than re-surface the dialog on every + // subsequent exit. + yield* logInstanceError("backend still never-ready after fallback; stopping", { + reason, + attempt: nextState.neverReadyAttempt, + }); + yield* Ref.update(state, (latest) => ({ + ...latest, + desiredRunning: false, + ready: false, + })); + } else { + yield* logInstanceError( + "backend exited repeatedly before becoming ready; surfacing and falling back", + { reason, attempt: nextState.neverReadyAttempt }, + ); + yield* surfaceFatalFailure( + { + reason: `backend process exited ${nextState.neverReadyAttempt} times in a row without becoming ready (last: ${reason})`, + fatal: true, + }, + reason, + ); + } + } else { + yield* scheduleRestart(reason); + } } }), ); @@ -652,6 +740,7 @@ export const makeBackendInstance = Effect.fn("makeBackendInstance")(function* ( { ...latest, restartAttempt: 0, + neverReadyAttempt: 0, ready: true, }, ] as const; diff --git a/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts b/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts index cdb3fc78286..e18fc431c9c 100644 --- a/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts +++ b/apps/desktop/src/wsl/DesktopWslEnvironment.test.ts @@ -4,6 +4,7 @@ import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Fiber from "effect/Fiber"; import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; import * as Sink from "effect/Sink"; import * as Stream from "effect/Stream"; import * as TestClock from "effect/testing/TestClock"; @@ -12,6 +13,7 @@ import { ChildProcessSpawner } from "effect/unstable/process"; import { buildWslNodeEnvPreamble, DesktopWslDistroListError, + ensureNodePtyImpl, formatMissingToolsReason, formatNodePtyProbeFailureReason, formatWslShellTransportFailureReason, @@ -235,3 +237,127 @@ describe("formatMissingToolsReason", () => { expect(reason).not.toContain("nvm"); }); }); + +describe("ensureNodePtyImpl: WSL Node engine-range preflight (#3611)", () => { + // Always resolves -- isolates the engine-range branch from the separate + // `wslpath` round trip that production windowsToWslPath performs. + const windowsToWslPath = () => Effect.succeed(Option.some("/home/josh/repo")); + + const makeProbeSpawner = (probeStdout: string) => + ChildProcessSpawner.make(() => + Effect.succeed( + ChildProcessSpawner.makeHandle({ + pid: ChildProcessSpawner.ProcessId(1), + exitCode: Effect.succeed(ChildProcessSpawner.ExitCode(0)), + isRunning: Effect.succeed(false), + kill: () => Effect.void, + unref: Effect.succeed(Effect.void), + stdin: Sink.drain, + stdout: Stream.make(encoder.encode(probeStdout)), + stderr: Stream.empty, + all: Stream.empty, + getInputFd: () => Sink.drain, + getOutputFd: () => Stream.empty, + }), + ), + ); + + it("rejects a resolved Node that does not satisfy nodeEngineRange (the reporter's exact case)", () => + Effect.gen(function* () { + const result = yield* ensureNodePtyImpl("Ubuntu", "C:\\repo", windowsToWslPath, { + nodeEngineRange: "^22.16 || ^23.11 || >=24.10", + }); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.fatal).toBe(true); + expect(result.reason).toContain("18.20.8"); + expect(result.reason).toContain("^22.16 || ^23.11 || >=24.10"); + expect(result.reason).toContain("/home/josh/.nvm/versions/node/v18.20.8/bin/node"); + // Deterministic mismatch: surfaced on the first attempt instead of + // burning the default preflight retry allowance. + expect(result.retryLimit).toBe(1); + } + }).pipe( + Effect.provideService( + ChildProcessSpawner.ChildProcessSpawner, + makeProbeSpawner( + [ + "nodePath:/home/josh/.nvm/versions/node/v18.20.8/bin/node", + "resolvedPath:/home/josh/.nvm/versions/node/v18.20.8/bin:/usr/bin", + "nodeVersion:18.20.8", + ].join("\n"), + ), + ), + )); + + it("rejects (fail closed) when a range is required but the probe didn't report a version (bot-flagged fix)", () => + Effect.gen(function* () { + const result = yield* ensureNodePtyImpl("Ubuntu", "C:\\repo", windowsToWslPath, { + nodeEngineRange: "^22.16 || ^23.11 || >=24.10", + }); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.fatal).toBe(true); + expect(result.reason).toContain("^22.16 || ^23.11 || >=24.10"); + // Possibly a one-off probe hiccup: keeps the default retry allowance, + // unlike the deterministic version-mismatch case. + expect(result.retryLimit).toBeUndefined(); + } + }).pipe( + Effect.provideService( + ChildProcessSpawner.ChildProcessSpawner, + makeProbeSpawner( + // No nodeVersion: line at all -- the probe omitted/failed to + // report it even though node itself resolved fine. + [ + "nodePath:/home/josh/.nvm/versions/node/v18.20.8/bin/node", + "resolvedPath:/home/josh/.nvm/versions/node/v18.20.8/bin:/usr/bin", + ].join("\n"), + ), + ), + )); + + it("does not false-positive when the resolved Node satisfies nodeEngineRange (happy path unchanged)", () => + Effect.gen(function* () { + const result = yield* ensureNodePtyImpl("Ubuntu", "C:\\repo", windowsToWslPath, { + nodeEngineRange: "^22.16 || ^23.11 || >=24.10", + }); + expect(result).toEqual({ + ok: true, + nodePath: "/home/josh/.nvm/versions/node/v22.22.1/bin/node", + resolvedPath: "/home/josh/.nvm/versions/node/v22.22.1/bin:/usr/bin", + }); + }).pipe( + Effect.provideService( + ChildProcessSpawner.ChildProcessSpawner, + makeProbeSpawner( + [ + "nodePath:/home/josh/.nvm/versions/node/v22.22.1/bin/node", + "resolvedPath:/home/josh/.nvm/versions/node/v22.22.1/bin:/usr/bin", + "nodeVersion:22.22.1", + ].join("\n"), + ), + ), + )); + + it("does not check the engine range when nodeEngineRange is not configured", () => + Effect.gen(function* () { + const result = yield* ensureNodePtyImpl("Ubuntu", "C:\\repo", windowsToWslPath, {}); + expect(result).toEqual({ + ok: true, + nodePath: "/home/josh/.nvm/versions/node/v18.20.8/bin/node", + resolvedPath: "/home/josh/.nvm/versions/node/v18.20.8/bin:/usr/bin", + }); + }).pipe( + Effect.provideService( + ChildProcessSpawner.ChildProcessSpawner, + makeProbeSpawner( + [ + "nodePath:/home/josh/.nvm/versions/node/v18.20.8/bin/node", + "resolvedPath:/home/josh/.nvm/versions/node/v18.20.8/bin:/usr/bin", + "nodeVersion:18.20.8", + ].join("\n"), + ), + ), + )); +}); diff --git a/apps/desktop/src/wsl/DesktopWslEnvironment.ts b/apps/desktop/src/wsl/DesktopWslEnvironment.ts index b38675f0e1f..33e09845034 100644 --- a/apps/desktop/src/wsl/DesktopWslEnvironment.ts +++ b/apps/desktop/src/wsl/DesktopWslEnvironment.ts @@ -223,7 +223,13 @@ const NODE_PTY_PROBE_SCRIPT = ( linuxServerDir: string, ) => `printf 'nodePath:%s\\n' "$(command -v node 2>/dev/null)" printf 'resolvedPath:%s\\n' "$PATH" -cd ${shellQuote(linuxServerDir)} && node <<'NODE' >/dev/null 2>&1 +cd ${shellQuote(linuxServerDir)} && node <<'NODE' 3>&1 >/dev/null 2>&1 +// Emit the version over fd 3 -- duplicated onto the probe's real stdout by +// the 3>&1 above, before stdout/stderr are silenced -- so the engine-range +// check reuses this node invocation instead of paying a second Node cold +// start on every preflight. First statement on purpose: the version must be +// reported even when the dependency checks below exit early. +require("node:fs").writeSync(3, "nodeVersion:" + process.versions.node + "\\n"); // The server bundle externalizes its deps to node_modules, and the WSL Node // can't read inside app.asar, so confirm those deps are unpacked on the real // filesystem before reporting the backend healthy. "effect" is the framework @@ -370,7 +376,10 @@ export const formatMissingToolsReason = ( return `WSL distro is missing required tools: ${issues.join(", ")}. Install ${remediations.join(" and ")}, then retry.`; }; -const ensureNodePtyImpl = ( +// Exported for unit testing the preflight branches directly against a fake +// ChildProcessSpawner without needing to also fake the separate `wslpath` +// round trip that `windowsToWslPath` performs. +export const ensureNodePtyImpl = ( distro: string | null, windowsRepoRoot: string, windowsToWslPath: ( @@ -457,7 +466,58 @@ const ensureNodePtyImpl = ( } as const; } - if (probe.exitCode === 0) return { ok: true, nodePath, resolvedPath } as const; + if (probe.exitCode === 0) { + // The rest of the probe succeeded -- but that isn't enough on its own, + // it also has to satisfy engines.node. A too-old Node (e.g. nvm's + // default alias still pointing at 18) loads and runs the server bundle + // without error, but `import.meta.main` is undefined pre-20.11/22.x, so + // the server's `if (import.meta.main)` launch gate never fires: the + // process exits 0 with no stdout/stderr and nothing ever binds the port + // -- identical symptoms to the stdin-EOF race below, but a structurally + // different cause that the probe previously never checked for once + // *any* node resolved. Gated on exitCode === 0 (rather than checked + // unconditionally as soon as nodePath resolves) so this doesn't mask a + // more specific, unrelated probe failure (e.g. the exitCode === 3 case + // above) behind a Node-version error in the rare case both are true. + const nodeVersion = parseToolchainReport(probe.stdout).nodeVersion; + const requiredRange = options.nodeEngineRange?.trim() || null; + if (requiredRange !== null) { + if (nodeVersion === null) { + // A range is required but the probe didn't report a version -- + // fail closed rather than silently letting an unchecked Node + // through (the exact failure class this check exists to catch). + // Fatal/bounded with the default retry allowance, so a one-off + // probe hiccup still gets a few retries before falling back. + return { + ok: false, + reason: `Could not determine the WSL Node.js version, which is required to satisfy ${requiredRange}. The version probe did not report a version; please report this if it persists.`, + fatal: true, + } as const; + } + if (!satisfiesSemverRange(nodeVersion, requiredRange)) { + // Single source for the out-of-range wording and remediation -- + // formatMissingToolsReason's nodeOutOfRange branch -- so this + // message can't drift from the toolchain-report path's. The + // resolved path is appended because it pins down WHICH node needs + // switching (e.g. an nvm default alias). + const mismatch = + formatMissingToolsReason({ missingTools: [], nodeVersion }, requiredRange) ?? + `WSL Node.js v${nodeVersion} does not satisfy the required range ${requiredRange}.`; + return { + ok: false, + reason: `${mismatch} (Resolved node: ${nodePath}.)`, + fatal: true, + // Unlike the missing-version case above, the resolved version + // cannot change between the manager's back-to-back preflight + // retries, so surface the actionable error immediately instead + // of burning the default retry allowance on a deterministic + // failure. + retryLimit: 1, + } as const; + } + } + return { ok: true, nodePath, resolvedPath } as const; + } if (options.allowBuild !== true) { const packagedProbeFailure = formatNodePtyProbeFailureReason(probe.exitCode); diff --git a/apps/server/src/bootstrap.test.ts b/apps/server/src/bootstrap.test.ts index 05155f32ec4..ed0b54a206e 100644 --- a/apps/server/src/bootstrap.test.ts +++ b/apps/server/src/bootstrap.test.ts @@ -69,10 +69,14 @@ it.layer(NodeServices.layer)("readBootstrapEnvelope", (it) => { `${yield* encodeTestEnvelopeSchema({ mode: "desktop" })}\n`, ); - const fd = yield* Effect.acquireRelease( - Effect.sync(() => NodeFS.openSync(filePath, "r")), - (fd) => Effect.sync(() => NodeFS.closeSync(fd)), - ); + // Open without acquireRelease: readBootstrapEnvelope now detaches its + // reader (closing the readline interface and destroying its backing + // stream, which is opened with autoClose: true) as soon as it resolves, + // on every path -- not just the duplication-fallback path below. The + // stream therefore owns fd's lifecycle here; also closing it + // synchronously in a finalizer would race the stream's async close and + // produce an uncaught EBADF. + const fd = NodeFS.openSync(filePath, "r"); const payload = yield* readBootstrapEnvelope(TestEnvelopeSchema, fd, { timeoutMs: 100 }); assertSome(payload, { @@ -81,6 +85,57 @@ it.layer(NodeServices.layer)("readBootstrapEnvelope", (it) => { }), ); + it.effect( + "stops reading from the fd once the envelope is parsed instead of leaving the reader attached", + () => + Effect.gen(function* () { + const fs = yield* FileSystem.FileSystem; + const filePath = yield* fs.makeTempFileScoped({ + prefix: "t3-bootstrap-", + suffix: ".ndjson", + }); + + yield* fs.writeFileString( + filePath, + `${yield* encodeTestEnvelopeSchema({ mode: "desktop" })}\n`, + ); + + // Force the win32 branch (resolveFdPath returns undefined there) so + // this test is deterministic on every OS it runs on: it always + // exercises makeDirectBootstrapStream's autoClose: true stream over + // the original fd, regardless of the host platform actually running + // the suite. + const fd = NodeFS.openSync(filePath, "r"); + + const payload = yield* readBootstrapEnvelope(TestEnvelopeSchema, fd, { + timeoutMs: 100, + }).pipe(Effect.provideService(HostProcessPlatform, "win32")); + assertSome(payload, { mode: "desktop" }); + + // Regression guard for the bug fixed alongside the WSL stdin-held-open + // change: previously, the cleanup() that closes the readline + // interface and destroys its backing stream was wired only as the + // async-interrupt finalizer, so it never ran on a successful parse -- + // the reader (and the fd's stream) stayed attached and "live" for the + // rest of the process. If that regresses, this fd will still be open + // here and fstatSync will succeed instead of throwing EBADF. + const isFdOpen = Effect.sync(() => { + try { + NodeFS.fstatSync(fd); + return true; + } catch { + return false; + } + }); + let fdStillOpen = yield* isFdOpen; + for (let attempt = 0; fdStillOpen && attempt < 20; attempt++) { + yield* Effect.sleep(Duration.millis(25)); + fdStillOpen = yield* isFdOpen; + } + assert.isFalse(fdStillOpen, "expected the bootstrap reader to have closed its fd"); + }), + ); + it.effect("falls back to reading the inherited fd when path duplication fails", () => Effect.gen(function* () { const fs = yield* FileSystem.FileSystem; @@ -183,10 +238,10 @@ it.layer(NodeServices.layer)("readBootstrapEnvelope", (it) => { const filePath = yield* fs.makeTempFileScoped({ prefix: "t3-bootstrap-", suffix: ".ndjson" }); yield* fs.writeFileString(filePath, '{"mode":42}\n'); - const fd = yield* Effect.acquireRelease( - Effect.sync(() => NodeFS.openSync(filePath, "r")), - (fd) => Effect.sync(() => NodeFS.closeSync(fd)), - ); + // No acquireRelease here either -- a decode failure is still a + // terminal, detaching outcome (see the comment on the first test in + // this suite), so the stream's autoClose owns fd the same way. + const fd = NodeFS.openSync(filePath, "r"); const error = yield* readBootstrapEnvelope(TestEnvelopeSchema, fd, { timeoutMs: 100, }).pipe(Effect.flip); diff --git a/apps/server/src/bootstrap.ts b/apps/server/src/bootstrap.ts index 04dcf277e21..8c4f3c5964d 100644 --- a/apps/server/src/bootstrap.ts +++ b/apps/server/src/bootstrap.ts @@ -95,14 +95,42 @@ export const readBootstrapEnvelope = Effect.fn("readBootstrapEnvelope")(function }); const cleanup = () => { - stream.removeListener("error", handleError); + input.removeListener("error", handleError); input.removeListener("line", handleLine); input.removeListener("close", handleClose); input.close(); stream.destroy(); }; + // Every branch below is terminal -- it resolves the envelope read one way + // or another -- so each one stops reading from `fd` before resuming. + // Without this, the readline interface and its backing stream stay + // attached and keep reading from `fd` for the rest of the process + // lifetime. That was harmless while the desktop side closed stdin right + // after writing the bootstrap line (the stream would hit EOF almost + // immediately either way), but it stopped being harmless once the WSL + // bootstrap path started holding stdin open indefinitely (see + // DesktopBackendManager.ts's runBackendProcess): a readline interface + // left idling on a never-closing, wsl.exe-relayed stdin pipe can surface + // a spurious EAGAIN 'error' event on that stream well after the envelope + // was already read. + // + // The error listener is registered on the readline *interface* (`input`), + // not the raw `stream`, and deliberately not on both. Node's + // readline.Interface attaches its own internal 'error' listener to the + // input stream at construction time and, on error, re-emits onto itself + // (node:internal/readline/interface.js); that internal listener runs + // before any listener registered on `stream` afterward, and a thrown + // exception from one stream listener prevents later ones on the same + // stream from running at all -- so a `stream.once("error", ...)` + // registered here would never fire for an error that happens before the + // first line ever arrives (verified directly: an error emitted on + // `stream` before any "line"/"close" event is an uncaught exception with + // only a stream-level listener, since readline's re-emit-with-no-listener + // on the interface throws first). Listening on `input` instead receives + // the same re-emitted error reliably, exactly once, in every case. const handleError = (error: Error) => { + cleanup(); if (isUnavailableBootstrapFdError(error)) { resume(Effect.succeedNone); return; @@ -118,6 +146,7 @@ export const readBootstrapEnvelope = Effect.fn("readBootstrapEnvelope")(function }; const handleLine = (line: string) => { + cleanup(); const parsed = decodeJsonResult(schema)(line); if (Result.isSuccess(parsed)) { resume(Effect.succeedSome(parsed.success)); @@ -134,10 +163,11 @@ export const readBootstrapEnvelope = Effect.fn("readBootstrapEnvelope")(function }; const handleClose = () => { + cleanup(); resume(Effect.succeedNone); }; - stream.once("error", handleError); + input.once("error", handleError); input.once("line", handleLine); input.once("close", handleClose);