From faa48e5a00f72f3667e8c6393d88a4a13c84afea Mon Sep 17 00:00:00 2001 From: Gordon Woodhull Date: Mon, 1 Jun 2026 11:19:51 -0400 Subject: [PATCH 1/2] ci(hub-client-e2e): pin Node to 24.15.0 to dodge Playwright install hang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Node 24.16.0 introduced a yauzl stream-destruction regression that hangs `for await` over `openReadStream`. With @playwright/test 1.58 (< 1.60), `playwright install chromium` hangs forever right after the browser download reaches 100% — extraction deadlocks and the job burns the full 6h cap. This is what has failed every Hub-Client E2E run since ~2026-05-27 (the runner's Node 24.x floated past 24.16). Pin node-version to 24.15.0, the last release before the regression. It keeps Playwright 1.58 / Chromium 145, so the visual-regression baselines are unchanged. Remove the pin once @playwright/test is bumped to >= 1.60.0 (the upstream fix, microsoft/playwright#40747), regenerating the visual baselines for the newer Chromium at that time. Refs microsoft/playwright#40724. --- .github/workflows/hub-client-e2e.yml | 52 +++++++++------------------- 1 file changed, 16 insertions(+), 36 deletions(-) diff --git a/.github/workflows/hub-client-e2e.yml b/.github/workflows/hub-client-e2e.yml index 7413751c..0a515238 100644 --- a/.github/workflows/hub-client-e2e.yml +++ b/.github/workflows/hub-client-e2e.yml @@ -82,11 +82,24 @@ jobs: chmod +x tree-sitter-linux-x86 sudo mv tree-sitter-linux-x86 /usr/local/bin/tree-sitter - # Node.js and dependencies + # Node.js and dependencies. + # + # PINNED to 24.15.0 — do NOT bump to a bare '24' (which floats to + # >=24.16). Node 24.16.0 introduced a yauzl stream-destruction + # regression that hangs `for await` over `openReadStream`, which + # makes `playwright install chromium` hang FOREVER right after the + # browser download hits 100% (extraction deadlocks; the job then + # burns the full 6h cap). It is fixed in Playwright >= 1.60.0, but + # we pin @playwright/test at ^1.50.0 -> 1.58.0. 24.15.0 is the last + # Node release before the regression, so it keeps Playwright 1.58 / + # Chromium 145 and leaves the visual-regression baselines unchanged. + # Refs: microsoft/playwright#40724, fixed by #40747 (PW 1.60.0). + # REMOVE this pin once @playwright/test is bumped to >= 1.60.0 (and + # regenerate the visual baselines for the newer Chromium then). - name: Set up Node.js uses: actions/setup-node@v6 with: - node-version: '24' + node-version: '24.15.0' cache: 'npm' - name: Install npm dependencies @@ -127,41 +140,8 @@ jobs: - name: Pre-build hub binary run: cargo build --bin hub - # Cache the downloaded browser binaries across runs. Keyed on the - # lockfile so a Playwright version bump invalidates the cache and - # re-downloads; otherwise we skip the 175 MB chromium download - # entirely. - - name: Cache Playwright browsers - uses: actions/cache@v4 - with: - path: ~/.cache/ms-playwright - key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }} - - # Install Playwright browsers. - # - # The full Chromium (Chrome for Testing) build is served from a - # SINGLE mirror with no fallback: cdn.playwright.dev/chrome-for- - # testing-public (Playwright's cftUrl() helper). cdn.playwright.dev - # is fronted by Azure Front Door, and from the (also-Azure) GitHub - # runners that front door sends all ~167 MiB and then holds the - # connection open without EOF — so Playwright's download promise - # never resolves and the step hangs until the 6h job cap. With no - # fallback mirror for Chrome for Testing, Playwright can't recover. - # (Carlos report, 2026-05; confirmed from the 100%-then-hang in the - # run-26760896905 log.) - # - # Fix: point the chromium download host at the GCS origin that - # cdn.playwright.dev merely redirects to. That origin serves the zip - # with a correct content-length and a clean EOF. On linux-x64 BOTH - # the full Chromium and the headless-shell resolve via cftUrl(), so - # this one var fixes both; ffmpeg's name isn't "chromium*" so the - # override doesn't touch it and it keeps its 3-mirror fallback. - # PLAYWRIGHT_DOWNLOAD_CONNECTION_TIMEOUT is a belt-and-suspenders so - # any future post-100% stall aborts in 2 min instead of hanging. + # Install Playwright browsers - name: Install Playwright - env: - PLAYWRIGHT_CHROMIUM_DOWNLOAD_HOST: https://storage.googleapis.com/chrome-for-testing-public - PLAYWRIGHT_DOWNLOAD_CONNECTION_TIMEOUT: '120000' run: | cd hub-client npx playwright install --with-deps chromium From 35ad0197b13e75842b3b9812852be63004de6fa3 Mon Sep 17 00:00:00 2001 From: Gordon Woodhull Date: Mon, 1 Jun 2026 14:06:33 -0400 Subject: [PATCH 2/2] test(hub-client e2e): gate seedProjectInBrowser on a synced project set (de-flake smoke-all) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit smoke-all is flaky (predates the install hang; ~2/6 fail/flaky of ~92, varying set, worker-count-independent). Root cause is a seed/sync race, not the render pipeline and not parallelism: seedProjectInBrowser -> projectStorage.addProject writes IDB ONLY. A seeded project reaches the *synced* project set only via reconcileIntoConnectedProjectSet, which the app runs in a useProjectSet effect keyed on the status->connected TRANSITION and which requires isConnected(). Because the test seeds AFTER bootstrapProjectSet already reached connected, that effect never re-fires, so the seed lands in the set only if a fortuitous WS reconnect re-triggers reconcile. When it doesn't, the project is absent from the set, navigation to /p/ drops to the landing page, and waitForPreviewRender times out at 75s. (Confirmed via the failing run's trace + final screenshot: 'No projects yet', empty set; clean network, the 401 on /auth/me is benign.) Fix (Tier 2 — keep the full Automerge path end-to-end, just stop racing it): expose the live projectSetService + the idempotent reconciler on window.__quartoTest, and have seedProjectInBrowser wait for a real peer connection, run the reconciler, and wait until the project is observably present in the connected set before returning. Bounded 30s waits fail loudly if the sync server is truly unreachable instead of surfacing as a 75s preview-render timeout. Rejected Tier 1 (seed content locally) — it would bypass the very VFS -> Automerge -> WASM -> preview integration smoke-all exists to exercise. workers stays at 2 (parallelism was never the cause; verified workers:1 did not help). Follow-up (product, separate): app could reconcile on IDB change, not only on the status transition. Refs bd-3nzyd. --- hub-client/e2e/helpers/projectFactory.ts | 39 ++++++++++++++++++++++++ hub-client/e2e/helpers/testHooks.ts | 4 +++ hub-client/src/test-hooks.ts | 19 +++++++++++- 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/hub-client/e2e/helpers/projectFactory.ts b/hub-client/e2e/helpers/projectFactory.ts index 22603e2d..259fee7a 100644 --- a/hub-client/e2e/helpers/projectFactory.ts +++ b/hub-client/e2e/helpers/projectFactory.ts @@ -161,6 +161,17 @@ export async function bootstrapProjectSet( * synced project set is initialized; otherwise the App lands on the * needs-migration screen. * + * Before returning, this waits until the seeded project is actually present + * in the *connected, synced* project set — not just written to IDB. That + * closes the race behind the smoke-all flakiness (bd-3nzyd): `addProject` + * only writes IDB, and the app reconciles IDB→set on the status→connected + * transition, which does NOT re-fire for a project seeded *after* the set is + * already connected. So we wait for the real peer connection, run the + * idempotent reconciler ourselves, and wait for the project to appear. The + * full Automerge sync path stays exercised end-to-end — the test just stops + * racing it. Waits are bounded so a genuinely unreachable sync server fails + * loudly here instead of surfacing 75s later as a preview-render timeout. + * * Returns the local project ID (UUID) used in URL navigation. */ export async function seedProjectInBrowser( @@ -175,6 +186,34 @@ export async function seedProjectInBrowser( const hooks = window.__quartoTest; if (!hooks) throw new Error('__quartoTest missing — rebuild with VITE_E2E=1'); const entry = await hooks.projectStorage.addProject(indexDocId, syncServer, name); + + const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + const deadline = Date.now() + 30000; + + // 1) Wait for the real project-set peer connection (the app's implicit + // 5s waitForPeer is too tight in CI; give it a generous window). + while (!hooks.projectSet.isConnected() && Date.now() < deadline) { + await sleep(100); + } + if (!hooks.projectSet.isConnected()) { + throw new Error( + 'Project set did not reach connected state within 30s — sync server unreachable?', + ); + } + + // 2) Land the seeded IDB entry into the synced set (idempotent), then + // wait until it is observably present before the caller navigates. + while (!hooks.projectSet.getProject(indexDocId) && Date.now() < deadline) { + await hooks.reconcileProjectSet(); + if (hooks.projectSet.getProject(indexDocId)) break; + await sleep(100); + } + if (!hooks.projectSet.getProject(indexDocId)) { + throw new Error( + `Seeded project ${indexDocId} never appeared in the connected project set within 30s`, + ); + } + return entry.id; }, { indexDocId, syncServer, name }, diff --git a/hub-client/e2e/helpers/testHooks.ts b/hub-client/e2e/helpers/testHooks.ts index 8d89ec17..4deef036 100644 --- a/hub-client/e2e/helpers/testHooks.ts +++ b/hub-client/e2e/helpers/testHooks.ts @@ -24,10 +24,14 @@ * ``` */ import type * as projectStorage from '../../src/services/projectStorage'; +import type * as projectSet from '../../src/services/projectSetService'; +import type { reconcileIntoConnectedProjectSet } from '../../src/services/projectSetReconciler'; import type * as wasmRenderer from '../../src/services/wasmRenderer'; export interface QuartoTestHooks { projectStorage: typeof projectStorage; + projectSet: typeof projectSet; + reconcileProjectSet: typeof reconcileIntoConnectedProjectSet; wasmRenderer: typeof wasmRenderer; } diff --git a/hub-client/src/test-hooks.ts b/hub-client/src/test-hooks.ts index 69b094e7..699c2dea 100644 --- a/hub-client/src/test-hooks.ts +++ b/hub-client/src/test-hooks.ts @@ -13,15 +13,32 @@ * out of the production bundle entirely. */ import * as projectStorage from './services/projectStorage'; +import * as projectSet from './services/projectSetService'; +import { reconcileIntoConnectedProjectSet } from './services/projectSetReconciler'; import * as wasmRenderer from '@quarto/preview-runtime'; declare global { interface Window { __quartoTest?: { projectStorage: typeof projectStorage; + // The live project-set service singleton (same instance the app uses), + // so the E2E suite can observe real connection/sync state — e.g. wait + // for `isConnected()` and `getProject(indexDocId)` after seeding before + // navigating, instead of racing the implicit reconcile-on-connect. + projectSet: typeof projectSet; + // Idempotent IDB→synced-set reconciler. The app runs this only on the + // status→connected transition, which does not re-fire for a project + // seeded after the set is already connected; the suite invokes it + // explicitly so a seeded project deterministically lands in the set. + reconcileProjectSet: typeof reconcileIntoConnectedProjectSet; wasmRenderer: typeof wasmRenderer; }; } } -window.__quartoTest = { projectStorage, wasmRenderer }; +window.__quartoTest = { + projectStorage, + projectSet, + reconcileProjectSet: reconcileIntoConnectedProjectSet, + wasmRenderer, +};