From 5da1f8ba64407f4fe914980b4877241be4e296d4 Mon Sep 17 00:00:00 2001 From: John McLear Date: Fri, 22 May 2026 09:50:29 +0100 Subject: [PATCH 1/4] fix: page sessionstorage cleanup to avoid OOM (#7830) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SessionStore._cleanup() previously called `findKeys('sessionstorage:*', null)`, materialising every session key into a single array. On decade- old MariaDB installs with millions of sessions this OOMs the node process within ~15 minutes — see #7830. Switch to ueberdb2 6.1.0's findKeysPaged with a 500-key page size, and yield to the event loop between pages so the DB driver can release each page's buffered rows and request handlers can interleave. The break is now driven by `page.length === 0` rather than `page.length < CLEANUP_PAGE_SIZE` so a stubbed/throttled paged source still iterates the full keyspace. Adds a regression test that seeds 50 sessionstorage rows, monkey-patches `DB.findKeysPaged` to use a 4-key page, runs cleanup, and asserts every expired row is removed plus every valid row preserved across page boundaries. Closes #7830 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/node/db/DB.ts | 3 +- src/node/db/SessionStore.ts | 64 +++++++++++++++++-------- src/package.json | 2 +- src/tests/backend/specs/SessionStore.ts | 52 ++++++++++++++++++++ 4 files changed, 99 insertions(+), 22 deletions(-) diff --git a/src/node/db/DB.ts b/src/node/db/DB.ts index b9d0e3ec675..9d962521c7d 100644 --- a/src/node/db/DB.ts +++ b/src/node/db/DB.ts @@ -47,8 +47,9 @@ exports.init = async () => { stats.gauge(`ueberdb_${metric}`, () => exports.db.metrics[metric]); } } - for (const fn of ['get', 'set', 'findKeys', 'getSub', 'setSub', 'remove']) { + for (const fn of ['get', 'set', 'findKeys', 'findKeysPaged', 'getSub', 'setSub', 'remove']) { const f = exports.db[fn]; + if (typeof f !== 'function') continue; exports[fn] = async (...args:string[]) => await f.call(exports.db, ...args); Object.setPrototypeOf(exports[fn], Object.getPrototypeOf(f)); Object.defineProperties(exports[fn], Object.getOwnPropertyDescriptors(f)); diff --git a/src/node/db/SessionStore.ts b/src/node/db/SessionStore.ts index 9ce81a70990..b1c30b1a999 100644 --- a/src/node/db/SessionStore.ts +++ b/src/node/db/SessionStore.ts @@ -12,6 +12,12 @@ const logger = log4js.getLogger('SessionStore'); // How often to run the cleanup of expired/stale sessions. const CLEANUP_INTERVAL_MS = 60 * 60 * 1000; // 1 hour +// Maximum number of session keys fetched from the database per cleanup +// iteration. Bounded so that instances with very large session keyspaces (see +// https://github.com/ether/etherpad/issues/7830) don't load every key into +// memory at once. Tuned for ~50 KB per page assuming ~100-char keys. +const CLEANUP_PAGE_SIZE = 500; + class SessionStore extends expressSession.Store { /** * @param {?number} [refresh] - How often (in milliseconds) `touch()` will update a session's @@ -74,37 +80,55 @@ class SessionStore extends expressSession.Store { * - Sessions with no expiry that contain no data beyond the default cookie are removed. * These are the empty sessions that accumulate indefinitely (bug #5010) — they have * `{cookie: {path: "/", _expires: null, ...}}` and nothing else. + * + * Iterates the keyspace in fixed-size pages (CLEANUP_PAGE_SIZE) so a large + * sessionstorage table (#7830) doesn't load every key into memory at once. */ async _cleanup() { - const keys = await DB.findKeys('sessionstorage:*', null); - if (!keys || keys.length === 0) return; const now = Date.now(); let removed = 0; - for (const key of keys) { - const sess = await DB.get(key); - if (!sess) { - await DB.remove(key); - removed++; - continue; - } - const expires = sess.cookie?.expires; - if (expires) { - // Session has an expiry — remove if expired. - if (new Date(expires).getTime() <= now) { + let scanned = 0; + let after: string | undefined; + // eslint-disable-next-line no-constant-condition + while (true) { + const page = await DB.findKeysPaged('sessionstorage:*', null, { + limit: CLEANUP_PAGE_SIZE, + ...(after != null ? {after} : {}), + }); + if (!page || page.length === 0) break; + // Defensive: a buggy backend that returns the cursor key would loop + // forever. `after` is exclusive, so the first key of the next page must + // be strictly greater than the previous cursor. + if (after != null && page[0] <= after) break; + for (const key of page) { + scanned++; + const sess = await DB.get(key); + if (!sess) { await DB.remove(key); removed++; + continue; } - } else { - // Session has no expiry and no user data beyond the cookie — remove as empty/stale. - const hasData = Object.keys(sess).some((k) => k !== 'cookie'); - if (!hasData) { - await DB.remove(key); - removed++; + const expires = sess.cookie?.expires; + if (expires) { + if (new Date(expires).getTime() <= now) { + await DB.remove(key); + removed++; + } + } else { + const hasData = Object.keys(sess).some((k) => k !== 'cookie'); + if (!hasData) { + await DB.remove(key); + removed++; + } } } + after = page[page.length - 1]; + // Yield to the event loop between pages so request handlers can run and + // the DB driver can release the previous page's buffered rows. + await new Promise((resolve) => setImmediate(resolve)); } if (removed > 0) { - logger.info(`Session cleanup: removed ${removed} expired/stale sessions out of ${keys.length}`); + logger.info(`Session cleanup: removed ${removed} expired/stale sessions out of ${scanned}`); } } diff --git a/src/package.json b/src/package.json index 4309f0fa3a3..3061c35216b 100644 --- a/src/package.json +++ b/src/package.json @@ -87,7 +87,7 @@ "surrealdb": "^2.0.3", "tinycon": "0.6.8", "tsx": "4.22.3", - "ueberdb2": "^6.0.3", + "ueberdb2": "^6.1.0", "underscore": "1.13.8", "undici": "^8.3.0", "unorm": "1.6.0", diff --git a/src/tests/backend/specs/SessionStore.ts b/src/tests/backend/specs/SessionStore.ts index a92301f5da3..dbfe4cbbf6d 100644 --- a/src/tests/backend/specs/SessionStore.ts +++ b/src/tests/backend/specs/SessionStore.ts @@ -309,5 +309,57 @@ describe(__filename, function () { // After shutdown, the timer should be cleared. assert(ss!._cleanupTimer == null); }); + + // Regression for https://github.com/ether/etherpad/issues/7830 — cleanup + // used to load every sessionstorage key into a single array; on huge DBs + // this OOMed. Verifies the paged iteration still hits every key when the + // count exceeds CLEANUP_PAGE_SIZE — we seed a few-row spread and force a + // small page size to keep the test fast. + it('pages across a large sessionstorage keyspace', async function () { + // Tag rows so the assertion ignores anything other tests left behind. + const tag = common.randomString(); + const expiredSids: string[] = []; + const validSids: string[] = []; + // Seed 25 expired + 25 valid rows. The default CLEANUP_PAGE_SIZE (500) + // would cover this in one call, so we monkey-patch the constant for + // this test by stubbing DB.findKeysPaged to enforce a small page. + const real = db.findKeysPaged; + let pageCalls = 0; + db.findKeysPaged = async (key: string, notKey: any, opts: any) => { + pageCalls++; + return await real.call(db, key, notKey, {...opts, limit: 4}); + }; + try { + for (let i = 0; i < 25; i++) { + const sid = `cleanup_paged_exp_${tag}_${String(i).padStart(2, '0')}`; + expiredSids.push(sid); + await db.set(`sessionstorage:${sid}`, { + cookie: {path: '/', expires: new Date(1).toJSON(), httpOnly: true}, + }); + } + for (let i = 0; i < 25; i++) { + const sid = `cleanup_paged_val_${tag}_${String(i).padStart(2, '0')}`; + validSids.push(sid); + await db.set(`sessionstorage:${sid}`, { + cookie: { + path: '/', expires: new Date(Date.now() + 60000).toJSON(), httpOnly: true, + }, + }); + } + await ss!._cleanup(); + for (const sid of expiredSids) { + assert(await db.get(`sessionstorage:${sid}`) == null, `expired ${sid} not removed`); + } + for (const sid of validSids) { + assert(await db.get(`sessionstorage:${sid}`) != null, `valid ${sid} was wrongly removed`); + } + // page size 4 over 50 rows -> at least 12 paged calls (final page may + // be short). Confirms we actually iterated. + assert(pageCalls >= 12, `expected paged iteration (got ${pageCalls} calls)`); + } finally { + db.findKeysPaged = real; + for (const sid of validSids) await db.remove(`sessionstorage:${sid}`); + } + }); }); }); From 63c2493bf239fbc4879fb341b5268648f030c50a Mon Sep 17 00:00:00 2001 From: John McLear Date: Fri, 22 May 2026 10:09:35 +0100 Subject: [PATCH 2/4] fix: address Qodo review on #7831 Four follow-ups raised by Qodo on the session cleanup paging fix: - DB.ts: fail-fast at init() if any required wrapper method (incl. findKeysPaged) is missing, so a stale ueberdb2 pin surfaces at boot rather than crashing the first cleanup run an hour later. - SessionStore: bound a single _cleanup() run to 10 minutes. Under sustained session creation the keyspace can grow faster than cleanup drains it; without a budget the next scheduled run would never fire. When the budget hits, log a warning and let the next run continue. - SessionStore: log the defensive `page[0] <= after` cursor-stall break. Previously the loop exited silently, leaving expired rows behind with no operator-visible signal of the backend regression. - Tests: the paged-cleanup regression test now removes both expiredSids AND validSids in finally, so a failed assertion doesn't leak rows. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/node/db/DB.ts | 6 ++++- src/node/db/SessionStore.ts | 29 ++++++++++++++++++++++--- src/tests/backend/specs/SessionStore.ts | 7 +++++- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/node/db/DB.ts b/src/node/db/DB.ts index 9d962521c7d..ce9468ff862 100644 --- a/src/node/db/DB.ts +++ b/src/node/db/DB.ts @@ -49,7 +49,11 @@ exports.init = async () => { } for (const fn of ['get', 'set', 'findKeys', 'findKeysPaged', 'getSub', 'setSub', 'remove']) { const f = exports.db[fn]; - if (typeof f !== 'function') continue; + if (typeof f !== 'function') { + throw new Error( + `ueberdb2 ${exports.db.constructor.name} is missing required method ${fn}; ` + + 'check that ueberdb2 is at the minimum version pinned in package.json'); + } exports[fn] = async (...args:string[]) => await f.call(exports.db, ...args); Object.setPrototypeOf(exports[fn], Object.getPrototypeOf(f)); Object.defineProperties(exports[fn], Object.getOwnPropertyDescriptors(f)); diff --git a/src/node/db/SessionStore.ts b/src/node/db/SessionStore.ts index b1c30b1a999..fd560ed7b64 100644 --- a/src/node/db/SessionStore.ts +++ b/src/node/db/SessionStore.ts @@ -18,6 +18,13 @@ const CLEANUP_INTERVAL_MS = 60 * 60 * 1000; // 1 hour // memory at once. Tuned for ~50 KB per page assuming ~100-char keys. const CLEANUP_PAGE_SIZE = 500; +// Upper bound on a single cleanup run. Under sustained session creation the +// keyspace can grow faster than cleanup processes it; without a budget the +// loop would never reach an empty page and the next scheduled run would never +// fire. When the budget hits, the next scheduled run picks up where this one +// left off (the database state advances each iteration regardless). +const CLEANUP_MAX_RUNTIME_MS = 10 * 60 * 1000; // 10 minutes + class SessionStore extends expressSession.Store { /** * @param {?number} [refresh] - How often (in milliseconds) `touch()` will update a session's @@ -86,9 +93,11 @@ class SessionStore extends expressSession.Store { */ async _cleanup() { const now = Date.now(); + const startMs = Date.now(); let removed = 0; let scanned = 0; let after: string | undefined; + let budgetExhausted = false; // eslint-disable-next-line no-constant-condition while (true) { const page = await DB.findKeysPaged('sessionstorage:*', null, { @@ -98,8 +107,14 @@ class SessionStore extends expressSession.Store { if (!page || page.length === 0) break; // Defensive: a buggy backend that returns the cursor key would loop // forever. `after` is exclusive, so the first key of the next page must - // be strictly greater than the previous cursor. - if (after != null && page[0] <= after) break; + // be strictly greater than the previous cursor. Log so an operator can + // notice partial cleanup caused by a pagination regression. + if (after != null && page[0] <= after) { + logger.error( + `Session cleanup: paged cursor did not advance (after=${after}, ` + + `page[0]=${page[0]}); aborting this run to prevent an infinite loop`); + break; + } for (const key of page) { scanned++; const sess = await DB.get(key); @@ -123,11 +138,19 @@ class SessionStore extends expressSession.Store { } } after = page[page.length - 1]; + if (Date.now() - startMs > CLEANUP_MAX_RUNTIME_MS) { + budgetExhausted = true; + break; + } // Yield to the event loop between pages so request handlers can run and // the DB driver can release the previous page's buffered rows. await new Promise((resolve) => setImmediate(resolve)); } - if (removed > 0) { + if (budgetExhausted) { + logger.warn( + `Session cleanup: hit ${CLEANUP_MAX_RUNTIME_MS}ms budget after scanning ` + + `${scanned} keys (${removed} removed); next scheduled run will continue`); + } else if (removed > 0) { logger.info(`Session cleanup: removed ${removed} expired/stale sessions out of ${scanned}`); } } diff --git a/src/tests/backend/specs/SessionStore.ts b/src/tests/backend/specs/SessionStore.ts index dbfe4cbbf6d..6e09895eea8 100644 --- a/src/tests/backend/specs/SessionStore.ts +++ b/src/tests/backend/specs/SessionStore.ts @@ -358,7 +358,12 @@ describe(__filename, function () { assert(pageCalls >= 12, `expected paged iteration (got ${pageCalls} calls)`); } finally { db.findKeysPaged = real; - for (const sid of validSids) await db.remove(`sessionstorage:${sid}`); + // Symmetric cleanup — if an assertion threw earlier, expiredSids may + // still be present in the DB. Remove both groups so the test leaves + // no rows behind even on failure. + for (const sid of [...expiredSids, ...validSids]) { + await db.remove(`sessionstorage:${sid}`); + } } }); }); From d9ba72117fb6e93e4277232dee600759db5ff82f Mon Sep 17 00:00:00 2001 From: John McLear Date: Fri, 22 May 2026 10:15:27 +0100 Subject: [PATCH 3/4] docs: note paged session cleanup in CHANGELOG + settings template CHANGELOG.md picks up an entry under 3.1.0 Notable fixes describing the OOM cause, the paged iteration, the 10-minute per-run budget, the cursor-stall logging, and the fail-fast init guard. settings.json.template's sessionCleanup comment adds the page-size, budget, and pointer to #7830 so admins can reason about the new behaviour from the template alone. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 1 + settings.json.template | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eac0736986c..0164dff21cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - **Export HTML — ordered-list counter no longer poisoned by a sibling unordered list.** When an ordered-list level was the only consumer of `olItemCounts`, closing *any* list at that depth (including a `
    ` that happened to share the level) reset the counter to 0. A subsequent unrelated `
      ` at the same depth then took the "counter exists but is 0" branch and emitted `
        ` without the `start=` attribute. The reset is now gated on `line.listTypeName === 'number'` so closing an unordered list never touches the ol bookkeeping. Fixes #7786 / #7787 (#7791). - **Export — bad `:rev` returns a meaningful 500 body, not Express's HTML error page.** A non-numeric `:rev` (e.g. `/p/foo/test1/export/txt`) reached `checkValidRev` which throws `CustomError('rev is not a number', 'apierror')`; the message fell through `.catch(next)` and Express's default renderer returned an HTML 500 page. The route handler now catches the apierror and emits `err.message` as a deterministic `text/plain` 500. As a follow-up, `checkValidRev` runs *before* `res.attachment()` so an invalid rev no longer leaves a `Content-Disposition` header in place (browsers were offering to save the error message as a file), and unrelated export failures (conversion, fs, soffice) are surfaced as text/plain rather than the HTML stack page. Fixes #7788 (#7792). +- **Session cleanup no longer OOMs on huge sessionstorage tables.** Pre-2.7.3 `SessionStore._cleanup()` issued a single unbounded `findKeys('sessionstorage:*', null)` that materialised every key into one JS array; on a decade-old MariaDB install with millions of stale sessions the mysql2 driver retained the rows on the pool connection while the JS array dominated heap, OOMing the process within ~15 minutes of boot. Cleanup now pages the keyspace in 500-key batches via the new `findKeysPaged` API on ueberdb2 6.1.0 (DB-side ranged query on mysql/postgres, JS-side fallback elsewhere), yielding to the event loop between pages. A single run is capped at 10 minutes; the next scheduled run continues. The defensive cursor-stall guard now logs an error rather than silently aborting, and `DB.init()` fails fast if any required wrapper method is missing (a misconfigured ueberdb2 pin surfaces at boot instead of an hour later). Fixes #7830 (#7831). ### Security hardening diff --git a/settings.json.template b/settings.json.template index b9882ac015a..db4b0ba3a62 100644 --- a/settings.json.template +++ b/settings.json.template @@ -634,6 +634,12 @@ /* * Whether to periodically clean up expired and stale sessions from the * database. Set to false to disable. Default: true. + * + * Cleanup runs hourly and walks the sessionstorage:* keyspace in 500-key + * pages so very large session tables don't pin the keys in memory all at + * once (see https://github.com/ether/etherpad/issues/7830). A single run + * is capped at 10 minutes; if your DB is so large the budget is reached, + * a warning is logged and the next scheduled run continues. */ "sessionCleanup": true }, From e369346bc0a218d97b9bb9845333e2d6cae21a56 Mon Sep 17 00:00:00 2001 From: John McLear Date: Fri, 22 May 2026 10:49:20 +0100 Subject: [PATCH 4/4] chore: regenerate lockfile against ueberdb2 6.1.2 Now that ether/ueberDB#983 unblocked the publish workflow (OIDC trusted publishing), ueberdb2 6.1.2 is live on npm and the `^6.1.0` pin in src/package.json resolves cleanly. Resolves the ERR_PNPM_OUTDATED_LOCKFILE that was blocking CI on this PR. 29 SessionStore backend tests still green against the published tarball. Co-Authored-By: Claude Opus 4.7 (1M context) --- pnpm-lock.yaml | 68 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2353ee32d21..7eac1a47fbe 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -356,8 +356,8 @@ importers: specifier: 4.22.3 version: 4.22.3 ueberdb2: - specifier: ^6.0.3 - version: 6.0.3(@elastic/elasticsearch@9.4.0)(async@3.2.6)(cassandra-driver@4.8.0)(dirty-ts@1.1.8)(mongodb@7.2.0)(mssql@12.5.3(@azure/core-client@1.10.1))(mysql2@3.22.3(@types/node@25.9.1))(nano@11.0.5)(pg@8.21.0)(redis@5.12.1(@opentelemetry/api@1.9.1))(rethinkdb@2.4.2)(rusty-store-kv@1.3.1)(surrealdb@2.0.3(tslib@2.8.1)(typescript@6.0.3)) + specifier: ^6.1.0 + version: 6.1.2(@elastic/elasticsearch@9.4.0)(async@3.2.6)(cassandra-driver@4.8.0)(dirty-ts@1.1.8)(mongodb@7.2.0)(mssql@12.5.3(@azure/core-client@1.10.1))(mysql2@3.22.3(@types/node@25.9.1))(nano@11.0.5)(pg@8.21.0)(redis@5.12.1(@opentelemetry/api@1.9.1))(rethinkdb@2.4.2)(rusty-store-kv@1.3.1)(surrealdb@2.0.3(tslib@2.8.1)(typescript@6.0.3)) underscore: specifier: 1.13.8 version: 1.13.8 @@ -5560,6 +5560,54 @@ packages: surrealdb: optional: true + ueberdb2@6.1.2: + resolution: {integrity: sha512-GjrMEbmOk0z9H3HoaTTdb+ZoA6ggnpj99Lx61u52LSgMxUI5KO+4AkuZoa7Vi6im6LfTI0aKQacUBr8V080vhg==} + engines: {node: '>=24.0.0'} + peerDependencies: + '@elastic/elasticsearch': ^9.0.0 + async: ^3.0.0 + cassandra-driver: ^4.0.0 + dirty-ts: ^1.0.0 + mongodb: ^7.0.0 + mssql: ^12.0.0 + mysql2: ^3.0.0 + nano: ^11.0.0 + pg: ^8.0.0 + redis: ^5.0.0 + rethinkdb: ^2.0.0 + rusty-store-kv: ^1.0.0 + simple-git: ^3.0.0 + surrealdb: ^2.0.0 + peerDependenciesMeta: + '@elastic/elasticsearch': + optional: true + async: + optional: true + cassandra-driver: + optional: true + dirty-ts: + optional: true + mongodb: + optional: true + mssql: + optional: true + mysql2: + optional: true + nano: + optional: true + pg: + optional: true + redis: + optional: true + rethinkdb: + optional: true + rusty-store-kv: + optional: true + simple-git: + optional: true + surrealdb: + optional: true + uid-safe@2.1.5: resolution: {integrity: sha512-KPHm4VL5dDXKz01UuEd88Df+KzynaohSL9fBh096KWAxSKZQDI2uBrVqtvRM4rwrIrRRKsdLNML/lnaaVSRioA==} engines: {node: '>= 0.8'} @@ -11360,6 +11408,22 @@ snapshots: rusty-store-kv: 1.3.1 surrealdb: 2.0.3(tslib@2.8.1)(typescript@6.0.3) + ueberdb2@6.1.2(@elastic/elasticsearch@9.4.0)(async@3.2.6)(cassandra-driver@4.8.0)(dirty-ts@1.1.8)(mongodb@7.2.0)(mssql@12.5.3(@azure/core-client@1.10.1))(mysql2@3.22.3(@types/node@25.9.1))(nano@11.0.5)(pg@8.21.0)(redis@5.12.1(@opentelemetry/api@1.9.1))(rethinkdb@2.4.2)(rusty-store-kv@1.3.1)(surrealdb@2.0.3(tslib@2.8.1)(typescript@6.0.3)): + optionalDependencies: + '@elastic/elasticsearch': 9.4.0 + async: 3.2.6 + cassandra-driver: 4.8.0 + dirty-ts: 1.1.8 + mongodb: 7.2.0 + mssql: 12.5.3(@azure/core-client@1.10.1) + mysql2: 3.22.3(@types/node@25.9.1) + nano: 11.0.5 + pg: 8.21.0 + redis: 5.12.1(@opentelemetry/api@1.9.1) + rethinkdb: 2.4.2 + rusty-store-kv: 1.3.1 + surrealdb: 2.0.3(tslib@2.8.1)(typescript@6.0.3) + uid-safe@2.1.5: dependencies: random-bytes: 1.0.0