From 5e2ea1ea048ce11c5f69e53ae452e3f3d627f857 Mon Sep 17 00:00:00 2001 From: greatjourney589 Date: Tue, 26 May 2026 11:40:02 -0400 Subject: [PATCH 1/2] fix: track body truncation explicitly instead of inferring from cap length (#165) --- .../issue/[owner]/[name]/[number]/route.ts | 17 +++++-- .../api/pull/[owner]/[name]/[number]/route.ts | 14 ++++-- .../[owner]/[name]/[number]/route.ts | 14 ++++-- src/lib/body-cap.ts | 33 +++++++++++++ src/lib/db.ts | 24 ++++++++++ src/lib/refresh.ts | 47 +++++++++++++++---- 6 files changed, 128 insertions(+), 21 deletions(-) create mode 100644 src/lib/body-cap.ts diff --git a/src/app/api/issue/[owner]/[name]/[number]/route.ts b/src/app/api/issue/[owner]/[name]/[number]/route.ts index 849f8ae..375b96d 100644 --- a/src/app/api/issue/[owner]/[name]/[number]/route.ts +++ b/src/app/api/issue/[owner]/[name]/[number]/route.ts @@ -72,14 +72,19 @@ export async function GET( const db = getDb(); const cached = db .prepare( - `SELECT id, repo_full_name, number, title, body, state, state_reason, + `SELECT id, repo_full_name, number, title, body, body_truncated, state, state_reason, author_login, author_association, labels, comments, created_at, updated_at, closed_at, html_url, fetched_at, first_seen_at FROM issues WHERE repo_full_name = ? AND number = ?` ) .get(repoFullName, num) as IssueRow | undefined; - if (cached && cached.body !== null && cached.body.length !== 8000) { + // Serve from cache only when the stored body is complete. Truncation is read + // from the explicit flag the poller sets, not inferred from the body length + // (issue #165) — so a body that happens to equal the cap is no longer + // re-fetched forever, and a full body stored by an earlier detail open keeps + // being served instead of triggering a fresh GitHub call every poll cycle. + if (cached && cached.body !== null && !cached.body_truncated) { return NextResponse.json( issueRowPayload(cached, 'cache', mergedPullCountForIssue(db, repoFullName, num)), { headers: NO_STORE_HEADERS }, @@ -106,13 +111,16 @@ export async function GET( db.prepare( `INSERT INTO issues - (repo_full_name, number, title, body, state, state_reason, author_login, author_association, + (repo_full_name, number, title, body, body_truncated, state, state_reason, author_login, author_association, labels, comments, created_at, updated_at, closed_at, html_url, raw_json, fetched_at, first_seen_at) - VALUES (@repo_full_name, @number, @title, @body, @state, @state_reason, @author_login, @author_association, + VALUES (@repo_full_name, @number, @title, @body, 0, @state, @state_reason, @author_login, @author_association, @labels, @comments, @created_at, @updated_at, @closed_at, @html_url, NULL, @fetched_at, @first_seen_at) ON CONFLICT(repo_full_name, number) DO UPDATE SET title = excluded.title, + -- Detail fetch returns the full, uncapped body — store it verbatim and + -- clear the truncated flag so the poller won't clobber it (issue #165). body = excluded.body, + body_truncated = 0, state = excluded.state, state_reason = excluded.state_reason, author_login = excluded.author_login, @@ -149,6 +157,7 @@ export async function GET( number: data.number, title: data.title, body: data.body ?? null, + body_truncated: 0, state: data.state, state_reason: data.state_reason ?? null, author_login: data.user?.login ?? null, diff --git a/src/app/api/pull/[owner]/[name]/[number]/route.ts b/src/app/api/pull/[owner]/[name]/[number]/route.ts index 3010f87..eb21b42 100644 --- a/src/app/api/pull/[owner]/[name]/[number]/route.ts +++ b/src/app/api/pull/[owner]/[name]/[number]/route.ts @@ -35,14 +35,16 @@ export async function GET( const db = getDb(); const cached = db .prepare( - `SELECT id, repo_full_name, number, title, body, state, draft, merged, + `SELECT id, repo_full_name, number, title, body, body_truncated, state, draft, merged, author_login, author_association, created_at, updated_at, closed_at, merged_at, html_url, fetched_at, first_seen_at FROM pulls WHERE repo_full_name = ? AND number = ?` ) .get(repoFullName, num) as PullRow | undefined; - if (cached && cached.body !== null && cached.body.length !== 4000) { + // Serve from cache only when the stored body is complete, read from the + // explicit poller-set flag rather than inferred from length (issue #165). + if (cached && cached.body !== null && !cached.body_truncated) { return NextResponse.json({ ...cached, source: 'cache' }, { headers: NO_STORE_HEADERS }); } @@ -60,13 +62,16 @@ export async function GET( db.prepare( `INSERT INTO pulls - (repo_full_name, number, title, body, state, draft, merged, author_login, author_association, + (repo_full_name, number, title, body, body_truncated, state, draft, merged, author_login, author_association, created_at, updated_at, closed_at, merged_at, html_url, raw_json, fetched_at, first_seen_at) - VALUES (@repo_full_name, @number, @title, @body, @state, @draft, @merged, @author_login, @author_association, + VALUES (@repo_full_name, @number, @title, @body, 0, @state, @draft, @merged, @author_login, @author_association, @created_at, @updated_at, @closed_at, @merged_at, @html_url, NULL, @fetched_at, @first_seen_at) ON CONFLICT(repo_full_name, number) DO UPDATE SET title = excluded.title, + -- Detail fetch returns the full, uncapped body — store it verbatim and + -- clear the truncated flag so the poller won't clobber it (issue #165). body = excluded.body, + body_truncated = 0, state = excluded.state, draft = excluded.draft, merged = excluded.merged, @@ -103,6 +108,7 @@ export async function GET( number: data.number, title: data.title, body: data.body ?? null, + body_truncated: 0, state: data.state, draft: data.draft ? 1 : 0, merged: data.merged ? 1 : 0, diff --git a/src/app/api/related-issues/[owner]/[name]/[number]/route.ts b/src/app/api/related-issues/[owner]/[name]/[number]/route.ts index 82a3d5e..ad8d131 100644 --- a/src/app/api/related-issues/[owner]/[name]/[number]/route.ts +++ b/src/app/api/related-issues/[owner]/[name]/[number]/route.ts @@ -53,7 +53,7 @@ function selectPull(repoFullName: string, prNumber: number): PullRow | null { return ( (getDb() .prepare( - `SELECT id, repo_full_name, number, title, body, state, draft, merged, + `SELECT id, repo_full_name, number, title, body, body_truncated, state, draft, merged, author_login, author_association, created_at, updated_at, closed_at, merged_at, html_url, fetched_at, first_seen_at FROM pulls WHERE repo_full_name = ? AND number = ?` @@ -80,6 +80,7 @@ async function fetchPull(owner: string, repo: string, prNumber: number): Promise number: data.number, title: data.title, body: data.body ?? null, + body_truncated: 0, state: data.state, draft: data.draft ? 1 : 0, merged: data.merged ? 1 : 0, @@ -122,13 +123,16 @@ async function fetchAndCacheIssue(owner: string, repo: string, issueNumber: numb const now = new Date().toISOString(); db.prepare( `INSERT INTO issues - (repo_full_name, number, title, body, state, state_reason, author_login, author_association, + (repo_full_name, number, title, body, body_truncated, state, state_reason, author_login, author_association, labels, comments, created_at, updated_at, closed_at, html_url, raw_json, fetched_at, first_seen_at) - VALUES (@repo_full_name, @number, @title, @body, @state, @state_reason, @author_login, @author_association, + VALUES (@repo_full_name, @number, @title, @body, 0, @state, @state_reason, @author_login, @author_association, @labels, @comments, @created_at, @updated_at, @closed_at, @html_url, NULL, @fetched_at, @first_seen_at) ON CONFLICT(repo_full_name, number) DO UPDATE SET title = excluded.title, + -- Full, uncapped body from issues.get — store it and clear the truncated + -- flag so the poller won't re-clip it (issue #165). body = excluded.body, + body_truncated = 0, state = excluded.state, state_reason = excluded.state_reason, author_login = excluded.author_login, @@ -162,7 +166,7 @@ async function fetchAndCacheIssue(owner: string, repo: string, issueNumber: numb function selectLinkedIssues(repoFullName: string, prNumber: number): IssueRow[] { return getDb() .prepare( - `SELECT i.id, i.repo_full_name, i.number, i.title, i.body, i.state, i.state_reason, + `SELECT i.id, i.repo_full_name, i.number, i.title, i.body, i.body_truncated, i.state, i.state_reason, i.author_login, i.author_association, i.labels, i.comments, i.created_at, i.updated_at, i.closed_at, i.html_url, i.fetched_at, i.first_seen_at FROM pr_issue_links l @@ -176,7 +180,7 @@ function selectLinkedIssues(repoFullName: string, prNumber: number): IssueRow[] function selectRelatedPulls(repoFullName: string, issueNumber: number): PullRow[] { return getDb() .prepare( - `SELECT p.id, p.repo_full_name, p.number, p.title, p.body, p.state, p.draft, p.merged, + `SELECT p.id, p.repo_full_name, p.number, p.title, p.body, p.body_truncated, p.state, p.draft, p.merged, p.author_login, p.author_association, p.created_at, p.updated_at, p.closed_at, p.merged_at, p.html_url, p.fetched_at, p.first_seen_at FROM pr_issue_links l diff --git a/src/lib/body-cap.ts b/src/lib/body-cap.ts new file mode 100644 index 0000000..701240d --- /dev/null +++ b/src/lib/body-cap.ts @@ -0,0 +1,33 @@ +// Single source of truth for how long issue/PR bodies are stored in the list +// cache. The poller (refresh.ts) caps bodies to these lengths to bound DB size +// across the thousands of rows it sweeps; detail opens fetch the full, +// uncapped body and persist it. +// +// Whether a stored body was actually shortened is tracked explicitly via the +// `body_truncated` flag on the `issues`/`pulls` rows — never inferred by +// comparing the stored length to one of these constants. That inference was +// fragile (issue #165): a body whose real length happened to equal the cap was +// misclassified as truncated forever, and the two sides could silently drift if +// only one cap changed. Keeping the caps here, and the truncation decision in +// `capBody`, means the poller and the detail routes can never disagree. +export const ISSUE_BODY_CAP = 8000; +export const PULL_BODY_CAP = 4000; + +export interface CappedBody { + /** The body clamped to at most `cap` characters. Never null. */ + body: string; + /** 1 if the original body was longer than `cap` and got shortened, else 0. */ + truncated: 0 | 1; +} + +/** + * Cap a body to `cap` characters, reporting whether it was actually shortened. + * + * A body whose length is exactly `cap` is NOT truncated (`slice(0, cap)` of it + * is the whole string) — the strict `>` is what fixes the exact-length + * misclassification from issue #165. + */ +export function capBody(body: string | null | undefined, cap: number): CappedBody { + const full = body ?? ''; + return { body: full.slice(0, cap), truncated: full.length > cap ? 1 : 0 }; +} diff --git a/src/lib/db.ts b/src/lib/db.ts index 685a909..51a1832 100644 --- a/src/lib/db.ts +++ b/src/lib/db.ts @@ -1,6 +1,7 @@ import path from 'path'; import fs from 'fs'; import Database from 'better-sqlite3'; +import { ISSUE_BODY_CAP, PULL_BODY_CAP } from './body-cap'; const DATA_DIR = path.resolve(process.cwd(), 'data'); if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true }); @@ -113,6 +114,7 @@ export function getDb(): Database.Database { number INTEGER NOT NULL, title TEXT NOT NULL, body TEXT, + body_truncated INTEGER NOT NULL DEFAULT 0, state TEXT NOT NULL, state_reason TEXT, author_login TEXT, @@ -145,6 +147,7 @@ export function getDb(): Database.Database { number INTEGER NOT NULL, title TEXT NOT NULL, body TEXT, + body_truncated INTEGER NOT NULL DEFAULT 0, state TEXT NOT NULL, draft INTEGER DEFAULT 0, merged INTEGER DEFAULT 0, @@ -308,6 +311,25 @@ export function getDb(): Database.Database { db.exec('ALTER TABLE pulls ADD COLUMN author_association TEXT'); } + // `body_truncated` (issue #165) records whether a stored body was capped by + // the poller, replacing the old "stored length == cap" inference in the + // detail routes. When the column is first added we backfill it: a poller- + // capped body lands at exactly the cap length, so flag those rows truncated. + // Erring toward `1` is the safe direction — a false truncated flag costs one + // detail re-fetch that then re-stores the full body and self-heals the flag, + // whereas a false complete flag would serve a clipped body as if whole. + const issuesCols = db.prepare("PRAGMA table_info(issues)").all() as Array<{ name: string }>; + if (!issuesCols.some((c) => c.name === 'body_truncated')) { + db.exec('ALTER TABLE issues ADD COLUMN body_truncated INTEGER NOT NULL DEFAULT 0'); + db.prepare('UPDATE issues SET body_truncated = 1 WHERE body IS NOT NULL AND length(body) = ?') + .run(ISSUE_BODY_CAP); + } + if (!pullsCols.some((c) => c.name === 'body_truncated')) { + db.exec('ALTER TABLE pulls ADD COLUMN body_truncated INTEGER NOT NULL DEFAULT 0'); + db.prepare('UPDATE pulls SET body_truncated = 1 WHERE body IS NOT NULL AND length(body) = ?') + .run(PULL_BODY_CAP); + } + // One-shot purge of false-positive pr_issue_links (issue #137). Guarded by // PRAGMA user_version so it runs exactly once per database file. const schemaVersion = (db.prepare('PRAGMA user_version').get() as { user_version: number }).user_version; @@ -329,6 +351,7 @@ export interface IssueRow { number: number; title: string; body: string | null; + body_truncated: number; state: string; state_reason: string | null; author_login: string | null; @@ -349,6 +372,7 @@ export interface PullRow { number: number; title: string; body: string | null; + body_truncated: number; state: string; draft: number; merged: number; diff --git a/src/lib/refresh.ts b/src/lib/refresh.ts index b4bbfa6..8bc5e10 100644 --- a/src/lib/refresh.ts +++ b/src/lib/refresh.ts @@ -11,6 +11,7 @@ import { withRotation, } from './github'; import { extractLinkedIssues } from './pr-linking'; +import { ISSUE_BODY_CAP, PULL_BODY_CAP, capBody } from './body-cap'; /** * Add this PR's `pr_issue_links` rows from the body+title regex. Only @@ -357,15 +358,32 @@ function upsertIssue(repoFullName: string, issue: GhIssue): void { const firstSeen = existing?.first_seen_at ?? nowIso(); + const { body: cappedBody, truncated } = capBody(issue.body, ISSUE_BODY_CAP); db.prepare( `INSERT INTO issues - (repo_full_name, number, title, body, state, state_reason, author_login, author_association, + (repo_full_name, number, title, body, body_truncated, state, state_reason, author_login, author_association, labels, comments, created_at, updated_at, closed_at, html_url, raw_json, fetched_at, first_seen_at) - VALUES (@repo_full_name, @number, @title, @body, @state, @state_reason, @author_login, @author_association, + VALUES (@repo_full_name, @number, @title, @body, @body_truncated, @state, @state_reason, @author_login, @author_association, @labels, @comments, @created_at, @updated_at, @closed_at, @html_url, NULL, @fetched_at, @first_seen_at) ON CONFLICT(repo_full_name, number) DO UPDATE SET title = excluded.title, - body = excluded.body, + -- Don't let a poller sweep downgrade a complete body (e.g. one a detail + -- open fetched in full) back to a capped slice (issue #165). Overwrite + -- only when the incoming body is itself complete, the stored body is + -- already truncated, or the incoming text is at least as long as what's + -- stored (so a genuinely grown body still wins). A rarely-edited long + -- body may go briefly stale here; that's the accepted trade for keeping + -- the detail cache warm. + body = CASE + WHEN excluded.body_truncated = 0 + OR issues.body_truncated = 1 + OR length(excluded.body) >= length(IFNULL(issues.body, '')) + THEN excluded.body ELSE issues.body END, + body_truncated = CASE + WHEN excluded.body_truncated = 0 + OR issues.body_truncated = 1 + OR length(excluded.body) >= length(IFNULL(issues.body, '')) + THEN excluded.body_truncated ELSE issues.body_truncated END, state = excluded.state, state_reason = excluded.state_reason, author_login = excluded.author_login, @@ -381,7 +399,8 @@ function upsertIssue(repoFullName: string, issue: GhIssue): void { repo_full_name: repoFullName, number: issue.number, title: issue.title, - body: (issue.body ?? '').slice(0, 8000), + body: cappedBody, + body_truncated: truncated, state: issue.state, state_reason: issue.state_reason, author_login: issue.user?.login ?? null, @@ -406,16 +425,27 @@ function upsertPull(repoFullName: string, pull: GhPull): void { const firstSeen = existing?.first_seen_at ?? nowIso(); const merged = pull.merged_at ? 1 : 0; - const truncatedBody = (pull.body ?? '').slice(0, 4000); + const { body: truncatedBody, truncated } = capBody(pull.body, PULL_BODY_CAP); db.prepare( `INSERT INTO pulls - (repo_full_name, number, title, body, state, draft, merged, author_login, author_association, + (repo_full_name, number, title, body, body_truncated, state, draft, merged, author_login, author_association, created_at, updated_at, closed_at, merged_at, html_url, raw_json, fetched_at, first_seen_at) - VALUES (@repo_full_name, @number, @title, @body, @state, @draft, @merged, @author_login, @author_association, + VALUES (@repo_full_name, @number, @title, @body, @body_truncated, @state, @draft, @merged, @author_login, @author_association, @created_at, @updated_at, @closed_at, @merged_at, @html_url, NULL, @fetched_at, @first_seen_at) ON CONFLICT(repo_full_name, number) DO UPDATE SET title = excluded.title, - body = excluded.body, + -- See upsertIssue: never downgrade a complete body to a capped slice on + -- a poller sweep (issue #165). + body = CASE + WHEN excluded.body_truncated = 0 + OR pulls.body_truncated = 1 + OR length(excluded.body) >= length(IFNULL(pulls.body, '')) + THEN excluded.body ELSE pulls.body END, + body_truncated = CASE + WHEN excluded.body_truncated = 0 + OR pulls.body_truncated = 1 + OR length(excluded.body) >= length(IFNULL(pulls.body, '')) + THEN excluded.body_truncated ELSE pulls.body_truncated END, state = excluded.state, draft = excluded.draft, merged = excluded.merged, @@ -430,6 +460,7 @@ function upsertPull(repoFullName: string, pull: GhPull): void { number: pull.number, title: pull.title, body: truncatedBody, + body_truncated: truncated, state: pull.state, draft: pull.draft ? 1 : 0, merged, From f8521adc18d9e79818ed8180cedbb3faa0fce91f Mon Sep 17 00:00:00 2001 From: greatjourney589 Date: Tue, 26 May 2026 13:03:38 -0400 Subject: [PATCH 2/2] fix: wrap body_truncated migration in a transaction to prevent partial-migration data loss (#165) --- src/lib/db.ts | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/lib/db.ts b/src/lib/db.ts index 51a1832..3e9d8ef 100644 --- a/src/lib/db.ts +++ b/src/lib/db.ts @@ -318,16 +318,24 @@ export function getDb(): Database.Database { // Erring toward `1` is the safe direction — a false truncated flag costs one // detail re-fetch that then re-stores the full body and self-heals the flag, // whereas a false complete flag would serve a clipped body as if whole. + // The ADD COLUMN and its backfill must commit together: if a crash landed + // between them, the next boot would see the column present, skip this block, + // and leave capped rows defaulted to 0 (complete) — the unsafe direction. + // Wrapping in a transaction rolls the ALTER back too, so the migration re-runs. const issuesCols = db.prepare("PRAGMA table_info(issues)").all() as Array<{ name: string }>; if (!issuesCols.some((c) => c.name === 'body_truncated')) { - db.exec('ALTER TABLE issues ADD COLUMN body_truncated INTEGER NOT NULL DEFAULT 0'); - db.prepare('UPDATE issues SET body_truncated = 1 WHERE body IS NOT NULL AND length(body) = ?') - .run(ISSUE_BODY_CAP); + db.transaction(() => { + db.exec('ALTER TABLE issues ADD COLUMN body_truncated INTEGER NOT NULL DEFAULT 0'); + db.prepare('UPDATE issues SET body_truncated = 1 WHERE body IS NOT NULL AND length(body) = ?') + .run(ISSUE_BODY_CAP); + })(); } if (!pullsCols.some((c) => c.name === 'body_truncated')) { - db.exec('ALTER TABLE pulls ADD COLUMN body_truncated INTEGER NOT NULL DEFAULT 0'); - db.prepare('UPDATE pulls SET body_truncated = 1 WHERE body IS NOT NULL AND length(body) = ?') - .run(PULL_BODY_CAP); + db.transaction(() => { + db.exec('ALTER TABLE pulls ADD COLUMN body_truncated INTEGER NOT NULL DEFAULT 0'); + db.prepare('UPDATE pulls SET body_truncated = 1 WHERE body IS NOT NULL AND length(body) = ?') + .run(PULL_BODY_CAP); + })(); } // One-shot purge of false-positive pr_issue_links (issue #137). Guarded by