Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions src/app/api/issue/[owner]/[name]/[number]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,19 @@ export async function GET(
const db = getDb();
const cached = db
.prepare(
`SELECT id, repo_full_name, number, title, body, state, state_reason,
`SELECT id, repo_full_name, number, title, body, body_truncated, state, state_reason,
author_login, author_association, labels, comments,
created_at, updated_at, closed_at, html_url, fetched_at, first_seen_at
FROM issues WHERE repo_full_name = ? AND number = ?`
)
.get(repoFullName, num) as IssueRow | undefined;

if (cached && cached.body !== null && cached.body.length !== 8000) {
// Serve from cache only when the stored body is complete. Truncation is read
// from the explicit flag the poller sets, not inferred from the body length
// (issue #165) — so a body that happens to equal the cap is no longer
// re-fetched forever, and a full body stored by an earlier detail open keeps
// being served instead of triggering a fresh GitHub call every poll cycle.
if (cached && cached.body !== null && !cached.body_truncated) {
return NextResponse.json(
issueRowPayload(cached, 'cache', mergedPullCountForIssue(db, repoFullName, num)),
{ headers: NO_STORE_HEADERS },
Expand All @@ -106,13 +111,16 @@ export async function GET(

db.prepare(
`INSERT INTO issues
(repo_full_name, number, title, body, state, state_reason, author_login, author_association,
(repo_full_name, number, title, body, body_truncated, state, state_reason, author_login, author_association,
labels, comments, created_at, updated_at, closed_at, html_url, raw_json, fetched_at, first_seen_at)
VALUES (@repo_full_name, @number, @title, @body, @state, @state_reason, @author_login, @author_association,
VALUES (@repo_full_name, @number, @title, @body, 0, @state, @state_reason, @author_login, @author_association,
@labels, @comments, @created_at, @updated_at, @closed_at, @html_url, NULL, @fetched_at, @first_seen_at)
ON CONFLICT(repo_full_name, number) DO UPDATE SET
title = excluded.title,
-- Detail fetch returns the full, uncapped body — store it verbatim and
-- clear the truncated flag so the poller won't clobber it (issue #165).
body = excluded.body,
body_truncated = 0,
state = excluded.state,
state_reason = excluded.state_reason,
author_login = excluded.author_login,
Expand Down Expand Up @@ -149,6 +157,7 @@ export async function GET(
number: data.number,
title: data.title,
body: data.body ?? null,
body_truncated: 0,
state: data.state,
state_reason: data.state_reason ?? null,
author_login: data.user?.login ?? null,
Expand Down
14 changes: 10 additions & 4 deletions src/app/api/pull/[owner]/[name]/[number]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,16 @@ export async function GET(
const db = getDb();
const cached = db
.prepare(
`SELECT id, repo_full_name, number, title, body, state, draft, merged,
`SELECT id, repo_full_name, number, title, body, body_truncated, state, draft, merged,
author_login, author_association, created_at, updated_at, closed_at, merged_at,
html_url, fetched_at, first_seen_at
FROM pulls WHERE repo_full_name = ? AND number = ?`
)
.get(repoFullName, num) as PullRow | undefined;

if (cached && cached.body !== null && cached.body.length !== 4000) {
// Serve from cache only when the stored body is complete, read from the
// explicit poller-set flag rather than inferred from length (issue #165).
if (cached && cached.body !== null && !cached.body_truncated) {
return NextResponse.json({ ...cached, source: 'cache' }, { headers: NO_STORE_HEADERS });
}

Expand All @@ -60,13 +62,16 @@ export async function GET(

db.prepare(
`INSERT INTO pulls
(repo_full_name, number, title, body, state, draft, merged, author_login, author_association,
(repo_full_name, number, title, body, body_truncated, state, draft, merged, author_login, author_association,
created_at, updated_at, closed_at, merged_at, html_url, raw_json, fetched_at, first_seen_at)
VALUES (@repo_full_name, @number, @title, @body, @state, @draft, @merged, @author_login, @author_association,
VALUES (@repo_full_name, @number, @title, @body, 0, @state, @draft, @merged, @author_login, @author_association,
@created_at, @updated_at, @closed_at, @merged_at, @html_url, NULL, @fetched_at, @first_seen_at)
ON CONFLICT(repo_full_name, number) DO UPDATE SET
title = excluded.title,
-- Detail fetch returns the full, uncapped body — store it verbatim and
-- clear the truncated flag so the poller won't clobber it (issue #165).
body = excluded.body,
body_truncated = 0,
state = excluded.state,
draft = excluded.draft,
merged = excluded.merged,
Expand Down Expand Up @@ -103,6 +108,7 @@ export async function GET(
number: data.number,
title: data.title,
body: data.body ?? null,
body_truncated: 0,
state: data.state,
draft: data.draft ? 1 : 0,
merged: data.merged ? 1 : 0,
Expand Down
14 changes: 9 additions & 5 deletions src/app/api/related-issues/[owner]/[name]/[number]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ function selectPull(repoFullName: string, prNumber: number): PullRow | null {
return (
(getDb()
.prepare(
`SELECT id, repo_full_name, number, title, body, state, draft, merged,
`SELECT id, repo_full_name, number, title, body, body_truncated, state, draft, merged,
author_login, author_association, created_at, updated_at, closed_at, merged_at,
html_url, fetched_at, first_seen_at
FROM pulls WHERE repo_full_name = ? AND number = ?`
Expand All @@ -80,6 +80,7 @@ async function fetchPull(owner: string, repo: string, prNumber: number): Promise
number: data.number,
title: data.title,
body: data.body ?? null,
body_truncated: 0,
state: data.state,
draft: data.draft ? 1 : 0,
merged: data.merged ? 1 : 0,
Expand Down Expand Up @@ -122,13 +123,16 @@ async function fetchAndCacheIssue(owner: string, repo: string, issueNumber: numb
const now = new Date().toISOString();
db.prepare(
`INSERT INTO issues
(repo_full_name, number, title, body, state, state_reason, author_login, author_association,
(repo_full_name, number, title, body, body_truncated, state, state_reason, author_login, author_association,
labels, comments, created_at, updated_at, closed_at, html_url, raw_json, fetched_at, first_seen_at)
VALUES (@repo_full_name, @number, @title, @body, @state, @state_reason, @author_login, @author_association,
VALUES (@repo_full_name, @number, @title, @body, 0, @state, @state_reason, @author_login, @author_association,
@labels, @comments, @created_at, @updated_at, @closed_at, @html_url, NULL, @fetched_at, @first_seen_at)
ON CONFLICT(repo_full_name, number) DO UPDATE SET
title = excluded.title,
-- Full, uncapped body from issues.get — store it and clear the truncated
-- flag so the poller won't re-clip it (issue #165).
body = excluded.body,
body_truncated = 0,
state = excluded.state,
state_reason = excluded.state_reason,
author_login = excluded.author_login,
Expand Down Expand Up @@ -162,7 +166,7 @@ async function fetchAndCacheIssue(owner: string, repo: string, issueNumber: numb
function selectLinkedIssues(repoFullName: string, prNumber: number): IssueRow[] {
return getDb()
.prepare(
`SELECT i.id, i.repo_full_name, i.number, i.title, i.body, i.state, i.state_reason,
`SELECT i.id, i.repo_full_name, i.number, i.title, i.body, i.body_truncated, i.state, i.state_reason,
i.author_login, i.author_association, i.labels, i.comments,
i.created_at, i.updated_at, i.closed_at, i.html_url, i.fetched_at, i.first_seen_at
FROM pr_issue_links l
Expand All @@ -176,7 +180,7 @@ function selectLinkedIssues(repoFullName: string, prNumber: number): IssueRow[]
function selectRelatedPulls(repoFullName: string, issueNumber: number): PullRow[] {
return getDb()
.prepare(
`SELECT p.id, p.repo_full_name, p.number, p.title, p.body, p.state, p.draft, p.merged,
`SELECT p.id, p.repo_full_name, p.number, p.title, p.body, p.body_truncated, p.state, p.draft, p.merged,
p.author_login, p.author_association, p.created_at, p.updated_at, p.closed_at, p.merged_at,
p.html_url, p.fetched_at, p.first_seen_at
FROM pr_issue_links l
Expand Down
33 changes: 33 additions & 0 deletions src/lib/body-cap.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Single source of truth for how long issue/PR bodies are stored in the list
// cache. The poller (refresh.ts) caps bodies to these lengths to bound DB size
// across the thousands of rows it sweeps; detail opens fetch the full,
// uncapped body and persist it.
//
// Whether a stored body was actually shortened is tracked explicitly via the
// `body_truncated` flag on the `issues`/`pulls` rows — never inferred by
// comparing the stored length to one of these constants. That inference was
// fragile (issue #165): a body whose real length happened to equal the cap was
// misclassified as truncated forever, and the two sides could silently drift if
// only one cap changed. Keeping the caps here, and the truncation decision in
// `capBody`, means the poller and the detail routes can never disagree.
export const ISSUE_BODY_CAP = 8000;
export const PULL_BODY_CAP = 4000;

export interface CappedBody {
/** The body clamped to at most `cap` characters. Never null. */
body: string;
/** 1 if the original body was longer than `cap` and got shortened, else 0. */
truncated: 0 | 1;
}

/**
* Cap a body to `cap` characters, reporting whether it was actually shortened.
*
* A body whose length is exactly `cap` is NOT truncated (`slice(0, cap)` of it
* is the whole string) — the strict `>` is what fixes the exact-length
* misclassification from issue #165.
*/
export function capBody(body: string | null | undefined, cap: number): CappedBody {
const full = body ?? '';
return { body: full.slice(0, cap), truncated: full.length > cap ? 1 : 0 };
}
32 changes: 32 additions & 0 deletions src/lib/db.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import path from 'path';
import fs from 'fs';
import Database from 'better-sqlite3';
import { ISSUE_BODY_CAP, PULL_BODY_CAP } from './body-cap';

const DATA_DIR = path.resolve(process.cwd(), 'data');
if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
Expand Down Expand Up @@ -113,6 +114,7 @@ export function getDb(): Database.Database {
number INTEGER NOT NULL,
title TEXT NOT NULL,
body TEXT,
body_truncated INTEGER NOT NULL DEFAULT 0,
state TEXT NOT NULL,
state_reason TEXT,
author_login TEXT,
Expand Down Expand Up @@ -145,6 +147,7 @@ export function getDb(): Database.Database {
number INTEGER NOT NULL,
title TEXT NOT NULL,
body TEXT,
body_truncated INTEGER NOT NULL DEFAULT 0,
state TEXT NOT NULL,
draft INTEGER DEFAULT 0,
merged INTEGER DEFAULT 0,
Expand Down Expand Up @@ -311,6 +314,33 @@ export function getDb(): Database.Database {
db.exec('ALTER TABLE pulls ADD COLUMN author_association TEXT');
}

// `body_truncated` (issue #165) records whether a stored body was capped by
// the poller, replacing the old "stored length == cap" inference in the
// detail routes. When the column is first added we backfill it: a poller-
// capped body lands at exactly the cap length, so flag those rows truncated.
// Erring toward `1` is the safe direction — a false truncated flag costs one
// detail re-fetch that then re-stores the full body and self-heals the flag,
// whereas a false complete flag would serve a clipped body as if whole.
// The ADD COLUMN and its backfill must commit together: if a crash landed
// between them, the next boot would see the column present, skip this block,
// and leave capped rows defaulted to 0 (complete) — the unsafe direction.
// Wrapping in a transaction rolls the ALTER back too, so the migration re-runs.
const issuesCols = db.prepare("PRAGMA table_info(issues)").all() as Array<{ name: string }>;
if (!issuesCols.some((c) => c.name === 'body_truncated')) {
db.transaction(() => {
db.exec('ALTER TABLE issues ADD COLUMN body_truncated INTEGER NOT NULL DEFAULT 0');
db.prepare('UPDATE issues SET body_truncated = 1 WHERE body IS NOT NULL AND length(body) = ?')
.run(ISSUE_BODY_CAP);
})();
}
if (!pullsCols.some((c) => c.name === 'body_truncated')) {
db.transaction(() => {
db.exec('ALTER TABLE pulls ADD COLUMN body_truncated INTEGER NOT NULL DEFAULT 0');
db.prepare('UPDATE pulls SET body_truncated = 1 WHERE body IS NOT NULL AND length(body) = ?')
.run(PULL_BODY_CAP);
})();
}

// One-shot purge of false-positive pr_issue_links (issue #137). Guarded by
// PRAGMA user_version so it runs exactly once per database file.
const schemaVersion = (db.prepare('PRAGMA user_version').get() as { user_version: number }).user_version;
Expand All @@ -332,6 +362,7 @@ export interface IssueRow {
number: number;
title: string;
body: string | null;
body_truncated: number;
state: string;
state_reason: string | null;
author_login: string | null;
Expand All @@ -352,6 +383,7 @@ export interface PullRow {
number: number;
title: string;
body: string | null;
body_truncated: number;
state: string;
draft: number;
merged: number;
Expand Down
47 changes: 39 additions & 8 deletions src/lib/refresh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
withRotation,
} from './github';
import { extractLinkedIssues } from './pr-linking';
import { ISSUE_BODY_CAP, PULL_BODY_CAP, capBody } from './body-cap';

/**
* Add this PR's `pr_issue_links` rows from the body+title regex. Only
Expand Down Expand Up @@ -458,15 +459,32 @@ function upsertIssue(repoFullName: string, issue: GhIssue): void {

const firstSeen = existing?.first_seen_at ?? nowIso();

const { body: cappedBody, truncated } = capBody(issue.body, ISSUE_BODY_CAP);
db.prepare(
`INSERT INTO issues
(repo_full_name, number, title, body, state, state_reason, author_login, author_association,
(repo_full_name, number, title, body, body_truncated, state, state_reason, author_login, author_association,
labels, comments, created_at, updated_at, closed_at, html_url, raw_json, fetched_at, first_seen_at)
VALUES (@repo_full_name, @number, @title, @body, @state, @state_reason, @author_login, @author_association,
VALUES (@repo_full_name, @number, @title, @body, @body_truncated, @state, @state_reason, @author_login, @author_association,
@labels, @comments, @created_at, @updated_at, @closed_at, @html_url, NULL, @fetched_at, @first_seen_at)
ON CONFLICT(repo_full_name, number) DO UPDATE SET
title = excluded.title,
body = excluded.body,
-- Don't let a poller sweep downgrade a complete body (e.g. one a detail
-- open fetched in full) back to a capped slice (issue #165). Overwrite
-- only when the incoming body is itself complete, the stored body is
-- already truncated, or the incoming text is at least as long as what's
-- stored (so a genuinely grown body still wins). A rarely-edited long
-- body may go briefly stale here; that's the accepted trade for keeping
-- the detail cache warm.
body = CASE
WHEN excluded.body_truncated = 0
OR issues.body_truncated = 1
OR length(excluded.body) >= length(IFNULL(issues.body, ''))
THEN excluded.body ELSE issues.body END,
body_truncated = CASE
WHEN excluded.body_truncated = 0
OR issues.body_truncated = 1
OR length(excluded.body) >= length(IFNULL(issues.body, ''))
THEN excluded.body_truncated ELSE issues.body_truncated END,
state = excluded.state,
state_reason = excluded.state_reason,
author_login = excluded.author_login,
Expand All @@ -482,7 +500,8 @@ function upsertIssue(repoFullName: string, issue: GhIssue): void {
repo_full_name: repoFullName,
number: issue.number,
title: issue.title,
body: (issue.body ?? '').slice(0, 8000),
body: cappedBody,
body_truncated: truncated,
state: issue.state,
state_reason: issue.state_reason,
author_login: issue.user?.login ?? null,
Expand All @@ -507,16 +526,27 @@ function upsertPull(repoFullName: string, pull: GhPull): void {
const firstSeen = existing?.first_seen_at ?? nowIso();
const merged = pull.merged_at ? 1 : 0;

const truncatedBody = (pull.body ?? '').slice(0, 4000);
const { body: truncatedBody, truncated } = capBody(pull.body, PULL_BODY_CAP);
db.prepare(
`INSERT INTO pulls
(repo_full_name, number, title, body, state, draft, merged, author_login, author_association,
(repo_full_name, number, title, body, body_truncated, state, draft, merged, author_login, author_association,
created_at, updated_at, closed_at, merged_at, html_url, raw_json, fetched_at, first_seen_at)
VALUES (@repo_full_name, @number, @title, @body, @state, @draft, @merged, @author_login, @author_association,
VALUES (@repo_full_name, @number, @title, @body, @body_truncated, @state, @draft, @merged, @author_login, @author_association,
@created_at, @updated_at, @closed_at, @merged_at, @html_url, NULL, @fetched_at, @first_seen_at)
ON CONFLICT(repo_full_name, number) DO UPDATE SET
title = excluded.title,
body = excluded.body,
-- See upsertIssue: never downgrade a complete body to a capped slice on
-- a poller sweep (issue #165).
body = CASE
WHEN excluded.body_truncated = 0
OR pulls.body_truncated = 1
OR length(excluded.body) >= length(IFNULL(pulls.body, ''))
THEN excluded.body ELSE pulls.body END,
body_truncated = CASE
WHEN excluded.body_truncated = 0
OR pulls.body_truncated = 1
OR length(excluded.body) >= length(IFNULL(pulls.body, ''))
THEN excluded.body_truncated ELSE pulls.body_truncated END,
state = excluded.state,
draft = excluded.draft,
merged = excluded.merged,
Expand All @@ -531,6 +561,7 @@ function upsertPull(repoFullName: string, pull: GhPull): void {
number: pull.number,
title: pull.title,
body: truncatedBody,
body_truncated: truncated,
state: pull.state,
draft: pull.draft ? 1 : 0,
merged,
Expand Down