diff --git a/frontend/components/dataset/DatasetCard.tsx b/frontend/components/dataset/DatasetCard.tsx index 2d362ae..5b4c700 100644 --- a/frontend/components/dataset/DatasetCard.tsx +++ b/frontend/components/dataset/DatasetCard.tsx @@ -9,7 +9,12 @@ export interface DatasetCardData { status: DatasetStatus; cadence: string; columns: { name: string; type: string }[]; + // Preview is capped at 5 rows for the mini-table. The total row count + // is on `rowCount` (denormalized counter maintained by the row write + // mutations in convex/datasetRows.ts) so the footer stays reactive + // past the first 5 inserts. previewRows: Record[]; + rowCount: number; visibility?: "public" | "private"; } @@ -49,7 +54,7 @@ export function DatasetCard({ dataset }: { dataset: DatasetCardData }) { {dataset.cadence} - {dataset.previewRows?.length ?? 0} rows + {dataset.rowCount ?? 0} rows diff --git a/frontend/convex/datasetRows.ts b/frontend/convex/datasetRows.ts index 99eadb0..fd7f16d 100644 --- a/frontend/convex/datasetRows.ts +++ b/frontend/convex/datasetRows.ts @@ -1,8 +1,28 @@ import { query, internalMutation, internalQuery } from "./_generated/server.js"; +import type { MutationCtx } from "./_generated/server.js"; import { v } from "convex/values"; +import type { Id } from "./_generated/dataModel.js"; import { assertRowInDataset, loadReadableDataset } from "./lib/authz.js"; import { consumeQuotaForDataset } from "./lib/quota.js"; +/** + * Authoritative row count for a dataset. O(N), so use only on the slow + * paths: self-heal in `insert` / `remove` when the dataset doc predates + * the `rowCount` field, or the explicit `datasets.backfillRowCounts` + * migration. Steady-state writes hit the cached counter and never call + * this. + */ +async function actualRowCount( + ctx: MutationCtx, + datasetId: Id<"datasets">, +): Promise { + const rows = await ctx.db + .query("datasetRows") + .withIndex("by_dataset", (q) => q.eq("datasetId", datasetId)) + .collect(); + return rows.length; +} + /** * Read all rows of a dataset. * @@ -46,8 +66,26 @@ export const insert = internalMutation({ sources: v.optional(v.array(v.string())), }, handler: async (ctx, args) => { - await consumeQuotaForDataset(ctx, args.datasetId, 1); - return await ctx.db.insert("datasetRows", args); + // `consumeQuotaForDataset` returns the dataset doc so we don't + // double-read it. + const dataset = await consumeQuotaForDataset(ctx, args.datasetId, 1); + + // Pre-insert count is either the cached counter (fast path) or — for + // datasets whose docs predate the rowCount field — recomputed once + // here. Subsequent inserts on the same dataset hit the fast path. + const previousCount = + typeof dataset.rowCount === "number" + ? dataset.rowCount + : await actualRowCount(ctx, args.datasetId); + + const rowId = await ctx.db.insert("datasetRows", args); + + // Maintain the denormalized counter the dashboard reads from. Same + // transaction as the row insert → atomic; quota-rejected inserts + // never bump the counter. + await ctx.db.patch(args.datasetId, { rowCount: previousCount + 1 }); + + return rowId; }, }); @@ -114,6 +152,9 @@ export const clearByDataset = internalMutation({ for (const row of rows) { await ctx.db.delete(row._id); } + // Reset the cached counter. We know the post-state exactly, so this + // doesn't need the read-then-add dance that `insert` / `remove` use. + await ctx.db.patch(args.datasetId, { rowCount: 0 }); return rows.length; }, }); @@ -163,6 +204,24 @@ export const remove = internalMutation({ }, handler: async (ctx, args) => { await assertRowInDataset(ctx, args.id, args.expectedDatasetId); + + // Decrement the cached counter, self-healing if the dataset doc + // predates the rowCount field. `dataset` is guaranteed to exist — + // assertRowInDataset above verified the row belongs to it. + const dataset = await ctx.db.get(args.expectedDatasetId); + if (dataset) { + const previousCount = + typeof dataset.rowCount === "number" + ? dataset.rowCount + : await actualRowCount(ctx, args.expectedDatasetId); + await ctx.db.patch(args.expectedDatasetId, { + // clamp at 0 as a paranoid guard — counter should never go + // negative because we just confirmed the row exists, but a bug + // that drove it negative would manifest as an even weirder UI. + rowCount: Math.max(0, previousCount - 1), + }); + } + await ctx.db.delete(args.id); }, }); diff --git a/frontend/convex/datasets.ts b/frontend/convex/datasets.ts index e439576..281420b 100644 --- a/frontend/convex/datasets.ts +++ b/frontend/convex/datasets.ts @@ -30,13 +30,25 @@ const columnValidator = v.object({ const PREVIEW_ROW_COUNT = 5; async function attachPreview(ctx: QueryCtx, dataset: Doc<"datasets">) { - const rows = await ctx.db + // Mini-table preview: just the first N rows. `.take` keeps the + // subscription's read set small — the dashboard's reactivity for the + // row count does NOT depend on this query. It depends on the + // denormalized `rowCount` field on the dataset doc itself, maintained + // by datasetRows.{insert,remove,clearByDataset}. That field is part of + // `dataset`, which is part of the query's read set, so patches to it + // invalidate the subscription and the card re-renders with the new + // count even after the first PREVIEW_ROW_COUNT rows. + const previewRows = await ctx.db .query("datasetRows") .withIndex("by_dataset", (q) => q.eq("datasetId", dataset._id)) .take(PREVIEW_ROW_COUNT); return { ...dataset, - previewRows: rows.map((r) => r.data), + previewRows: previewRows.map((r) => r.data), + // Fallback to the preview length only when the dataset doc predates + // the `rowCount` field. Write paths self-heal on the next insert / + // remove; `datasets.backfillRowCounts` migrates every doc at once. + rowCount: dataset.rowCount ?? previewRows.length, }; } @@ -160,6 +172,7 @@ export const create = mutation({ ownerId: identity.subject, status: "building", visibility: "private", + rowCount: 0, }); }, }); @@ -194,3 +207,38 @@ export const remove = mutation({ await ctx.db.delete(dataset._id); }, }); + +/** + * One-shot migration: scan every dataset, count its rows, and patch + * `rowCount` to the true value. Idempotent and safe to re-run. + * + * Needed once after deploying the `rowCount` field — write paths + * self-heal on first hit, but datasets that haven't been written to + * since the field landed keep showing the preview-length fallback + * (capped at PREVIEW_ROW_COUNT). Running this promotes every doc to + * the fast path in one shot. + * + * Cost is O(total rows). Run from the convex CLI: + * npx convex run datasets:backfillRowCounts + */ +export const backfillRowCounts = internalMutation({ + args: {}, + handler: async (ctx) => { + const datasets = await ctx.db.query("datasets").collect(); + let patched = 0; + let alreadyCorrect = 0; + for (const ds of datasets) { + const rows = await ctx.db + .query("datasetRows") + .withIndex("by_dataset", (q) => q.eq("datasetId", ds._id)) + .collect(); + if (ds.rowCount === rows.length) { + alreadyCorrect++; + continue; + } + await ctx.db.patch(ds._id, { rowCount: rows.length }); + patched++; + } + return { patched, alreadyCorrect, total: datasets.length }; + }, +}); diff --git a/frontend/convex/publicSeed.ts b/frontend/convex/publicSeed.ts index 304d026..68c49d7 100644 --- a/frontend/convex/publicSeed.ts +++ b/frontend/convex/publicSeed.ts @@ -376,7 +376,10 @@ export const seedPublicDatasets = internalMutation({ continue; } - // Force-update: patch metadata + replace rows. + // Force-update: patch metadata + replace rows. Reset rowCount + // alongside the row replacement so the dashboard reflects the + // curated content immediately. We know the exact post-state + // (`ds.rows.length`), so no recount needed. await ctx.db.patch(tracked._id, { name: ds.name, description: ds.description, @@ -384,6 +387,7 @@ export const seedPublicDatasets = internalMutation({ columns: ds.columns, status: "live", visibility: "public", + rowCount: ds.rows.length, }); const oldRows = await ctx.db @@ -424,6 +428,7 @@ export const seedPublicDatasets = internalMutation({ cadence: ds.cadence, visibility: "public", columns: ds.columns, + rowCount: ds.rows.length, }); for (const row of ds.rows) { diff --git a/frontend/convex/schema.ts b/frontend/convex/schema.ts index 68cd55f..c1346a3 100644 --- a/frontend/convex/schema.ts +++ b/frontend/convex/schema.ts @@ -21,6 +21,15 @@ export default defineSchema({ // time doesn't rely on `name` (which marketing changes). User-created // datasets do not set this. See convex/publicSeed.ts. seedKey: v.optional(v.string()), + // Denormalized row count maintained by `datasetRows.insert / remove / + // clearByDataset` and by the seed/create paths. Read by the dashboard + // card's "X rows" footer via `datasets.attachPreview` so the count + // stays reactive past the first PREVIEW_ROW_COUNT inserts (a query + // over `.take(5)` only invalidates when one of the first 5 rows + // changes, freezing the dashboard at 5). Optional for backward compat + // with rows created before this field existed — write paths self-heal + // on first hit, and `datasets.backfillRowCounts` migrates all at once. + rowCount: v.optional(v.number()), columns: v.array( v.object({ name: v.string(),