Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion frontend/components/dataset/DatasetCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ export interface DatasetCardData {
status: DatasetStatus;
cadence: string;
columns: { name: string; type: string }[];
// Preview is capped at 5 rows for the mini-table. The total row count
// is on `rowCount` (denormalized counter maintained by the row write
// mutations in convex/datasetRows.ts) so the footer stays reactive
// past the first 5 inserts.
previewRows: Record<string, unknown>[];
rowCount: number;
visibility?: "public" | "private";
}

Expand Down Expand Up @@ -49,7 +54,7 @@ export function DatasetCard({ dataset }: { dataset: DatasetCardData }) {
<span className="text-[11px] text-muted">{dataset.cadence}</span>
</div>
<span className="text-[11px] text-muted">
{dataset.previewRows?.length ?? 0} rows
{dataset.rowCount ?? 0} rows
</span>
</div>
</div>
Expand Down
63 changes: 61 additions & 2 deletions frontend/convex/datasetRows.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
import { query, internalMutation, internalQuery } from "./_generated/server.js";
import type { MutationCtx } from "./_generated/server.js";
import { v } from "convex/values";
import type { Id } from "./_generated/dataModel.js";
import { assertRowInDataset, loadReadableDataset } from "./lib/authz.js";
import { consumeQuotaForDataset } from "./lib/quota.js";

/**
* Authoritative row count for a dataset. O(N), so use only on the slow
* paths: self-heal in `insert` / `remove` when the dataset doc predates
* the `rowCount` field, or the explicit `datasets.backfillRowCounts`
* migration. Steady-state writes hit the cached counter and never call
* this.
*/
async function actualRowCount(
ctx: MutationCtx,
datasetId: Id<"datasets">,
): Promise<number> {
const rows = await ctx.db
.query("datasetRows")
.withIndex("by_dataset", (q) => q.eq("datasetId", datasetId))
.collect();
return rows.length;
}

/**
* Read all rows of a dataset.
*
Expand Down Expand Up @@ -46,8 +66,26 @@ export const insert = internalMutation({
sources: v.optional(v.array(v.string())),
},
handler: async (ctx, args) => {
await consumeQuotaForDataset(ctx, args.datasetId, 1);
return await ctx.db.insert("datasetRows", args);
// `consumeQuotaForDataset` returns the dataset doc so we don't
// double-read it.
const dataset = await consumeQuotaForDataset(ctx, args.datasetId, 1);

// Pre-insert count is either the cached counter (fast path) or — for
// datasets whose docs predate the rowCount field — recomputed once
// here. Subsequent inserts on the same dataset hit the fast path.
const previousCount =
typeof dataset.rowCount === "number"
? dataset.rowCount
: await actualRowCount(ctx, args.datasetId);

const rowId = await ctx.db.insert("datasetRows", args);

// Maintain the denormalized counter the dashboard reads from. Same
// transaction as the row insert → atomic; quota-rejected inserts
// never bump the counter.
await ctx.db.patch(args.datasetId, { rowCount: previousCount + 1 });

return rowId;
},
});

Expand Down Expand Up @@ -114,6 +152,9 @@ export const clearByDataset = internalMutation({
for (const row of rows) {
await ctx.db.delete(row._id);
}
// Reset the cached counter. We know the post-state exactly, so this
// doesn't need the read-then-add dance that `insert` / `remove` use.
await ctx.db.patch(args.datasetId, { rowCount: 0 });
return rows.length;
},
});
Expand Down Expand Up @@ -163,6 +204,24 @@ export const remove = internalMutation({
},
handler: async (ctx, args) => {
await assertRowInDataset(ctx, args.id, args.expectedDatasetId);

// Decrement the cached counter, self-healing if the dataset doc
// predates the rowCount field. `dataset` is guaranteed to exist —
// assertRowInDataset above verified the row belongs to it.
const dataset = await ctx.db.get(args.expectedDatasetId);
if (dataset) {
const previousCount =
typeof dataset.rowCount === "number"
? dataset.rowCount
: await actualRowCount(ctx, args.expectedDatasetId);
await ctx.db.patch(args.expectedDatasetId, {
// clamp at 0 as a paranoid guard — counter should never go
// negative because we just confirmed the row exists, but a bug
// that drove it negative would manifest as an even weirder UI.
rowCount: Math.max(0, previousCount - 1),
});
}

await ctx.db.delete(args.id);
},
});
Expand Down
52 changes: 50 additions & 2 deletions frontend/convex/datasets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,25 @@ const columnValidator = v.object({
const PREVIEW_ROW_COUNT = 5;

async function attachPreview(ctx: QueryCtx, dataset: Doc<"datasets">) {
const rows = await ctx.db
// Mini-table preview: just the first N rows. `.take` keeps the
// subscription's read set small — the dashboard's reactivity for the
// row count does NOT depend on this query. It depends on the
// denormalized `rowCount` field on the dataset doc itself, maintained
// by datasetRows.{insert,remove,clearByDataset}. That field is part of
// `dataset`, which is part of the query's read set, so patches to it
// invalidate the subscription and the card re-renders with the new
// count even after the first PREVIEW_ROW_COUNT rows.
const previewRows = await ctx.db
.query("datasetRows")
.withIndex("by_dataset", (q) => q.eq("datasetId", dataset._id))
.take(PREVIEW_ROW_COUNT);
return {
...dataset,
previewRows: rows.map((r) => r.data),
previewRows: previewRows.map((r) => r.data),
// Fallback to the preview length only when the dataset doc predates
// the `rowCount` field. Write paths self-heal on the next insert /
// remove; `datasets.backfillRowCounts` migrates every doc at once.
rowCount: dataset.rowCount ?? previewRows.length,
};
}

Expand Down Expand Up @@ -160,6 +172,7 @@ export const create = mutation({
ownerId: identity.subject,
status: "building",
visibility: "private",
rowCount: 0,
});
},
});
Expand Down Expand Up @@ -194,3 +207,38 @@ export const remove = mutation({
await ctx.db.delete(dataset._id);
},
});

/**
* One-shot migration: scan every dataset, count its rows, and patch
* `rowCount` to the true value. Idempotent and safe to re-run.
*
* Needed once after deploying the `rowCount` field — write paths
* self-heal on first hit, but datasets that haven't been written to
* since the field landed keep showing the preview-length fallback
* (capped at PREVIEW_ROW_COUNT). Running this promotes every doc to
* the fast path in one shot.
*
* Cost is O(total rows). Run from the convex CLI:
* npx convex run datasets:backfillRowCounts
*/
export const backfillRowCounts = internalMutation({
args: {},
handler: async (ctx) => {
const datasets = await ctx.db.query("datasets").collect();
let patched = 0;
let alreadyCorrect = 0;
for (const ds of datasets) {
const rows = await ctx.db
.query("datasetRows")
.withIndex("by_dataset", (q) => q.eq("datasetId", ds._id))
.collect();
if (ds.rowCount === rows.length) {
alreadyCorrect++;
continue;
}
await ctx.db.patch(ds._id, { rowCount: rows.length });
patched++;
}
return { patched, alreadyCorrect, total: datasets.length };
},
});
7 changes: 6 additions & 1 deletion frontend/convex/publicSeed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -376,14 +376,18 @@ export const seedPublicDatasets = internalMutation({
continue;
}

// Force-update: patch metadata + replace rows.
// Force-update: patch metadata + replace rows. Reset rowCount
// alongside the row replacement so the dashboard reflects the
// curated content immediately. We know the exact post-state
// (`ds.rows.length`), so no recount needed.
await ctx.db.patch(tracked._id, {
name: ds.name,
description: ds.description,
cadence: ds.cadence,
columns: ds.columns,
status: "live",
visibility: "public",
rowCount: ds.rows.length,
});

const oldRows = await ctx.db
Expand Down Expand Up @@ -424,6 +428,7 @@ export const seedPublicDatasets = internalMutation({
cadence: ds.cadence,
visibility: "public",
columns: ds.columns,
rowCount: ds.rows.length,
});

for (const row of ds.rows) {
Expand Down
9 changes: 9 additions & 0 deletions frontend/convex/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ export default defineSchema({
// time doesn't rely on `name` (which marketing changes). User-created
// datasets do not set this. See convex/publicSeed.ts.
seedKey: v.optional(v.string()),
// Denormalized row count maintained by `datasetRows.insert / remove /
// clearByDataset` and by the seed/create paths. Read by the dashboard
// card's "X rows" footer via `datasets.attachPreview` so the count
// stays reactive past the first PREVIEW_ROW_COUNT inserts (a query
// over `.take(5)` only invalidates when one of the first 5 rows
// changes, freezing the dashboard at 5). Optional for backward compat
// with rows created before this field existed — write paths self-heal
// on first hit, and `datasets.backfillRowCounts` migrates all at once.
rowCount: v.optional(v.number()),
columns: v.array(
v.object({
name: v.string(),
Expand Down