diff --git a/README.md b/README.md
index 2e167b9..ddee776 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,8 @@ cp frontend/.env.example frontend/.env.local
# Fill in all three Clerk keys (publishable, secret, and JWT issuer domain)
```
+> **Required for the create-dataset wizard:** set `OPENROUTER_API_KEY` (used by the schema-inference pipeline). Get one at [openrouter.ai](https://openrouter.ai). Without it the wizard's "Generate Schema" step will fail.
+
> **Optional:** to enable [PostHog](https://posthog.com) product analytics + session replay + error tracking, set `NEXT_PUBLIC_POSTHOG_KEY` and `NEXT_PUBLIC_POSTHOG_HOST`. Leave blank to disable cleanly (the app no-ops every event).
### 3. Start everything
@@ -64,7 +66,11 @@ cp frontend/.env.example frontend/.env.local
make dev
```
-This starts all Docker services, waits for Convex to be healthy, and deploys Convex functions automatically.
+This starts all Docker services, waits for Convex to be healthy, and deploys Convex functions automatically. Once it's up:
+
+- App: http://localhost:3500
+- Convex dashboard: http://localhost:6791
+- [Mastra Studio](https://mastra.ai) (workflow inspector): http://localhost:4111
### 4. Generate Convex admin key (first time only)
@@ -93,6 +99,8 @@ Open [localhost:3500](http://localhost:3500) and click **Get started** to sign i
> **Note:** Backend env needs no setup β `backend/.env.example` has correct defaults. If you edit Convex functions in `frontend/convex/`, run `make convex-push` to deploy the changes.
+> **Free tier:** each signed-in account gets **2,500 row operations per calendar month** (resets on the 1st, UTC). The header shows a live usage badge; system-owned curated datasets bypass the quota.
+
---
## π Tech Stack
@@ -104,7 +112,9 @@ Open [localhost:3500](http://localhost:3500) and click **Get started** to sign i
| Auth | [Clerk](https://clerk.com) |
| Database | [Convex](https://convex.dev) (self-hosted) |
| Data Collection | [TinyFish](https://tinyfish.ai) APIs (Search, Fetch, Browser) |
+| Schema inference | [Mastra](https://mastra.ai) workflows + [Vercel AI SDK](https://sdk.vercel.ai) + [OpenRouter](https://openrouter.ai) β Claude Sonnet |
| Table view | [TanStack Table](https://tanstack.com/table) + [react-window](https://github.com/bvaughn/react-window) virtualization |
+| Exports | CSV (built-in) + XLSX ([SheetJS](https://sheetjs.com), dynamic-imported) |
| Analytics | [PostHog](https://posthog.com) β events, session replay, error tracking (optional) |
## π Project Structure
@@ -112,9 +122,12 @@ Open [localhost:3500](http://localhost:3500) and click **Get started** to sign i
```text
bigset/
βββ frontend/ Next.js 16 β UI + Convex schema & functions
-β βββ convex/ Convex functions, schema, and auth config
+β βββ convex/ Convex functions, schema, authz + quota helpers
β βββ .env.local Clerk + Convex keys (not committed)
-βββ backend/ Fastify β agent runner, writes to Convex via HTTP
+βββ backend/ Fastify + Mastra β schema inference + (future) agents
+β βββ src/pipeline/ Pure schema-inference fn (called by Fastify + Mastra)
+β βββ src/mastra/ Mastra workflows (Studio at :4111 in dev)
+βββ scripts/ One-off scripts (e.g. verify-authz.sh)
βββ .env Clerk keys for docker-compose (not committed)
βββ docker-compose.dev.yml
βββ Makefile
diff --git a/frontend/app/dashboard/page.tsx b/frontend/app/dashboard/page.tsx
index b7b0149..cc94b78 100644
--- a/frontend/app/dashboard/page.tsx
+++ b/frontend/app/dashboard/page.tsx
@@ -2,7 +2,7 @@
import { useEffect, useMemo, useRef, useState } from "react";
import Link from "next/link";
-import { useQuery, useMutation, useConvexAuth } from "convex/react";
+import { useQuery, useConvexAuth } from "convex/react";
import { useUser, useClerk } from "@clerk/nextjs";
import { api } from "@/convex/_generated/api";
import {
@@ -10,6 +10,7 @@ import {
type DatasetCardData,
} from "@/components/dataset/DatasetCard";
import { ThemeToggle } from "@/components/ThemeToggle";
+import { QuotaBadge } from "@/components/QuotaBadge";
import { EVENTS, track } from "@/lib/analytics";
export default function DashboardPage() {
@@ -25,17 +26,13 @@ export default function DashboardPage() {
// Public datasets are open to anonymous users too, so no `skip` gate.
const curated = useQuery(api.datasets.listPublic, {});
- const seedData = useMutation(api.seed.seed);
- const hasSeeded = useRef(false);
-
- useEffect(() => {
- if (mine && mine.length === 0 && isAuthenticated && !hasSeeded.current) {
- hasSeeded.current = true;
- void seedData({}).catch(() => {
- hasSeeded.current = false;
- });
- }
- }, [mine, isAuthenticated, seedData]);
+ // Quota state drives the "+ New Dataset" button β disabled when the
+ // user is at their free-tier limit. `undefined` while loading.
+ const usage = useQuery(
+ api.quota.getMy,
+ isAuthenticated ? {} : "skip",
+ );
+ const atLimit = usage !== undefined && usage.remaining === 0;
// Fire dashboard_viewed once per mount when both queries have resolved,
// so we attach accurate counts. `dashboardFired` prevents the effect
@@ -86,6 +83,8 @@ export default function DashboardPage() {
+
+
{/* PII: mask the email in session replays */}
@@ -148,12 +147,40 @@ export default function DashboardPage() {
className="w-full rounded-lg border border-border bg-surface py-2.5 pl-10 pr-3 text-sm outline-none placeholder:text-muted/60 focus:border-foreground/30 transition-[border-color] duration-150"
/>
-
- + New Dataset
-
+ {atLimit ? (
+
+
+ + New Dataset
+
+ {/*
+ Custom popover beside the disabled button. Replaces the
+ native `title=""` tooltip so we can style consistently
+ with the rest of the UI and use the exact wording requested.
+ Shown on hover via Tailwind's `group-hover`.
+ */}
+
+
+ Free-tier limit reached (2,500 row modifications). Please upgrade.
+
+
+ ) : (
+
+ + New Dataset
+
+ )}
diff --git a/frontend/app/dataset/[id]/page.tsx b/frontend/app/dataset/[id]/page.tsx
index 84e4234..3a158ae 100644
--- a/frontend/app/dataset/[id]/page.tsx
+++ b/frontend/app/dataset/[id]/page.tsx
@@ -2,12 +2,13 @@
import { useParams } from "next/navigation";
import Link from "next/link";
-import { useEffect, useRef, useState } from "react";
+import { useEffect, useMemo, useRef, useState } from "react";
import { useQuery, useConvexAuth } from "convex/react";
import { useAuth } from "@clerk/nextjs";
import { api } from "@/convex/_generated/api";
import type { Id } from "@/convex/_generated/dataModel";
import { DatasetTable } from "@/components/table";
+import { useSelection } from "@/components/table/use-selection";
import { ThemeToggle } from "@/components/ThemeToggle";
import { StatusBadge } from "@/components/dataset/StatusBadge";
import { downloadCSV, downloadXLSX } from "@/lib/export";
@@ -16,16 +17,24 @@ import { EVENTS, captureException, track } from "@/lib/analytics";
export default function DatasetPage() {
const params = useParams();
- const { isLoading } = useConvexAuth();
+ const { isLoading: authLoading } = useConvexAuth();
const { userId, getToken } = useAuth();
const [exporting, setExporting] = useState<"csv" | "xlsx" | null>(null);
const [populating, setPopulating] = useState(false);
const datasetId = params.id as Id<"datasets">;
- const dataset = useQuery(api.datasets.get, isLoading ? "skip" : { id: datasetId });
- const rows = useQuery(api.datasetRows.listByDataset, isLoading ? "skip" : {
- datasetId,
- });
+ const dataset = useQuery(
+ api.datasets.get,
+ authLoading ? "skip" : { id: datasetId },
+ );
+ const rows = useQuery(
+ api.datasetRows.listByDataset,
+ authLoading ? "skip" : { datasetId },
+ );
+
+ const rowIds = useMemo(() => (rows ?? []).map((r) => r._id), [rows]);
+ const selection = useSelection(rowIds);
+ const selectedCount = selection.selected.size;
// Fire dataset_opened once per dataset visit, after the dataset has
// resolved. The ref keeps it idempotent across re-renders.
@@ -44,16 +53,28 @@ export default function DatasetPage() {
async function handleExport(format: "csv" | "xlsx") {
if (!dataset || !rows || exporting) return;
+
+ // If the user has rows selected, export ONLY those. Otherwise the
+ // entire dataset. Preserves column ordering (handled by the export
+ // util β it iterates `dataset.columns` in order).
+ const exportRows =
+ selectedCount > 0
+ ? rows.filter((r) => selection.selected.has(r._id))
+ : rows;
+ if (exportRows.length === 0) return;
+
setExporting(format);
try {
if (format === "csv") {
- downloadCSV(dataset.name, dataset.columns, rows);
+ downloadCSV(dataset.name, dataset.columns, exportRows);
} else {
- await downloadXLSX(dataset.name, dataset.columns, rows);
+ await downloadXLSX(dataset.name, dataset.columns, exportRows);
}
track(EVENTS.DATASET_EXPORTED, {
format,
- row_count: rows.length,
+ row_count: exportRows.length,
+ total_rows: rows.length,
+ selected_only: selectedCount > 0,
seedKey: dataset.seedKey,
});
} catch (err) {
@@ -62,7 +83,8 @@ export default function DatasetPage() {
operation: "dataset_export",
format,
datasetId: dataset._id,
- row_count: rows.length,
+ row_count: exportRows.length,
+ selected_only: selectedCount > 0,
});
} finally {
setExporting(null);
@@ -98,7 +120,7 @@ export default function DatasetPage() {
}
}
- if (isLoading || dataset === undefined || rows === undefined) {
+ if (authLoading || dataset === undefined || rows === undefined) {
return (
Loading...
@@ -110,6 +132,20 @@ export default function DatasetPage() {
// thrown instead β caught by /dataset/[id]/error.tsx, which renders
// the "Dataset not found" UI.
+ const exportDisabled = exporting !== null || rows.length === 0;
+ const csvLabel =
+ exporting === "csv"
+ ? "Exportingβ¦"
+ : selectedCount > 0
+ ? `Export CSV (${selectedCount})`
+ : "Export CSV";
+ const xlsxLabel =
+ exporting === "xlsx"
+ ? "Exportingβ¦"
+ : selectedCount > 0
+ ? `Export XLSX (${selectedCount})`
+ : "Export XLSX";
+
return (
+ {error && (
+
+ {error}
+
+ )}
+
- Create Dataset
+ {isCreating ? "Creatingβ¦" : "Create Dataset"}
setStep("describe")}
- className="rounded-lg border border-border px-4 py-2.5 text-sm font-medium text-foreground hover:bg-foreground/[0.03] transition-colors"
+ disabled={isCreating}
+ className="rounded-lg border border-border px-4 py-2.5 text-sm font-medium text-foreground hover:bg-foreground/[0.03] transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
>
Back
diff --git a/frontend/components/QuotaBadge.tsx b/frontend/components/QuotaBadge.tsx
new file mode 100644
index 0000000..3317ac7
--- /dev/null
+++ b/frontend/components/QuotaBadge.tsx
@@ -0,0 +1,81 @@
+"use client";
+
+import { useQuery, useConvexAuth } from "convex/react";
+import { api } from "@/convex/_generated/api";
+
+/**
+ * Compact usage indicator in the dashboard header.
+ *
+ * Three visual states, all derived from server-returned usage so paid
+ * plans / per-user limits "just work" with no UI change:
+ *
+ * <80% muted text, neutral border
+ * 80β99% amber border + amber text
+ * 100% red border + red text
+ *
+ * Hidden for anonymous viewers β the badge is account-scoped.
+ */
+export function QuotaBadge() {
+ const { isAuthenticated } = useConvexAuth();
+ const usage = useQuery(
+ api.quota.getMy,
+ isAuthenticated ? {} : "skip",
+ );
+
+ if (!isAuthenticated || !usage) return null;
+
+ const exhausted = usage.remaining === 0;
+ const warning = !exhausted && usage.fractionUsed >= 0.8;
+ const resetLabel = formatResetDate(usage.periodEndsAt);
+
+ const borderClass = exhausted
+ ? "border-red-500/40"
+ : warning
+ ? "border-amber-500/40"
+ : "border-border";
+
+ const textClass = exhausted
+ ? "text-red-600 dark:text-red-400"
+ : warning
+ ? "text-amber-700 dark:text-amber-400"
+ : "text-muted";
+
+ const tooltip = exhausted
+ ? `Monthly free-tier limit reached. Resets on ${resetLabel}.`
+ : `${usage.remaining.toLocaleString()} of ${usage.limit.toLocaleString()} row operations remaining this month. Resets on ${resetLabel}.`;
+
+ return (
+
+
+ {usage.consumed.toLocaleString()} / {usage.limit.toLocaleString()}
+
+
+
+
+
+ );
+}
+
+/**
+ * "Dec 1" style label for the period-end timestamp. Uses the browser's
+ * locale; the underlying period is calendar-month UTC, so the displayed
+ * day will be 1 (or 31 in some timezones β acceptable rounding).
+ */
+function formatResetDate(periodEndsAt: number): string {
+ return new Date(periodEndsAt).toLocaleDateString(undefined, {
+ month: "short",
+ day: "numeric",
+ });
+}
diff --git a/frontend/components/table/DatasetTable.tsx b/frontend/components/table/DatasetTable.tsx
index 1c702d2..692226e 100644
--- a/frontend/components/table/DatasetTable.tsx
+++ b/frontend/components/table/DatasetTable.tsx
@@ -9,11 +9,13 @@ import {
} from "@tanstack/react-table";
import { FixedSizeList } from "react-window";
import type { DatasetMeta, DatasetRow, DatasetColumn } from "./types";
-import { useSelection } from "./use-selection";
+import type { useSelection } from "./use-selection";
import { usePersistedColumnWidths } from "./use-persisted-widths";
import { TableHeader } from "./TableHeader";
import { DataRow, type DataRowData } from "./DataRow";
+type Selection = ReturnType
;
+
const CHECKBOX_COL_WIDTH = 40;
const DEFAULT_COL_WIDTH = 180;
const MIN_COL_WIDTH = 80;
@@ -45,14 +47,24 @@ function buildColumns(
return [selectCol, ...dataCols];
}
+/**
+ * Renders the dataset's rows in a TanStack-Table + react-window grid.
+ *
+ * `selection` is owned by the parent page (so the page can export only
+ * selected rows). Without it, this component is a pure view of rows +
+ * columns; with it, header/row checkboxes drive selection through the
+ * parent.
+ */
export function DatasetTable({
dataset,
rows,
datasetId,
+ selection,
}: {
dataset: DatasetMeta;
rows: DatasetRow[];
datasetId: string;
+ selection: Selection;
}) {
const tableContainerRef = useRef(null);
const [containerHeight, setContainerHeight] = useState(600);
@@ -69,8 +81,6 @@ export function DatasetTable({
return () => observer.disconnect();
}, []);
- const rowIds = useMemo(() => rows.map((r) => r._id), [rows]);
- const selection = useSelection(rowIds);
const [storedWidths, setStoredWidths] = usePersistedColumnWidths(datasetId);
const columns = useMemo(
diff --git a/frontend/convex/datasetRows.ts b/frontend/convex/datasetRows.ts
index dc3f318..567f85e 100644
--- a/frontend/convex/datasetRows.ts
+++ b/frontend/convex/datasetRows.ts
@@ -1,6 +1,10 @@
import { query, internalMutation, internalQuery } from "./_generated/server.js";
import { v } from "convex/values";
import { loadReadableDataset } from "./lib/authz.js";
+import {
+ consumeQuotaForDataset,
+ consumeQuotaForRow,
+} from "./lib/quota.js";
/**
* Read all rows of a dataset.
@@ -28,9 +32,15 @@ export const listByDataset = query({
* functions or from a trusted backend authenticated with the Convex
* admin key.
*
+ * Quota: every row write charges the dataset's owner exactly once (see
+ * convex/lib/quota.ts). System-owned datasets bypass quota. The charge
+ * happens BEFORE the write in the same transaction, so failed writes
+ * never consume quota.
+ *
* If user-facing row editing is ever introduced, add a separate purpose-
* built public mutation (e.g. `userEditCell`) that performs ownership
- * checks via `loadOwnedDataset` first. Do not relax these to public.
+ * checks via `loadOwnedDataset` first AND calls `consumeQuotaForRow`.
+ * Do not relax these to public.
*/
export const insert = internalMutation({
args: {
@@ -39,6 +49,7 @@ export const insert = internalMutation({
sources: v.optional(v.array(v.string())),
},
handler: async (ctx, args) => {
+ await consumeQuotaForDataset(ctx, args.datasetId, 1);
return await ctx.db.insert("datasetRows", args);
},
});
@@ -49,8 +60,8 @@ export const update = internalMutation({
data: v.record(v.string(), v.any()),
},
handler: async (ctx, args) => {
- const existing = await ctx.db.get(args.id);
- if (!existing) throw new Error("Row not found");
+ // Resolves row β dataset β consumes 1 unit of owner's quota.
+ const existing = await consumeQuotaForRow(ctx, args.id, 1);
const oldData = existing.data as Record;
const newData = args.data;
@@ -100,12 +111,24 @@ export const remove = internalMutation({
},
});
+/**
+ * Insert N rows in one transaction.
+ *
+ * All-or-nothing semantics by design:
+ * - The quota layer's only job is hard enforcement (yes/no, atomic).
+ * - The agent runner's job is batch sizing β call `quota:getMy` to
+ * see `remaining`, then call insertBatch with at most that many.
+ * - Partial accept would push policy decisions ("which rows survived?")
+ * into the quota layer, which has no business making them.
+ */
export const insertBatch = internalMutation({
args: {
datasetId: v.id("datasets"),
rows: v.array(v.record(v.string(), v.any())),
},
handler: async (ctx, args) => {
+ await consumeQuotaForDataset(ctx, args.datasetId, args.rows.length);
+
for (const data of args.rows) {
await ctx.db.insert("datasetRows", {
datasetId: args.datasetId,
diff --git a/frontend/convex/datasets.ts b/frontend/convex/datasets.ts
index 95050e7..f0fb8cc 100644
--- a/frontend/convex/datasets.ts
+++ b/frontend/convex/datasets.ts
@@ -8,6 +8,7 @@ import {
loadReadableDataset,
requireIdentity,
} from "./lib/authz.js";
+import { requireQuotaRemaining } from "./lib/quota.js";
const columnValidator = v.object({
name: v.string(),
@@ -92,6 +93,10 @@ export const create = mutation({
handler: async (ctx, args) => {
const identity = await requireIdentity(ctx);
assertNotReservedOwner(identity.subject);
+ // Block dataset creation at full exhaustion β a dataset you can't
+ // populate is just clutter. Row generation later will re-check, so
+ // this is a UX safeguard, not the only line of defense.
+ await requireQuotaRemaining(ctx, identity.subject, 1);
return await ctx.db.insert("datasets", {
...args,
diff --git a/frontend/convex/lib/quota.ts b/frontend/convex/lib/quota.ts
new file mode 100644
index 0000000..ee2d416
--- /dev/null
+++ b/frontend/convex/lib/quota.ts
@@ -0,0 +1,223 @@
+import type { GenericMutationCtx, GenericQueryCtx } from "convex/server";
+import type { DataModel, Doc, Id } from "../_generated/dataModel.js";
+import { isReservedOwnerId } from "./authz.js";
+
+/**
+ * Per-principal quota enforcement for row modifications.
+ *
+ * One counter per principal (the `usage` table). Free tier is currently
+ * 2,500 row operations PER MONTH (calendar month, UTC). The counter resets
+ * lazily β the next read or write after the 1st of a new month rolls the
+ * counter back to 0. No background job needed.
+ *
+ * Charging model:
+ * - 1 row inserted, updated, or replaced = 1 unit consumed this period
+ * - System-owned datasets (ownerId === "system") bypass quota entirely
+ * - Deletes do NOT refund; the counter tracks WORK in the period, not
+ * current row count. Deletion is just cleanup.
+ * - Period rolls over on the 1st (UTC) of each calendar month
+ *
+ * Principal model:
+ * - Today: the principal is `dataset.ownerId`, which is a Clerk user id
+ * - Future (orgs/teams): the principal can also be a Clerk `org_xxx` id;
+ * this module already resolves the principal through the dataset, so
+ * a future schema change that lets datasets be owned by an org will
+ * "just work" β `usage.userId` should be read as "principalId"
+ *
+ * Atomicity:
+ * - Convex mutations are atomic. `consumeQuota` + the actual row write
+ * happen in one transaction, so failed writes never charge the user.
+ * - Concurrent agent calls on the same principal serialize via the
+ * `usage` doc (Convex retries on optimistic-concurrency conflict).
+ *
+ * Architectural boundary with the agent runner:
+ * - The quota layer's job is HARD ENFORCEMENT β yes/no, atomic, simple.
+ * - The agent layer's job is BATCH SIZING β call `getUsageFor` first,
+ * split work to fit `remaining`, drive the retry/backoff strategy.
+ * - `insertBatch` is intentionally all-or-nothing: a partial accept
+ * would leak quota-aware policy ("which rows survived?") into the
+ * quota layer, which has no business making that decision.
+ */
+
+type AnyCtx =
+ | GenericQueryCtx
+ | GenericMutationCtx;
+type WriteCtx = GenericMutationCtx;
+
+/**
+ * Monthly free-tier limit. Hardcoded today; will move onto the `usage`
+ * row (`plan` field + lookup table) when paid tiers exist.
+ */
+export const FREE_TIER_MONTHLY_QUOTA = 2500;
+
+export class QuotaExceededError extends Error {
+ constructor(consumed: number, limit: number, requested: number) {
+ super(
+ `Monthly free-tier quota exceeded: ${consumed}/${limit} used this period, ${requested} more requested`,
+ );
+ this.name = "QuotaExceededError";
+ }
+}
+
+export interface UsageSnapshot {
+ consumed: number;
+ limit: number;
+ remaining: number;
+ /** 0..1 fraction. Capped at 1 for display purposes. */
+ fractionUsed: number;
+ /** ms epoch of the start of the current billing period (1st of month UTC). */
+ periodStart: number;
+ /** ms epoch when the current period ends (1st of next month UTC). */
+ periodEndsAt: number;
+}
+
+/**
+ * First-millisecond of the UTC calendar month containing `ts`.
+ * Pure function; internal helper.
+ */
+function getMonthStartUTC(ts: number): number {
+ const d = new Date(ts);
+ return Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), 1);
+}
+
+/** First-millisecond of the UTC calendar month AFTER the one containing `ts`. */
+function getNextMonthStartUTC(ts: number): number {
+ const d = new Date(ts);
+ return Date.UTC(d.getUTCFullYear(), d.getUTCMonth() + 1, 1);
+}
+
+function snapshotOf(consumed: number, periodStart: number): UsageSnapshot {
+ const remaining = Math.max(0, FREE_TIER_MONTHLY_QUOTA - consumed);
+ const fractionUsed = Math.min(1, consumed / FREE_TIER_MONTHLY_QUOTA);
+ return {
+ consumed,
+ limit: FREE_TIER_MONTHLY_QUOTA,
+ remaining,
+ fractionUsed,
+ periodStart,
+ periodEndsAt: getNextMonthStartUTC(periodStart),
+ };
+}
+
+/**
+ * Read-only snapshot for UI rendering. Returns a zero-state snapshot if
+ * the user has no usage row yet OR if the existing row belongs to a past
+ * period (no DB write β the actual reset happens on the next consumeQuota
+ * call).
+ */
+export async function getUsageFor(
+ ctx: AnyCtx,
+ userId: string,
+): Promise {
+ const row = await ctx.db
+ .query("usage")
+ .withIndex("by_user", (q) => q.eq("userId", userId))
+ .unique();
+
+ const monthStart = getMonthStartUTC(Date.now());
+
+ // Either no row yet, or row belongs to a previous period β show 0.
+ // (The DB row is left alone here; the next write rolls it over.)
+ if (!row || (row.periodStart ?? 0) < monthStart) {
+ return snapshotOf(0, monthStart);
+ }
+ return snapshotOf(row.rowsConsumed, row.periodStart ?? monthStart);
+}
+
+/**
+ * Pre-flight check used by `datasets.create`: rejects the call if the
+ * user has zero quota left in the current period. We block dataset
+ * creation at full exhaustion because a dataset you can't populate is
+ * just clutter.
+ */
+export async function requireQuotaRemaining(
+ ctx: AnyCtx,
+ userId: string,
+ atLeast: number = 1,
+): Promise {
+ const usage = await getUsageFor(ctx, userId);
+ if (usage.remaining < atLeast) {
+ throw new QuotaExceededError(usage.consumed, usage.limit, atLeast);
+ }
+}
+
+/**
+ * Atomically check + consume `n` units against `dataset.ownerId` for the
+ * CURRENT period. Performs the monthly rollover if the existing row
+ * belongs to a past period.
+ *
+ * Call this BEFORE the row write inside the same mutation. If quota is
+ * exceeded, throws (the row write never happens, transaction rolls back).
+ * System-owned datasets pass through with no accounting.
+ */
+export async function consumeQuota(
+ ctx: WriteCtx,
+ dataset: Doc<"datasets">,
+ n: number,
+): Promise {
+ if (n <= 0) return;
+ if (isReservedOwnerId(dataset.ownerId)) return;
+
+ const userId = dataset.ownerId;
+ const monthStart = getMonthStartUTC(Date.now());
+
+ const row = await ctx.db
+ .query("usage")
+ .withIndex("by_user", (q) => q.eq("userId", userId))
+ .unique();
+
+ // Rollover if the existing row belongs to a past period.
+ const carryConsumed =
+ row && (row.periodStart ?? 0) >= monthStart ? row.rowsConsumed : 0;
+ const next = carryConsumed + n;
+
+ if (next > FREE_TIER_MONTHLY_QUOTA) {
+ throw new QuotaExceededError(carryConsumed, FREE_TIER_MONTHLY_QUOTA, n);
+ }
+
+ if (row) {
+ await ctx.db.patch(row._id, {
+ rowsConsumed: next,
+ periodStart: monthStart,
+ });
+ } else {
+ await ctx.db.insert("usage", {
+ userId,
+ rowsConsumed: next,
+ periodStart: monthStart,
+ });
+ }
+}
+
+/**
+ * Resolve a row's parent dataset and consume `n` against its owner.
+ * Used by `datasetRows.update`, which only knows the rowId up front.
+ */
+export async function consumeQuotaForRow(
+ ctx: WriteCtx,
+ rowId: Id<"datasetRows">,
+ n: number,
+): Promise> {
+ const row = await ctx.db.get(rowId);
+ if (!row) throw new Error("Row not found");
+ const dataset = await ctx.db.get(row.datasetId);
+ if (!dataset) throw new Error("Dataset not found");
+ await consumeQuota(ctx, dataset, n);
+ return row;
+}
+
+/**
+ * Resolve a dataset by id (used by row writes that take datasetId) and
+ * consume `n` against its owner. Returns the dataset for callers that
+ * also need it.
+ */
+export async function consumeQuotaForDataset(
+ ctx: WriteCtx,
+ datasetId: Id<"datasets">,
+ n: number,
+): Promise> {
+ const dataset = await ctx.db.get(datasetId);
+ if (!dataset) throw new Error("Dataset not found");
+ await consumeQuota(ctx, dataset, n);
+ return dataset;
+}
diff --git a/frontend/convex/quota.ts b/frontend/convex/quota.ts
new file mode 100644
index 0000000..2a79f67
--- /dev/null
+++ b/frontend/convex/quota.ts
@@ -0,0 +1,21 @@
+import { query } from "./_generated/server.js";
+import { getUsageFor } from "./lib/quota.js";
+import { requireIdentity } from "./lib/authz.js";
+
+/**
+ * Read-only snapshot of the signed-in user's quota usage. Used by the
+ * dashboard's QuotaBadge component.
+ *
+ * Returns a `UsageSnapshot` (see lib/quota.ts): consumed, limit, remaining,
+ * fractionUsed, periodStart, periodEndsAt. The limit + period bounds are
+ * returned alongside `consumed` so the UI never hardcodes them β when paid
+ * plans land, the limit becomes per-user and this query stays the single
+ * source of truth.
+ */
+export const getMy = query({
+ args: {},
+ handler: async (ctx) => {
+ const identity = await requireIdentity(ctx);
+ return await getUsageFor(ctx, identity.subject);
+ },
+});
diff --git a/frontend/convex/schema.ts b/frontend/convex/schema.ts
index ac98e54..68cd55f 100644
--- a/frontend/convex/schema.ts
+++ b/frontend/convex/schema.ts
@@ -53,4 +53,30 @@ export default defineSchema({
newValue: v.string(),
changedAt: v.number(),
}).index("by_row", ["datasetRowId"]),
+
+ // Per-user / per-account quota accounting. One row per principal, created
+ // lazily on first row modification. `rowsConsumed` tracks WORK done in
+ // the current period β deleting rows does NOT refund quota.
+ //
+ // Period model: calendar month, UTC. Rolls over on the 1st (UTC) of each
+ // month β the helper detects rollover lazily on the next read/write and
+ // resets the counter without a background job.
+ //
+ // The `userId` field is named for the current scope (per-Clerk-user) but
+ // semantically holds any principal id β when Clerk Organizations land,
+ // an `org_xxx` id will live here too without a schema change. See
+ // convex/lib/quota.ts for the resolution policy.
+ //
+ // Future fields (all optional β no migration needed when added):
+ // - plan: "free" | "pro" | "enterprise" (today: implicitly "free")
+ // - limitOverride (admin grants beyond plan default)
+ usage: defineTable({
+ userId: v.string(),
+ rowsConsumed: v.number(),
+ // ms epoch of the start of the period this counter belongs to (first
+ // ms of the current UTC calendar month). Optional for forward-compat
+ // with rows written before this field existed β missing = treated as
+ // "before current period", which forces a reset on next write.
+ periodStart: v.optional(v.number()),
+ }).index("by_user", ["userId"]),
});
diff --git a/frontend/convex/seed.ts b/frontend/convex/seed.ts
deleted file mode 100644
index 1e2999a..0000000
--- a/frontend/convex/seed.ts
+++ /dev/null
@@ -1,198 +0,0 @@
-import { mutation } from "./_generated/server.js";
-import { assertNotReservedOwner, requireIdentity } from "./lib/authz.js";
-
-type ColType = "text" | "number" | "boolean" | "url" | "date";
-
-interface DatasetDef {
- name: string;
- description: string;
- status: "live" | "paused" | "building";
- cadence: string;
- columns: { name: string; type: ColType; description?: string }[];
- rows: Record[];
-}
-
-const SEED_DATASETS: DatasetDef[] = [
- {
- name: "YC S25 Companies Hiring",
- description:
- "Active YC S25 companies with open engineering roles, tracking headcount and positions across career pages.",
- status: "live",
- cadence: "Every 6 hours",
- columns: [
- { name: "Company", type: "text" },
- { name: "Description", type: "text" },
- { name: "Website", type: "url" },
- { name: "Hiring", type: "boolean" },
- { name: "Open Roles", type: "number" },
- { name: "Stage", type: "text" },
- { name: "Location", type: "text" },
- { name: "Founded", type: "date" },
- { name: "Employees", type: "number" },
- { name: "LinkedIn", type: "url" },
- ],
- rows: [
- { Company: "Airbase", Description: "Spend management platform for mid-market...", Website: "airbase.com", Hiring: "Yes", "Open Roles": "12", Stage: "Series B", Location: "San Francisco", Founded: "2017", Employees: "250", LinkedIn: "linkedin.com/company/airbase" },
- { Company: "Brex", Description: "Financial OS for growing companies built...", Website: "brex.com", Hiring: "Yes", "Open Roles": "34", Stage: "Series D", Location: "New York", Founded: "2017", Employees: "1200", LinkedIn: "linkedin.com/company/brex" },
- { Company: "Clerk", Description: "Authentication and user management for...", Website: "clerk.com", Hiring: "Yes", "Open Roles": "8", Stage: "Series A", Location: "San Francisco", Founded: "2019", Employees: "85", LinkedIn: "linkedin.com/company/clerk-dev" },
- { Company: "Deel", Description: "Global payroll and compliance platform...", Website: "deel.com", Hiring: "Yes", "Open Roles": "21", Stage: "Series D", Location: "Remote", Founded: "2019", Employees: "3000", LinkedIn: "linkedin.com/company/deel" },
- { Company: "Expo", Description: "Framework and platform for universal React...", Website: "expo.dev", Hiring: "No", "Open Roles": "0", Stage: "Series B", Location: "Palo Alto", Founded: "2014", Employees: "60", LinkedIn: "linkedin.com/company/expo-dev" },
- { Company: "Fly.io", Description: "Deploy app servers close to users with a...", Website: "fly.io", Hiring: "Yes", "Open Roles": "5", Stage: "Series C", Location: "Chicago", Founded: "2017", Employees: "120", LinkedIn: "linkedin.com/company/fly-io" },
- { Company: "Graphite", Description: "Modern code review tool that stacks pull...", Website: "graphite.dev", Hiring: "Yes", "Open Roles": "4", Stage: "Series A", Location: "New York", Founded: "2020", Employees: "35", LinkedIn: "linkedin.com/company/graphitedev" },
- { Company: "Helicone", Description: "Open-source LLM observability platform...", Website: "helicone.ai", Hiring: "Yes", "Open Roles": "6", Stage: "Seed", Location: "San Francisco", Founded: "2023", Employees: "15", LinkedIn: "linkedin.com/company/helicone" },
- { Company: "Incident.io", Description: "Incident management platform that helps...", Website: "incident.io", Hiring: "Yes", "Open Roles": "11", Stage: "Series B", Location: "London", Founded: "2021", Employees: "130", LinkedIn: "linkedin.com/company/incident-io" },
- { Company: "Jasper", Description: "AI copilot for enterprise marketing teams...", Website: "jasper.ai", Hiring: "No", "Open Roles": "0", Stage: "Series A", Location: "Austin", Founded: "2021", Employees: "400", LinkedIn: "linkedin.com/company/jasper-ai" },
- { Company: "Knock", Description: "Notification infrastructure for developers...", Website: "knock.app", Hiring: "Yes", "Open Roles": "3", Stage: "Series A", Location: "New York", Founded: "2020", Employees: "30", LinkedIn: "linkedin.com/company/knocklabs" },
- { Company: "LangChain", Description: "Framework for developing applications...", Website: "langchain.com", Hiring: "Yes", "Open Roles": "9", Stage: "Series A", Location: "San Francisco", Founded: "2022", Employees: "60", LinkedIn: "linkedin.com/company/langchain" },
- { Company: "Mintlify", Description: "Modern documentation platform that makes...", Website: "mintlify.com", Hiring: "Yes", "Open Roles": "7", Stage: "Series A", Location: "San Francisco", Founded: "2021", Employees: "40", LinkedIn: "linkedin.com/company/mintlify" },
- { Company: "Neon", Description: "Serverless Postgres with branching and...", Website: "neon.tech", Hiring: "Yes", "Open Roles": "15", Stage: "Series B", Location: "San Francisco", Founded: "2021", Employees: "150", LinkedIn: "linkedin.com/company/neondatabase" },
- { Company: "OpenPipe", Description: "Fine-tuning platform that turns LLM logs...", Website: "openpipe.ai", Hiring: "Yes", "Open Roles": "4", Stage: "Seed", Location: "San Francisco", Founded: "2023", Employees: "12", LinkedIn: "linkedin.com/company/openpipe" },
- { Company: "Posthog", Description: "Open-source product analytics platform...", Website: "posthog.com", Hiring: "Yes", "Open Roles": "8", Stage: "Series B", Location: "Remote", Founded: "2020", Employees: "50", LinkedIn: "linkedin.com/company/posthog" },
- { Company: "Resend", Description: "Email API for developers with React Email...", Website: "resend.com", Hiring: "Yes", "Open Roles": "6", Stage: "Series A", Location: "San Francisco", Founded: "2022", Employees: "25", LinkedIn: "linkedin.com/company/resend" },
- { Company: "Supabase", Description: "Open-source Firebase alternative with...", Website: "supabase.com", Hiring: "Yes", "Open Roles": "18", Stage: "Series C", Location: "Remote", Founded: "2020", Employees: "200", LinkedIn: "linkedin.com/company/supabase" },
- { Company: "Trigger.dev", Description: "Background jobs framework for TypeScript...", Website: "trigger.dev", Hiring: "Yes", "Open Roles": "3", Stage: "Seed", Location: "London", Founded: "2022", Employees: "15", LinkedIn: "linkedin.com/company/triggerdev" },
- { Company: "Unkey", Description: "API authentication and rate limiting built...", Website: "unkey.dev", Hiring: "Yes", "Open Roles": "2", Stage: "Seed", Location: "Remote", Founded: "2023", Employees: "8", LinkedIn: "linkedin.com/company/unkey" },
- ],
- },
- {
- name: "Bay Area Vehicle Insurance Quotes",
- description:
- "Monthly premium quotes for a 2020 Honda Civic across major insurers in the Bay Area.",
- status: "live",
- cadence: "Daily",
- columns: [
- { name: "Provider", type: "text" },
- { name: "Description", type: "text" },
- { name: "Website", type: "url" },
- { name: "Monthly Premium", type: "number" },
- { name: "Deductible", type: "number" },
- { name: "Coverage Type", type: "text" },
- { name: "AM Best Rating", type: "text" },
- { name: "Customer Rating", type: "number" },
- { name: "Quote Date", type: "date" },
- ],
- rows: [
- { Provider: "Geico", Description: "Government Employees Insurance Company...", Website: "geico.com", "Monthly Premium": "$142", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A++", "Customer Rating": "4.2", "Quote Date": "May 17" },
- { Provider: "State Farm", Description: "Largest property and casualty insurance...", Website: "statefarm.com", "Monthly Premium": "$158", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A++", "Customer Rating": "4.5", "Quote Date": "May 17" },
- { Provider: "Progressive", Description: "American insurance company, third largest...", Website: "progressive.com", "Monthly Premium": "$131", Deductible: "$750", "Coverage Type": "Basic", "AM Best Rating": "A+", "Customer Rating": "3.9", "Quote Date": "May 17" },
- { Provider: "Allstate", Description: "Second largest personal lines insurer in...", Website: "allstate.com", "Monthly Premium": "$167", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A+", "Customer Rating": "4.1", "Quote Date": "May 17" },
- { Provider: "USAA", Description: "Financial services for military members...", Website: "usaa.com", "Monthly Premium": "$119", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A++", "Customer Rating": "4.8", "Quote Date": "May 17" },
- { Provider: "Liberty Mutual", Description: "American diversified global insurer...", Website: "libertymutual.com", "Monthly Premium": "$172", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A", "Customer Rating": "3.8", "Quote Date": "May 17" },
- { Provider: "Farmers", Description: "American insurer group of automobiles...", Website: "farmers.com", "Monthly Premium": "$155", Deductible: "$750", "Coverage Type": "Full Coverage", "AM Best Rating": "A", "Customer Rating": "4.0", "Quote Date": "May 17" },
- { Provider: "Nationwide", Description: "Insurance and financial services company...", Website: "nationwide.com", "Monthly Premium": "$148", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A+", "Customer Rating": "4.3", "Quote Date": "May 17" },
- ],
- },
- {
- name: "Competitor Blog Posts",
- description:
- "Latest blog posts from competitor companies, tracking publish date, title, and topic tags.",
- status: "live",
- cadence: "Every 12 hours",
- columns: [
- { name: "Company", type: "text" },
- { name: "Title", type: "text" },
- { name: "URL", type: "url" },
- { name: "Date", type: "date" },
- { name: "Topic", type: "text" },
- { name: "Author", type: "text" },
- { name: "Read Time", type: "text" },
- ],
- rows: [
- { Company: "Firecrawl", Title: "Announcing v2 API with Structured Extraction", URL: "firecrawl.dev/blog/v2-api", Date: "May 16", Topic: "Product", Author: "Mendable Team", "Read Time": "5 min" },
- { Company: "Apify", Title: "The Complete Web Scraping Guide for 2026", URL: "blog.apify.com/scraping-guide", Date: "May 15", Topic: "Tutorial", Author: "Ondra Urban", "Read Time": "12 min" },
- { Company: "Browserbase", Title: "Introducing Stealth Mode for Anti-Detection", URL: "browserbase.com/blog/stealth", Date: "May 14", Topic: "Feature", Author: "Paul Klein", "Read Time": "4 min" },
- { Company: "ScrapFly", Title: "Understanding Proxy Networks: A Deep Dive", URL: "scrapfly.io/blog/proxy-networks", Date: "May 13", Topic: "Guide", Author: "ScrapFly Team", "Read Time": "8 min" },
- { Company: "Bright Data", Title: "AI-Powered Data Collection at Scale", URL: "brightdata.com/blog/ai-collection", Date: "May 12", Topic: "Research", Author: "Or Lenchner", "Read Time": "6 min" },
- { Company: "Crawlee", Title: "How We Rebuilt Our Crawler from Scratch", URL: "crawlee.dev/blog/rebuild", Date: "May 11", Topic: "Engineering", Author: "Jan Curn", "Read Time": "10 min" },
- { Company: "Zyte", Title: "E-commerce Price Monitoring Best Practices", URL: "zyte.com/blog/price-monitoring", Date: "May 10", Topic: "Guide", Author: "Zyte Team", "Read Time": "7 min" },
- { Company: "Playwright", Title: "New Locator Strategies in v1.45", URL: "playwright.dev/blog/v145", Date: "May 9", Topic: "Release", Author: "MS Team", "Read Time": "3 min" },
- ],
- },
- {
- name: "GPU Prices β RTX 5090",
- description:
- "Price tracking for NVIDIA RTX 5090 across major retailers, including stock availability.",
- status: "live",
- cadence: "Every 30 min",
- columns: [
- { name: "Retailer", type: "text" },
- { name: "Product Name", type: "text" },
- { name: "Price", type: "number" },
- { name: "In Stock", type: "boolean" },
- { name: "Shipping", type: "text" },
- { name: "Seller Type", type: "text" },
- { name: "URL", type: "url" },
- { name: "Last Checked", type: "date" },
- ],
- rows: [
- { Retailer: "Newegg", "Product Name": "NVIDIA GeForce RTX 5090 Founders Edition", Price: "$1,999", "In Stock": "Yes", Shipping: "Free 2-day", "Seller Type": "Direct", URL: "newegg.com/nvidia-rtx-5090", "Last Checked": "12 min ago" },
- { Retailer: "Best Buy", "Product Name": "NVIDIA GeForce RTX 5090 FE 32GB GDDR7", Price: "$1,999", "In Stock": "No", Shipping: "β", "Seller Type": "Direct", URL: "bestbuy.com/nvidia-rtx-5090", "Last Checked": "12 min ago" },
- { Retailer: "Amazon", "Product Name": "NVIDIA RTX 5090 Founders Edition 32GB", Price: "$2,149", "In Stock": "Yes", Shipping: "$12.99", "Seller Type": "3rd Party", URL: "amazon.com/dp/B0DRTX5090", "Last Checked": "12 min ago" },
- { Retailer: "B&H Photo", "Product Name": "NVIDIA GeForce RTX 5090 FE 32GB", Price: "$1,999", "In Stock": "Yes", Shipping: "Free Expedited", "Seller Type": "Direct", URL: "bhphoto.com/nvidia-rtx-5090", "Last Checked": "12 min ago" },
- { Retailer: "Micro Center", "Product Name": "NVIDIA GeForce RTX 5090 Founders 32GB", Price: "$1,979", "In Stock": "Yes", Shipping: "In-store only", "Seller Type": "Direct", URL: "microcenter.com/rtx-5090", "Last Checked": "12 min ago" },
- { Retailer: "CDW", "Product Name": "NVIDIA RTX 5090 FE 32GB Graphics Card", Price: "$2,049", "In Stock": "Yes", Shipping: "$19.99", "Seller Type": "Direct", URL: "cdw.com/nvidia-rtx-5090", "Last Checked": "12 min ago" },
- ],
- },
- {
- name: "SG Startup Funding Rounds",
- description:
- "Recent funding rounds for Singapore-based startups, sourced from press releases and Crunchbase.",
- status: "building",
- cadence: "Daily",
- columns: [
- { name: "Startup", type: "text" },
- { name: "Description", type: "text" },
- { name: "Round", type: "text" },
- { name: "Amount", type: "number" },
- { name: "Lead Investor", type: "text" },
- { name: "Date", type: "date" },
- { name: "Sector", type: "text" },
- { name: "Valuation", type: "number" },
- { name: "Crunchbase", type: "url" },
- ],
- rows: [
- { Startup: "Grab", Description: "Southeast Asian super app for ride-hailing...", Round: "Series H", Amount: "$300M", "Lead Investor": "GIC", Date: "May 10", Sector: "Transportation", Valuation: "$14B", Crunchbase: "crunchbase.com/organization/grab" },
- { Startup: "Carousell", Description: "Consumer-to-consumer marketplace for...", Round: "Series D", Amount: "$100M", "Lead Investor": "Temasek", Date: "May 8", Sector: "Marketplace", Valuation: "$1.1B", Crunchbase: "crunchbase.com/organization/carousell" },
- { Startup: "Ninja Van", Description: "Logistics company providing last-mile...", Round: "Series E", Amount: "$150M", "Lead Investor": "B Capital", Date: "May 5", Sector: "Logistics", Valuation: "$2B", Crunchbase: "crunchbase.com/organization/ninjavan" },
- { Startup: "PatSnap", Description: "AI-powered innovation intelligence...", Round: "Series D", Amount: "$90M", "Lead Investor": "SoftBank", Date: "May 2", Sector: "Enterprise", Valuation: "$1B", Crunchbase: "crunchbase.com/organization/patsnap" },
- { Startup: "Endowus", Description: "Digital wealth platform for personal...", Round: "Series B", Amount: "$45M", "Lead Investor": "UBS", Date: "Apr 28", Sector: "Fintech", Valuation: "$400M", Crunchbase: "crunchbase.com/organization/endowus" },
- { Startup: "Nium", Description: "Global payments infrastructure platform...", Round: "Series D", Amount: "$200M", "Lead Investor": "Visa", Date: "Apr 20", Sector: "Fintech", Valuation: "$2.1B", Crunchbase: "crunchbase.com/organization/nium" },
- ],
- },
-];
-
-export const seed = mutation({
- args: {},
- handler: async (ctx) => {
- const identity = await requireIdentity(ctx);
- assertNotReservedOwner(identity.subject);
- const ownerId = identity.subject;
-
- const existing = await ctx.db
- .query("datasets")
- .withIndex("by_owner", (q) => q.eq("ownerId", ownerId))
- .first();
- if (existing) return { status: "already_seeded" };
-
- for (const ds of SEED_DATASETS) {
- const datasetId = await ctx.db.insert("datasets", {
- name: ds.name,
- description: ds.description,
- ownerId,
- status: ds.status,
- cadence: ds.cadence,
- visibility: "private",
- columns: ds.columns,
- });
-
- for (const row of ds.rows) {
- await ctx.db.insert("datasetRows", {
- datasetId,
- data: row,
- });
- }
- }
-
- return { status: "seeded", count: SEED_DATASETS.length };
- },
-});