diff --git a/README.md b/README.md index 2e167b9..ddee776 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,8 @@ cp frontend/.env.example frontend/.env.local # Fill in all three Clerk keys (publishable, secret, and JWT issuer domain) ``` +> **Required for the create-dataset wizard:** set `OPENROUTER_API_KEY` (used by the schema-inference pipeline). Get one at [openrouter.ai](https://openrouter.ai). Without it the wizard's "Generate Schema" step will fail. + > **Optional:** to enable [PostHog](https://posthog.com) product analytics + session replay + error tracking, set `NEXT_PUBLIC_POSTHOG_KEY` and `NEXT_PUBLIC_POSTHOG_HOST`. Leave blank to disable cleanly (the app no-ops every event). ### 3. Start everything @@ -64,7 +66,11 @@ cp frontend/.env.example frontend/.env.local make dev ``` -This starts all Docker services, waits for Convex to be healthy, and deploys Convex functions automatically. +This starts all Docker services, waits for Convex to be healthy, and deploys Convex functions automatically. Once it's up: + +- App: http://localhost:3500 +- Convex dashboard: http://localhost:6791 +- [Mastra Studio](https://mastra.ai) (workflow inspector): http://localhost:4111 ### 4. Generate Convex admin key (first time only) @@ -93,6 +99,8 @@ Open [localhost:3500](http://localhost:3500) and click **Get started** to sign i > **Note:** Backend env needs no setup β€” `backend/.env.example` has correct defaults. If you edit Convex functions in `frontend/convex/`, run `make convex-push` to deploy the changes. +> **Free tier:** each signed-in account gets **2,500 row operations per calendar month** (resets on the 1st, UTC). The header shows a live usage badge; system-owned curated datasets bypass the quota. + --- ## πŸ›  Tech Stack @@ -104,7 +112,9 @@ Open [localhost:3500](http://localhost:3500) and click **Get started** to sign i | Auth | [Clerk](https://clerk.com) | | Database | [Convex](https://convex.dev) (self-hosted) | | Data Collection | [TinyFish](https://tinyfish.ai) APIs (Search, Fetch, Browser) | +| Schema inference | [Mastra](https://mastra.ai) workflows + [Vercel AI SDK](https://sdk.vercel.ai) + [OpenRouter](https://openrouter.ai) β†’ Claude Sonnet | | Table view | [TanStack Table](https://tanstack.com/table) + [react-window](https://github.com/bvaughn/react-window) virtualization | +| Exports | CSV (built-in) + XLSX ([SheetJS](https://sheetjs.com), dynamic-imported) | | Analytics | [PostHog](https://posthog.com) β€” events, session replay, error tracking (optional) | ## πŸ“ Project Structure @@ -112,9 +122,12 @@ Open [localhost:3500](http://localhost:3500) and click **Get started** to sign i ```text bigset/ β”œβ”€β”€ frontend/ Next.js 16 β€” UI + Convex schema & functions -β”‚ β”œβ”€β”€ convex/ Convex functions, schema, and auth config +β”‚ β”œβ”€β”€ convex/ Convex functions, schema, authz + quota helpers β”‚ └── .env.local Clerk + Convex keys (not committed) -β”œβ”€β”€ backend/ Fastify β€” agent runner, writes to Convex via HTTP +β”œβ”€β”€ backend/ Fastify + Mastra β€” schema inference + (future) agents +β”‚ β”œβ”€β”€ src/pipeline/ Pure schema-inference fn (called by Fastify + Mastra) +β”‚ └── src/mastra/ Mastra workflows (Studio at :4111 in dev) +β”œβ”€β”€ scripts/ One-off scripts (e.g. verify-authz.sh) β”œβ”€β”€ .env Clerk keys for docker-compose (not committed) β”œβ”€β”€ docker-compose.dev.yml └── Makefile diff --git a/frontend/app/dashboard/page.tsx b/frontend/app/dashboard/page.tsx index b7b0149..cc94b78 100644 --- a/frontend/app/dashboard/page.tsx +++ b/frontend/app/dashboard/page.tsx @@ -2,7 +2,7 @@ import { useEffect, useMemo, useRef, useState } from "react"; import Link from "next/link"; -import { useQuery, useMutation, useConvexAuth } from "convex/react"; +import { useQuery, useConvexAuth } from "convex/react"; import { useUser, useClerk } from "@clerk/nextjs"; import { api } from "@/convex/_generated/api"; import { @@ -10,6 +10,7 @@ import { type DatasetCardData, } from "@/components/dataset/DatasetCard"; import { ThemeToggle } from "@/components/ThemeToggle"; +import { QuotaBadge } from "@/components/QuotaBadge"; import { EVENTS, track } from "@/lib/analytics"; export default function DashboardPage() { @@ -25,17 +26,13 @@ export default function DashboardPage() { // Public datasets are open to anonymous users too, so no `skip` gate. const curated = useQuery(api.datasets.listPublic, {}); - const seedData = useMutation(api.seed.seed); - const hasSeeded = useRef(false); - - useEffect(() => { - if (mine && mine.length === 0 && isAuthenticated && !hasSeeded.current) { - hasSeeded.current = true; - void seedData({}).catch(() => { - hasSeeded.current = false; - }); - } - }, [mine, isAuthenticated, seedData]); + // Quota state drives the "+ New Dataset" button β€” disabled when the + // user is at their free-tier limit. `undefined` while loading. + const usage = useQuery( + api.quota.getMy, + isAuthenticated ? {} : "skip", + ); + const atLimit = usage !== undefined && usage.remaining === 0; // Fire dashboard_viewed once per mount when both queries have resolved, // so we attach accurate counts. `dashboardFired` prevents the effect @@ -86,6 +83,8 @@ export default function DashboardPage() { BigSet BigSet
+ +
{/* PII: mask the email in session replays */} @@ -148,12 +147,40 @@ export default function DashboardPage() { className="w-full rounded-lg border border-border bg-surface py-2.5 pl-10 pr-3 text-sm outline-none placeholder:text-muted/60 focus:border-foreground/30 transition-[border-color] duration-150" />
- - + New Dataset - + {atLimit ? ( +
+ + + New Dataset + + {/* + Custom popover beside the disabled button. Replaces the + native `title=""` tooltip so we can style consistently + with the rest of the UI and use the exact wording requested. + Shown on hover via Tailwind's `group-hover`. + */} + +
+ ) : ( + + + New Dataset + + )}
diff --git a/frontend/app/dataset/[id]/page.tsx b/frontend/app/dataset/[id]/page.tsx index 84e4234..3a158ae 100644 --- a/frontend/app/dataset/[id]/page.tsx +++ b/frontend/app/dataset/[id]/page.tsx @@ -2,12 +2,13 @@ import { useParams } from "next/navigation"; import Link from "next/link"; -import { useEffect, useRef, useState } from "react"; +import { useEffect, useMemo, useRef, useState } from "react"; import { useQuery, useConvexAuth } from "convex/react"; import { useAuth } from "@clerk/nextjs"; import { api } from "@/convex/_generated/api"; import type { Id } from "@/convex/_generated/dataModel"; import { DatasetTable } from "@/components/table"; +import { useSelection } from "@/components/table/use-selection"; import { ThemeToggle } from "@/components/ThemeToggle"; import { StatusBadge } from "@/components/dataset/StatusBadge"; import { downloadCSV, downloadXLSX } from "@/lib/export"; @@ -16,16 +17,24 @@ import { EVENTS, captureException, track } from "@/lib/analytics"; export default function DatasetPage() { const params = useParams(); - const { isLoading } = useConvexAuth(); + const { isLoading: authLoading } = useConvexAuth(); const { userId, getToken } = useAuth(); const [exporting, setExporting] = useState<"csv" | "xlsx" | null>(null); const [populating, setPopulating] = useState(false); const datasetId = params.id as Id<"datasets">; - const dataset = useQuery(api.datasets.get, isLoading ? "skip" : { id: datasetId }); - const rows = useQuery(api.datasetRows.listByDataset, isLoading ? "skip" : { - datasetId, - }); + const dataset = useQuery( + api.datasets.get, + authLoading ? "skip" : { id: datasetId }, + ); + const rows = useQuery( + api.datasetRows.listByDataset, + authLoading ? "skip" : { datasetId }, + ); + + const rowIds = useMemo(() => (rows ?? []).map((r) => r._id), [rows]); + const selection = useSelection(rowIds); + const selectedCount = selection.selected.size; // Fire dataset_opened once per dataset visit, after the dataset has // resolved. The ref keeps it idempotent across re-renders. @@ -44,16 +53,28 @@ export default function DatasetPage() { async function handleExport(format: "csv" | "xlsx") { if (!dataset || !rows || exporting) return; + + // If the user has rows selected, export ONLY those. Otherwise the + // entire dataset. Preserves column ordering (handled by the export + // util β€” it iterates `dataset.columns` in order). + const exportRows = + selectedCount > 0 + ? rows.filter((r) => selection.selected.has(r._id)) + : rows; + if (exportRows.length === 0) return; + setExporting(format); try { if (format === "csv") { - downloadCSV(dataset.name, dataset.columns, rows); + downloadCSV(dataset.name, dataset.columns, exportRows); } else { - await downloadXLSX(dataset.name, dataset.columns, rows); + await downloadXLSX(dataset.name, dataset.columns, exportRows); } track(EVENTS.DATASET_EXPORTED, { format, - row_count: rows.length, + row_count: exportRows.length, + total_rows: rows.length, + selected_only: selectedCount > 0, seedKey: dataset.seedKey, }); } catch (err) { @@ -62,7 +83,8 @@ export default function DatasetPage() { operation: "dataset_export", format, datasetId: dataset._id, - row_count: rows.length, + row_count: exportRows.length, + selected_only: selectedCount > 0, }); } finally { setExporting(null); @@ -98,7 +120,7 @@ export default function DatasetPage() { } } - if (isLoading || dataset === undefined || rows === undefined) { + if (authLoading || dataset === undefined || rows === undefined) { return (

Loading...

@@ -110,6 +132,20 @@ export default function DatasetPage() { // thrown instead β€” caught by /dataset/[id]/error.tsx, which renders // the "Dataset not found" UI. + const exportDisabled = exporting !== null || rows.length === 0; + const csvLabel = + exporting === "csv" + ? "Exporting…" + : selectedCount > 0 + ? `Export CSV (${selectedCount})` + : "Export CSV"; + const xlsxLabel = + exporting === "xlsx" + ? "Exporting…" + : selectedCount > 0 + ? `Export XLSX (${selectedCount})` + : "Export XLSX"; + return (
@@ -130,17 +166,27 @@ export default function DatasetPage() {
+ {error && ( +
+ {error} +
+ )} +
diff --git a/frontend/components/QuotaBadge.tsx b/frontend/components/QuotaBadge.tsx new file mode 100644 index 0000000..3317ac7 --- /dev/null +++ b/frontend/components/QuotaBadge.tsx @@ -0,0 +1,81 @@ +"use client"; + +import { useQuery, useConvexAuth } from "convex/react"; +import { api } from "@/convex/_generated/api"; + +/** + * Compact usage indicator in the dashboard header. + * + * Three visual states, all derived from server-returned usage so paid + * plans / per-user limits "just work" with no UI change: + * + * <80% muted text, neutral border + * 80–99% amber border + amber text + * 100% red border + red text + * + * Hidden for anonymous viewers β€” the badge is account-scoped. + */ +export function QuotaBadge() { + const { isAuthenticated } = useConvexAuth(); + const usage = useQuery( + api.quota.getMy, + isAuthenticated ? {} : "skip", + ); + + if (!isAuthenticated || !usage) return null; + + const exhausted = usage.remaining === 0; + const warning = !exhausted && usage.fractionUsed >= 0.8; + const resetLabel = formatResetDate(usage.periodEndsAt); + + const borderClass = exhausted + ? "border-red-500/40" + : warning + ? "border-amber-500/40" + : "border-border"; + + const textClass = exhausted + ? "text-red-600 dark:text-red-400" + : warning + ? "text-amber-700 dark:text-amber-400" + : "text-muted"; + + const tooltip = exhausted + ? `Monthly free-tier limit reached. Resets on ${resetLabel}.` + : `${usage.remaining.toLocaleString()} of ${usage.limit.toLocaleString()} row operations remaining this month. Resets on ${resetLabel}.`; + + return ( +
+ + {usage.consumed.toLocaleString()} / {usage.limit.toLocaleString()} + + + + +
+ ); +} + +/** + * "Dec 1" style label for the period-end timestamp. Uses the browser's + * locale; the underlying period is calendar-month UTC, so the displayed + * day will be 1 (or 31 in some timezones β€” acceptable rounding). + */ +function formatResetDate(periodEndsAt: number): string { + return new Date(periodEndsAt).toLocaleDateString(undefined, { + month: "short", + day: "numeric", + }); +} diff --git a/frontend/components/table/DatasetTable.tsx b/frontend/components/table/DatasetTable.tsx index 1c702d2..692226e 100644 --- a/frontend/components/table/DatasetTable.tsx +++ b/frontend/components/table/DatasetTable.tsx @@ -9,11 +9,13 @@ import { } from "@tanstack/react-table"; import { FixedSizeList } from "react-window"; import type { DatasetMeta, DatasetRow, DatasetColumn } from "./types"; -import { useSelection } from "./use-selection"; +import type { useSelection } from "./use-selection"; import { usePersistedColumnWidths } from "./use-persisted-widths"; import { TableHeader } from "./TableHeader"; import { DataRow, type DataRowData } from "./DataRow"; +type Selection = ReturnType; + const CHECKBOX_COL_WIDTH = 40; const DEFAULT_COL_WIDTH = 180; const MIN_COL_WIDTH = 80; @@ -45,14 +47,24 @@ function buildColumns( return [selectCol, ...dataCols]; } +/** + * Renders the dataset's rows in a TanStack-Table + react-window grid. + * + * `selection` is owned by the parent page (so the page can export only + * selected rows). Without it, this component is a pure view of rows + + * columns; with it, header/row checkboxes drive selection through the + * parent. + */ export function DatasetTable({ dataset, rows, datasetId, + selection, }: { dataset: DatasetMeta; rows: DatasetRow[]; datasetId: string; + selection: Selection; }) { const tableContainerRef = useRef(null); const [containerHeight, setContainerHeight] = useState(600); @@ -69,8 +81,6 @@ export function DatasetTable({ return () => observer.disconnect(); }, []); - const rowIds = useMemo(() => rows.map((r) => r._id), [rows]); - const selection = useSelection(rowIds); const [storedWidths, setStoredWidths] = usePersistedColumnWidths(datasetId); const columns = useMemo( diff --git a/frontend/convex/datasetRows.ts b/frontend/convex/datasetRows.ts index dc3f318..567f85e 100644 --- a/frontend/convex/datasetRows.ts +++ b/frontend/convex/datasetRows.ts @@ -1,6 +1,10 @@ import { query, internalMutation, internalQuery } from "./_generated/server.js"; import { v } from "convex/values"; import { loadReadableDataset } from "./lib/authz.js"; +import { + consumeQuotaForDataset, + consumeQuotaForRow, +} from "./lib/quota.js"; /** * Read all rows of a dataset. @@ -28,9 +32,15 @@ export const listByDataset = query({ * functions or from a trusted backend authenticated with the Convex * admin key. * + * Quota: every row write charges the dataset's owner exactly once (see + * convex/lib/quota.ts). System-owned datasets bypass quota. The charge + * happens BEFORE the write in the same transaction, so failed writes + * never consume quota. + * * If user-facing row editing is ever introduced, add a separate purpose- * built public mutation (e.g. `userEditCell`) that performs ownership - * checks via `loadOwnedDataset` first. Do not relax these to public. + * checks via `loadOwnedDataset` first AND calls `consumeQuotaForRow`. + * Do not relax these to public. */ export const insert = internalMutation({ args: { @@ -39,6 +49,7 @@ export const insert = internalMutation({ sources: v.optional(v.array(v.string())), }, handler: async (ctx, args) => { + await consumeQuotaForDataset(ctx, args.datasetId, 1); return await ctx.db.insert("datasetRows", args); }, }); @@ -49,8 +60,8 @@ export const update = internalMutation({ data: v.record(v.string(), v.any()), }, handler: async (ctx, args) => { - const existing = await ctx.db.get(args.id); - if (!existing) throw new Error("Row not found"); + // Resolves row β†’ dataset β†’ consumes 1 unit of owner's quota. + const existing = await consumeQuotaForRow(ctx, args.id, 1); const oldData = existing.data as Record; const newData = args.data; @@ -100,12 +111,24 @@ export const remove = internalMutation({ }, }); +/** + * Insert N rows in one transaction. + * + * All-or-nothing semantics by design: + * - The quota layer's only job is hard enforcement (yes/no, atomic). + * - The agent runner's job is batch sizing β€” call `quota:getMy` to + * see `remaining`, then call insertBatch with at most that many. + * - Partial accept would push policy decisions ("which rows survived?") + * into the quota layer, which has no business making them. + */ export const insertBatch = internalMutation({ args: { datasetId: v.id("datasets"), rows: v.array(v.record(v.string(), v.any())), }, handler: async (ctx, args) => { + await consumeQuotaForDataset(ctx, args.datasetId, args.rows.length); + for (const data of args.rows) { await ctx.db.insert("datasetRows", { datasetId: args.datasetId, diff --git a/frontend/convex/datasets.ts b/frontend/convex/datasets.ts index 95050e7..f0fb8cc 100644 --- a/frontend/convex/datasets.ts +++ b/frontend/convex/datasets.ts @@ -8,6 +8,7 @@ import { loadReadableDataset, requireIdentity, } from "./lib/authz.js"; +import { requireQuotaRemaining } from "./lib/quota.js"; const columnValidator = v.object({ name: v.string(), @@ -92,6 +93,10 @@ export const create = mutation({ handler: async (ctx, args) => { const identity = await requireIdentity(ctx); assertNotReservedOwner(identity.subject); + // Block dataset creation at full exhaustion β€” a dataset you can't + // populate is just clutter. Row generation later will re-check, so + // this is a UX safeguard, not the only line of defense. + await requireQuotaRemaining(ctx, identity.subject, 1); return await ctx.db.insert("datasets", { ...args, diff --git a/frontend/convex/lib/quota.ts b/frontend/convex/lib/quota.ts new file mode 100644 index 0000000..ee2d416 --- /dev/null +++ b/frontend/convex/lib/quota.ts @@ -0,0 +1,223 @@ +import type { GenericMutationCtx, GenericQueryCtx } from "convex/server"; +import type { DataModel, Doc, Id } from "../_generated/dataModel.js"; +import { isReservedOwnerId } from "./authz.js"; + +/** + * Per-principal quota enforcement for row modifications. + * + * One counter per principal (the `usage` table). Free tier is currently + * 2,500 row operations PER MONTH (calendar month, UTC). The counter resets + * lazily β€” the next read or write after the 1st of a new month rolls the + * counter back to 0. No background job needed. + * + * Charging model: + * - 1 row inserted, updated, or replaced = 1 unit consumed this period + * - System-owned datasets (ownerId === "system") bypass quota entirely + * - Deletes do NOT refund; the counter tracks WORK in the period, not + * current row count. Deletion is just cleanup. + * - Period rolls over on the 1st (UTC) of each calendar month + * + * Principal model: + * - Today: the principal is `dataset.ownerId`, which is a Clerk user id + * - Future (orgs/teams): the principal can also be a Clerk `org_xxx` id; + * this module already resolves the principal through the dataset, so + * a future schema change that lets datasets be owned by an org will + * "just work" β€” `usage.userId` should be read as "principalId" + * + * Atomicity: + * - Convex mutations are atomic. `consumeQuota` + the actual row write + * happen in one transaction, so failed writes never charge the user. + * - Concurrent agent calls on the same principal serialize via the + * `usage` doc (Convex retries on optimistic-concurrency conflict). + * + * Architectural boundary with the agent runner: + * - The quota layer's job is HARD ENFORCEMENT β€” yes/no, atomic, simple. + * - The agent layer's job is BATCH SIZING β€” call `getUsageFor` first, + * split work to fit `remaining`, drive the retry/backoff strategy. + * - `insertBatch` is intentionally all-or-nothing: a partial accept + * would leak quota-aware policy ("which rows survived?") into the + * quota layer, which has no business making that decision. + */ + +type AnyCtx = + | GenericQueryCtx + | GenericMutationCtx; +type WriteCtx = GenericMutationCtx; + +/** + * Monthly free-tier limit. Hardcoded today; will move onto the `usage` + * row (`plan` field + lookup table) when paid tiers exist. + */ +export const FREE_TIER_MONTHLY_QUOTA = 2500; + +export class QuotaExceededError extends Error { + constructor(consumed: number, limit: number, requested: number) { + super( + `Monthly free-tier quota exceeded: ${consumed}/${limit} used this period, ${requested} more requested`, + ); + this.name = "QuotaExceededError"; + } +} + +export interface UsageSnapshot { + consumed: number; + limit: number; + remaining: number; + /** 0..1 fraction. Capped at 1 for display purposes. */ + fractionUsed: number; + /** ms epoch of the start of the current billing period (1st of month UTC). */ + periodStart: number; + /** ms epoch when the current period ends (1st of next month UTC). */ + periodEndsAt: number; +} + +/** + * First-millisecond of the UTC calendar month containing `ts`. + * Pure function; internal helper. + */ +function getMonthStartUTC(ts: number): number { + const d = new Date(ts); + return Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), 1); +} + +/** First-millisecond of the UTC calendar month AFTER the one containing `ts`. */ +function getNextMonthStartUTC(ts: number): number { + const d = new Date(ts); + return Date.UTC(d.getUTCFullYear(), d.getUTCMonth() + 1, 1); +} + +function snapshotOf(consumed: number, periodStart: number): UsageSnapshot { + const remaining = Math.max(0, FREE_TIER_MONTHLY_QUOTA - consumed); + const fractionUsed = Math.min(1, consumed / FREE_TIER_MONTHLY_QUOTA); + return { + consumed, + limit: FREE_TIER_MONTHLY_QUOTA, + remaining, + fractionUsed, + periodStart, + periodEndsAt: getNextMonthStartUTC(periodStart), + }; +} + +/** + * Read-only snapshot for UI rendering. Returns a zero-state snapshot if + * the user has no usage row yet OR if the existing row belongs to a past + * period (no DB write β€” the actual reset happens on the next consumeQuota + * call). + */ +export async function getUsageFor( + ctx: AnyCtx, + userId: string, +): Promise { + const row = await ctx.db + .query("usage") + .withIndex("by_user", (q) => q.eq("userId", userId)) + .unique(); + + const monthStart = getMonthStartUTC(Date.now()); + + // Either no row yet, or row belongs to a previous period β†’ show 0. + // (The DB row is left alone here; the next write rolls it over.) + if (!row || (row.periodStart ?? 0) < monthStart) { + return snapshotOf(0, monthStart); + } + return snapshotOf(row.rowsConsumed, row.periodStart ?? monthStart); +} + +/** + * Pre-flight check used by `datasets.create`: rejects the call if the + * user has zero quota left in the current period. We block dataset + * creation at full exhaustion because a dataset you can't populate is + * just clutter. + */ +export async function requireQuotaRemaining( + ctx: AnyCtx, + userId: string, + atLeast: number = 1, +): Promise { + const usage = await getUsageFor(ctx, userId); + if (usage.remaining < atLeast) { + throw new QuotaExceededError(usage.consumed, usage.limit, atLeast); + } +} + +/** + * Atomically check + consume `n` units against `dataset.ownerId` for the + * CURRENT period. Performs the monthly rollover if the existing row + * belongs to a past period. + * + * Call this BEFORE the row write inside the same mutation. If quota is + * exceeded, throws (the row write never happens, transaction rolls back). + * System-owned datasets pass through with no accounting. + */ +export async function consumeQuota( + ctx: WriteCtx, + dataset: Doc<"datasets">, + n: number, +): Promise { + if (n <= 0) return; + if (isReservedOwnerId(dataset.ownerId)) return; + + const userId = dataset.ownerId; + const monthStart = getMonthStartUTC(Date.now()); + + const row = await ctx.db + .query("usage") + .withIndex("by_user", (q) => q.eq("userId", userId)) + .unique(); + + // Rollover if the existing row belongs to a past period. + const carryConsumed = + row && (row.periodStart ?? 0) >= monthStart ? row.rowsConsumed : 0; + const next = carryConsumed + n; + + if (next > FREE_TIER_MONTHLY_QUOTA) { + throw new QuotaExceededError(carryConsumed, FREE_TIER_MONTHLY_QUOTA, n); + } + + if (row) { + await ctx.db.patch(row._id, { + rowsConsumed: next, + periodStart: monthStart, + }); + } else { + await ctx.db.insert("usage", { + userId, + rowsConsumed: next, + periodStart: monthStart, + }); + } +} + +/** + * Resolve a row's parent dataset and consume `n` against its owner. + * Used by `datasetRows.update`, which only knows the rowId up front. + */ +export async function consumeQuotaForRow( + ctx: WriteCtx, + rowId: Id<"datasetRows">, + n: number, +): Promise> { + const row = await ctx.db.get(rowId); + if (!row) throw new Error("Row not found"); + const dataset = await ctx.db.get(row.datasetId); + if (!dataset) throw new Error("Dataset not found"); + await consumeQuota(ctx, dataset, n); + return row; +} + +/** + * Resolve a dataset by id (used by row writes that take datasetId) and + * consume `n` against its owner. Returns the dataset for callers that + * also need it. + */ +export async function consumeQuotaForDataset( + ctx: WriteCtx, + datasetId: Id<"datasets">, + n: number, +): Promise> { + const dataset = await ctx.db.get(datasetId); + if (!dataset) throw new Error("Dataset not found"); + await consumeQuota(ctx, dataset, n); + return dataset; +} diff --git a/frontend/convex/quota.ts b/frontend/convex/quota.ts new file mode 100644 index 0000000..2a79f67 --- /dev/null +++ b/frontend/convex/quota.ts @@ -0,0 +1,21 @@ +import { query } from "./_generated/server.js"; +import { getUsageFor } from "./lib/quota.js"; +import { requireIdentity } from "./lib/authz.js"; + +/** + * Read-only snapshot of the signed-in user's quota usage. Used by the + * dashboard's QuotaBadge component. + * + * Returns a `UsageSnapshot` (see lib/quota.ts): consumed, limit, remaining, + * fractionUsed, periodStart, periodEndsAt. The limit + period bounds are + * returned alongside `consumed` so the UI never hardcodes them β€” when paid + * plans land, the limit becomes per-user and this query stays the single + * source of truth. + */ +export const getMy = query({ + args: {}, + handler: async (ctx) => { + const identity = await requireIdentity(ctx); + return await getUsageFor(ctx, identity.subject); + }, +}); diff --git a/frontend/convex/schema.ts b/frontend/convex/schema.ts index ac98e54..68cd55f 100644 --- a/frontend/convex/schema.ts +++ b/frontend/convex/schema.ts @@ -53,4 +53,30 @@ export default defineSchema({ newValue: v.string(), changedAt: v.number(), }).index("by_row", ["datasetRowId"]), + + // Per-user / per-account quota accounting. One row per principal, created + // lazily on first row modification. `rowsConsumed` tracks WORK done in + // the current period β€” deleting rows does NOT refund quota. + // + // Period model: calendar month, UTC. Rolls over on the 1st (UTC) of each + // month β€” the helper detects rollover lazily on the next read/write and + // resets the counter without a background job. + // + // The `userId` field is named for the current scope (per-Clerk-user) but + // semantically holds any principal id β€” when Clerk Organizations land, + // an `org_xxx` id will live here too without a schema change. See + // convex/lib/quota.ts for the resolution policy. + // + // Future fields (all optional β†’ no migration needed when added): + // - plan: "free" | "pro" | "enterprise" (today: implicitly "free") + // - limitOverride (admin grants beyond plan default) + usage: defineTable({ + userId: v.string(), + rowsConsumed: v.number(), + // ms epoch of the start of the period this counter belongs to (first + // ms of the current UTC calendar month). Optional for forward-compat + // with rows written before this field existed β€” missing = treated as + // "before current period", which forces a reset on next write. + periodStart: v.optional(v.number()), + }).index("by_user", ["userId"]), }); diff --git a/frontend/convex/seed.ts b/frontend/convex/seed.ts deleted file mode 100644 index 1e2999a..0000000 --- a/frontend/convex/seed.ts +++ /dev/null @@ -1,198 +0,0 @@ -import { mutation } from "./_generated/server.js"; -import { assertNotReservedOwner, requireIdentity } from "./lib/authz.js"; - -type ColType = "text" | "number" | "boolean" | "url" | "date"; - -interface DatasetDef { - name: string; - description: string; - status: "live" | "paused" | "building"; - cadence: string; - columns: { name: string; type: ColType; description?: string }[]; - rows: Record[]; -} - -const SEED_DATASETS: DatasetDef[] = [ - { - name: "YC S25 Companies Hiring", - description: - "Active YC S25 companies with open engineering roles, tracking headcount and positions across career pages.", - status: "live", - cadence: "Every 6 hours", - columns: [ - { name: "Company", type: "text" }, - { name: "Description", type: "text" }, - { name: "Website", type: "url" }, - { name: "Hiring", type: "boolean" }, - { name: "Open Roles", type: "number" }, - { name: "Stage", type: "text" }, - { name: "Location", type: "text" }, - { name: "Founded", type: "date" }, - { name: "Employees", type: "number" }, - { name: "LinkedIn", type: "url" }, - ], - rows: [ - { Company: "Airbase", Description: "Spend management platform for mid-market...", Website: "airbase.com", Hiring: "Yes", "Open Roles": "12", Stage: "Series B", Location: "San Francisco", Founded: "2017", Employees: "250", LinkedIn: "linkedin.com/company/airbase" }, - { Company: "Brex", Description: "Financial OS for growing companies built...", Website: "brex.com", Hiring: "Yes", "Open Roles": "34", Stage: "Series D", Location: "New York", Founded: "2017", Employees: "1200", LinkedIn: "linkedin.com/company/brex" }, - { Company: "Clerk", Description: "Authentication and user management for...", Website: "clerk.com", Hiring: "Yes", "Open Roles": "8", Stage: "Series A", Location: "San Francisco", Founded: "2019", Employees: "85", LinkedIn: "linkedin.com/company/clerk-dev" }, - { Company: "Deel", Description: "Global payroll and compliance platform...", Website: "deel.com", Hiring: "Yes", "Open Roles": "21", Stage: "Series D", Location: "Remote", Founded: "2019", Employees: "3000", LinkedIn: "linkedin.com/company/deel" }, - { Company: "Expo", Description: "Framework and platform for universal React...", Website: "expo.dev", Hiring: "No", "Open Roles": "0", Stage: "Series B", Location: "Palo Alto", Founded: "2014", Employees: "60", LinkedIn: "linkedin.com/company/expo-dev" }, - { Company: "Fly.io", Description: "Deploy app servers close to users with a...", Website: "fly.io", Hiring: "Yes", "Open Roles": "5", Stage: "Series C", Location: "Chicago", Founded: "2017", Employees: "120", LinkedIn: "linkedin.com/company/fly-io" }, - { Company: "Graphite", Description: "Modern code review tool that stacks pull...", Website: "graphite.dev", Hiring: "Yes", "Open Roles": "4", Stage: "Series A", Location: "New York", Founded: "2020", Employees: "35", LinkedIn: "linkedin.com/company/graphitedev" }, - { Company: "Helicone", Description: "Open-source LLM observability platform...", Website: "helicone.ai", Hiring: "Yes", "Open Roles": "6", Stage: "Seed", Location: "San Francisco", Founded: "2023", Employees: "15", LinkedIn: "linkedin.com/company/helicone" }, - { Company: "Incident.io", Description: "Incident management platform that helps...", Website: "incident.io", Hiring: "Yes", "Open Roles": "11", Stage: "Series B", Location: "London", Founded: "2021", Employees: "130", LinkedIn: "linkedin.com/company/incident-io" }, - { Company: "Jasper", Description: "AI copilot for enterprise marketing teams...", Website: "jasper.ai", Hiring: "No", "Open Roles": "0", Stage: "Series A", Location: "Austin", Founded: "2021", Employees: "400", LinkedIn: "linkedin.com/company/jasper-ai" }, - { Company: "Knock", Description: "Notification infrastructure for developers...", Website: "knock.app", Hiring: "Yes", "Open Roles": "3", Stage: "Series A", Location: "New York", Founded: "2020", Employees: "30", LinkedIn: "linkedin.com/company/knocklabs" }, - { Company: "LangChain", Description: "Framework for developing applications...", Website: "langchain.com", Hiring: "Yes", "Open Roles": "9", Stage: "Series A", Location: "San Francisco", Founded: "2022", Employees: "60", LinkedIn: "linkedin.com/company/langchain" }, - { Company: "Mintlify", Description: "Modern documentation platform that makes...", Website: "mintlify.com", Hiring: "Yes", "Open Roles": "7", Stage: "Series A", Location: "San Francisco", Founded: "2021", Employees: "40", LinkedIn: "linkedin.com/company/mintlify" }, - { Company: "Neon", Description: "Serverless Postgres with branching and...", Website: "neon.tech", Hiring: "Yes", "Open Roles": "15", Stage: "Series B", Location: "San Francisco", Founded: "2021", Employees: "150", LinkedIn: "linkedin.com/company/neondatabase" }, - { Company: "OpenPipe", Description: "Fine-tuning platform that turns LLM logs...", Website: "openpipe.ai", Hiring: "Yes", "Open Roles": "4", Stage: "Seed", Location: "San Francisco", Founded: "2023", Employees: "12", LinkedIn: "linkedin.com/company/openpipe" }, - { Company: "Posthog", Description: "Open-source product analytics platform...", Website: "posthog.com", Hiring: "Yes", "Open Roles": "8", Stage: "Series B", Location: "Remote", Founded: "2020", Employees: "50", LinkedIn: "linkedin.com/company/posthog" }, - { Company: "Resend", Description: "Email API for developers with React Email...", Website: "resend.com", Hiring: "Yes", "Open Roles": "6", Stage: "Series A", Location: "San Francisco", Founded: "2022", Employees: "25", LinkedIn: "linkedin.com/company/resend" }, - { Company: "Supabase", Description: "Open-source Firebase alternative with...", Website: "supabase.com", Hiring: "Yes", "Open Roles": "18", Stage: "Series C", Location: "Remote", Founded: "2020", Employees: "200", LinkedIn: "linkedin.com/company/supabase" }, - { Company: "Trigger.dev", Description: "Background jobs framework for TypeScript...", Website: "trigger.dev", Hiring: "Yes", "Open Roles": "3", Stage: "Seed", Location: "London", Founded: "2022", Employees: "15", LinkedIn: "linkedin.com/company/triggerdev" }, - { Company: "Unkey", Description: "API authentication and rate limiting built...", Website: "unkey.dev", Hiring: "Yes", "Open Roles": "2", Stage: "Seed", Location: "Remote", Founded: "2023", Employees: "8", LinkedIn: "linkedin.com/company/unkey" }, - ], - }, - { - name: "Bay Area Vehicle Insurance Quotes", - description: - "Monthly premium quotes for a 2020 Honda Civic across major insurers in the Bay Area.", - status: "live", - cadence: "Daily", - columns: [ - { name: "Provider", type: "text" }, - { name: "Description", type: "text" }, - { name: "Website", type: "url" }, - { name: "Monthly Premium", type: "number" }, - { name: "Deductible", type: "number" }, - { name: "Coverage Type", type: "text" }, - { name: "AM Best Rating", type: "text" }, - { name: "Customer Rating", type: "number" }, - { name: "Quote Date", type: "date" }, - ], - rows: [ - { Provider: "Geico", Description: "Government Employees Insurance Company...", Website: "geico.com", "Monthly Premium": "$142", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A++", "Customer Rating": "4.2", "Quote Date": "May 17" }, - { Provider: "State Farm", Description: "Largest property and casualty insurance...", Website: "statefarm.com", "Monthly Premium": "$158", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A++", "Customer Rating": "4.5", "Quote Date": "May 17" }, - { Provider: "Progressive", Description: "American insurance company, third largest...", Website: "progressive.com", "Monthly Premium": "$131", Deductible: "$750", "Coverage Type": "Basic", "AM Best Rating": "A+", "Customer Rating": "3.9", "Quote Date": "May 17" }, - { Provider: "Allstate", Description: "Second largest personal lines insurer in...", Website: "allstate.com", "Monthly Premium": "$167", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A+", "Customer Rating": "4.1", "Quote Date": "May 17" }, - { Provider: "USAA", Description: "Financial services for military members...", Website: "usaa.com", "Monthly Premium": "$119", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A++", "Customer Rating": "4.8", "Quote Date": "May 17" }, - { Provider: "Liberty Mutual", Description: "American diversified global insurer...", Website: "libertymutual.com", "Monthly Premium": "$172", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A", "Customer Rating": "3.8", "Quote Date": "May 17" }, - { Provider: "Farmers", Description: "American insurer group of automobiles...", Website: "farmers.com", "Monthly Premium": "$155", Deductible: "$750", "Coverage Type": "Full Coverage", "AM Best Rating": "A", "Customer Rating": "4.0", "Quote Date": "May 17" }, - { Provider: "Nationwide", Description: "Insurance and financial services company...", Website: "nationwide.com", "Monthly Premium": "$148", Deductible: "$500", "Coverage Type": "Full Coverage", "AM Best Rating": "A+", "Customer Rating": "4.3", "Quote Date": "May 17" }, - ], - }, - { - name: "Competitor Blog Posts", - description: - "Latest blog posts from competitor companies, tracking publish date, title, and topic tags.", - status: "live", - cadence: "Every 12 hours", - columns: [ - { name: "Company", type: "text" }, - { name: "Title", type: "text" }, - { name: "URL", type: "url" }, - { name: "Date", type: "date" }, - { name: "Topic", type: "text" }, - { name: "Author", type: "text" }, - { name: "Read Time", type: "text" }, - ], - rows: [ - { Company: "Firecrawl", Title: "Announcing v2 API with Structured Extraction", URL: "firecrawl.dev/blog/v2-api", Date: "May 16", Topic: "Product", Author: "Mendable Team", "Read Time": "5 min" }, - { Company: "Apify", Title: "The Complete Web Scraping Guide for 2026", URL: "blog.apify.com/scraping-guide", Date: "May 15", Topic: "Tutorial", Author: "Ondra Urban", "Read Time": "12 min" }, - { Company: "Browserbase", Title: "Introducing Stealth Mode for Anti-Detection", URL: "browserbase.com/blog/stealth", Date: "May 14", Topic: "Feature", Author: "Paul Klein", "Read Time": "4 min" }, - { Company: "ScrapFly", Title: "Understanding Proxy Networks: A Deep Dive", URL: "scrapfly.io/blog/proxy-networks", Date: "May 13", Topic: "Guide", Author: "ScrapFly Team", "Read Time": "8 min" }, - { Company: "Bright Data", Title: "AI-Powered Data Collection at Scale", URL: "brightdata.com/blog/ai-collection", Date: "May 12", Topic: "Research", Author: "Or Lenchner", "Read Time": "6 min" }, - { Company: "Crawlee", Title: "How We Rebuilt Our Crawler from Scratch", URL: "crawlee.dev/blog/rebuild", Date: "May 11", Topic: "Engineering", Author: "Jan Curn", "Read Time": "10 min" }, - { Company: "Zyte", Title: "E-commerce Price Monitoring Best Practices", URL: "zyte.com/blog/price-monitoring", Date: "May 10", Topic: "Guide", Author: "Zyte Team", "Read Time": "7 min" }, - { Company: "Playwright", Title: "New Locator Strategies in v1.45", URL: "playwright.dev/blog/v145", Date: "May 9", Topic: "Release", Author: "MS Team", "Read Time": "3 min" }, - ], - }, - { - name: "GPU Prices β€” RTX 5090", - description: - "Price tracking for NVIDIA RTX 5090 across major retailers, including stock availability.", - status: "live", - cadence: "Every 30 min", - columns: [ - { name: "Retailer", type: "text" }, - { name: "Product Name", type: "text" }, - { name: "Price", type: "number" }, - { name: "In Stock", type: "boolean" }, - { name: "Shipping", type: "text" }, - { name: "Seller Type", type: "text" }, - { name: "URL", type: "url" }, - { name: "Last Checked", type: "date" }, - ], - rows: [ - { Retailer: "Newegg", "Product Name": "NVIDIA GeForce RTX 5090 Founders Edition", Price: "$1,999", "In Stock": "Yes", Shipping: "Free 2-day", "Seller Type": "Direct", URL: "newegg.com/nvidia-rtx-5090", "Last Checked": "12 min ago" }, - { Retailer: "Best Buy", "Product Name": "NVIDIA GeForce RTX 5090 FE 32GB GDDR7", Price: "$1,999", "In Stock": "No", Shipping: "β€”", "Seller Type": "Direct", URL: "bestbuy.com/nvidia-rtx-5090", "Last Checked": "12 min ago" }, - { Retailer: "Amazon", "Product Name": "NVIDIA RTX 5090 Founders Edition 32GB", Price: "$2,149", "In Stock": "Yes", Shipping: "$12.99", "Seller Type": "3rd Party", URL: "amazon.com/dp/B0DRTX5090", "Last Checked": "12 min ago" }, - { Retailer: "B&H Photo", "Product Name": "NVIDIA GeForce RTX 5090 FE 32GB", Price: "$1,999", "In Stock": "Yes", Shipping: "Free Expedited", "Seller Type": "Direct", URL: "bhphoto.com/nvidia-rtx-5090", "Last Checked": "12 min ago" }, - { Retailer: "Micro Center", "Product Name": "NVIDIA GeForce RTX 5090 Founders 32GB", Price: "$1,979", "In Stock": "Yes", Shipping: "In-store only", "Seller Type": "Direct", URL: "microcenter.com/rtx-5090", "Last Checked": "12 min ago" }, - { Retailer: "CDW", "Product Name": "NVIDIA RTX 5090 FE 32GB Graphics Card", Price: "$2,049", "In Stock": "Yes", Shipping: "$19.99", "Seller Type": "Direct", URL: "cdw.com/nvidia-rtx-5090", "Last Checked": "12 min ago" }, - ], - }, - { - name: "SG Startup Funding Rounds", - description: - "Recent funding rounds for Singapore-based startups, sourced from press releases and Crunchbase.", - status: "building", - cadence: "Daily", - columns: [ - { name: "Startup", type: "text" }, - { name: "Description", type: "text" }, - { name: "Round", type: "text" }, - { name: "Amount", type: "number" }, - { name: "Lead Investor", type: "text" }, - { name: "Date", type: "date" }, - { name: "Sector", type: "text" }, - { name: "Valuation", type: "number" }, - { name: "Crunchbase", type: "url" }, - ], - rows: [ - { Startup: "Grab", Description: "Southeast Asian super app for ride-hailing...", Round: "Series H", Amount: "$300M", "Lead Investor": "GIC", Date: "May 10", Sector: "Transportation", Valuation: "$14B", Crunchbase: "crunchbase.com/organization/grab" }, - { Startup: "Carousell", Description: "Consumer-to-consumer marketplace for...", Round: "Series D", Amount: "$100M", "Lead Investor": "Temasek", Date: "May 8", Sector: "Marketplace", Valuation: "$1.1B", Crunchbase: "crunchbase.com/organization/carousell" }, - { Startup: "Ninja Van", Description: "Logistics company providing last-mile...", Round: "Series E", Amount: "$150M", "Lead Investor": "B Capital", Date: "May 5", Sector: "Logistics", Valuation: "$2B", Crunchbase: "crunchbase.com/organization/ninjavan" }, - { Startup: "PatSnap", Description: "AI-powered innovation intelligence...", Round: "Series D", Amount: "$90M", "Lead Investor": "SoftBank", Date: "May 2", Sector: "Enterprise", Valuation: "$1B", Crunchbase: "crunchbase.com/organization/patsnap" }, - { Startup: "Endowus", Description: "Digital wealth platform for personal...", Round: "Series B", Amount: "$45M", "Lead Investor": "UBS", Date: "Apr 28", Sector: "Fintech", Valuation: "$400M", Crunchbase: "crunchbase.com/organization/endowus" }, - { Startup: "Nium", Description: "Global payments infrastructure platform...", Round: "Series D", Amount: "$200M", "Lead Investor": "Visa", Date: "Apr 20", Sector: "Fintech", Valuation: "$2.1B", Crunchbase: "crunchbase.com/organization/nium" }, - ], - }, -]; - -export const seed = mutation({ - args: {}, - handler: async (ctx) => { - const identity = await requireIdentity(ctx); - assertNotReservedOwner(identity.subject); - const ownerId = identity.subject; - - const existing = await ctx.db - .query("datasets") - .withIndex("by_owner", (q) => q.eq("ownerId", ownerId)) - .first(); - if (existing) return { status: "already_seeded" }; - - for (const ds of SEED_DATASETS) { - const datasetId = await ctx.db.insert("datasets", { - name: ds.name, - description: ds.description, - ownerId, - status: ds.status, - cadence: ds.cadence, - visibility: "private", - columns: ds.columns, - }); - - for (const row of ds.rows) { - await ctx.db.insert("datasetRows", { - datasetId, - data: row, - }); - } - } - - return { status: "seeded", count: SEED_DATASETS.length }; - }, -});