diff --git a/backend/package-lock.json b/backend/package-lock.json
index e231b48..8c4ca5a 100644
--- a/backend/package-lock.json
+++ b/backend/package-lock.json
@@ -1901,9 +1901,6 @@
"cpu": [
"arm64"
],
- "libc": [
- "glibc"
- ],
"license": "MIT",
"optional": true,
"os": [
@@ -1920,9 +1917,6 @@
"cpu": [
"arm64"
],
- "libc": [
- "musl"
- ],
"license": "MIT",
"optional": true,
"os": [
@@ -1939,9 +1933,6 @@
"cpu": [
"riscv64"
],
- "libc": [
- "glibc"
- ],
"license": "MIT",
"optional": true,
"os": [
@@ -1958,9 +1949,6 @@
"cpu": [
"x64"
],
- "libc": [
- "glibc"
- ],
"license": "MIT",
"optional": true,
"os": [
@@ -1977,9 +1965,6 @@
"cpu": [
"x64"
],
- "libc": [
- "musl"
- ],
"license": "MIT",
"optional": true,
"os": [
diff --git a/backend/src/clerk-auth.ts b/backend/src/clerk-auth.ts
index de28dca..570bf1b 100644
--- a/backend/src/clerk-auth.ts
+++ b/backend/src/clerk-auth.ts
@@ -116,8 +116,8 @@ export async function requireAuth(
const requestState = await req.server.clerk.authenticateRequest(
clerkRequest,
{
- // Anyone consuming our backend is our own frontend; lock to its origin.
- authorizedParties: [env.CLIENT_ORIGIN],
+ // Anyone consuming our backend is our own frontend; lock to allowed origins.
+ authorizedParties: env.CLIENT_ORIGINS,
},
);
diff --git a/backend/src/env.ts b/backend/src/env.ts
index 97c410f..63369f7 100644
--- a/backend/src/env.ts
+++ b/backend/src/env.ts
@@ -18,11 +18,21 @@ function numberFromEnv(name: string, fallback: number): number {
return Number.isFinite(parsed) ? parsed : fallback;
}
+function stringListFromEnv(name: string, fallback: string[]): string[] {
+ const raw = process.env[name];
+ if (!raw) return fallback;
+ return raw
+ .split(",")
+ .map((value) => value.trim())
+ .filter(Boolean);
+}
+
export const env = {
PROD: process.env.PROD,
IS_PROD: process.env.PROD === "1",
IS_LOCAL_MODE: process.env.PROD !== "1",
CLIENT_ORIGIN: process.env.CLIENT_ORIGIN || "http://localhost:3500",
+ CLIENT_ORIGINS: stringListFromEnv("CLIENT_ORIGIN", ["http://localhost:3500"]),
CONVEX_URL: required("CONVEX_URL"),
PORT: numberFromEnv("PORT", 3501),
diff --git a/backend/src/index.ts b/backend/src/index.ts
index cb57cd1..12621a98 100644
--- a/backend/src/index.ts
+++ b/backend/src/index.ts
@@ -8,7 +8,7 @@ import { inferSchema } from "./pipeline/schema-inference.js";
import { datasetContextSchema, type DatasetContext } from "./pipeline/populate.js";
import { populateWorkflow } from "./mastra/workflows/populate.js";
import { updateWorkflow } from "./mastra/workflows/update.js";
-import { convex, internal } from "./convex.js";
+import { convex, api, internal } from "./convex.js";
import { sendTransactionalEmail } from "./email/send.js";
import { datasetReadyTemplate } from "./email/templates/dataset-ready.js";
import { capture, shutdown as shutdownAnalytics } from "./analytics/posthog.js";
@@ -629,23 +629,25 @@ function startLocalRefreshScheduler(
const fastify = Fastify({ logger: true });
-const allowedCorsOrigins = new Set([env.CLIENT_ORIGIN]);
+const allowedCorsOrigins = new Set(env.CLIENT_ORIGINS);
if (env.IS_LOCAL_MODE) {
- try {
- const clientOrigin = new URL(env.CLIENT_ORIGIN);
- if (
- clientOrigin.hostname === "localhost" ||
- clientOrigin.hostname === "127.0.0.1"
- ) {
- allowedCorsOrigins.add(
- `${clientOrigin.protocol}//localhost${clientOrigin.port ? `:${clientOrigin.port}` : ""}`,
- );
- allowedCorsOrigins.add(
- `${clientOrigin.protocol}//127.0.0.1${clientOrigin.port ? `:${clientOrigin.port}` : ""}`,
- );
+ for (const origin of env.CLIENT_ORIGINS) {
+ try {
+ const clientOrigin = new URL(origin);
+ if (
+ clientOrigin.hostname === "localhost" ||
+ clientOrigin.hostname === "127.0.0.1"
+ ) {
+ allowedCorsOrigins.add(
+ `${clientOrigin.protocol}//localhost${clientOrigin.port ? `:${clientOrigin.port}` : ""}`,
+ );
+ allowedCorsOrigins.add(
+ `${clientOrigin.protocol}//127.0.0.1${clientOrigin.port ? `:${clientOrigin.port}` : ""}`,
+ );
+ }
+ } catch {
+ // Keep the configured origin only if the origin is not URL-shaped.
}
- } catch {
- // Keep the configured origin only if CLIENT_ORIGIN is not URL-shaped.
}
}
@@ -682,6 +684,34 @@ fastify.addHook("onClose", async () => {
fastify.get("/health", async () => ({ status: "ok" }));
+fastify.get("/share/:id", async (request, reply) => {
+ const { id } = request.params as { id: string };
+ reply.header("Access-Control-Allow-Origin", "*");
+ let dataset;
+ try {
+ dataset = await convex.query(api.datasets.get, { id });
+ } catch (err) {
+ request.log.error({ err, id }, "Failed to fetch dataset for share route");
+ return reply.code(502).send({ error: "Failed to fetch dataset" });
+ }
+ if (!dataset || dataset.visibility !== "public") {
+ return reply.code(404).send({ error: "Dataset not found" });
+ }
+ return {
+ name: dataset.name,
+ description: dataset.description,
+ rowCount: dataset.rowCount,
+ columns: dataset.columns,
+ };
+});
+
+fastify.options("/share/:id", async (_request, reply) => {
+ reply.header("Access-Control-Allow-Origin", "*");
+ reply.header("Access-Control-Allow-Methods", "GET, OPTIONS");
+ reply.header("Access-Control-Allow-Headers", "Content-Type");
+ return reply.code(204).send();
+});
+
fastify.get("/local-setup/status", async (_req, reply) => {
if (!env.IS_LOCAL_MODE) {
return reply.code(404).send({ error: "Not found" });
diff --git a/backend/src/pipeline/schema-inference.ts b/backend/src/pipeline/schema-inference.ts
index 467a393..57248b4 100644
--- a/backend/src/pipeline/schema-inference.ts
+++ b/backend/src/pipeline/schema-inference.ts
@@ -18,6 +18,7 @@ Your job is to:
- \`hybrid\` — unclear; the pipeline will try search_fetch first and fall back to browser.
5. Set \`source_hint\` to a specific URL whenever possible (e.g. \`https://www.ycombinator.com/companies?industry=Fintech\`). Avoid vague descriptions.
6. Write a \`retrieval_hint\` for each column describing where/how the value can be found later. Downstream agents will use this to fill the column for each row.
+7. If the user's prompt mentions a specific number of items (e.g. "top 10", "list of 50", "25 companies"), set \`suggested_row_count\` to that number. Otherwise omit it.
Rules:
diff --git a/backend/src/pipeline/types.ts b/backend/src/pipeline/types.ts
index e0b95f9..1b754b3 100644
--- a/backend/src/pipeline/types.ts
+++ b/backend/src/pipeline/types.ts
@@ -38,6 +38,7 @@ export const datasetSchemaSchema = z
primary_key: z.union([z.string(), z.array(z.string())]),
retrieval_strategy: retrievalStrategySchema,
source_hint: z.string().min(1),
+ suggested_row_count: z.number().optional(),
})
.superRefine((data, ctx) => {
const names = data.columns.map((c) => c.name);
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
index 3506ef9..3a3d923 100644
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -28,7 +28,7 @@ services:
- ./backend/src:/app/src
- ./scripts:/scripts:ro
environment:
- CLIENT_ORIGIN: http://localhost:3500
+ CLIENT_ORIGIN: http://localhost:3500,http://localhost:3502
CONVEX_URL: http://convex:3210
PORT: 3501
PROD: ${PROD:-}
@@ -109,6 +109,7 @@ services:
- ./scripts:/scripts:ro
environment:
NEXT_PUBLIC_CONVEX_URL: http://localhost:3210
+ CONVEX_URL: http://convex:3210
NEXT_PUBLIC_PROD: ${PROD:-}
PROD: ${PROD:-}
NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY: ${NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY:-}
@@ -122,6 +123,7 @@ services:
NEXT_PUBLIC_CLERK_AFTER_SIGN_IN_URL: /dashboard
NEXT_PUBLIC_CLERK_AFTER_SIGN_UP_URL: /dashboard
NEXT_PUBLIC_BACKEND_URL: http://localhost:3501
+ BACKEND_URL: http://backend:3501
# PostHog — analytics no-ops if unset
NEXT_PUBLIC_POSTHOG_KEY: ${NEXT_PUBLIC_POSTHOG_KEY:-}
NEXT_PUBLIC_POSTHOG_HOST: ${NEXT_PUBLIC_POSTHOG_HOST:-https://us.i.posthog.com}
diff --git a/frontend/app/api/share/[id]/route.ts b/frontend/app/api/share/[id]/route.ts
new file mode 100644
index 0000000..72218db
--- /dev/null
+++ b/frontend/app/api/share/[id]/route.ts
@@ -0,0 +1,40 @@
+import { NextResponse } from "next/server";
+
+const CORS = { "Access-Control-Allow-Origin": "*" };
+const BACKEND_URL =
+ process.env.BACKEND_URL ??
+ process.env.NEXT_PUBLIC_BACKEND_URL ??
+ "http://localhost:3501";
+
+export async function GET(
+ _req: Request,
+ { params }: { params: Promise<{ id: string }> },
+) {
+ const { id } = await params;
+ try {
+ const res = await fetch(`${BACKEND_URL}/share/${id}`, {
+ signal: AbortSignal.timeout(5000),
+ });
+ if (res.ok) {
+ const data = await res.json();
+ return NextResponse.json(data, { headers: CORS });
+ }
+ if (res.status === 404) {
+ return NextResponse.json({ error: "Dataset not found" }, { status: 404, headers: CORS });
+ }
+ return NextResponse.json({ error: "Upstream service error" }, { status: 502, headers: CORS });
+ } catch {
+ return NextResponse.json({ error: "Upstream service error" }, { status: 502, headers: CORS });
+ }
+}
+
+export async function OPTIONS() {
+ return new Response(null, {
+ status: 204,
+ headers: {
+ "Access-Control-Allow-Origin": "*",
+ "Access-Control-Allow-Methods": "GET, OPTIONS",
+ "Access-Control-Allow-Headers": "Content-Type",
+ },
+ });
+}
diff --git a/frontend/app/dashboard/page.tsx b/frontend/app/dashboard/page.tsx
index 7c3ca80..fb9368e 100644
--- a/frontend/app/dashboard/page.tsx
+++ b/frontend/app/dashboard/page.tsx
@@ -3,8 +3,10 @@
import { useEffect, useMemo, useRef, useState } from "react";
import Link from "next/link";
import { useRouter } from "next/navigation";
-import { useQuery } from "convex/react";
+import { useMutation, useQuery } from "convex/react";
import { api } from "@/convex/_generated/api";
+import type { Id } from "@/convex/_generated/dataModel";
+import type { DatasetMeta } from "@/lib/fetch-dataset-meta";
import {
DatasetCard,
type DatasetCardData,
@@ -22,6 +24,7 @@ export default function DashboardPage() {
const { user } = useAppUser();
const { signOut } = useAppClerk();
const [search, setSearch] = useState("");
+ const [importOpen, setImportOpen] = useState(false);
const mine = useQuery(
api.datasets.listMine,
@@ -147,6 +150,14 @@ export default function DashboardPage() {
className="w-full rounded-lg border border-border bg-surface py-2.5 pl-10 pr-3 text-sm outline-none placeholder:text-muted/60 focus:border-foreground/30 transition-[border-color] duration-150"
/>
+
+
{atLimit ? (
)}
+
+ {importOpen && (
+ setImportOpen(false)} />
+ )}
+
+ );
+}
+
+function ImportModal({ onClose }: { onClose: () => void }) {
+ const router = useRouter();
+ const [url, setUrl] = useState("");
+ const [importing, setImporting] = useState(false);
+ const [error, setError] = useState(null);
+ const [crossPreview, setCrossPreview] = useState(null);
+ const importDataset = useMutation(api.datasets.importDataset);
+ const importDatasetFromSchema = useMutation(api.datasets.importDatasetFromSchema);
+
+ function parseShareUrl(input: string): { id: string; origin: string } | null {
+ const trimmed = input.trim();
+ try {
+ const parsed = new URL(trimmed);
+ const match = parsed.pathname.match(/\/(?:dataset|share)\/([^/?#]+)/);
+ const id = match?.[1] ?? null;
+ if (id && /^[a-zA-Z0-9]{20,}$/.test(id)) return { id, origin: parsed.origin };
+ return null;
+ } catch {
+ return null;
+ }
+ }
+
+ const parsed = parseShareUrl(url);
+ const extractedId = parsed?.id ?? null;
+ const sourceOrigin = parsed?.origin ?? null;
+ const currentOrigin = typeof window !== "undefined" ? window.location.origin : "";
+ const isCrossInstance = !!sourceOrigin && sourceOrigin !== currentOrigin;
+
+ const sameInstancePreview = useQuery(
+ api.datasets.get,
+ !isCrossInstance && extractedId ? { id: extractedId as Id<"datasets"> } : "skip",
+ );
+
+ useEffect(() => {
+ if (!isCrossInstance || !extractedId || !sourceOrigin) {
+ setCrossPreview(null);
+ return;
+ }
+ setCrossPreview("loading");
+ const controller = new AbortController();
+
+ let fallback: DatasetMeta | null = null;
+ try {
+ const schemaParam = new URL(url.trim()).searchParams.get("schema");
+ if (schemaParam) {
+ const bytes = Uint8Array.from(atob(schemaParam), c => c.charCodeAt(0));
+ fallback = JSON.parse(new TextDecoder().decode(bytes)) as DatasetMeta;
+ }
+ } catch {}
+
+ fetch(`${sourceOrigin}/api/share/${extractedId}`, {
+ signal: controller.signal,
+ })
+ .then((r) => (r.ok ? r.json() : null))
+ .then((data: DatasetMeta | null) => setCrossPreview(data ?? fallback))
+ .catch(() => setCrossPreview(fallback));
+ return () => controller.abort();
+ }, [isCrossInstance, extractedId, sourceOrigin, url]);
+
+ useEffect(() => {
+ function handleKey(e: KeyboardEvent) {
+ if (e.key === "Escape") onClose();
+ }
+ document.addEventListener("keydown", handleKey);
+ return () => document.removeEventListener("keydown", handleKey);
+ }, [onClose]);
+
+ const preview: DatasetMeta | null | undefined = isCrossInstance
+ ? (crossPreview === "loading" ? undefined : crossPreview)
+ : (sameInstancePreview as DatasetMeta | null | undefined);
+
+ const previewLoading = isCrossInstance
+ ? crossPreview === "loading"
+ : extractedId !== null && sameInstancePreview === undefined;
+
+ const isValidUrl = extractedId !== null;
+ const isPublic = isCrossInstance
+ ? preview !== null && preview !== undefined
+ : preview !== null && preview !== undefined && (preview as { visibility?: string }).visibility === "public";
+
+ async function handleImport() {
+ if (!extractedId || importing || !preview) return;
+ setImporting(true);
+ setError(null);
+ try {
+ let newId: Id<"datasets">;
+ if (isCrossInstance) {
+ newId = await importDatasetFromSchema({
+ name: preview.name,
+ description: preview.description,
+ columns: preview.columns,
+ });
+ } else {
+ newId = await importDataset({ sourceId: extractedId as Id<"datasets"> });
+ }
+ onClose();
+ router.push(`/dataset/${newId}`);
+ } catch (err) {
+ const msg = err instanceof Error ? err.message : "Failed to import dataset.";
+ setError(
+ msg.toLowerCase().includes("quota")
+ ? "You've reached your free-tier quota. Upgrade to import more datasets."
+ : msg,
+ );
+ } finally {
+ setImporting(false);
+ }
+ }
+
+ return (
+ { if (e.target === e.currentTarget) onClose(); }}
+ role="presentation"
+ >
+
+
+
Import Dataset
+
+
+
+
Paste a BigSet dataset link to add it to your account.
+
+
{ setUrl(e.target.value); setError(null); }}
+ placeholder="https://..."
+ className="w-full rounded-lg border border-border bg-foreground/[0.03] px-3 py-2 text-xs text-foreground placeholder:text-muted/50 outline-none focus:border-foreground/30 transition-[border-color]"
+ />
+
+ {isValidUrl && previewLoading && (
+
Loading preview...
+ )}
+
+ {isValidUrl && !previewLoading && preview === null && (
+
Dataset not found or not accessible.
+ )}
+
+ {preview && (
+
+
{preview.name}
+ {preview.description && (
+
{preview.description}
+ )}
+
+ {preview.columns.length} column{preview.columns.length !== 1 ? "s" : ""}
+ {preview.rowCount ? ` · ${preview.rowCount} rows` : ""}
+
+ {!isPublic && (
+
This dataset is private and cannot be imported.
+ )}
+
+ )}
+
+ {error && (
+
{error}
+ )}
+
+
+
+
+
+
);
}
diff --git a/frontend/app/dataset/[id]/layout.tsx b/frontend/app/dataset/[id]/layout.tsx
new file mode 100644
index 0000000..4923d60
--- /dev/null
+++ b/frontend/app/dataset/[id]/layout.tsx
@@ -0,0 +1,32 @@
+import type { Metadata } from "next";
+import { fetchPublicDatasetMeta } from "@/lib/fetch-dataset-meta";
+
+export async function generateMetadata({
+ params,
+}: {
+ params: Promise<{ id: string }>;
+}): Promise {
+ const { id } = await params;
+ const dataset = await fetchPublicDatasetMeta(id);
+
+ if (!dataset) return { title: "BigSet" };
+
+ const description = [
+ dataset.description,
+ dataset.columns.length ? `${dataset.columns.length} columns` : null,
+ dataset.rowCount != null ? `${dataset.rowCount} rows` : null,
+ ]
+ .filter(Boolean)
+ .join(" · ");
+
+ return {
+ title: `${dataset.name} | BigSet`,
+ description,
+ openGraph: { title: dataset.name, description, type: "website", siteName: "BigSet" },
+ twitter: { card: "summary", title: dataset.name, description },
+ };
+}
+
+export default function DatasetLayout({ children }: { children: React.ReactNode }) {
+ return <>{children}>;
+}
diff --git a/frontend/app/dataset/[id]/page.tsx b/frontend/app/dataset/[id]/page.tsx
index 697addd..7a15aa6 100644
--- a/frontend/app/dataset/[id]/page.tsx
+++ b/frontend/app/dataset/[id]/page.tsx
@@ -1,6 +1,6 @@
"use client";
-import { useParams } from "next/navigation";
+import { useParams, useRouter } from "next/navigation";
import Link from "next/link";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { useMutation, useQuery } from "convex/react";
@@ -36,6 +36,9 @@ export default function DatasetPage() {
const [exportOpen, setExportOpen] = useState(false);
const [settingsOpen, setSettingsOpen] = useState(false);
const [confirmPopulate, setConfirmPopulate] = useState(false);
+ const [shareOpen, setShareOpen] = useState(false);
+ const [addingToMyBigSet, setAddingToMyBigSet] = useState(false);
+ const [addToMyBigSetError, setAddToMyBigSetError] = useState(null);
const [savingRefreshCadence, setSavingRefreshCadence] = useState(false);
const [savingMaxRowCount, setSavingMaxRowCount] = useState(false);
const [maxRowCountSaveError, setMaxRowCountSaveError] = useState(null);
@@ -54,8 +57,11 @@ export default function DatasetPage() {
api.datasetRows.listByDataset,
authLoading ? "skip" : { datasetId },
);
+ const router = useRouter();
const updateRefreshSettings = useMutation(api.datasets.updateRefreshSettings);
const updateMaxRowCount = useMutation(api.datasets.updateMaxRowCount);
+ const updateVisibility = useMutation(api.datasets.updateVisibility);
+ const importDataset = useMutation(api.datasets.importDataset);
const usage = useQuery(
api.quota.getMy,
isAuthenticated ? {} : "skip",
@@ -303,6 +309,22 @@ export default function DatasetPage() {
}
}, [isDatasetBusy]);
+ async function handleAddToMyBigSet() {
+ if (!dataset || addingToMyBigSet) return;
+ setAddingToMyBigSet(true);
+ setAddToMyBigSetError(null);
+ try {
+ const newId = await importDataset({ sourceId: dataset._id });
+ router.push(`/dataset/${newId}`);
+ } catch (err) {
+ captureException(err, { operation: "dataset_import", sourceId: dataset._id });
+ const msg = err instanceof Error ? err.message : "Import failed.";
+ setAddToMyBigSetError(msg);
+ } finally {
+ setAddingToMyBigSet(false);
+ }
+ }
+
if (authLoading || dataset === undefined || rows === undefined) {
return (
@@ -389,6 +411,37 @@ export default function DatasetPage() {
)}
+ {isOwner && (
+
+ )}
+
+ {!isOwner && dataset.visibility === "public" && (
+
+ )}
+
setSettingsOpen((o) => !o)}
@@ -476,6 +529,18 @@ export default function DatasetPage() {
onCancel={() => setConfirmPopulate(false)}
/>
)}
+
+ {shareOpen && (
+ updateVisibility({ id: dataset._id, visibility: v })}
+ onClose={() => setShareOpen(false)}
+ />
+ )}
);
}
@@ -859,3 +924,150 @@ function ConfirmPopulateModal({
);
}
+
+/* ------------------------------------------------------------------ */
+/* Share modal */
+/* ------------------------------------------------------------------ */
+
+function ShareModal({
+ datasetId,
+ datasetName,
+ description,
+ columns,
+ visibility,
+ onVisibilityChange,
+ onClose,
+}: {
+ datasetId: string;
+ datasetName: string;
+ description: string;
+ columns: Array<{ name: string; type: string; description?: string; isPrimaryKey?: boolean }>;
+ visibility: "public" | "private";
+ onVisibilityChange: (v: "public" | "private") => Promise;
+ onClose: () => void;
+}) {
+ const [saving, setSaving] = useState(false);
+ const [saveError, setSaveError] = useState(null);
+ const [copied, setCopied] = useState(false);
+ const shareUrl = typeof window !== "undefined"
+ ? (() => {
+ const minimal = {
+ name: datasetName,
+ description,
+ columns: columns.map(({ name, type, isPrimaryKey }) => ({ name, type, ...(isPrimaryKey ? { isPrimaryKey } : {}) })),
+ };
+ const json = JSON.stringify(minimal);
+ const schema = btoa(String.fromCharCode(...new TextEncoder().encode(json)));
+ return `${window.location.origin}/share/${datasetId}?schema=${schema}`;
+ })()
+ : "";
+
+ useEffect(() => {
+ function handleKey(e: KeyboardEvent) {
+ if (e.key === "Escape") onClose();
+ }
+ document.addEventListener("keydown", handleKey);
+ return () => document.removeEventListener("keydown", handleKey);
+ }, [onClose]);
+
+ async function handleToggle(next: "public" | "private") {
+ setSaving(true);
+ setSaveError(null);
+ try {
+ await onVisibilityChange(next);
+ } catch (err) {
+ setSaveError(err instanceof Error ? err.message : "Failed to update. Try again.");
+ } finally {
+ setSaving(false);
+ }
+ }
+
+ async function handleCopy() {
+ try {
+ await navigator.clipboard.writeText(shareUrl);
+ setCopied(true);
+ setTimeout(() => setCopied(false), 2000);
+ } catch {
+ setSaveError("Copy failed. Select and copy the link manually.");
+ }
+ }
+
+ return (
+ { if (e.target === e.currentTarget) onClose(); }}
+ role="presentation"
+ >
+
+
+
+ Share “{datasetName}”
+
+
+
+
+
+
+
+ {visibility === "public" ? "Public" : "Private"}
+
+
+ {visibility === "public"
+ ? "Anyone with the link can view and add this dataset."
+ : "Only you can see this dataset."}
+
+
+
+
+
+ {saveError && (
+
{saveError}
+ )}
+
+ {visibility === "public" && (
+
+
Share link
+
+ e.target.select()}
+ />
+
+
+
+ )}
+
+
+ );
+}
diff --git a/frontend/app/dataset/new/page.tsx b/frontend/app/dataset/new/page.tsx
index 63b8ebf..ae01f24 100644
--- a/frontend/app/dataset/new/page.tsx
+++ b/frontend/app/dataset/new/page.tsx
@@ -143,6 +143,9 @@ export default function NewDatasetPage() {
);
setRetrievalStrategy(schema.retrieval_strategy);
setSourceHint(schema.source_hint);
+ if (schema.suggested_row_count) {
+ setMaxRowCountInput(String(schema.suggested_row_count));
+ }
track(EVENTS.DATASET_SCHEMA_GENERATED, {
column_count: schema.columns.length,
});
diff --git a/frontend/app/share/[id]/error.tsx b/frontend/app/share/[id]/error.tsx
new file mode 100644
index 0000000..08f44ac
--- /dev/null
+++ b/frontend/app/share/[id]/error.tsx
@@ -0,0 +1,22 @@
+"use client";
+
+import Link from "next/link";
+
+export default function ShareError() {
+ return (
+
+
+
Dataset not found.
+
+ It may have been deleted or made private.
+
+
+ Go to BigSet
+
+
+
+ );
+}
diff --git a/frontend/app/share/[id]/layout.tsx b/frontend/app/share/[id]/layout.tsx
new file mode 100644
index 0000000..fd39b57
--- /dev/null
+++ b/frontend/app/share/[id]/layout.tsx
@@ -0,0 +1,32 @@
+import type { Metadata } from "next";
+import { fetchPublicDatasetMeta } from "@/lib/fetch-dataset-meta";
+
+export async function generateMetadata({
+ params,
+}: {
+ params: Promise<{ id: string }>;
+}): Promise {
+ const { id } = await params;
+ const dataset = await fetchPublicDatasetMeta(id);
+
+ if (!dataset) return { title: "BigSet" };
+
+ const description = [
+ dataset.description,
+ dataset.columns.length ? `${dataset.columns.length} columns` : null,
+ dataset.rowCount != null ? `${dataset.rowCount} rows` : null,
+ ]
+ .filter(Boolean)
+ .join(" · ");
+
+ return {
+ title: `${dataset.name} | BigSet`,
+ description,
+ openGraph: { title: dataset.name, description, type: "website", siteName: "BigSet" },
+ twitter: { card: "summary", title: dataset.name, description },
+ };
+}
+
+export default function ShareLayout({ children }: { children: React.ReactNode }) {
+ return <>{children}>;
+}
diff --git a/frontend/app/share/[id]/page.tsx b/frontend/app/share/[id]/page.tsx
new file mode 100644
index 0000000..862fee5
--- /dev/null
+++ b/frontend/app/share/[id]/page.tsx
@@ -0,0 +1,229 @@
+"use client";
+
+import { useParams, useRouter } from "next/navigation";
+import Link from "next/link";
+import { useState } from "react";
+import { useMutation, useQuery } from "convex/react";
+import { api } from "@/convex/_generated/api";
+import type { Id } from "@/convex/_generated/dataModel";
+import { useAppAuth, useAppConvexAuth } from "@/lib/app-auth";
+import { captureException } from "@/lib/analytics";
+
+const TYPE_LABELS: Record = {
+ text: "Text",
+ number: "Number",
+ boolean: "Boolean",
+ url: "URL",
+ date: "Date",
+};
+
+export default function SharePreviewPage() {
+ const params = useParams();
+ const router = useRouter();
+ const id = params.id as string;
+ const { isAuthenticated, isLoading: authLoading } = useAppConvexAuth();
+ const { userId } = useAppAuth();
+ const [importing, setImporting] = useState(false);
+ const [error, setError] = useState(null);
+
+ const dataset = useQuery(api.datasets.get, { id: id as Id<"datasets"> });
+ const rows = useQuery(
+ api.datasetRows.listByDataset,
+ dataset ? { datasetId: id as Id<"datasets"> } : "skip",
+ );
+ const importDataset = useMutation(api.datasets.importDataset);
+
+ async function handleImport() {
+ if (!dataset || importing) return;
+ setImporting(true);
+ setError(null);
+ try {
+ const newId = await importDataset({ sourceId: id as Id<"datasets"> });
+ router.push(`/dataset/${newId}`);
+ } catch (err) {
+ captureException(err, { operation: "dataset_import", sourceId: id });
+ setError(err instanceof Error ? err.message : "Failed to import dataset.");
+ } finally {
+ setImporting(false);
+ }
+ }
+
+ if (authLoading || dataset === undefined) {
+ return (
+
+ );
+ }
+
+ if (!dataset || dataset.visibility !== "public") {
+ return (
+
+
+
Dataset not found.
+
It may have been deleted or made private.
+
+ Go to BigSet
+
+
+
+ );
+ }
+
+ const isOwner = !!userId && userId === dataset.ownerId;
+ const previewRows = (rows ?? []).slice(0, 5);
+
+ return (
+
+
+
+
+
+
+
+ My Datasets
+
+
+
+
+
+
+ Shared Dataset
+
+
+
+
+ {dataset.name}
+
+
+ {dataset.description && (
+
+ {dataset.description}
+
+ )}
+
+
+ {dataset.columns.length} column{dataset.columns.length !== 1 ? "s" : ""}
+ ·
+ {dataset.rowCount ?? 0} row{(dataset.rowCount ?? 0) !== 1 ? "s" : ""}
+
+
+
+
+ Columns
+
+
+ {dataset.columns.map((col, i) => (
+
+
+ {TYPE_LABELS[col.type] ?? col.type}
+
+
+
{col.name}
+ {col.description && (
+
{col.description}
+ )}
+
+
+ ))}
+
+
+
+ {previewRows.length > 0 && (
+
+
+ Preview
+
+
+
+
+
+ {dataset.columns.map((col) => (
+ |
+ {col.name}
+ |
+ ))}
+
+
+
+ {previewRows.map((row, i) => (
+
+ {dataset.columns.map((col) => (
+ |
+ {String(row.data[col.name] ?? "")}
+ |
+ ))}
+
+ ))}
+
+
+
+ {(dataset.rowCount ?? 0) > 5 && (
+
+ Showing 5 of {dataset.rowCount} rows
+
+ )}
+
+ )}
+
+
+ {isOwner ? (
+ <>
+
This is your dataset
+
+ You shared this link. Others can use it to add a copy to their BigSet.
+
+
+ View your dataset
+
+ >
+ ) : (
+ <>
+
Add this dataset to your BigSet
+
+ Import the schema and populate it with fresh data from the web.
+
+
+ {error && (
+
{error}
+ )}
+
+ {isAuthenticated ? (
+
+ ) : (
+
+ Sign in to add this dataset
+
+ )}
+ >
+ )}
+
+
+
+ );
+}
diff --git a/frontend/convex/datasets.ts b/frontend/convex/datasets.ts
index d295327..6f33dab 100644
--- a/frontend/convex/datasets.ts
+++ b/frontend/convex/datasets.ts
@@ -489,6 +489,73 @@ export const updateStatus = mutation({
},
});
+export const updateVisibility = mutation({
+ args: {
+ id: v.id("datasets"),
+ visibility: v.union(v.literal("public"), v.literal("private")),
+ },
+ handler: async (ctx, args) => {
+ const dataset = await loadOwnedDataset(ctx, args.id);
+ await ctx.db.patch(dataset._id, { visibility: args.visibility });
+ },
+});
+
+export const importDataset = mutation({
+ args: { sourceId: v.id("datasets") },
+ handler: async (ctx, args) => {
+ const identity = await requireIdentity(ctx);
+ assertNotReservedOwner(identity.subject);
+ const source = await loadReadableDataset(ctx, args.sourceId);
+ if (source.visibility !== "public") {
+ throw new Error("Dataset is not public.");
+ }
+ if (source.ownerId === identity.subject) {
+ throw new Error("You already own this dataset.");
+ }
+ const maxRowCount = source.maxRowCount ?? DEFAULT_MAX_ROW_COUNT;
+ await requireQuotaRemaining(ctx, identity.subject, maxRowCount);
+ return await ctx.db.insert("datasets", {
+ name: source.name,
+ description: source.description,
+ columns: source.columns,
+ ownerId: identity.subject,
+ status: "paused",
+ visibility: "private",
+ rowCount: 0,
+ refreshCadence: "manual",
+ refreshEnabled: false,
+ maxRowCount,
+ nextRefreshAt: nextRefreshAtFor("manual", Date.now()),
+ });
+ },
+});
+
+export const importDatasetFromSchema = mutation({
+ args: {
+ name: v.string(),
+ description: v.string(),
+ columns: v.array(columnValidator),
+ },
+ handler: async (ctx, args) => {
+ const identity = await requireIdentity(ctx);
+ assertNotReservedOwner(identity.subject);
+ await requireQuotaRemaining(ctx, identity.subject, DEFAULT_MAX_ROW_COUNT);
+ return await ctx.db.insert("datasets", {
+ name: args.name,
+ description: args.description,
+ columns: args.columns,
+ ownerId: identity.subject,
+ status: "paused",
+ visibility: "private",
+ rowCount: 0,
+ refreshCadence: "manual",
+ refreshEnabled: false,
+ maxRowCount: DEFAULT_MAX_ROW_COUNT,
+ nextRefreshAt: nextRefreshAtFor("manual", Date.now()),
+ });
+ },
+});
+
export const remove = mutation({
args: { id: v.id("datasets") },
handler: async (ctx, args) => {
diff --git a/frontend/lib/backend.ts b/frontend/lib/backend.ts
index f5ea3e5..c0889d1 100644
--- a/frontend/lib/backend.ts
+++ b/frontend/lib/backend.ts
@@ -5,6 +5,7 @@ export interface InferredSchema {
primary_key: string;
retrieval_strategy: "search_fetch" | "browser" | "hybrid";
source_hint: string;
+ suggested_row_count?: number;
}
export interface InferredColumn {
diff --git a/frontend/lib/fetch-dataset-meta.ts b/frontend/lib/fetch-dataset-meta.ts
new file mode 100644
index 0000000..b7ee70e
--- /dev/null
+++ b/frontend/lib/fetch-dataset-meta.ts
@@ -0,0 +1,36 @@
+export type ColumnType = "text" | "number" | "boolean" | "url" | "date";
+
+export interface DatasetMeta {
+ name: string;
+ description: string;
+ rowCount?: number;
+ columns: { name: string; type: ColumnType; description?: string; isPrimaryKey?: boolean }[];
+}
+
+export async function fetchPublicDatasetMeta(
+ id: string,
+ { noCache = false } = {},
+): Promise {
+ const convexUrl =
+ process.env.CONVEX_URL ??
+ process.env.NEXT_PUBLIC_CONVEX_URL ??
+ "http://localhost:3210";
+
+ try {
+ const res = await fetch(`${convexUrl}/api/query`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ path: "datasets:get", args: { id }, format: "json" }),
+ signal: AbortSignal.timeout(5000),
+ ...(noCache ? { cache: "no-store" as const } : { next: { revalidate: 60 } }),
+ });
+ if (!res.ok) return null;
+ const json = await res.json();
+ if (json.status !== "success") return null;
+ const dataset = json.value;
+ if (!dataset || dataset.visibility !== "public") return null;
+ return dataset as DatasetMeta;
+ } catch {
+ return null;
+ }
+}
diff --git a/frontend/next.config.ts b/frontend/next.config.ts
index 6c1915e..ac0b3bb 100644
--- a/frontend/next.config.ts
+++ b/frontend/next.config.ts
@@ -2,7 +2,7 @@ import type { NextConfig } from "next";
const nextConfig: NextConfig = {
output: "standalone",
- allowedDevOrigins: ["127.0.0.1"],
+ allowedDevOrigins: ["127.0.0.1", "localhost"],
env: {
NEXT_PUBLIC_PROD: process.env.NEXT_PUBLIC_PROD ?? process.env.PROD ?? "",
},
diff --git a/frontend/proxy.ts b/frontend/proxy.ts
index 3e25634..46ede59 100644
--- a/frontend/proxy.ts
+++ b/frontend/proxy.ts
@@ -32,6 +32,7 @@ function isPublicPath(req: NextRequest): boolean {
path !== "/dataset/new" &&
!path.startsWith("/dataset/new/")
) return true;
+ if (path.startsWith("/share/")) return true;
return false;
}