diff --git a/.env.example b/.env.example deleted file mode 100644 index f522353..0000000 --- a/.env.example +++ /dev/null @@ -1,50 +0,0 @@ -# This is the only local env file BigSet expects. -# Copy this file to .env and fill in your values. - -# TinyFish (required) — web search + dataset population. -# Generate at https://agent.tinyfish.ai/api-keys?utm_source=github&utm_medium=organic&utm_campaign=bigset-developer-2026q2 -TINYFISH_API_KEY= - -# OpenRouter (required) — schema inference + AI agents. -# Generate at https://openrouter.ai/settings/keys -OPENROUTER_API_KEY=sk-or-... - -# OpenRouter model slugs for each AI task. -# Defaults (used when no user preference is saved): -# SCHEMA_INFERENCE_MODEL: anthropic/claude-sonnet-4.6 (powerful for schema inference) -# POPULATE_ORCHESTRATOR_MODEL: qwen/qwen3.7-max (cost-effective orchestrator) -# INVESTIGATE_SUBAGENT_MODEL: qwen/qwen3.7-max (cost-effective subagent) -# Find model IDs at https://openrouter.ai/models — any OpenRouter model slug is valid. -SCHEMA_INFERENCE_MODEL=anthropic/claude-sonnet-4.6 -POPULATE_ORCHESTRATOR_MODEL=qwen/qwen3.7-max -INVESTIGATE_SUBAGENT_MODEL=qwen/qwen3.7-max - -# Clerk (required) — user authentication. -# Create a free app at https://dashboard.clerk.com -# Enable the Clerk JWT Templates -> Convex template, then set your issuer URL. -NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY=pk_test_... -CLERK_SECRET_KEY=sk_test_... -CLERK_JWT_ISSUER_DOMAIN=https://your-app.clerk.accounts.dev - -# Auto-generated by `make dev` on first run. Do not fill in manually. -CONVEX_SELF_HOSTED_ADMIN_KEY= - -# Local service URLs -CLIENT_ORIGIN=http://localhost:3500 -CONVEX_URL=http://localhost:3210 -NEXT_PUBLIC_CONVEX_URL=http://127.0.0.1:3210 -CONVEX_SELF_HOSTED_URL=http://127.0.0.1:3210 -NEXT_PUBLIC_BACKEND_URL=http://localhost:3501 -PORT=3501 - -# Optional — the following keys are not required to run BigSet. - -# Resend (optional — transactional emails when a populate workflow finishes). -# Unset → email module logs and no-ops. Generate at https://resend.com/api-keys -RESEND_API_KEY= -EMAIL_FROM="BigSet " - -# PostHog (optional — leave blank to disable analytics entirely in local dev). -# Get from https://us.posthog.com/project/settings/general. -NEXT_PUBLIC_POSTHOG_KEY= -NEXT_PUBLIC_POSTHOG_HOST=https://us.i.posthog.com diff --git a/.github/workflows/build-release-artifacts.yml b/.github/workflows/build-release-artifacts.yml new file mode 100644 index 0000000..53877f9 --- /dev/null +++ b/.github/workflows/build-release-artifacts.yml @@ -0,0 +1,101 @@ +--- +name: Build Release Artifacts + +on: # yamllint disable-line rule:truthy + workflow_dispatch: + inputs: + release_tag: + description: Existing release tag to upload assets to. Leave empty to only upload workflow artifacts. + required: false + type: string + release: + types: [published] + +permissions: + contents: write + +jobs: + build: + name: Build ${{ matrix.platform }} + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + include: + - platform: darwin-arm64 + runner: macos-15 + - platform: darwin-x64 + runner: macos-15-intel + - platform: linux-arm64 + runner: ubuntu-24.04-arm + - platform: linux-x64 + runner: ubuntu-24.04 + - platform: win32-arm64 + runner: windows-11-arm + - platform: win32-x64 + runner: windows-2025 + + env: + ARTIFACT_NAME: bigset-build-${{ matrix.platform }}.zip + RELEASE_TAG: ${{ github.event.release.tag_name || inputs.release_tag }} + + steps: + - name: Checkout + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 + with: + persist-credentials: false + + - name: Setup Node + uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 + with: + node-version: "24" + + - name: Install frontend dependencies + working-directory: frontend + run: npm install --silent + + - name: Install backend dependencies + working-directory: backend + run: npm install --silent + + - name: Build release + run: node scripts/build-release.mjs + + - name: Rename artifact + run: node -e "const fs = require('fs'); fs.renameSync('dist/bigset-build.zip', 'dist/' + process.env.ARTIFACT_NAME);" + + - name: Upload workflow artifact + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + with: + name: ${{ matrix.platform }} + path: dist/${{ env.ARTIFACT_NAME }} + if-no-files-found: error + + - name: Validate release tag + if: github.event_name == 'release' || inputs.release_tag != '' + shell: bash + run: | + if [[ -z "$RELEASE_TAG" ]]; then + echo "Release tag is required when uploading release assets." >&2 + exit 1 + fi + if [[ ! "$RELEASE_TAG" =~ ^[A-Za-z0-9][A-Za-z0-9._/@+-]*$ ]]; then + echo "Release tag contains unsupported characters." >&2 + exit 1 + fi + + - name: Upload release asset + if: github.event_name == 'release' || inputs.release_tag != '' + shell: bash + env: + GH_TOKEN: ${{ github.token }} + run: gh release upload "$RELEASE_TAG" "dist/$ARTIFACT_NAME" --clobber + + - name: Upload legacy release asset + if: (github.event_name == 'release' || inputs.release_tag != '') && matrix.platform == 'darwin-arm64' + shell: bash + env: + GH_TOKEN: ${{ github.token }} + run: | + node -e "const fs = require('fs'); fs.copyFileSync('dist/' + process.env.ARTIFACT_NAME, 'dist/bigset-build.zip');" + gh release upload "$RELEASE_TAG" "dist/bigset-build.zip" --clobber diff --git a/.gitignore b/.gitignore index 60e55f6..65cd7ef 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,8 @@ yarn-debug.log* # Local-only files *.bak +.local/ +dist/ tmp/ temp/ diff --git a/AGENTS.md b/AGENTS.md index fae66c0..d821a46 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,7 +8,6 @@ ## What not to do -- Do not add Clerk, Auth0, or any third-party auth service. We use Better Auth (self-hosted). - Do not add API routes to the frontend. All API logic belongs in the backend. - Do not hardcode ports. Read from env vars (`PORT`, `CLIENT_ORIGIN`, `BETTER_AUTH_URL`). - Do not commit `.env` files or secrets. diff --git a/README.md b/README.md index f63efee..25802f1 100644 --- a/README.md +++ b/README.md @@ -67,63 +67,72 @@ Any dataset. Any source. Always fresh. That's the idea. ## 🚀 Quick Start -**Prerequisites:** [Docker](https://docs.docker.com/get-docker/) and [Make](https://www.gnu.org/software/make/) +**Prerequisites:** [Node.js](https://nodejs.org/) 22+ with npm. -You'll also need API keys from three services (all free to set up): +```bash +npm install --global @adamexu/bigset +bigset +``` + +That's it. The `bigset` command downloads the current local BigSet release, +starts Convex, the backend, the frontend, and the local credential bridge, then +prints the app URL. Open [127.0.0.1:3500](http://127.0.0.1:3500) in your web browser to use it. + +The first run caches release files under `~/.bigset`; after that, starting +BigSet is designed to take only a few seconds. + +On first launch, BigSet sends you to setup. You'll connect two services: | Service | What it's for | Get your key | |---------|--------------|-------------| | **TinyFish** | Web search + page fetching | [tinyfish.ai/api-keys](https://agent.tinyfish.ai/api-keys?utm_source=github&utm_medium=organic&utm_campaign=bigset-developer-2026q2) | | **OpenRouter** | LLM calls (schema inference + agents) | [openrouter.ai/settings/keys](https://openrouter.ai/settings/keys) | -| **Clerk** | User authentication | [dashboard.clerk.com](https://dashboard.clerk.com) | -### Step 1: Clone the repo +Local API keys are stored in your OS keychain. + +For a one-off run without installing globally: ```bash -git clone https://github.com/tinyfish-io/bigset.git -cd bigset -cp .env.example .env +npx @adamexu/bigset ``` -### Step 2: Set up TinyFish (web access) +Useful local options: -TinyFish powers all web search and page fetching. Search and Fetch have generous rate limits. - -1. Go to [tinyfish.ai](https://www.tinyfish.ai?utm_source=github&utm_medium=organic&utm_campaign=bigset-developer-2026q2) and create an account -2. Go to [API Keys](https://agent.tinyfish.ai/api-keys?utm_source=github&utm_medium=organic&utm_campaign=bigset-developer-2026q2) and create a key -3. Paste it as `TINYFISH_API_KEY` in `.env` +| Command | What it does | +|---------|-------------| +| `bigset --force` | Redownload the latest cached release | +| `bigset --app-port 4500 --backend-port 4501` | Use alternate app/backend ports | +| `bigset --home ~/.bigset-dev` | Use a separate local cache directory | -### Step 3: Set up OpenRouter (LLM) +--- -OpenRouter routes LLM calls to Claude Sonnet (schema inference) and Qwen (agents). It's pay-as-you-go; a dataset costs a few dollars in LLM usage. +## Developing From Source -1. Go to [openrouter.ai](https://openrouter.ai) and create an account -2. Go to [Settings → Keys](https://openrouter.ai/settings/keys) and create an API key -3. Paste it as `OPENROUTER_API_KEY` in `.env` -4. Add some credits; $5-10 is plenty to start +Use this path when you're changing BigSet itself. The supported development +workflow is still `make dev`. -### Step 4: Set up Clerk (auth) +**Prerequisites:** [Node.js](https://nodejs.org/) 22+ with npm, +[Docker](https://docs.docker.com/get-docker/), and +[Make](https://www.gnu.org/software/make/). -Clerk handles user sign-in. The setup takes ~2 minutes: +### Step 1: Clone the repo -1. Go to [dashboard.clerk.com](https://dashboard.clerk.com) and create a new application -2. Pick a sign-in method (email, Google, GitHub, whatever you prefer) -3. Once created, go to **Configure → API Keys** in the sidebar - - Copy **Publishable Key** → paste as `NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY` in `.env` - - Copy **Secret Key** → paste as `CLERK_SECRET_KEY` in `.env` -4. Go to **Configure → JWT Templates** in the sidebar - - Click **New template** → select the **Convex** template → click **Save** -5. Go to **Configure → Settings** (or **Domains**) - - Find your **Issuer URL** (looks like `https://your-app-name.clerk.accounts.dev`) - - Paste it as `CLERK_JWT_ISSUER_DOMAIN` in `.env` +```bash +git clone https://github.com/tinyfish-io/bigset.git +cd bigset +``` -### Step 5: Start everything +### Step 2: Start everything ```bash make dev ``` -This installs dependencies, builds and starts all Docker services (Postgres, Convex, frontend, backend, Mastra), and deploys the Convex schema. On first run, it automatically generates the Convex admin key — no manual steps needed. See [How `make dev` Works](#how-make-dev-works) for the full breakdown. +`make dev` creates a local `.env` if needed, installs dependencies, builds and +starts all Docker services (Postgres, Convex, frontend, backend, Mastra), and +deploys the Convex schema. On first run, it automatically generates the Convex +admin key. See [How `make dev` Works](#how-make-dev-works) for the full +breakdown. Once everything is ready, you'll see: @@ -133,13 +142,26 @@ Once everything is ready, you'll see: | **Convex dashboard** | [localhost:6791](http://localhost:6791) | | **Mastra Studio** (workflow inspector) | [localhost:4111](http://localhost:4111) | -Open [localhost:3500](http://localhost:3500) and click **Get started** to sign in. +Open [localhost:3500](http://localhost:3500). The setup screen will ask for +TinyFish and OpenRouter credentials and save them to your OS keychain for this +workspace. + +### Step 3: Connect TinyFish and OpenRouter + +TinyFish powers web search and page fetching. OpenRouter routes LLM calls to +the models BigSet uses for schema inference and agents. + +1. Create a TinyFish key at [agent.tinyfish.ai/api-keys](https://agent.tinyfish.ai/api-keys?utm_source=github&utm_medium=organic&utm_campaign=bigset-developer-2026q2) +2. Create an OpenRouter key at [openrouter.ai/settings/keys](https://openrouter.ai/settings/keys) +3. Paste both into BigSet's setup screen + +OpenRouter is pay-as-you-go; $5-10 is plenty to start. > **Note:** root `.env` is the only local env file. If you edit Convex functions in `frontend/convex/`, run `make convex-push` to deploy the changes. -> **Free tier:** each signed-in account gets **2,500 row operations per calendar month** (resets on the 1st, UTC). The header shows a live usage badge; system-owned curated datasets bypass the quota. +> **Free tier:** cloud signed-in accounts get **2,500 row operations per calendar month** (resets on the 1st, UTC). Local mode bypasses the cloud quota and uses your TinyFish/OpenRouter accounts directly. -### Step 6 (optional): Load curated datasets +### Step 4 (optional): Load curated datasets BigSet includes 9 curated public datasets (AI companies hiring, GPU prices, model pricing, etc.) that show on the landing page: @@ -155,15 +177,16 @@ This is idempotent; safe to run multiple times. `make dev` is designed to handle everything — first run, subsequent runs, and recovery from bad state. You should never need to run any other setup command. Here's what it does, in order: -1. **Validates your `.env`** — checks that all required API keys are set (Clerk, OpenRouter, TinyFish). Stops with a clear error if anything is missing. +1. **Validates your `.env`** — creates local keychain bridge settings automatically. 2. **Installs dependencies** — runs `npm install` in both `frontend/` and `backend/`. Silent if already up to date. -3. **Starts the database layer** — brings up Postgres and Convex (self-hosted) first, since other services depend on them. -4. **Waits for Convex** — polls the Convex health endpoint until it's ready (up to 120s). -5. **Ensures the admin key** — if `CONVEX_SELF_HOSTED_ADMIN_KEY` is empty in `.env`, generates one automatically and writes it. If a key exists, validates it against the running Convex instance. If the key is stale (e.g. you ran `make clean` and wiped the database), it detects the mismatch and regenerates. -6. **Pushes Convex config** — sets the Clerk JWT issuer URL in Convex so auth tokens are validated correctly. -7. **Deploys Convex schema** — pushes the table schema and functions from `frontend/convex/` to the running instance. -8. **Starts remaining services** — brings up the frontend, backend, and Mastra. These read the now-populated `.env` including the admin key. -9. **Streams logs** — tails all container logs so you can see what's happening. `Ctrl+C` to stop watching (containers keep running). +3. **Starts the local keychain bridge** — runs a host-side helper so Docker services can read/write this workspace's OS keychain entries. +4. **Starts the database layer** — brings up Postgres and Convex (self-hosted) first, since other services depend on them. +5. **Waits for Convex** — polls the Convex health endpoint until it's ready (up to 120s). +6. **Ensures the admin key** — if `CONVEX_SELF_HOSTED_ADMIN_KEY` is empty in `.env`, generates one automatically and writes it. If a key exists, validates it against the running Convex instance. If the key is stale (e.g. you ran `make clean` and wiped the database), it detects the mismatch and regenerates. +7. **Configures Convex auth** — sets `BIGSET_LOCAL_MODE=1` for the local app. +8. **Deploys Convex schema** — pushes the table schema and functions from `frontend/convex/` to the running instance. +9. **Starts remaining services** — brings up the frontend, backend, and Mastra. These read the now-populated `.env` including the admin key. +10. **Streams logs** — tails all container logs so you can see what's happening. `Ctrl+C` to stop watching (containers keep running). ### Commands @@ -188,8 +211,7 @@ Other commands you might use during development: | Problem | What happens | |---------|-------------| -| Missing `.env` | Error: "Run: cp .env.example .env" | -| Missing API key | Error tells you exactly which key to set | +| Missing `.env` | `make dev` creates a local one automatically | | Stale admin key (after `make clean`) | Detected automatically, regenerated | | Containers already running | No-op for running services, starts any that are missing | | Convex won't start | Error after 120s timeout — check Docker is running | @@ -202,12 +224,10 @@ If you want a completely fresh start: `make clean` then `make dev`. | Variable | Required | Where to get it | |----------|----------|----------------| -| `TINYFISH_API_KEY` | ✅ | [tinyfish.ai](https://agent.tinyfish.ai/api-keys?utm_source=github&utm_medium=organic&utm_campaign=bigset-developer-2026q2) → API Keys | -| `OPENROUTER_API_KEY` | ✅ | openrouter.ai → Settings → Keys | -| `NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY` | ✅ | Clerk dashboard → API Keys | -| `CLERK_SECRET_KEY` | ✅ | Clerk dashboard → API Keys | -| `CLERK_JWT_ISSUER_DOMAIN` | ✅ | Clerk dashboard → Settings/Domains | +| `TINYFISH_API_KEY` | Optional | Usually entered in setup and stored in your OS keychain | +| `OPENROUTER_API_KEY` | Optional | Usually entered in setup and stored in your OS keychain | | `CONVEX_SELF_HOSTED_ADMIN_KEY` | Auto | Auto-generated by `make dev` on first run | +| `LOCAL_KEYCHAIN_PORT`, `LOCAL_KEYCHAIN_TOKEN`, `BIGSET_LOCAL_WORKSPACE_ID` | Auto | Auto-generated by `make dev` for local OS keychain access | | `RESEND_API_KEY` | Optional | For "dataset ready" emails. Leave blank to skip. | | `NEXT_PUBLIC_POSTHOG_KEY` | Optional | For product analytics. Leave blank to disable. | @@ -219,7 +239,7 @@ If you want a completely fresh start: `make clean` then `make dev`. |-------|------| | Frontend | Next.js 16, React 19, Tailwind 4 | | Backend | Fastify, TypeScript (agent runner) | -| Auth | [Clerk](https://clerk.com) | +| Auth | Local auth | | Database | [Convex](https://convex.dev) (self-hosted) | | Data Collection | [TinyFish](https://www.tinyfish.ai?utm_source=github&utm_medium=organic&utm_campaign=bigset-developer-2026q2) APIs (Search, Fetch, Browser) | | AI orchestration | [Mastra](https://mastra.ai) workflows + [Vercel AI SDK](https://sdk.vercel.ai) + [OpenRouter](https://openrouter.ai) → Claude Sonnet (schema inference + populate agent) | diff --git a/backend/package-lock.json b/backend/package-lock.json index a95de5a..e231b48 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -11,6 +11,7 @@ "@clerk/backend": "^3.4.11", "@fastify/cors": "^11.0.0", "@mastra/core": "^1.36.0", + "@napi-rs/keyring": "^1.3.0", "@openrouter/ai-sdk-provider": "^2.9.0", "ai": "^6.0.0", "convex": "^1.39.1", @@ -1802,6 +1803,240 @@ } } }, + "node_modules/@napi-rs/keyring": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring/-/keyring-1.3.0.tgz", + "integrity": "sha512-WrOw/bcXm0f9qHkumlT1QlArXSTWqaY9sunsDpOk+yCCorCKMxvWT/a3xko4EYHVdeZoh00yI2TydXn6eyICDA==", + "license": "MIT", + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + }, + "optionalDependencies": { + "@napi-rs/keyring-darwin-arm64": "1.3.0", + "@napi-rs/keyring-darwin-x64": "1.3.0", + "@napi-rs/keyring-freebsd-x64": "1.3.0", + "@napi-rs/keyring-linux-arm-gnueabihf": "1.3.0", + "@napi-rs/keyring-linux-arm64-gnu": "1.3.0", + "@napi-rs/keyring-linux-arm64-musl": "1.3.0", + "@napi-rs/keyring-linux-riscv64-gnu": "1.3.0", + "@napi-rs/keyring-linux-x64-gnu": "1.3.0", + "@napi-rs/keyring-linux-x64-musl": "1.3.0", + "@napi-rs/keyring-win32-arm64-msvc": "1.3.0", + "@napi-rs/keyring-win32-ia32-msvc": "1.3.0", + "@napi-rs/keyring-win32-x64-msvc": "1.3.0" + } + }, + "node_modules/@napi-rs/keyring-darwin-arm64": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-darwin-arm64/-/keyring-darwin-arm64-1.3.0.tgz", + "integrity": "sha512-pl76hJvdYUBn6I24bXiOBMA9nbDapo3I5B+f3OorjDU4dUMSypXeKbOVehJe8fhgTiH24flMyTS3aAIy43xegQ==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-darwin-x64": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-darwin-x64/-/keyring-darwin-x64-1.3.0.tgz", + "integrity": "sha512-YcJtEV5LA3cvA4z3BurgxH5IhTsW1JfIvcAAcqcecwk06Si9F9NqkxbZVIfDwQ8oRHgaBmT3zZJnLAotCrVahw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-freebsd-x64": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-freebsd-x64/-/keyring-freebsd-x64-1.3.0.tgz", + "integrity": "sha512-vlLf31TGhfRAaxLDBhg8b89ss0HHD/lyNmL5F3UjSaz5CUXElsJmKYq9fqA/B+cZKUEUcLHHGhF0I/CqcFdaVw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-linux-arm-gnueabihf": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-linux-arm-gnueabihf/-/keyring-linux-arm-gnueabihf-1.3.0.tgz", + "integrity": "sha512-KiWdMMu/Inz/bHHIAGrnF7r54FZDYXuHO6UFF/rhIrshUsxbMG1Rl9lEymNtqqsVo927G0VYcb02FzWQ3iBQRQ==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-linux-arm64-gnu": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-linux-arm64-gnu/-/keyring-linux-arm64-gnu-1.3.0.tgz", + "integrity": "sha512-eyKGpY40lm9Jvs1aD294XRH4y7+TlJM0YVAryZeXA6TX0mb4gMkxVXwSQv7MCwgah7raeUd0dKUb4BPAYIgcMg==", + "cpu": [ + "arm64" + ], + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-linux-arm64-musl": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-linux-arm64-musl/-/keyring-linux-arm64-musl-1.3.0.tgz", + "integrity": "sha512-iIK6JWHXAJqDrEyLY3TmswwloVyt2vj+04TZnew+uSJ9gnDO8EwRbp3/iw3LpWaXiDO7VomGO6y8I0Id8uBZSw==", + "cpu": [ + "arm64" + ], + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-linux-riscv64-gnu": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-linux-riscv64-gnu/-/keyring-linux-riscv64-gnu-1.3.0.tgz", + "integrity": "sha512-/PGqrwn6EwgtK6vccASSXJRfOSP4vN1F4ASsIQ+7MdrK6hNvAJ1FZPrIuD5gGGdxezo3F++To2Wq7DbuGIeuNQ==", + "cpu": [ + "riscv64" + ], + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-linux-x64-gnu": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-linux-x64-gnu/-/keyring-linux-x64-gnu-1.3.0.tgz", + "integrity": "sha512-2PDK1WKWTu9lBGq9VvNEkSlQD3O7YwVpmnyN2M3cy4v7NJ/8gDMd9GXv3G+FVXN13uhp4gnnPBS+ScefmEeD2A==", + "cpu": [ + "x64" + ], + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-linux-x64-musl": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-linux-x64-musl/-/keyring-linux-x64-musl-1.3.0.tgz", + "integrity": "sha512-oJ2HkX8YUo46QBkn0pG+HuIKQNqr523q6vBobCn+P95s4C4K6/kLBqHY/1bg5J4ap31DzsznhnFKcfBNBsjCnw==", + "cpu": [ + "x64" + ], + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-win32-arm64-msvc": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-win32-arm64-msvc/-/keyring-win32-arm64-msvc-1.3.0.tgz", + "integrity": "sha512-tOd3c/uAaeoE4ycVlmAdSvygz0Zt3zdca6Y7gokBeIbaRDWpjDIUOpU3MvML59XAaqyuKGsVVu0F/DZb1lHPmw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-win32-ia32-msvc": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-win32-ia32-msvc/-/keyring-win32-ia32-msvc-1.3.0.tgz", + "integrity": "sha512-sPSqeAFZMGqP1R++M2JTza7GQJJ/TpCo6JU6Vcd4jnebvOaEDs9b7eipakU1PJdSvhpC2yXMCNRk9gXfrhuwHQ==", + "cpu": [ + "ia32" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@napi-rs/keyring-win32-x64-msvc": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@napi-rs/keyring-win32-x64-msvc/-/keyring-win32-x64-msvc-1.3.0.tgz", + "integrity": "sha512-4DnCWXwDc0HRKwyRlG5y0VhKZW2tNRQfKKfyj6IX/KWfDNyq9hn4n+GL1auyDcOO/v8PwnhmYo2+rOOqCkvvOg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", diff --git a/backend/package.json b/backend/package.json index 433c853..11df78b 100644 --- a/backend/package.json +++ b/backend/package.json @@ -13,6 +13,7 @@ "@clerk/backend": "^3.4.11", "@fastify/cors": "^11.0.0", "@mastra/core": "^1.36.0", + "@napi-rs/keyring": "^1.3.0", "@openrouter/ai-sdk-provider": "^2.9.0", "ai": "^6.0.0", "convex": "^1.39.1", diff --git a/backend/src/clerk-auth.ts b/backend/src/clerk-auth.ts index 0a046ae..de28dca 100644 --- a/backend/src/clerk-auth.ts +++ b/backend/src/clerk-auth.ts @@ -8,6 +8,7 @@ import fp from "fastify-plugin"; import { createClerkClient, type ClerkClient } from "@clerk/backend"; import { env } from "./env.js"; +import { LOCAL_USER_ID } from "./local-credentials.js"; /** * Clerk JWT verification for the Fastify backend. @@ -41,7 +42,7 @@ declare module "fastify" { } const clerkPlugin: FastifyPluginAsync = async (fastify: FastifyInstance) => { - if (!env.CLERK_SECRET_KEY) { + if (env.IS_PROD && !env.CLERK_SECRET_KEY) { fastify.log.warn( "CLERK_SECRET_KEY not set — protected routes will reject all requests. " + "Set it before adding routes that require auth.", @@ -66,6 +67,7 @@ export async function getUserEmail( clerk: ClerkClient, userId: string, ): Promise { + if (env.IS_LOCAL_MODE) return null; try { const user = await clerk.users.getUser(userId); return user.primaryEmailAddress?.emailAddress ?? null; @@ -87,6 +89,11 @@ export async function requireAuth( req: FastifyRequest, reply: FastifyReply, ): Promise { + if (env.IS_LOCAL_MODE) { + req.auth = { userId: LOCAL_USER_ID }; + return; + } + if (!env.CLERK_SECRET_KEY) { req.log.error("CLERK_SECRET_KEY is not set; cannot verify request"); await reply.code(500).send({ error: "Auth not configured" }); diff --git a/backend/src/config/models.ts b/backend/src/config/models.ts index f63b419..1d28cbf 100644 --- a/backend/src/config/models.ts +++ b/backend/src/config/models.ts @@ -6,6 +6,7 @@ import { api, internal, convex } from "../convex.js"; import { env } from "../env.js"; +import { requireOpenRouterApiKey } from "../local-credentials.js"; export interface OpenRouterModel { modelName: string; @@ -130,20 +131,19 @@ export async function getModelConfig( * for Convex storage. */ export async function fetchModelsFromOpenRouter(): Promise { - const apiKey = env.OPENROUTER_API_KEY; - if (!apiKey) { - throw new Error("OPENROUTER_API_KEY is not set"); - } + const apiKey = await requireOpenRouterApiKey(); + + const baseUrl = (process.env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1").replace(/\/+$/, ""); + const url = new URL(`${baseUrl}/models`); + url.searchParams.set("output_modalities", "text"); + url.searchParams.set("supported_parameters", "tools"); // Only text-based models that support tools - const response = await fetch( - "https://openrouter.ai/api/v1/models?output_modalities=text&supported_parameters=tools", - { - headers: { - Authorization: `Bearer ${apiKey}`, - }, - } - ); + const response = await fetch(url, { + headers: { + Authorization: `Bearer ${apiKey}`, + }, + }); if (!response.ok) { throw new Error(`OpenRouter API failed: ${response.status} ${response.statusText}`); @@ -152,8 +152,8 @@ export async function fetchModelsFromOpenRouter(): Promise { const json = (await response.json()) as { data: Array<{ id: string; - name: string; - context_length: number; + name?: string; + context_length?: number; pricing?: { completion?: string; prompt?: string }; }>; }; @@ -163,7 +163,7 @@ export async function fetchModelsFromOpenRouter(): Promise { const models = json.data .filter((m) => !EXCLUDED_MODEL_SLUGS.includes(m.id)) .map((model) => ({ - modelName: model.name, + modelName: model.name ?? model.id, canonicalSlug: model.id, contextLength: model.context_length ?? 0, promptCost: parseFloat(model.pricing?.prompt ?? "0") * 1_000_000, @@ -171,4 +171,4 @@ export async function fetchModelsFromOpenRouter(): Promise { })); return models; -} \ No newline at end of file +} diff --git a/backend/src/env.ts b/backend/src/env.ts index f1fbf17..97c410f 100644 --- a/backend/src/env.ts +++ b/backend/src/env.ts @@ -19,6 +19,9 @@ function numberFromEnv(name: string, fallback: number): number { } export const env = { + PROD: process.env.PROD, + IS_PROD: process.env.PROD === "1", + IS_LOCAL_MODE: process.env.PROD !== "1", CLIENT_ORIGIN: process.env.CLIENT_ORIGIN || "http://localhost:3500", CONVEX_URL: required("CONVEX_URL"), PORT: numberFromEnv("PORT", 3501), @@ -36,6 +39,10 @@ export const env = { process.env.NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY, OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY, + BIGSET_LOCAL_WORKSPACE_ID: required("BIGSET_LOCAL_WORKSPACE_ID"), + LOCAL_KEYCHAIN_URL: process.env.LOCAL_KEYCHAIN_URL, + LOCAL_KEYCHAIN_TOKEN: process.env.LOCAL_KEYCHAIN_TOKEN, + LOCAL_KEYCHAIN_TIMEOUT_MS: numberFromEnv("LOCAL_KEYCHAIN_TIMEOUT_MS", 5_000), // Default models — used when a user has not saved a preference. // Each must be a valid OpenRouter model slug. diff --git a/backend/src/fetch-timeout.ts b/backend/src/fetch-timeout.ts new file mode 100644 index 0000000..ebd4816 --- /dev/null +++ b/backend/src/fetch-timeout.ts @@ -0,0 +1 @@ +export const FETCH_TIMEOUT_MS = 30_000; diff --git a/backend/src/index.ts b/backend/src/index.ts index 26c72ac..cb57cd1 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -1,4 +1,4 @@ -import Fastify, { type FastifyBaseLogger } from "fastify"; +import Fastify, { type FastifyBaseLogger, type FastifyReply } from "fastify"; import fastifyCors from "@fastify/cors"; import type { ClerkClient } from "@clerk/backend"; @@ -14,6 +14,15 @@ import { datasetReadyTemplate } from "./email/templates/dataset-ready.js"; import { capture, shutdown as shutdownAnalytics } from "./analytics/posthog.js"; import { EVENTS } from "./analytics/events.js"; import { registerDataset, deregisterDataset, abortDataset } from "./abort-registry.js"; +import { + clearLegacyPlaintextLocalCredentials, + exchangeOpenRouterOAuthCode, + getLocalSetupStatus, + requireLocalSetupComplete, + saveLocalCredential, + verifyOpenRouterApiKey, + verifyTinyFishApiKey, +} from "./local-credentials.js"; /** Domain part of an email, for analytics (we never log full addresses). */ function emailDomain(email: string): string { @@ -84,6 +93,8 @@ async function sendDatasetReadyNotification({ rowCount: number; workflowType?: "populate" | "update"; }): Promise { + if (env.IS_LOCAL_MODE) return; + const baseProps = { datasetId, datasetName, @@ -142,6 +153,18 @@ async function sendDatasetReadyNotification({ } } +async function ensureLocalSetupReady(reply: FastifyReply): Promise { + try { + await requireLocalSetupComplete(); + return true; + } catch { + await reply.code(428).send({ + error: "Local setup is incomplete. Connect TinyFish and OpenRouter first.", + }); + return false; + } +} + /** * Shared stop-success path: set the dataset live, send the ready email. * @@ -523,6 +546,11 @@ function startLocalRefreshScheduler( ticking = true; try { + if (env.IS_LOCAL_MODE) { + const setup = await getLocalSetupStatus(); + if (!setup.complete) return; + } + const now = Date.now(); const dueDatasets = await convex.query( internal.datasets.listDueForRefreshInternal, @@ -601,8 +629,28 @@ function startLocalRefreshScheduler( const fastify = Fastify({ logger: true }); +const allowedCorsOrigins = new Set([env.CLIENT_ORIGIN]); +if (env.IS_LOCAL_MODE) { + try { + const clientOrigin = new URL(env.CLIENT_ORIGIN); + if ( + clientOrigin.hostname === "localhost" || + clientOrigin.hostname === "127.0.0.1" + ) { + allowedCorsOrigins.add( + `${clientOrigin.protocol}//localhost${clientOrigin.port ? `:${clientOrigin.port}` : ""}`, + ); + allowedCorsOrigins.add( + `${clientOrigin.protocol}//127.0.0.1${clientOrigin.port ? `:${clientOrigin.port}` : ""}`, + ); + } + } catch { + // Keep the configured origin only if CLIENT_ORIGIN is not URL-shaped. + } +} + await fastify.register(fastifyCors, { - origin: env.CLIENT_ORIGIN, + origin: Array.from(allowedCorsOrigins), methods: ["GET", "POST", "PUT", "DELETE", "OPTIONS"], allowedHeaders: ["Content-Type", "Authorization", "Cookie"], credentials: true, @@ -614,6 +662,10 @@ await fastify.register(fastifyCors, { // protected routes — see the example block below. await fastify.register(clerkAuthPlugin); +await clearLegacyPlaintextLocalCredentials().catch((err) => { + fastify.log.warn({ err }, "Failed to clear legacy local credential plaintext"); +}); + await backfillDatasetRefreshSettings(fastify.log); const refreshScheduler = startLocalRefreshScheduler(fastify.log); @@ -630,6 +682,81 @@ fastify.addHook("onClose", async () => { fastify.get("/health", async () => ({ status: "ok" })); +fastify.get("/local-setup/status", async (_req, reply) => { + if (!env.IS_LOCAL_MODE) { + return reply.code(404).send({ error: "Not found" }); + } + return await getLocalSetupStatus(); +}); + +fastify.post("/local-setup/tinyfish", async (req, reply) => { + if (!env.IS_LOCAL_MODE) { + return reply.code(404).send({ error: "Not found" }); + } + + const body = req.body as { apiKey?: string }; + const apiKey = body?.apiKey?.trim(); + if (!apiKey) { + return reply.code(400).send({ error: "TinyFish API key is required" }); + } + + try { + await verifyTinyFishApiKey(apiKey); + await saveLocalCredential("tinyfish", apiKey, "api_key"); + return await getLocalSetupStatus(); + } catch (err) { + const message = err instanceof Error ? err.message : "TinyFish verification failed"; + req.log.warn({ err }, "TinyFish local setup verification failed"); + return reply.code(400).send({ error: message }); + } +}); + +fastify.post("/local-setup/openrouter-key", async (req, reply) => { + if (!env.IS_LOCAL_MODE) { + return reply.code(404).send({ error: "Not found" }); + } + + const body = req.body as { apiKey?: string }; + const apiKey = body?.apiKey?.trim(); + if (!apiKey) { + return reply.code(400).send({ error: "OpenRouter API key is required" }); + } + + try { + await verifyOpenRouterApiKey(apiKey); + await saveLocalCredential("openrouter", apiKey, "api_key"); + return await getLocalSetupStatus(); + } catch (err) { + const message = err instanceof Error ? err.message : "OpenRouter verification failed"; + req.log.warn({ err }, "OpenRouter local setup verification failed"); + return reply.code(400).send({ error: message }); + } +}); + +fastify.post("/local-setup/openrouter-oauth", async (req, reply) => { + if (!env.IS_LOCAL_MODE) { + return reply.code(404).send({ error: "Not found" }); + } + + const body = req.body as { code?: string; codeVerifier?: string }; + const code = body?.code?.trim(); + const codeVerifier = body?.codeVerifier?.trim(); + if (!code || !codeVerifier) { + return reply.code(400).send({ error: "OpenRouter OAuth code is required" }); + } + + try { + const apiKey = await exchangeOpenRouterOAuthCode({ code, codeVerifier }); + await verifyOpenRouterApiKey(apiKey); + await saveLocalCredential("openrouter", apiKey, "oauth"); + return await getLocalSetupStatus(); + } catch (err) { + const message = err instanceof Error ? err.message : "OpenRouter OAuth failed"; + req.log.warn({ err }, "OpenRouter OAuth setup failed"); + return reply.code(400).send({ error: message }); + } +}); + fastify.post("/openrouter/refresh", { preHandler: requireAuth }, async (req, reply) => { const { fetchModelsFromOpenRouter, upsertModelBatch } = await import("./config/models.js"); @@ -715,6 +842,7 @@ await fastify.register(async (instance) => { if (!body?.prompt || typeof body.prompt !== "string" || !body.prompt.trim()) { return reply.code(400).send({ error: "prompt is required" }); } + if (!(await ensureLocalSetupReady(reply))) return; try { const auth = req.auth; @@ -744,6 +872,7 @@ await fastify.register(async (instance) => { details: parsed.error.flatten().fieldErrors, }); } + if (!(await ensureLocalSetupReady(reply))) return; try { const auth = req.auth; @@ -826,6 +955,7 @@ await fastify.register(async (instance) => { details: parsed.error.flatten().fieldErrors, }); } + if (!(await ensureLocalSetupReady(reply))) return; try { const auth = req.auth; diff --git a/backend/src/keychain-server.ts b/backend/src/keychain-server.ts new file mode 100644 index 0000000..fc10630 --- /dev/null +++ b/backend/src/keychain-server.ts @@ -0,0 +1,174 @@ +import { createServer, type IncomingMessage, type ServerResponse } from "node:http"; +import { randomUUID } from "node:crypto"; +import { existsSync } from "node:fs"; +import { resolve } from "node:path"; +import { config as loadDotenv } from "dotenv"; +import { Entry } from "@napi-rs/keyring"; + +import { + isLocalCredentialService, + localKeychainAccount, +} from "./local-credential-types.js"; + +const rootEnvPath = resolve(process.cwd(), "../.env"); +if (existsSync(rootEnvPath)) { + loadDotenv({ path: rootEnvPath }); +} + +const KEYCHAIN_SERVICE = "ai.bigset.local-credentials"; +const MAX_BODY_BYTES = 64 * 1024; + +function requiredEnv(name: string): string { + const value = process.env[name]; + if (!value) { + throw new Error(`${name} is required for the local keychain bridge.`); + } + return value; +} + +function numberEnv(name: string): number { + const raw = requiredEnv(name); + const value = Number(raw); + if (!Number.isInteger(value) || value <= 0) { + throw new Error(`${name} must be a positive integer.`); + } + return value; +} + +const bindHost = process.env.LOCAL_KEYCHAIN_BIND_HOST || "127.0.0.1"; +const port = numberEnv("LOCAL_KEYCHAIN_PORT"); +const token = requiredEnv("LOCAL_KEYCHAIN_TOKEN"); +const workspaceId = requiredEnv("BIGSET_LOCAL_WORKSPACE_ID"); + +interface CredentialBody { + service?: unknown; + apiKey?: unknown; +} + +function writeJson( + res: ServerResponse, + statusCode: number, + body: Record, +): void { + res.writeHead(statusCode, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); +} + +function credentialEntry(service: unknown): { entry: Entry; account: string } { + if (!isLocalCredentialService(service)) { + throw new Error("Unsupported credential service."); + } + + const account = localKeychainAccount(workspaceId, service); + return { + account, + entry: new Entry(KEYCHAIN_SERVICE, account), + }; +} + +async function readBody(req: IncomingMessage): Promise { + const chunks: Buffer[] = []; + let totalBytes = 0; + + for await (const chunk of req) { + const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk); + totalBytes += buffer.byteLength; + if (totalBytes > MAX_BODY_BYTES) { + throw new Error("Request body is too large."); + } + chunks.push(buffer); + } + + if (chunks.length === 0) return {}; + const raw = Buffer.concat(chunks).toString("utf8"); + const parsed = JSON.parse(raw) as CredentialBody; + if (!parsed || typeof parsed !== "object") { + throw new Error("Request body must be a JSON object."); + } + return parsed; +} + +function isAuthorized(req: IncomingMessage): boolean { + return req.headers.authorization === `Bearer ${token}`; +} + +async function handleRequest( + req: IncomingMessage, + res: ServerResponse, +): Promise { + const requestId = randomUUID(); + const url = new URL(req.url || "/", `http://${bindHost}:${port}`); + + if (req.method === "GET" && url.pathname === "/health") { + writeJson(res, 200, { status: "ok", workspaceId }); + return; + } + + if (!isAuthorized(req)) { + writeJson(res, 401, { error: "Unauthorized" }); + return; + } + + if (req.method !== "POST") { + writeJson(res, 405, { error: "Method not allowed" }); + return; + } + + try { + const body = await readBody(req); + + if (url.pathname === "/credentials/get") { + const { entry, account } = credentialEntry(body.service); + writeJson(res, 200, { + apiKey: entry.getPassword(), + keychainAccount: account, + }); + return; + } + + if (url.pathname === "/credentials/set") { + if (typeof body.apiKey !== "string" || !body.apiKey.trim()) { + writeJson(res, 400, { error: "API key is required." }); + return; + } + + const apiKey = body.apiKey.trim(); + const { entry, account } = credentialEntry(body.service); + entry.setPassword(apiKey); + writeJson(res, 200, { keychainAccount: account }); + return; + } + + if (url.pathname === "/credentials/delete") { + const { entry } = credentialEntry(body.service); + writeJson(res, 200, { deleted: entry.deletePassword() }); + return; + } + + writeJson(res, 404, { error: "Not found" }); + } catch (err) { + const message = err instanceof Error ? err.message : "Keychain bridge failed."; + console.warn({ requestId, err }, "Local keychain bridge request failed"); + writeJson(res, 400, { error: message, requestId }); + } +} + +const server = createServer((req, res) => { + void handleRequest(req, res).catch((err) => { + const message = err instanceof Error ? err.message : "Keychain bridge failed."; + console.error({ err }, "Local keychain bridge crashed during request"); + writeJson(res, 500, { error: message }); + }); +}); + +server.listen(port, bindHost, () => { + console.log( + `Local keychain bridge listening on http://${bindHost}:${port} (${workspaceId})`, + ); +}); + +for (const signal of ["SIGINT", "SIGTERM"] as const) { + process.on(signal, () => { + server.close(() => process.exit(0)); + }); +} diff --git a/backend/src/local-credential-types.ts b/backend/src/local-credential-types.ts new file mode 100644 index 0000000..bcb4235 --- /dev/null +++ b/backend/src/local-credential-types.ts @@ -0,0 +1,20 @@ +export const LOCAL_CREDENTIAL_SERVICES = ["tinyfish", "openrouter"] as const; + +export type LocalCredentialService = (typeof LOCAL_CREDENTIAL_SERVICES)[number]; +export type ConnectionMethod = "api_key" | "oauth"; + +export function isLocalCredentialService( + value: unknown, +): value is LocalCredentialService { + return ( + typeof value === "string" && + (LOCAL_CREDENTIAL_SERVICES as readonly string[]).includes(value) + ); +} + +export function localKeychainAccount( + workspaceId: string, + service: LocalCredentialService, +): string { + return `${workspaceId}:${service}`; +} diff --git a/backend/src/local-credentials.ts b/backend/src/local-credentials.ts new file mode 100644 index 0000000..a6a5b4b --- /dev/null +++ b/backend/src/local-credentials.ts @@ -0,0 +1,309 @@ +import { convex, internal } from "./convex.js"; +import { env } from "./env.js"; +import { FETCH_TIMEOUT_MS } from "./fetch-timeout.js"; +import { + getKeychainCredential, + setKeychainCredential, +} from "./local-keychain-client.js"; +import type { + ConnectionMethod, + LocalCredentialService, +} from "./local-credential-types.js"; + +export const LOCAL_USER_ID = "local_user_default"; + +export interface ServiceSetupStatus { + configured: boolean; + source: "local" | "env" | null; + connectionMethod: ConnectionMethod | null; + verifiedAt: number | null; +} + +export interface LocalSetupStatus { + mode: "local" | "production"; + required: boolean; + complete: boolean; + services: Record; +} + +function isPlaceholder(value: string, service: LocalCredentialService): boolean { + if (!value.trim()) return true; + if (value.includes("...")) return true; + if (service === "openrouter" && value === "sk-or-...") return true; + return false; +} + +function envCredential(service: LocalCredentialService): string | undefined { + const value = + service === "tinyfish" ? process.env.TINYFISH_API_KEY : env.OPENROUTER_API_KEY; + if (!value || isPlaceholder(value, service)) return undefined; + return value; +} + +async function localCredential(service: LocalCredentialService): Promise<{ + apiKey: string; + connectionMethod: ConnectionMethod; + verifiedAt: number | null; + keychainAccount: string; +} | null> { + if (!env.IS_LOCAL_MODE) return null; + const keychain = await getKeychainCredential(service); + if (!keychain?.apiKey) return null; + + const row = await convex.query(internal.localCredentials.getInternal, { + service, + }); + + return { + apiKey: keychain.apiKey, + connectionMethod: row?.connectionMethod ?? "api_key", + verifiedAt: row?.verifiedAt ?? null, + keychainAccount: keychain.keychainAccount, + }; +} + +async function localCredentialForStatus( + service: LocalCredentialService, +): Promise>> { + try { + return await localCredential(service); + } catch { + return null; + } +} + +export async function resolveCredential( + service: LocalCredentialService, +): Promise<{ apiKey: string; source: "local" | "env" } | null> { + if (env.IS_LOCAL_MODE) { + const local = await localCredential(service); + return local ? { apiKey: local.apiKey, source: "local" } : null; + } + + const fromEnv = envCredential(service); + if (fromEnv) return { apiKey: fromEnv, source: "env" }; + + return null; +} + +export async function getOpenRouterApiKey(): Promise { + return (await resolveCredential("openrouter"))?.apiKey; +} + +export async function requireOpenRouterApiKey(): Promise { + const apiKey = await getOpenRouterApiKey(); + if (!apiKey) { + throw new Error("OpenRouter is not configured. Complete local setup first."); + } + return apiKey; +} + +export async function getTinyFishApiKey(): Promise { + return (await resolveCredential("tinyfish"))?.apiKey; +} + +export function tinyFishHeaders(apiKey: string): Record { + return { + "X-API-Key": apiKey, + "X-TF-ORIGIN": "BigSet", + "X-TF-Request-Origin": "BigSet", + }; +} + +async function withFetchTimeout( + operation: (signal: AbortSignal) => Promise, + timeoutMessage: string, +): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + + try { + return await operation(controller.signal); + } catch (err) { + if (err instanceof Error && err.name === "AbortError") { + throw new Error(timeoutMessage); + } + throw err; + } finally { + clearTimeout(timeout); + } +} + +export async function requireLocalSetupComplete(): Promise { + if (!env.IS_LOCAL_MODE) return; + const status = await getLocalSetupStatus(); + if (!status.complete) { + throw new Error("Local setup is incomplete."); + } +} + +export async function getLocalSetupStatus(): Promise { + if (!env.IS_LOCAL_MODE) { + const tinyfish = envCredential("tinyfish"); + const openrouter = envCredential("openrouter"); + return { + mode: "production", + required: false, + complete: true, + services: { + tinyfish: { + configured: !!tinyfish, + source: tinyfish ? "env" : null, + connectionMethod: tinyfish ? "api_key" : null, + verifiedAt: null, + }, + openrouter: { + configured: !!openrouter, + source: openrouter ? "env" : null, + connectionMethod: openrouter ? "api_key" : null, + verifiedAt: null, + }, + }, + }; + } + + const tinyfishLocal = await localCredentialForStatus("tinyfish"); + const openrouterLocal = await localCredentialForStatus("openrouter"); + + const tinyfish: ServiceSetupStatus = tinyfishLocal + ? { + configured: true, + source: "local", + connectionMethod: tinyfishLocal.connectionMethod, + verifiedAt: tinyfishLocal.verifiedAt, + } + : { + configured: false, + source: null, + connectionMethod: null, + verifiedAt: null, + }; + + const openrouter: ServiceSetupStatus = openrouterLocal + ? { + configured: true, + source: "local", + connectionMethod: openrouterLocal.connectionMethod, + verifiedAt: openrouterLocal.verifiedAt, + } + : { + configured: false, + source: null, + connectionMethod: null, + verifiedAt: null, + }; + + return { + mode: "local", + required: true, + complete: tinyfish.configured && openrouter.configured, + services: { tinyfish, openrouter }, + }; +} + +export async function saveLocalCredential( + service: LocalCredentialService, + apiKey: string, + connectionMethod: ConnectionMethod, +): Promise { + if (!env.IS_LOCAL_MODE) { + throw new Error("Local credential storage is disabled when PROD=1."); + } + const { keychainAccount } = await setKeychainCredential(service, apiKey); + await convex.mutation(internal.localCredentials.upsertInternal, { + service, + keychainAccount, + connectionMethod, + verifiedAt: Date.now(), + }); +} + +export async function clearLegacyPlaintextLocalCredentials(): Promise { + if (!env.IS_LOCAL_MODE) return; + await convex.mutation(internal.localCredentials.clearLegacyPlaintextInternal, {}); +} + +export async function verifyTinyFishApiKey(apiKey: string): Promise { + const url = new URL("https://api.search.tinyfish.ai"); + url.searchParams.set("query", "BigSet"); + + await withFetchTimeout( + async (signal) => { + const response = await fetch(url, { + headers: tinyFishHeaders(apiKey), + signal, + }); + + if (!response.ok) { + if (response.status === 401) { + throw new Error("TinyFish rejected that API key."); + } + throw new Error( + `TinyFish verification failed with HTTP ${response.status}.`, + ); + } + }, + `TinyFish verification timed out after ${FETCH_TIMEOUT_MS / 1000} seconds.`, + ); +} + +export async function verifyOpenRouterApiKey(apiKey: string): Promise { + const baseUrl = ( + process.env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1" + ).replace(/\/+$/, ""); + + await withFetchTimeout( + async (signal) => { + const response = await fetch(`${baseUrl}/key`, { + headers: { Authorization: `Bearer ${apiKey}` }, + signal, + }); + + if (!response.ok) { + if (response.status === 401 || response.status === 403) { + throw new Error("OpenRouter rejected that API key."); + } + throw new Error( + `OpenRouter verification failed with HTTP ${response.status}.`, + ); + } + }, + `OpenRouter verification timed out after ${FETCH_TIMEOUT_MS / 1000} seconds.`, + ); +} + +export async function exchangeOpenRouterOAuthCode({ + code, + codeVerifier, +}: { + code: string; + codeVerifier: string; +}): Promise { + return await withFetchTimeout( + async (signal) => { + const response = await fetch("https://openrouter.ai/api/v1/auth/keys", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + code, + code_verifier: codeVerifier, + code_challenge_method: "S256", + }), + signal, + }); + + if (!response.ok) { + throw new Error( + `OpenRouter OAuth exchange failed with HTTP ${response.status}.`, + ); + } + + const body = (await response.json()) as { key?: string }; + if (!body.key) { + throw new Error("OpenRouter OAuth exchange did not return an API key."); + } + return body.key; + }, + `OpenRouter OAuth exchange timed out after ${FETCH_TIMEOUT_MS / 1000} seconds.`, + ); +} diff --git a/backend/src/local-keychain-client.ts b/backend/src/local-keychain-client.ts new file mode 100644 index 0000000..480b167 --- /dev/null +++ b/backend/src/local-keychain-client.ts @@ -0,0 +1,148 @@ +import { env } from "./env.js"; +import { + type LocalCredentialService, + localKeychainAccount, +} from "./local-credential-types.js"; + +interface KeychainGetResponse { + apiKey: string | null; + keychainAccount: string; +} + +interface KeychainSetResponse { + keychainAccount: string; +} + +type KeychainResponseValidator = (payload: unknown) => payload is T; + +function isJsonObject(payload: unknown): payload is Record { + return ( + payload !== null && typeof payload === "object" && !Array.isArray(payload) + ); +} + +function isKeychainGetResponse( + payload: unknown, +): payload is KeychainGetResponse { + return ( + isJsonObject(payload) && + (typeof payload.apiKey === "string" || payload.apiKey === null) && + typeof payload.keychainAccount === "string" + ); +} + +function isKeychainSetResponse( + payload: unknown, +): payload is KeychainSetResponse { + return isJsonObject(payload) && typeof payload.keychainAccount === "string"; +} + +function requireKeychainConfig(): { url: string; token: string } { + if (!env.LOCAL_KEYCHAIN_URL || !env.LOCAL_KEYCHAIN_TOKEN) { + throw new Error( + "Local keychain bridge is not configured. Run `make dev` to start it.", + ); + } + return { url: env.LOCAL_KEYCHAIN_URL, token: env.LOCAL_KEYCHAIN_TOKEN }; +} + +function keychainUrl(path: string): string { + const { url } = requireKeychainConfig(); + return new URL(path, url).toString(); +} + +async function keychainRequest( + path: string, + body: Record, + validatePayload: KeychainResponseValidator, +): Promise { + const { token } = requireKeychainConfig(); + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + env.LOCAL_KEYCHAIN_TIMEOUT_MS, + ); + + try { + const response = await fetch(keychainUrl(path), { + method: "POST", + headers: { + Authorization: `Bearer ${token}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + signal: controller.signal, + }); + + const payload: unknown = await response.json().catch(() => null); + + if (!response.ok) { + const bridgeError = + isJsonObject(payload) && typeof payload.error === "string" + ? payload.error + : null; + throw new Error( + bridgeError || `Keychain bridge error (${response.status})`, + ); + } + + if (!isJsonObject(payload)) { + throw new Error( + `Keychain bridge returned an invalid response for ${path}: expected a JSON object.`, + ); + } + + if (!validatePayload(payload)) { + throw new Error( + `Keychain bridge returned an invalid response for ${path}.`, + ); + } + + return payload; + } catch (err) { + if (err instanceof Error && err.name === "AbortError") { + throw new Error("Local keychain bridge timed out."); + } + throw err; + } finally { + clearTimeout(timeout); + } +} + +export function expectedKeychainAccount( + service: LocalCredentialService, +): string { + return localKeychainAccount(env.BIGSET_LOCAL_WORKSPACE_ID, service); +} + +export async function getKeychainCredential( + service: LocalCredentialService, +): Promise<{ apiKey: string; keychainAccount: string } | null> { + const result = await keychainRequest( + "/credentials/get", + { + service, + }, + isKeychainGetResponse, + ); + + if (!result.apiKey) return null; + return { + apiKey: result.apiKey, + keychainAccount: result.keychainAccount, + }; +} + +export async function setKeychainCredential( + service: LocalCredentialService, + apiKey: string, +): Promise<{ keychainAccount: string }> { + return await keychainRequest( + "/credentials/set", + { + service, + apiKey, + }, + isKeychainSetResponse, + ); +} diff --git a/backend/src/mastra/agents/investigate.ts b/backend/src/mastra/agents/investigate.ts index 4cdc32e..63a7b8c 100644 --- a/backend/src/mastra/agents/investigate.ts +++ b/backend/src/mastra/agents/investigate.ts @@ -5,10 +5,6 @@ import { searchWebTool, fetchPageTool } from "../tools/web-tools.js"; import type { AuthContext } from "../workflows/populate.js"; import type { PopulateColumn } from "../../pipeline/populate.js"; -const openrouter = createOpenRouter({ - apiKey: process.env.OPENROUTER_API_KEY!, -}); - function buildInvestigateInstructions(columns: PopulateColumn[]): string { const columnNames = columns.map((c) => c.name); const columnsDesc = columns @@ -59,8 +55,13 @@ export function buildInvestigateAgent( authorizedDatasetId: string, authContext: AuthContext, columns: PopulateColumn[], + openRouterApiKey: string, ): Agent { const modelSlug = authContext.modelConfig!.investigateSubagent; + const openrouter = createOpenRouter({ + apiKey: openRouterApiKey, + baseURL: process.env.OPENROUTER_BASE_URL, + }); const { insert_row } = buildPopulateTools( authorizedDatasetId, diff --git a/backend/src/mastra/agents/populate.ts b/backend/src/mastra/agents/populate.ts index 155492a..eb9b26f 100644 --- a/backend/src/mastra/agents/populate.ts +++ b/backend/src/mastra/agents/populate.ts @@ -6,10 +6,6 @@ import type { AuthContext } from "../workflows/populate.js"; import type { PopulateColumn } from "../../pipeline/populate.js"; import type { RunMetrics } from "../run-metrics.js"; -const openrouter = createOpenRouter({ - apiKey: process.env.OPENROUTER_API_KEY!, -}); - function buildInstructions(maxRowCount: number): string { return `You are an expert dataset builder. You conduct research using your web tools. You do broad research to see which rows to add, and then you spin up sub-agents that can do the deep research and fill in each row for you. @@ -44,10 +40,15 @@ export function buildPopulateAgent( authorizedDatasetId: string, authContext: AuthContext, columns: PopulateColumn[], + openRouterApiKey: string, maxRowCount: number, metrics?: RunMetrics, ): Agent { const modelSlug = authContext.modelConfig!.populateOrchestrator; + const openrouter = createOpenRouter({ + apiKey: openRouterApiKey, + baseURL: process.env.OPENROUTER_BASE_URL, + }); return new Agent({ id: "populate-agent", @@ -61,6 +62,7 @@ export function buildPopulateAgent( authorizedDatasetId, authContext, columns, + openRouterApiKey, maxRowCount, metrics, ), diff --git a/backend/src/mastra/agents/refresh.ts b/backend/src/mastra/agents/refresh.ts index 2215686..144065a 100644 --- a/backend/src/mastra/agents/refresh.ts +++ b/backend/src/mastra/agents/refresh.ts @@ -5,10 +5,6 @@ import { searchWebTool, fetchPageTool } from "../tools/web-tools.js"; import type { AuthContext } from "../workflows/populate.js"; import type { PopulateColumn } from "../../pipeline/populate.js"; -const openrouter = createOpenRouter({ - apiKey: process.env.OPENROUTER_API_KEY!, -}); - function buildRefreshInstructions(columns: PopulateColumn[]): string { const columnNames = columns.map((c) => c.name); const columnsDesc = columns @@ -55,7 +51,13 @@ export function buildRefreshAgent( authorizedDatasetId: string, authContext: AuthContext, columns: PopulateColumn[], + openRouterApiKey: string, ): Agent { + const modelSlug = authContext.modelConfig!.investigateSubagent; + const openrouter = createOpenRouter({ + apiKey: openRouterApiKey, + baseURL: process.env.OPENROUTER_BASE_URL, + }); const { update_row } = buildPopulateTools( authorizedDatasetId, authContext, @@ -64,7 +66,7 @@ export function buildRefreshAgent( id: "refresh-agent", name: "Dataset Refresh Agent", instructions: buildRefreshInstructions(columns), - model: openrouter("qwen/qwen3.7-max"), + model: openrouter(modelSlug), tools: { update_row, search_web: searchWebTool, diff --git a/backend/src/mastra/tools/investigate-tool.ts b/backend/src/mastra/tools/investigate-tool.ts index c1d6b18..0139aa4 100644 --- a/backend/src/mastra/tools/investigate-tool.ts +++ b/backend/src/mastra/tools/investigate-tool.ts @@ -75,6 +75,7 @@ export function buildSubagentTool( authorizedDatasetId: string, authContext: AuthContext, columns: PopulateColumn[], + openRouterApiKey: string, maxRowCount: number, metrics?: RunMetrics, ) { @@ -107,6 +108,7 @@ export function buildSubagentTool( authorizedDatasetId, authContext, columns, + openRouterApiKey, ); const pkBlock = Object.entries(primary_keys) diff --git a/backend/src/mastra/tools/web-tools.ts b/backend/src/mastra/tools/web-tools.ts index 961897e..245ee43 100644 --- a/backend/src/mastra/tools/web-tools.ts +++ b/backend/src/mastra/tools/web-tools.ts @@ -1,7 +1,7 @@ import { createTool } from "@mastra/core/tools"; import { z } from "zod"; - -const FETCH_TIMEOUT_MS = 30_000; +import { FETCH_TIMEOUT_MS } from "../../fetch-timeout.js"; +import { getTinyFishApiKey, tinyFishHeaders } from "../../local-credentials.js"; const searchResultSchema = z.object({ title: z.string(), @@ -24,7 +24,7 @@ export const searchWebTool = createTool({ if (!query?.trim()) return { error: "query is required and cannot be empty." }; - const apiKey = process.env.TINYFISH_API_KEY; + const apiKey = await getTinyFishApiKey(); if (!apiKey) return { error: "TINYFISH_API_KEY is not configured. Web search is unavailable — use synthetic data instead." }; @@ -35,7 +35,7 @@ export const searchWebTool = createTool({ const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); try { const res = await fetch(url, { - headers: { "X-API-Key": apiKey, "X-TF-Request-Origin": "bigset" }, + headers: tinyFishHeaders(apiKey), signal: controller.signal, }); clearTimeout(timeout); @@ -90,7 +90,7 @@ export const fetchPageTool = createTool({ if (!targetUrl.startsWith("http://") && !targetUrl.startsWith("https://")) return { error: `Invalid URL "${targetUrl}". Must start with http:// or https://.` }; - const apiKey = process.env.TINYFISH_API_KEY; + const apiKey = await getTinyFishApiKey(); if (!apiKey) return { error: "TINYFISH_API_KEY is not configured. Page fetch is unavailable — use data from search snippets instead." }; @@ -103,8 +103,7 @@ export const fetchPageTool = createTool({ method: "POST", headers: { "Content-Type": "application/json", - "X-API-Key": apiKey, - "X-TF-Request-Origin": "bigset", + ...tinyFishHeaders(apiKey), }, body: JSON.stringify({ urls: [targetUrl], format: "markdown" }), signal: controller.signal, diff --git a/backend/src/mastra/workflows/populate.ts b/backend/src/mastra/workflows/populate.ts index 35db3b1..e07ed8e 100644 --- a/backend/src/mastra/workflows/populate.ts +++ b/backend/src/mastra/workflows/populate.ts @@ -5,6 +5,7 @@ import { createOpenRouter } from "@openrouter/ai-sdk-provider"; import { datasetContextSchema, populateColumnSchema } from "../../pipeline/populate.js"; import { convex, internal } from "../../convex.js"; import { DEFAULT_MODEL_IDS } from "../../config/models.js"; +import { requireOpenRouterApiKey } from "../../local-credentials.js"; import { buildPopulateAgent } from "../agents/populate.js"; import { RunMetrics } from "../run-metrics.js"; import { saveRunMetrics } from "../save-run-metrics.js"; @@ -108,8 +109,10 @@ Respond with EXACTLY one word: scraper or search`; let classification: "scraper" | "search" = "search"; try { + const apiKey = await requireOpenRouterApiKey(); const openrouter = createOpenRouter({ - apiKey: process.env.OPENROUTER_API_KEY!, + apiKey, + baseURL: process.env.OPENROUTER_BASE_URL, }); const modelSlug = inputData.authContext?.modelConfig?.schemaInference ?? DEFAULT_MODEL_IDS.SCHEMA_INFERENCE; @@ -248,6 +251,7 @@ const agentStep = createStep({ inputData.authorizedDatasetId, inputData.authContext, inputData.columns, + await requireOpenRouterApiKey(), inputData.maxRowCount, metrics, ); diff --git a/backend/src/mastra/workflows/update.ts b/backend/src/mastra/workflows/update.ts index 45e3421..28aadee 100644 --- a/backend/src/mastra/workflows/update.ts +++ b/backend/src/mastra/workflows/update.ts @@ -4,6 +4,7 @@ import { datasetContextSchema, populateColumnSchema } from "../../pipeline/popul import { convex, internal } from "../../convex.js"; import { buildRefreshAgent } from "../agents/refresh.js"; import { authContextSchema } from "./populate.js"; +import { requireOpenRouterApiKey } from "../../local-credentials.js"; import { RunMetrics } from "../run-metrics.js"; import { saveRunMetrics } from "../save-run-metrics.js"; import { getSignal } from "../../abort-registry.js"; @@ -99,12 +100,18 @@ const refreshRowsStep = createStep({ const metrics = new RunMetrics(); const startedAt = Date.now(); + const openRouterApiKey = await requireOpenRouterApiKey(); const pkColumns = columns.filter((c) => c.isPrimaryKey); async function processRow(row: z.infer) { try { - const agent = buildRefreshAgent(datasetId, authContext, columns); + const agent = buildRefreshAgent( + datasetId, + authContext, + columns, + openRouterApiKey, + ); const pkBlock = pkColumns.length > 0 diff --git a/backend/src/pipeline/schema-inference.ts b/backend/src/pipeline/schema-inference.ts index 1f1ea2a..467a393 100644 --- a/backend/src/pipeline/schema-inference.ts +++ b/backend/src/pipeline/schema-inference.ts @@ -2,6 +2,7 @@ import { generateText, Output, NoObjectGeneratedError } from "ai"; import { createOpenRouter } from "@openrouter/ai-sdk-provider"; import { DEFAULT_MODEL_IDS } from "../config/models.js"; +import { requireOpenRouterApiKey } from "../local-credentials.js"; import { datasetSchemaSchema, type DatasetSchema } from "./types.js"; const SYSTEM_PROMPT = `You are a data engineering assistant that converts natural-language prompts into structured dataset schemas. Given a user prompt describing a dataset they want to build, you produce a precise schema definition. @@ -26,18 +27,18 @@ Rules: - Prefer concrete column choices over speculative ones — better to omit a column than guess wildly. - When a column is a scalar numeric rating (e.g. average score like 4.3/5 for restaurants, cafes, hotels, products, apps): name it generically (e.g. "rating" not "yelp_rating") and write a retrieval_hint explaining that review sites (Yelp, TripAdvisor, Google Maps) block direct page fetches, so the agent must extract ratings from **search result snippets**. The hint should say: "Search for \\" rating reviews\\" and include location terms only when location is part of the entity identity. Look for ratings in snippets from TripAdvisor (\\"rated X.X of 5\\"), Yelp search listings (\\"X.X (N reviews)\\"), or aggregator sites (Birdeye, joe.coffee, giftly, Uber Eats, menufyy). Do NOT try to fetch yelp.com or tripadvisor.com directly — they block automated access. Accept ratings from any reputable source." If including a rating column, also add a "rating_source" text column so the agent records where the rating came from. Do not rename review-count or review-text fields to "rating" — keep those as distinct columns (e.g. "review_count") when the user explicitly asks for them.`; -function getModel(modelSlug?: string) { - const apiKey = process.env.OPENROUTER_API_KEY; - if (!apiKey) { - throw new Error("Missing required environment variable: OPENROUTER_API_KEY"); - } - const openrouter = createOpenRouter({ apiKey }); +async function getModel(modelSlug?: string) { + const apiKey = await requireOpenRouterApiKey(); + const openrouter = createOpenRouter({ + apiKey, + baseURL: process.env.OPENROUTER_BASE_URL, + }); const resolvedSlug = modelSlug ?? DEFAULT_MODEL_IDS.SCHEMA_INFERENCE; return openrouter(resolvedSlug); } export async function inferSchema(prompt: string, modelSlug?: string): Promise { - const model = getModel(modelSlug); + const model = await getModel(modelSlug); try { return await callOnce(model, prompt); } catch (error) { diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 1a12432..3506ef9 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -31,11 +31,16 @@ services: CLIENT_ORIGIN: http://localhost:3500 CONVEX_URL: http://convex:3210 PORT: 3501 + PROD: ${PROD:-} CONVEX_SELF_HOSTED_ADMIN_KEY: ${CONVEX_SELF_HOSTED_ADMIN_KEY:-} CLERK_SECRET_KEY: ${CLERK_SECRET_KEY:-} CLERK_PUBLISHABLE_KEY: ${NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY:-} OPENROUTER_API_KEY: ${OPENROUTER_API_KEY:-} TINYFISH_API_KEY: ${TINYFISH_API_KEY:-} + BIGSET_LOCAL_WORKSPACE_ID: ${BIGSET_LOCAL_WORKSPACE_ID:-} + LOCAL_KEYCHAIN_URL: http://host.docker.internal:${LOCAL_KEYCHAIN_PORT:-3502} + LOCAL_KEYCHAIN_TOKEN: ${LOCAL_KEYCHAIN_TOKEN:-} + LOCAL_KEYCHAIN_TIMEOUT_MS: ${LOCAL_KEYCHAIN_TIMEOUT_MS:-5000} # Transactional email — when unset, the email module no-ops. RESEND_API_KEY: ${RESEND_API_KEY:-} EMAIL_FROM: ${EMAIL_FROM:-BigSet } @@ -47,6 +52,8 @@ services: REFRESH_SCHEDULER_POLL_MS: ${REFRESH_SCHEDULER_POLL_MS:-60000} REFRESH_SCHEDULER_BATCH_SIZE: ${REFRESH_SCHEDULER_BATCH_SIZE:-5} REFRESH_SCHEDULER_STALE_AFTER_MS: ${REFRESH_SCHEDULER_STALE_AFTER_MS:-21600000} + extra_hosts: + - "host.docker.internal:host-gateway" depends_on: convex: condition: service_healthy @@ -65,10 +72,17 @@ services: environment: HOST: 0.0.0.0 PORT: 4111 + PROD: ${PROD:-} OPENROUTER_API_KEY: ${OPENROUTER_API_KEY:-} CONVEX_URL: http://convex:3210 CONVEX_SELF_HOSTED_ADMIN_KEY: ${CONVEX_SELF_HOSTED_ADMIN_KEY:-} TINYFISH_API_KEY: ${TINYFISH_API_KEY:-} + BIGSET_LOCAL_WORKSPACE_ID: ${BIGSET_LOCAL_WORKSPACE_ID:-} + LOCAL_KEYCHAIN_URL: http://host.docker.internal:${LOCAL_KEYCHAIN_PORT:-3502} + LOCAL_KEYCHAIN_TOKEN: ${LOCAL_KEYCHAIN_TOKEN:-} + LOCAL_KEYCHAIN_TIMEOUT_MS: ${LOCAL_KEYCHAIN_TIMEOUT_MS:-5000} + extra_hosts: + - "host.docker.internal:host-gateway" depends_on: convex: condition: service_healthy @@ -95,8 +109,10 @@ services: - ./scripts:/scripts:ro environment: NEXT_PUBLIC_CONVEX_URL: http://localhost:3210 - NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY: ${NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY} - CLERK_SECRET_KEY: ${CLERK_SECRET_KEY} + NEXT_PUBLIC_PROD: ${PROD:-} + PROD: ${PROD:-} + NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY: ${NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY:-} + CLERK_SECRET_KEY: ${CLERK_SECRET_KEY:-} NEXT_PUBLIC_CLERK_SIGN_IN_URL: /sign-in NEXT_PUBLIC_CLERK_SIGN_UP_URL: /sign-up NEXT_PUBLIC_CLERK_SIGN_IN_FALLBACK_REDIRECT_URL: /dashboard diff --git a/frontend/.gitignore b/frontend/.gitignore index 598418e..995cb8f 100644 --- a/frontend/.gitignore +++ b/frontend/.gitignore @@ -38,9 +38,6 @@ yarn-error.log* package-lock.json yarn.lock -# convex generated -convex/_generated/ - # vercel .vercel diff --git a/frontend/app/convex-provider.tsx b/frontend/app/convex-provider.tsx index 7d876e0..be33d3e 100644 --- a/frontend/app/convex-provider.tsx +++ b/frontend/app/convex-provider.tsx @@ -1,15 +1,21 @@ "use client"; import { ConvexReactClient } from "convex/react"; +import { ConvexProvider } from "convex/react"; import { ConvexProviderWithClerk } from "convex/react-clerk"; -import { useAuth } from "@clerk/nextjs"; import { type ReactNode } from "react"; +import { useAuth } from "@clerk/nextjs"; +import { isLocalMode } from "@/lib/app-mode"; const convex = new ConvexReactClient( - process.env.NEXT_PUBLIC_CONVEX_URL as string + process.env.NEXT_PUBLIC_CONVEX_URL || "http://127.0.0.1:3210" ); export function ConvexClientProvider({ children }: { children: ReactNode }) { + if (isLocalMode) { + return {children}; + } + return ( {children} diff --git a/frontend/app/dashboard/page.tsx b/frontend/app/dashboard/page.tsx index 747e5c0..7c3ca80 100644 --- a/frontend/app/dashboard/page.tsx +++ b/frontend/app/dashboard/page.tsx @@ -3,8 +3,7 @@ import { useEffect, useMemo, useRef, useState } from "react"; import Link from "next/link"; import { useRouter } from "next/navigation"; -import { useQuery, useConvexAuth } from "convex/react"; -import { useUser, useClerk } from "@clerk/nextjs"; +import { useQuery } from "convex/react"; import { api } from "@/convex/_generated/api"; import { DatasetCard, @@ -12,29 +11,34 @@ import { } from "@/components/dataset/DatasetCard"; import { useTheme } from "@/components/ThemeToggle"; import { QuotaBadge } from "@/components/QuotaBadge"; +import { LocalUtilityMenu } from "@/components/LocalUtilityMenu"; import { EVENTS, track } from "@/lib/analytics"; import type { ProfileUser } from "@/lib/profile-user"; +import { useAppClerk, useAppConvexAuth, useAppUser } from "@/lib/app-auth"; +import { isLocalMode } from "@/lib/app-mode"; export default function DashboardPage() { - const { isAuthenticated, isLoading } = useConvexAuth(); - const { user } = useUser(); - const { signOut } = useClerk(); + const { isAuthenticated, isLoading } = useAppConvexAuth(); + const { user } = useAppUser(); + const { signOut } = useAppClerk(); const [search, setSearch] = useState(""); const mine = useQuery( api.datasets.listMine, isAuthenticated ? {} : "skip", ); - // Public datasets are open to anonymous users too, so no `skip` gate. - const curated = useQuery(api.datasets.listPublic, {}); + const showCurated = !isLocalMode; + const curated = useQuery( + api.datasets.listPublic, + showCurated ? {} : "skip", + ); - // Quota state drives the "+ New Dataset" button — disabled when the - // user is at their free-tier limit. `undefined` while loading. + // Quota limits are cloud-only. Local mode can create datasets without this gate. const usage = useQuery( api.quota.getMy, - isAuthenticated ? {} : "skip", + !isLocalMode && isAuthenticated ? {} : "skip", ); - const atLimit = usage !== undefined && usage.remaining === 0; + const atLimit = !isLocalMode && usage !== undefined && usage.remaining === 0; // Fire dashboard_viewed once per mount when both queries have resolved, // so we attach accurate counts. `dashboardFired` prevents the effect @@ -45,15 +49,15 @@ export default function DashboardPage() { !dashboardFired.current && isAuthenticated && mine !== undefined && - curated !== undefined + (!showCurated || curated !== undefined) ) { dashboardFired.current = true; track(EVENTS.DASHBOARD_VIEWED, { owned_count: mine.length, - curated_count: curated.length, + curated_count: showCurated ? (curated?.length ?? 0) : 0, }); } - }, [isAuthenticated, mine, curated]); + }, [isAuthenticated, mine, curated, showCurated]); const { filteredMine, filteredCurated } = useMemo(() => { const q = search.trim().toLowerCase(); @@ -85,9 +89,15 @@ export default function DashboardPage() { BigSet BigSet
- -
- signOut()} /> + {isLocalMode ? ( + + ) : ( + <> + +
+ signOut()} /> + + )}
@@ -187,19 +197,23 @@ export default function DashboardPage() { } /> -
+ {!isLocalMode && ( + <> +
-
+
+ + )}
); @@ -300,14 +314,16 @@ function ProfileMenu({ {open && ( )} diff --git a/frontend/app/dashboard/settings/layout.tsx b/frontend/app/dashboard/settings/layout.tsx index 5073466..1a4c0bf 100644 --- a/frontend/app/dashboard/settings/layout.tsx +++ b/frontend/app/dashboard/settings/layout.tsx @@ -1,17 +1,19 @@ "use client"; import Link from "next/link"; -import { useUser, useClerk } from "@clerk/nextjs"; import { useTheme } from "@/components/ThemeToggle"; +import { LocalUtilityMenu } from "@/components/LocalUtilityMenu"; import { useEffect, useRef, useState } from "react"; +import { useAppClerk, useAppUser } from "@/lib/app-auth"; +import { isLocalMode } from "@/lib/app-mode"; export default function SettingsLayout({ children, }: { children: React.ReactNode; }) { - const { user } = useUser(); - const { signOut } = useClerk(); + const { user } = useAppUser(); + const { signOut } = useAppClerk(); const [profileOpen, setProfileOpen] = useState(false); const profileRef = useRef(null); const { theme, toggle: toggleTheme } = useTheme(); @@ -31,7 +33,7 @@ export default function SettingsLayout({ const imageUrl = user?.imageUrl; return ( -
+
BigSet @@ -46,58 +48,62 @@ export default function SettingsLayout({ ← Back to Dashboard
-
- + {isLocalMode ? ( + + ) : ( +
+ - {profileOpen && ( -
-
-

{name}

- {email && ( -

- {email} -

- )} + {profileOpen && ( +
+
+

{name}

+ {email && ( +

+ {email} +

+ )} +
+
+ + +
-
- - -
-
- )} -
+ )} +
+ )}
-
+
{children}
); -} \ No newline at end of file +} diff --git a/frontend/app/dashboard/settings/models/page.tsx b/frontend/app/dashboard/settings/models/page.tsx index 5531f9e..6a3acb4 100644 --- a/frontend/app/dashboard/settings/models/page.tsx +++ b/frontend/app/dashboard/settings/models/page.tsx @@ -2,18 +2,19 @@ import { useState, useEffect } from "react"; import { useQuery } from "convex/react"; -import { useAuth } from "@clerk/nextjs"; import { api } from "@/convex/_generated/api"; import { getModelConfig, saveModelConfig, getOpenRouterModels, refreshOpenRouterModels, type EffectiveModelConfig, type OpenRouterModel } from "@/lib/backend"; import { SettingsPageLayout } from "@/components/settings/SettingsPageLayout"; import { SettingsHeader } from "@/components/settings/SettingsHeader"; import { SettingsTile } from "@/components/settings/SettingsTile"; +import { LocalCredentialsPanel } from "@/components/settings/LocalCredentialsPanel"; import { ModelSideSheet } from "@/components/settings/ModelSideSheet"; import { MODEL_ROLES, type ModelRole } from "@/components/settings/types"; import { SkeletonList } from "@/components/settings/Skeleton"; +import { useAppAuth } from "@/lib/app-auth"; export default function ModelSettingsPage() { - const { getToken } = useAuth(); + const { getToken } = useAppAuth(); const convexModels = useQuery(api.openRouterModels.list, {}); const [effectiveConfig, setEffectiveConfig] = useState(null); @@ -115,25 +116,29 @@ export default function ModelSettingsPage() { return ( - +
+ -
- {isLoading ? ( - - ) : ( - MODEL_ROLES.map((role) => ( - openSideSheet(role)} - /> - )) - )} + + +
+ {isLoading ? ( + + ) : ( + MODEL_ROLES.map((role) => ( + openSideSheet(role)} + /> + )) + )} +
{activeSheet && ( diff --git a/frontend/app/dataset/[id]/page.tsx b/frontend/app/dataset/[id]/page.tsx index d28abda..697addd 100644 --- a/frontend/app/dataset/[id]/page.tsx +++ b/frontend/app/dataset/[id]/page.tsx @@ -3,8 +3,7 @@ import { useParams } from "next/navigation"; import Link from "next/link"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; -import { useMutation, useQuery, useConvexAuth } from "convex/react"; -import { useAuth, useUser, useClerk } from "@clerk/nextjs"; +import { useMutation, useQuery } from "convex/react"; import { api } from "@/convex/_generated/api"; import type { Id } from "@/convex/_generated/dataModel"; import { DatasetTable } from "@/components/table"; @@ -22,13 +21,14 @@ import { type RefreshCadence, } from "@/lib/refresh-cadence"; import type { ProfileUser } from "@/lib/profile-user"; +import { useAppAuth, useAppClerk, useAppConvexAuth, useAppUser } from "@/lib/app-auth"; export default function DatasetPage() { const params = useParams(); - const { isLoading: authLoading, isAuthenticated } = useConvexAuth(); - const { userId, getToken } = useAuth(); - const { user } = useUser(); - const { signOut } = useClerk(); + const { isLoading: authLoading, isAuthenticated } = useAppConvexAuth(); + const { userId, getToken } = useAppAuth(); + const { user } = useAppUser(); + const { signOut } = useAppClerk(); const [exporting, setExporting] = useState<"csv" | "xlsx" | null>(null); const [populating, setPopulating] = useState(false); const [updating, setUpdating] = useState(false); @@ -624,7 +624,10 @@ function SettingsDropdown({ }, [open, onClose]); useEffect(() => { - if (!open) setMaxRowCountInput(String(maxRowCount)); + if (!open) { + const id = setTimeout(() => setMaxRowCountInput(String(maxRowCount)), 0); + return () => clearTimeout(id); + } }, [maxRowCount, open]); return ( diff --git a/frontend/app/dataset/new/page.tsx b/frontend/app/dataset/new/page.tsx index bb4dded..63b8ebf 100644 --- a/frontend/app/dataset/new/page.tsx +++ b/frontend/app/dataset/new/page.tsx @@ -3,11 +3,11 @@ import { useEffect, useState, useRef } from "react"; import { useRouter } from "next/navigation"; import Link from "next/link"; -import { useAuth } from "@clerk/nextjs"; -import { useMutation, useQuery, useConvexAuth } from "convex/react"; +import { useMutation, useQuery } from "convex/react"; import { api } from "@/convex/_generated/api"; import { EVENTS, track } from "@/lib/analytics"; import { inferSchema, type InferredColumn } from "@/lib/backend"; +import { useAppAuth, useAppConvexAuth } from "@/lib/app-auth"; import { REFRESH_CADENCE_OPTIONS, type RefreshCadence, @@ -78,7 +78,7 @@ function TypeSelector({ value, onChange }: { value: ColumnType; onChange: (v: Co export default function NewDatasetPage() { const router = useRouter(); - const { isAuthenticated, isLoading } = useConvexAuth(); + const { isAuthenticated, isLoading } = useAppConvexAuth(); const [step, setStep] = useState("describe"); const [prompt, setPrompt] = useState(""); @@ -94,7 +94,7 @@ export default function NewDatasetPage() { "search_fetch" | "browser" | "hybrid" | null >(null); const [sourceHint, setSourceHint] = useState(""); - const { getToken } = useAuth(); + const { getToken } = useAppAuth(); const createDataset = useMutation(api.datasets.create); const usage = useQuery( diff --git a/frontend/app/layout.tsx b/frontend/app/layout.tsx index c4955a3..d035ee2 100644 --- a/frontend/app/layout.tsx +++ b/frontend/app/layout.tsx @@ -1,8 +1,10 @@ import type { Metadata } from "next"; import { Geist, Geist_Mono } from "next/font/google"; -import { ClerkProvider } from "@clerk/nextjs"; import { ConvexClientProvider } from "./convex-provider"; +import { AppAuthProvider } from "@/lib/app-auth"; import { AnalyticsProvider } from "@/lib/analytics-provider"; +import { LocalSetupGate } from "./local-setup-gate"; +import { ThemeSync } from "@/components/ThemeToggle"; import "./globals.css"; const geistSans = Geist({ @@ -46,14 +48,14 @@ export default function RootLayout({