diff --git a/Dockerfile b/Dockerfile index b836b82..803c222 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,12 +38,14 @@ FROM debian:trixie-slim RUN apt-get update && apt-get install -y \ ca-certificates \ + curl \ && rm -rf /var/lib/apt/lists/* WORKDIR /app COPY --from=builder /app/target/release/compass /app/compass -RUN mkdir -p /app/data +COPY scripts/ /app/scripts/ +RUN chmod +x /app/scripts/*.sh && mkdir -p /app/data ENV PORT=4001 ENV DATA_DIR=/app/data diff --git a/Dockerfile.gpu b/Dockerfile.gpu index d9dae94..0a1b9eb 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -41,12 +41,14 @@ FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 RUN apt-get update && apt-get install -y \ ca-certificates \ + curl \ && rm -rf /var/lib/apt/lists/* WORKDIR /app COPY --from=builder /app/target/release/compass /app/compass -RUN mkdir -p /app/data +COPY scripts/ /app/scripts/ +RUN chmod +x /app/scripts/*.sh && mkdir -p /app/data ENV PORT=4001 ENV DATA_DIR=/app/data diff --git a/MOSAIC.md b/MOSAIC.md new file mode 100644 index 0000000..38c9181 --- /dev/null +++ b/MOSAIC.md @@ -0,0 +1,72 @@ +# mosaic-compass + +Fork of [runcaptain/compass](https://github.com/runcaptain/compass) tailored for +Captain's Mosaic production deployment at `api.mosaic.runcaptain.com` via Porter. + +## What this fork changes + +| Concern | Upstream | This fork | +|---|---|---| +| Auth | None — all routes open | Bearer-token middleware on every route except `/health` | +| Healthcheck | `docker-compose.yml` calls `curl`, runtime image has no `curl` (always reports unhealthy) | `curl` installed in runtime stage of both Dockerfiles | +| Embedding models | Expected in `$DATA_DIR/models/bge-small/`, never auto-downloaded | `scripts/download-models.sh` Porter pre-deploy job fetches BGE-small from HuggingFace on each deploy (idempotent) | +| Deploy target | Generic Docker / docker-compose | Porter v2 (`porter.yaml` included) | +| Telemetry | On by default | `COMPASS_TELEMETRY=off` set in `porter.yaml` | + +## Required environment variables + +| Var | Required? | Notes | +|---|---|---| +| `COMPASS_API_KEY` | **Yes (prod)** | Shared secret. Clients must send `Authorization: Bearer `. If unset, the server starts in unauthenticated dev mode and logs a loud warning. Configure as a Porter secret — do not commit. | +| `PORT` | No | Default `4001`. | +| `DATA_DIR` | No | Default `/app/data`. Must be a persistent volume in Porter. | +| `COMPASS_TELEMETRY` | No | Set to `off` (default in `porter.yaml`) to disable anonymous PostHog telemetry. | +| `COMPASS_BGE_REPO` | No | Override the HuggingFace repo used by the pre-deploy job. Default `BAAI/bge-small-en-v1.5`. | + +## Auth + +All routes require `Authorization: Bearer ` **except** `GET /health`, which is open so Porter and nginx probes can reach it without a key. + +```bash +# Allowed +curl https://api.mosaic.runcaptain.com/health + +# Requires Bearer +curl -H "Authorization: Bearer $COMPASS_API_KEY" \ + https://api.mosaic.runcaptain.com/collections +``` + +Returns `401 Unauthorized` on missing or mismatched key. The key check is constant-time (see `ct_eq` in `crates/compass/src/api/mod.rs`). + +## Deploying with Porter + +1. Create a Porter application from this repo. +2. Set the `COMPASS_API_KEY` secret in the Porter UI (generate with `openssl rand -hex 32`). +3. Attach a persistent volume (EBS) mounted at `/app/data`. +4. Point the `api.mosaic.runcaptain.com` DNS record at the Porter ingress. +5. Deploy. Each deploy will: + - Run the pre-deploy job (`scripts/download-models.sh`) which idempotently fetches BGE-small into `$DATA_DIR/models/bge-small/`. + - Start the `api` web service on port 4001. + - Mark healthy once `GET /health` returns 200. + +## Endpoint surface + +Unchanged from upstream. See `README.md` for full API documentation. All paths +require the Bearer header except `/health`. + +``` +GET /health (public) +POST /collections +GET /collections +GET /collections/{name} +DELETE /collections/{name} +POST /collections/{name}/vector-spaces +GET /collections/{name}/vector-spaces +DELETE /collections/{name}/vector-spaces/{space} +POST /collections/{name}/vector-spaces/{space}/rebuild +GET /collections/{name}/vector-spaces/{space}/status +PUT /collections/{name}/default-vector-space +POST /collections/{name}/ingest (64 MB body limit) +POST /collections/{name}/search +GET /collections/{name}/facets +``` diff --git a/crates/compass/src/api/mod.rs b/crates/compass/src/api/mod.rs index 10cdc17..668a5c7 100644 --- a/crates/compass/src/api/mod.rs +++ b/crates/compass/src/api/mod.rs @@ -2,6 +2,9 @@ // // All HTTP endpoints wired up here. Includes v2 endpoints for // vector space CRUD, rebuild triggers, and status checks. +// +// mosaic-compass: Bearer-token auth middleware is applied to all routes +// except /health. See `AuthConfig` and `auth_middleware` below. pub mod collections; pub mod ingest; @@ -10,7 +13,10 @@ pub mod search; use crate::collections::CollectionManager; use crate::embed::EmbedState; use crate::models::HealthResponse; -use axum::extract::State; +use axum::extract::{Request, State}; +use axum::http::{header::AUTHORIZATION, StatusCode}; +use axum::middleware::{from_fn_with_state, Next}; +use axum::response::Response; use axum::routing::{delete, get, post, put}; use axum::{Json, Router}; use std::sync::Arc; @@ -21,9 +27,50 @@ pub struct AppState { pub embed_state: Arc, } -/// Build the Axum router with all Compass endpoints. -pub fn build_router(state: Arc) -> Router { - Router::new() +/// Auth configuration for the protected-route middleware. +/// `None` disables auth (dev mode); a `Some(key)` requires `Authorization: Bearer `. +#[derive(Clone)] +pub struct AuthConfig { + pub expected_key: Option, +} + +/// Constant-time byte comparison to avoid timing attacks on the API key check. +fn ct_eq(a: &[u8], b: &[u8]) -> bool { + if a.len() != b.len() { + return false; + } + let mut diff: u8 = 0; + for (x, y) in a.iter().zip(b.iter()) { + diff |= x ^ y; + } + diff == 0 +} + +/// Bearer-token middleware. Applied to every route except `/health`. +async fn auth_middleware( + State(cfg): State>, + req: Request, + next: Next, +) -> Result { + let provided = req + .headers() + .get(AUTHORIZATION) + .and_then(|h| h.to_str().ok()) + .and_then(|s| s.strip_prefix("Bearer ")); + + match (cfg.expected_key.as_deref(), provided) { + (Some(expected), Some(got)) if ct_eq(expected.as_bytes(), got.as_bytes()) => { + Ok(next.run(req).await) + } + (None, _) => Ok(next.run(req).await), + _ => Err(StatusCode::UNAUTHORIZED), + } +} + +/// Build the Axum router with all Compass endpoints. `/health` is unauthenticated; +/// every other route requires `Authorization: Bearer ` when the key is set. +pub fn build_router(state: Arc, auth: Arc) -> Router { + let protected = Router::new() // ── Collection CRUD ────────────────────────────────────────────── .route("/collections", post(collections::create_collection)) .route("/collections", get(collections::list_collections)) @@ -65,8 +112,12 @@ pub fn build_router(state: Arc) -> Router { post(search::search_collection), ) .route("/collections/{name}/facets", get(search::get_facets)) - // ── Health ─────────────────────────────────────────────────────── + .layer(from_fn_with_state(auth, auth_middleware)); + + Router::new() + // ── Health (unauthenticated) ───────────────────────────────────── .route("/health", get(health_check)) + .merge(protected) // 64 MB body limit. Default 2 MB is too small for batched ingest with embeddings. .layer(axum::extract::DefaultBodyLimit::max(64 * 1024 * 1024)) .with_state(state) diff --git a/crates/compass/src/main.rs b/crates/compass/src/main.rs index 723b615..1456ce9 100644 --- a/crates/compass/src/main.rs +++ b/crates/compass/src/main.rs @@ -34,7 +34,7 @@ mod scoring; mod search; mod telemetry; -use api::AppState; +use api::{AppState, AuthConfig}; use std::env; use std::path::PathBuf; use std::sync::Arc; @@ -71,7 +71,18 @@ async fn main() -> Result<(), Box> { // Anonymous telemetry — opt out with COMPASS_TELEMETRY=off or DO_NOT_TRACK=1 telemetry::spawn_telemetry(data_dir.clone(), app_state.manager.clone()); - let app = api::build_router(app_state).layer(cors); + // mosaic-compass: Bearer-token auth via COMPASS_API_KEY. When unset, auth is disabled. + let api_key = env::var("COMPASS_API_KEY").ok().filter(|s| !s.is_empty()); + if api_key.is_some() { + tracing::info!("API key auth enabled (Authorization: Bearer required on all routes except /health)"); + } else { + tracing::warn!("COMPASS_API_KEY not set — API is unauthenticated (dev mode)"); + } + let auth_config = Arc::new(AuthConfig { + expected_key: api_key, + }); + + let app = api::build_router(app_state, auth_config).layer(cors); let addr = format!("0.0.0.0:{}", port); tracing::info!("Compass listening on {}", addr); diff --git a/porter.yaml b/porter.yaml new file mode 100644 index 0000000..b79ca02 --- /dev/null +++ b/porter.yaml @@ -0,0 +1,35 @@ +version: v2 +name: mosaic-compass + +build: + method: docker + dockerfile: ./Dockerfile + +# Pre-deploy job runs before the web service starts on each deploy. +# Idempotently downloads BGE-small embedding weights into the persistent volume. +predeploy: + run: /app/scripts/download-models.sh + +services: + - name: api + type: web + run: /app/compass + port: 4001 + cpuCores: 1 + ramMegabytes: 2048 + instances: 1 + domain: + name: api.mosaic.runcaptain.com + healthCheck: + enabled: true + httpPath: /health + # Persistent volume for collections, indexes, and pre-downloaded models. + # Survives deploys and instance restarts. + smartOptimization: false + +env: + PORT: "4001" + DATA_DIR: /app/data + COMPASS_TELEMETRY: "off" + RUST_LOG: compass=info + # COMPASS_API_KEY is set as a Porter secret, not committed here. diff --git a/scripts/download-models.sh b/scripts/download-models.sh new file mode 100755 index 0000000..0b9bf75 --- /dev/null +++ b/scripts/download-models.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# mosaic-compass: Pre-deploy job to download BGE-small embedding weights into the +# persistent volume. Runs once per Porter deploy, before the web service starts. +# Idempotent — skips download if the model is already present. +# +# Uses plain curl so the runtime image needs no Python. +set -euo pipefail + +DATA_DIR="${DATA_DIR:-/app/data}" +BGE_DIR="$DATA_DIR/models/bge-small" +HF_REPO="${COMPASS_BGE_REPO:-BAAI/bge-small-en-v1.5}" +BASE_URL="https://huggingface.co/${HF_REPO}/resolve/main" + +# Files candle_bge.rs expects: config.json, model.safetensors, tokenizer.json +FILES=("config.json" "model.safetensors" "tokenizer.json") + +if [ -f "$BGE_DIR/model.safetensors" ] && [ -f "$BGE_DIR/config.json" ] && [ -f "$BGE_DIR/tokenizer.json" ]; then + echo "[download-models] BGE-small already present at $BGE_DIR — skipping" + exit 0 +fi + +echo "[download-models] Fetching $HF_REPO into $BGE_DIR" +mkdir -p "$BGE_DIR" + +for f in "${FILES[@]}"; do + if [ -f "$BGE_DIR/$f" ]; then + echo " $f already present, skipping" + continue + fi + echo " downloading $f" + curl -fsSL --retry 3 --retry-delay 2 -o "$BGE_DIR/$f.tmp" "$BASE_URL/$f" + mv "$BGE_DIR/$f.tmp" "$BGE_DIR/$f" +done + +echo "[download-models] Done. Contents:" +ls -la "$BGE_DIR"