dvcdsys · dvcdsys · May 4, 2026 · May 2, 2026 · May 3, 2026 · May 3, 2026
diff --git a/.cixignore b/.cixignore
@@ -0,0 +1,27 @@
+# Files / dirs the cix indexer skips for this repo.
+# Pattern syntax mirrors .gitignore.
+
+# Frontend toolchain noise — don't index 50k+ files of node_modules / build output
+server/dashboard/node_modules/
+server/internal/httpapi/dashboard/dist/
+
+# Build artefacts
+server/dist/
+cli/build/
+cli/dist/
+
+# Generated code (regenerate with `npm run gen:api`)
+server/dashboard/src/api/generated.ts
+
+# Embedded vendored bundles already covered by the linked sources elsewhere
+server/internal/httpapi/docs/swagger-ui/
+
+# Local data / runtime state
+data/
+.local-data/
+*.db
+*.db-wal
+*.db-shm
+
+# Legacy archived tree (see commit 063a14e)
+legacy/python-api/
diff --git a/.env.example b/.env.example
@@ -1,12 +1,46 @@
+# cix-server environment template. Copy to .env and fill in real values.
+
+# ── Auth ──────────────────────────────────────────────────────────────────
+# Header API key for direct CLI / CI traffic. Generate with:
+#   openssl rand -hex 32 | sed 's/^/cix_/'
 CIX_API_KEY=cix_<generated-64-hex>
+
+# First-boot admin seed (REQUIRED when the DB has no users yet — the server
+# refuses to start otherwise). The user is flagged must_change_password=true,
+# so the temporary password only works for the first login. Generate with:
+#   openssl rand -base64 18 | tr -d '/+=' | cut -c1-20
+CIX_BOOTSTRAP_ADMIN_EMAIL=admin@example.com
+CIX_BOOTSTRAP_ADMIN_PASSWORD=change-me-on-first-login
+
+# ── Networking + storage ──────────────────────────────────────────────────
 CIX_PORT=21847
-CIX_EMBEDDING_MODEL=awhiteside/CodeRankEmbed-Q8_0-GGUF
-CIX_MAX_FILE_SIZE=524288
-CIX_EXCLUDED_DIRS=node_modules,.git,.venv,__pycache__,dist,build,.next,.cache,.DS_Store
 CIX_CHROMA_PERSIST_DIR=~/.cix/data/chroma
 CIX_SQLITE_PATH=~/.cix/data/sqlite/projects.db
 CIX_GGUF_CACHE_DIR=~/.cix/data/models
+
+# ── Indexing ──────────────────────────────────────────────────────────────
+CIX_EMBEDDING_MODEL=awhiteside/CodeRankEmbed-Q8_0-GGUF
+CIX_MAX_FILE_SIZE=524288
+CIX_EXCLUDED_DIRS=node_modules,.git,.venv,__pycache__,dist,build,.next,.cache,.DS_Store
+
+# ── llama-server sidecar ──────────────────────────────────────────────────
 CIX_LLAMA_BIN_DIR=/app
+# 99 = offload all layers (CUDA / Metal). 0 = CPU only.
 CIX_N_GPU_LAYERS=0
 CIX_LLAMA_STARTUP_TIMEOUT=60
 CIX_EMBEDDINGS_ENABLED=true
+
+# ── PR-E runtime tunables (also editable from /dashboard/server) ──────────
+# 0 = auto. Threads → runtime.NumCPU()/2; batch → match n_ctx.
+CIX_LLAMA_THREADS=0
+CIX_LLAMA_BATCH=0
+# Embedding queue parallelism. 5 is the new default — pipelines host-side
+# prep with device inference. Drop to 1 if you observe contention.
+CIX_MAX_EMBEDDING_CONCURRENCY=5
+CIX_EMBEDDING_QUEUE_TIMEOUT=300
+
+# ── Optional: bootstrap the GGUF cache from a host-side file ──────────────
+# Paired with the bind-mount example in docker-compose.{yml,cuda.yml}.
+# Uncomment + set after wiring the bind, then drop both once the cache
+# is seeded.
+# CIX_BOOTSTRAP_GGUF_PATH=/bootstrap/model.gguf
diff --git a/.github/workflows/release-server.yml b/.github/workflows/release-server.yml
@@ -84,6 +84,11 @@ jobs:
           provenance: mode=max
           sbom: true
           build-args: VERSION=${{ steps.ver.outputs.version }}
+          # `openapi=doc` mounts the repo-root doc/ folder so the dashboard
+          # build stage can `COPY --from=openapi openapi.yaml` without us
+          # widening the primary build context (which is `server/`).
+          build-contexts: |
+            openapi=doc
           tags: ${{ steps.tags.outputs.tags }}
 
   docker-cuda:
@@ -139,6 +144,11 @@ jobs:
           provenance: mode=max
           sbom: true
           build-args: VERSION=${{ steps.ver.outputs.version }}
+          # `openapi=doc` mounts the repo-root doc/ folder so the dashboard
+          # build stage can `COPY --from=openapi openapi.yaml` without us
+          # widening the primary build context (which is `server/`).
+          build-contexts: |
+            openapi=doc
           tags: ${{ steps.tags.outputs.tags }}
 
   release:

diff --git a/.gitignore b/.gitignore
@@ -8,7 +8,11 @@ __pycache__/
 *$py.class
 *.egg-info/
 *.egg
-dist/
+# Note: the bare `dist/` rule was scoped to /dist/ (root only) so the
+# server/internal/httpapi/dashboard/dist/.gitkeep negation below
+# actually works — Git can't re-include a file if a parent directory
+# is broadly excluded.
+/dist/
 build/
 .eggs/
 *.whl
@@ -44,11 +48,25 @@ cli/dist/
 server/dist/
 server/exec.log
 
+# Dashboard build output — produced by `make dashboard-build`.
+# A committed `.gitkeep` keeps dist/ non-empty so `//go:embed all:dist` works
+# on a fresh clone (the embed.FS needs at least one entry). The real
+# index.html + assets/ tree is produced by `make dashboard-build` and never
+# tracked. The handler in dashboard.go returns an inline "please build"
+# placeholder when index.html is absent.
+server/dashboard/node_modules/
+server/internal/httpapi/dashboard/dist/*
+!server/internal/httpapi/dashboard/dist/.gitkeep
+
 # uv
 .python-version
 
-# Local docs
+# Local docs (top-level docs/ is a notebook directory per CLAUDE.md;
+# tracked project documentation lives in doc/). Exception below for the
+# embedded Swagger UI bundle inside the Go server package.
 docs/
+!server/internal/httpapi/docs/
+!server/internal/httpapi/docs/**
 
 # Claude Code
 .claude/

diff --git a/README.md b/README.md
@@ -667,8 +667,11 @@ idle draw. Embedding calls do not spike VRAM the way fp16 PyTorch attention
 used to — sequence length and batch size only change latency, not peak memory.
 
 `MAX_CHUNK_TOKENS` still caps the length of each code chunk (1 token ≈ 4 chars)
-and must stay ≤ `n_ctx` (8192). `MAX_EMBEDDING_CONCURRENCY` should stay at `1`
-for single-GPU setups — llama.cpp serialises through one context.
+and must stay ≤ `n_ctx` (8192). `MAX_EMBEDDING_CONCURRENCY` defaults to `5` —
+the indexing queue ships chunks in parallel; the llama-server sidecar still
+serialises requests through one context, but pipelining host-side prep with
+device inference at this depth saturates the GPU without measurable latency
+cost. Drop to `1` only if you observe contention.
 
 See [`doc/vram-profiling.md`](doc/vram-profiling.md) for methodology and numbers.
 

diff --git a/doc/SECURITY_DEPLOYMENT.md b/doc/SECURITY_DEPLOYMENT.md
@@ -0,0 +1,160 @@
+# Security & deployment notes
+
+This document captures the operational requirements that the cix-server
+codebase assumes but does not enforce on its own. Read it before exposing
+the dashboard to users beyond a single trusted operator.
+
+## Trusted-proxy posture for `X-Forwarded-For`
+
+The server reads `X-Forwarded-For` (first hop) when present and uses the
+result for two things:
+
+1. **Audit metadata** — stored as `sessions.last_seen_ip` and
+   `api_keys.last_used_ip`.
+2. **Per-IP login rate limit key** — see "Login brute-force resistance"
+   below. The per-(IP, email) key still binds independently of the IP
+   source, so password guessing against a known account is rate-limited
+   regardless; only the global per-IP sweep cap depends on the header
+   being trustworthy.
+
+This makes the trusted-proxy posture **load-bearing for security**, not
+just for audit honesty. Two safe deployments:
+
+- **Reverse proxy in front** (Cloudflare / Caddy / nginx / Traefik / ALB):
+  configure the proxy to *replace* the inbound `X-Forwarded-For` with the
+  real client IP, not append to it. Drop `X-Real-IP` if you don't need
+  it. This is the recommended posture for any internet-exposed
+  deployment.
+- **Direct exposure on a trusted network** (LAN / VPN only): nothing
+  forwards `X-Forwarded-For` for you, so an attacker who can reach the
+  port can also forge the header. The per-(IP, email) cap still slows
+  password guessing, but the global per-IP cap is bypassable. Acceptable
+  on a trusted network, never on the open internet.
+
+Example for nginx:
+
+```nginx
+location / {
+  proxy_set_header X-Forwarded-For $remote_addr;  # replace, not append
+  proxy_set_header Host $host;
+  proxy_pass http://cix-server:21847;
+}
+```
+
+## TLS
+
+The session cookie's `Secure` attribute is set automatically when the
+request arrives over TLS (`r.TLS != nil`). For any deployment beyond
+`localhost`, terminate TLS in front of the server and ensure the server
+sees TLS-marked requests so the cookie is not sent in cleartext.
+
+If you front the server with a TLS-terminating proxy that downgrades to
+plain HTTP for the upstream hop, the auto-detection will return false and
+`Secure` will be omitted. Two fixes:
+
+- Terminate TLS directly in cix-server (drop the proxy).
+- Or configure the proxy to make the upstream hop look TLS-marked — the
+  details vary; consult the proxy docs.
+
+## Login brute-force resistance
+
+POST `/api/v1/auth/login` is rate-limited in process (`internal/httpapi/loginlimiter.go`):
+
+- **5 failed attempts per (IP, email) per 15 minutes** — slows guessing
+  against a known account. Cleared on a successful login so a user who
+  fat-fingers their password a few times is not stuck.
+- **60 attempts per IP per minute** — slows horizontal sweeps across many
+  emails from a single source. Not cleared on a successful login.
+
+This is a single-process limiter; multi-replica deployments do not share
+state. If you scale out, put a shared throttle (Redis, your reverse proxy)
+in front of `/api/v1/auth/login` or accept that the per-replica caps are
+the floor.
+
+## Request body size limits
+
+A request-body middleware rejects oversize payloads up-front:
+
+- **Default cap: 1 MiB** for every endpoint.
+- **Indexing cap: 64 MiB** for `POST /api/v1/projects/{path}/index/files`,
+  which legitimately receives JSON-encoded source from a batch of files.
+  At default config (batch=20, max-file=512 KiB) a real payload is ~11 MiB;
+  the cap also covers operator-tuned worst case (batch=50 × max-file=1 MiB
+  ≈ 55 MiB) with headroom.
+
+The cap fires on `Content-Length` (clean 413) and on chunked-transfer
+overflow (the JSON decoder fails and the handler returns 422). If your
+indexer batches need more than 64 MiB, raise `indexingMaxBodyBytes` in
+`internal/httpapi/middleware.go` rather than asking operators to disable
+the cap.
+
+## Bootstrap admin
+
+On a fresh database the server reads `CIX_BOOTSTRAP_ADMIN_EMAIL` and
+`CIX_BOOTSTRAP_ADMIN_PASSWORD` and creates the first admin row, marked
+`must_change_password=1` so the operator must change the password on
+first login.
+
+- Both env vars must be set together; setting only one is a fatal
+  startup error.
+- Once the users table is non-empty, the env vars are ignored. Rotating
+  the bootstrap password by editing the env has no effect on a running
+  installation — go through the dashboard or directly through SQLite.
+- The bootstrap path is **not transactional**. If two server instances
+  start simultaneously against the same fresh database, one of them will
+  fail with a UNIQUE-constraint error from the duplicate email. This is
+  intentional (better to fail loud than silently create two admins) but
+  operationally surprising under HA-style deployments — start a single
+  instance first, then scale out.
+
+## Password policy
+
+The server enforces only `len(password) >= 8`. There is no complexity
+rule, no breached-password dictionary check, no rotation prompt.
+
+For internet-exposed deployments, choose admin passwords accordingly: a
+20+ character random passphrase from a password manager beats anything
+the server could enforce. The rate limiter above caps the damage of
+weak passwords at ~480 guesses per (IP, email) per day.
+
+## No self-service password reset
+
+A user who forgets their password cannot reset it themselves. Recovery
+options (in order of preference):
+
+1. Another admin issues `POST /api/v1/admin/users` with a new initial
+   password and `must_change_password=1`, then disables the old account.
+2. Direct SQLite access to clear `users.disabled_at` and reset
+   `users.password_hash` (use bcrypt cost 12).
+
+Plan for this when designating admins — keep at least two so an admin
+reset never requires DB-level intervention.
+
+## API key scoping
+
+API keys inherit the full permissions of their owning user. A viewer's
+key can do anything a viewer can; an admin's key can do anything an
+admin can. There is no read-only scoping, no per-project scoping, no
+expiry.
+
+For automated callers (CI, scripts) that only need to read, create a
+dedicated viewer user and issue keys from that account. Rotate keys via
+`DELETE /api/v1/api-keys/{id}` rather than reusing them.
+
+## What the server does NOT do
+
+If your threat model needs any of these, build them in front of cix-server
+or accept the risk:
+
+- **CSRF tokens.** Protection relies on the cookie's `SameSite=Strict` +
+  `HttpOnly` attributes, which modern browsers honour. There is no
+  separate token to validate.
+- **CORS.** No `Access-Control-Allow-*` headers are emitted; same-origin
+  is the assumption.
+- **WAF / IDS.** No IP allowlisting, no anomaly detection. Use your
+  reverse proxy or a host-level firewall.
+- **Multi-tenant project ownership.** All authenticated users see all
+  projects. Destructive mutations (PATCH/DELETE) are admin-
+  only; create/list/search are open to any authenticated user. If you
+  need true tenant separation, run separate cix-server instances per
+  tenant.