diff --git a/.github/workflows/openapi.yml b/.github/workflows/openapi.yml new file mode 100644 index 0000000..367720b --- /dev/null +++ b/.github/workflows/openapi.yml @@ -0,0 +1,88 @@ +--- +name: openapi + +# Validate the embedded OpenAPI specification on every change. +# Two checks run: +# * redocly lint — schema-level validation against the OpenAPI 3.1 +# meta-spec (broken refs, missing required fields, malformed +# types, etc.). +# * Go drift test — asserts every fiber route registered in +# `cmd/hypercache-server/main.go` has a matching path in +# `cmd/hypercache-server/openapi.yaml` (and vice-versa). +# +# Together they prevent the spec from silently drifting from the +# binary — neither one alone catches both classes of breakage. + +on: + pull_request: + paths: + - "cmd/hypercache-server/openapi.yaml" + - "cmd/hypercache-server/openapi.go" + - "cmd/hypercache-server/openapi_test.go" + - "cmd/hypercache-server/main.go" + - ".github/workflows/openapi.yml" + push: + branches: [ main ] + paths: + - "cmd/hypercache-server/openapi.yaml" + - "cmd/hypercache-server/openapi.go" + - "cmd/hypercache-server/openapi_test.go" + - "cmd/hypercache-server/main.go" + - ".github/workflows/openapi.yml" + +permissions: + contents: read + +jobs: + spec-lint: + name: redocly lint + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@v6 + + - name: Setup Node + uses: actions/setup-node@v6 + with: + node-version: "22" + + # Pin a known-good redocly major. Patch updates are picked up + # automatically on next run; major bumps are explicit so a + # breaking change in the linter can't silently fail the build. + - name: redocly lint + run: npx --yes @redocly/cli@1 lint cmd/hypercache-server/openapi.yaml + + drift-test: + name: code↔spec drift + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@v6 + + - name: Load project settings + id: settings + run: | + set -a + source .project-settings.env + set +a + echo "go_version=${GO_VERSION}" >> "$GITHUB_OUTPUT" + + - name: Setup Go + uses: actions/setup-go@v6 + with: + go-version: "${{ steps.settings.outputs.go_version }}" + check-latest: true + + - name: Cache Go modules + uses: actions/cache@v5 + with: + path: | + ~/go/pkg/mod + ~/.cache/go-build + key: ${{ runner.os }}-go-${{ steps.settings.outputs.go_version }}-${{ + hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go-${{ steps.settings.outputs.go_version }}- + + - name: Drift test + run: go test -run TestOpenAPISpecMatchesRoutes ./cmd/hypercache-server/ -count=1 diff --git a/.mdl_style.rb b/.mdl_style.rb index 884184f..88709e9 100644 --- a/.mdl_style.rb +++ b/.mdl_style.rb @@ -12,7 +12,7 @@ # allow_different_nesting permits same-text headings as long as they sit # under distinct parent headings — which is exactly the Keep-a-Changelog # shape, and still catches genuine duplicates within the same section. -rule "MD024", :allow_different_nesting => true +rule "MD024", :allow_different_nesting => true, :siblings_only => true # MkDocs pages start with YAML frontmatter (---\ntitle: ...\n---), so # the first line cannot be a top-level heading. MD041 fights that diff --git a/.yamllint.yaml b/.yamllint.yaml index 4d3cc6c..986ff13 100644 --- a/.yamllint.yaml +++ b/.yamllint.yaml @@ -14,3 +14,4 @@ ignore: | cspell.config.yaml FUNDING.yml codeql.yml + cmd/hypercache-server/openapi.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 01cee24..233c957 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,110 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Added +- **Client API auth v2: multi-token, scoped, mTLS-capable.** New + [`pkg/httpauth/`](pkg/httpauth/) package with `Policy`, + `TokenIdentity`, `CertIdentity`, `Scope` types and a + scope-enforcing fiber middleware. Replaces the single-token + bearerAuth helper in `cmd/hypercache-server/main.go`. Three + credential classes resolved in priority order (bearer → mTLS + cert → ServerVerify hook), with constant-time multi-token + compare that visits every configured token even on early match + to prevent token-cardinality timing leaks. Per-route scope + enforcement: `GET`/`HEAD`/owners-lookup/`batch-get` require + `ScopeRead`; `PUT`/`DELETE`/`batch-put`/`batch-delete` require + `ScopeWrite`. Anonymous identity (with `AllowAnonymous: true`) + receives all scopes — used by the binary to preserve the + zero-config dev posture. +- **YAML auth config + legacy env-var coexistence.** + `HYPERCACHE_AUTH_CONFIG=/etc/hypercache/auth.yaml` (new) loads + a multi-token policy with per-identity scopes: + + ```yaml + tokens: + - id: app-prod + token: "" + scopes: [read, write] + - id: ops + token: "" + scopes: [admin] + cert_identities: + - subject_cn: app.internal + scopes: [read] + allow_anonymous: false + ``` + + The legacy `HYPERCACHE_AUTH_TOKEN` keeps working byte-identical: + one synthesized identity with all three scopes. The two env + vars are NOT mutually exclusive — `HYPERCACHE_AUTH_CONFIG` + governs the client API, `HYPERCACHE_AUTH_TOKEN` continues to + drive the dist transport's symmetric peer auth (single trust + domain). Both can be set in the same deployment without + conflict. Missing or malformed config files exit the binary + non-zero rather than fall through to permissive open mode — + fail-closed by design. +- **mTLS on the client API.** New env vars + `HYPERCACHE_API_TLS_CERT`, `HYPERCACHE_API_TLS_KEY`, and + `HYPERCACHE_API_TLS_CLIENT_CA` wrap the listener with + `tls.NewListener`. With CA set, `RequireAndVerifyClientCert` + is enabled and the verified peer cert's Subject CN is matched + against the policy's `CertIdentities` to resolve the calling + identity. Plaintext, standard-TLS, and mTLS shapes all share + one listener path. End-to-end coverage at + [cmd/hypercache-server/mtls_e2e_test.go](cmd/hypercache-server/mtls_e2e_test.go) + drives a real handshake against an in-process CA / server-cert + / client-cert chain and asserts CN-to-identity resolution + works in both directions (matching CN → 200, non-matching + CN → 401). + +### Security + +- **Constant-time bearer-token compare on the client API.** Replaced + the plaintext `got != want` check at + [cmd/hypercache-server/main.go](cmd/hypercache-server/main.go) with + `crypto/subtle.ConstantTimeCompare` to defeat timing side-channels. + A naive string compare returns as soon as the first differing byte + is found, leaking per-byte equality of `HYPERCACHE_AUTH_TOKEN` to a + remote attacker who can measure response time. The fix mirrors the + dist transport's existing constant-time check at + [pkg/backend/dist_http_server.go:144-152](pkg/backend/dist_http_server.go#L144-L152). + No public API change; the env-var contract and "empty token → + open mode" back-compatible behavior are unchanged. New auth-test suite + at [cmd/hypercache-server/auth_test.go](cmd/hypercache-server/auth_test.go) + pins the contract: missing/wrong/malformed/lowercase/wrong-length + bearer headers all return 401, public meta routes (`/healthz`, + `/v1/openapi.yaml`) stay reachable without credentials, every + protected route fires the wrapper. The new `newAuthedServer` + helper drives `registerClientRoutes` directly so future wiring + regressions are caught (the existing `handlers_test.go::newTestServer` + deliberately bypasses auth for handler-correctness coverage). + +### Added + +- **OpenAPI 3.1 specification + drift-detection.** The + `hypercache-server` binary now embeds its own contract via + [`cmd/hypercache-server/openapi.yaml`](cmd/hypercache-server/openapi.yaml) + (`//go:embed`) and serves it at `GET /v1/openapi.yaml` — every + running node is self-describing. The spec covers all nine client + routes (single-key PUT/GET/HEAD/DELETE, owners lookup, three + batch operations, plus the `/healthz` and `/v1/openapi.yaml` + meta endpoints), with reusable `ErrorResponse`, `ItemEnvelope`, + and batch-operation schemas, the `bearerAuth` security scheme, + and `operationId` on every operation for codegen-friendliness. + A drift detector at + [cmd/hypercache-server/openapi_test.go](cmd/hypercache-server/openapi_test.go) + drives `registerClientRoutes` directly and asserts every + fiber-registered route has a matching path in the spec — and + vice-versa — so the contract cannot silently fall out of sync + with the binary. Two CI workflows back this up at + [.github/workflows/openapi.yml](.github/workflows/openapi.yml): + `redocly lint` validates the schema against the OpenAPI 3.1 + meta-spec, and the Go drift test runs on every change to + `main.go` or the spec. The docs site renders the same spec + inline at the new + [API Reference](docs/api.md) page via the + `mkdocs-swagger-ui-tag` plugin — a single source of truth for + the binary, the docs, and any client codegen that points at a + live cluster. - **Documentation site on GitHub Pages**, built with MkDocs Material and published automatically on every push to `main`. Eight navigated pages — landing, quickstart, 5-node cluster tutorial, @@ -31,24 +135,24 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - **Richer client API — metadata inspection, JSON envelopes, batch operations.** Three additions to the `cmd/hypercache-server` HTTP surface: - - `HEAD /v1/cache/:key` returns the value's metadata in - `X-Cache-*` response headers (Version, Origin, Last-Updated, - TTL-Ms, Expires-At, Owners, Node) with no body — fast - existence + TTL inspection without paying the value-transfer - cost. 200 if present, 404 if not. - - `GET /v1/cache/:key` now honors `Accept: application/json` - and returns an `itemEnvelope` with the same metadata as - HEAD plus the base64-encoded value. The bare-`curl` default - remains raw bytes via `application/octet-stream` — current - clients are unaffected. - - `POST /v1/cache/batch/{get,put,delete}` enable bulk operations - in a single round-trip. Each request carries an array; the - response carries one result entry per item with per-item - status, owners, and error reporting. `batch-put` items - accept either UTF-8 strings (default) or base64-encoded byte - payloads via `value_encoding: "base64"`. Per-item errors are - surfaced in `error` + `code` fields without failing the - whole batch. + - `HEAD /v1/cache/:key` returns the value's metadata in + `X-Cache-*` response headers (Version, Origin, Last-Updated, + TTL-Ms, Expires-At, Owners, Node) with no body — fast + existence + TTL inspection without paying the value-transfer + cost. 200 if present, 404 if not. + - `GET /v1/cache/:key` now honors `Accept: application/json` + and returns an `itemEnvelope` with the same metadata as + HEAD plus the base64-encoded value. The bare-`curl` default + remains raw bytes via `application/octet-stream` — current + clients are unaffected. + - `POST /v1/cache/batch/{get,put,delete}` enable bulk operations + in a single round-trip. Each request carries an array; the + response carries one result entry per item with per-item + status, owners, and error reporting. `batch-put` items + accept either UTF-8 strings (default) or base64-encoded byte + payloads via `value_encoding: "base64"`. Per-item errors are + surfaced in `error` + `code` fields without failing the + whole batch. Six unit tests at [cmd/hypercache-server/handlers_test.go](cmd/hypercache-server/handlers_test.go) pin the contracts: HEAD present/missing, Accept-JSON envelope @@ -57,26 +161,26 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - **SWIM self-refutation + cross-process gossip dissemination.** Closes the last `experimental` marker on the heartbeat path. Three pieces: - - **`acceptGossip` self-refute** — incoming entries that - reference the local node as Suspect or Dead at incarnation - ≥ ours now bump the local incarnation and re-mark Alive. - Higher-incarnation-wins propagation in the same function - disseminates the refutation cluster-wide, so a falsely- - suspected node can clear suspicion through gossip alone - (pre-fix the only path was a fresh probe). - - **HTTP gossip wire** — new `Gossip(ctx, targetID, members)` - method on `DistTransport`, new - `POST /internal/gossip` server endpoint (auth-wrapped), - new `GossipMember` wire DTO. `runGossipTick` now falls - through to the HTTP path when the transport isn't an - `InProcessTransport`, so cross-process clusters disseminate - membership state — pre-Phase-E this was an in-process-only - no-op. - - The `experimental` qualifier is removed from - `heartbeatLoop`'s comment + the heartbeat-section field - doc; SWIM-style indirect probes (Phase B.1) and - self-refutation (this round) together provide the SWIM - properties the marker was tracking. + - **`acceptGossip` self-refute** — incoming entries that + reference the local node as Suspect or Dead at incarnation + ≥ ours now bump the local incarnation and re-mark Alive. + Higher-incarnation-wins propagation in the same function + disseminates the refutation cluster-wide, so a falsely- + suspected node can clear suspicion through gossip alone + (pre-fix the only path was a fresh probe). + - **HTTP gossip wire** — new `Gossip(ctx, targetID, members)` + method on `DistTransport`, new + `POST /internal/gossip` server endpoint (auth-wrapped), + new `GossipMember` wire DTO. `runGossipTick` now falls + through to the HTTP path when the transport isn't an + `InProcessTransport`, so cross-process clusters disseminate + membership state — pre-Phase-E this was an in-process-only + no-op. + - The `experimental` qualifier is removed from + `heartbeatLoop`'s comment + the heartbeat-section field + doc; SWIM-style indirect probes (Phase B.1) and + self-refutation (this round) together provide the SWIM + properties the marker was tracking. Regression coverage at [tests/integration/dist_swim_refute_test.go](tests/integration/dist_swim_refute_test.go): `TestDistSWIM_HTTPGossipExchange` exercises the wire (A pushes diff --git a/_mkdocs/hooks.py b/_mkdocs/hooks.py index f6a8152..5a9b214 100644 --- a/_mkdocs/hooks.py +++ b/_mkdocs/hooks.py @@ -1,23 +1,34 @@ """MkDocs hooks for the HyperCache site. -Rewrites repo-relative links to source files (`../pkg/foo.go`, -`../../hypercache.go`, etc.) into canonical GitHub URLs, so the same -markdown source renders correctly both on github.com and on the -GitHub Pages MkDocs build. - -Without this, the operations runbook and the RFCs reference dozens -of source files via paths like `../pkg/backend/dist_memory.go`. -GitHub renders those as in-repo links; MkDocs's strict mode flags -them as broken because `pkg/` is not part of the documentation -tree. Rewriting them at build time keeps the source markdown -GitHub-friendly while letting strict mode actually enforce -docs-internal correctness. +Two responsibilities: + +1. Rewrites repo-relative links to source files (`../pkg/foo.go`, + `../../hypercache.go`, etc.) into canonical GitHub URLs, so the + same markdown source renders correctly both on github.com and on + the GitHub Pages MkDocs build. Without this, the operations + runbook and the RFCs reference dozens of source files via paths + like `../pkg/backend/dist_memory.go`. GitHub renders those as + in-repo links; MkDocs's strict mode flags them as broken + because `pkg/` is not part of the documentation tree. Rewriting + them at build time keeps the source markdown GitHub-friendly + while letting strict mode actually enforce docs-internal + correctness. + +2. Injects `cmd/hypercache-server/openapi.yaml` into the docs build + as `api/openapi.yaml`, so the Swagger UI page on the docs site + renders the same spec the binary embeds. The spec lives next to + the binary (Go's `embed` cannot traverse `..`) but the docs + build needs it on its virtual filesystem — an `on_files` hook + adds it without requiring a duplicate file checked into `docs/`. """ import os import re +from pathlib import Path from typing import Any +from mkdocs.structure.files import File + GITHUB_REPO_BASE = "https://github.com/hyp3rd/hypercache/blob/main" # File extensions that we treat as "source code, not docs" — links @@ -108,6 +119,48 @@ def _resolve_to_repo_root(page_src_path: str, target: str) -> str: return docs_anchored +# Path of the canonical OpenAPI spec the server binary embeds. +# Resolved relative to the repo root (one above `_mkdocs/`), so +# this works whether MkDocs is invoked from the repo root or from +# inside `docs/`. +_REPO_ROOT = Path(__file__).resolve().parent.parent +_OPENAPI_SOURCE = _REPO_ROOT / "cmd" / "hypercache-server" / "openapi.yaml" + +# Where the spec appears on the rendered site — Swagger UI on +# `docs/api.md` references this URL. +_OPENAPI_DOCS_PATH = "api/openapi.yaml" + + +def on_files(files: Any, config: Any, **kwargs: Any) -> Any: + """Inject the embedded OpenAPI spec as a docs-site asset. + + Without this, `docs/api.md`'s Swagger UI tag would reference a + file that does not exist in the docs tree (the spec lives + under `cmd/hypercache-server/`). Adding it as a virtual File + keeps a single source of truth — the binary's embedded spec + is what the docs site renders. + """ + if not _OPENAPI_SOURCE.exists(): + # Defensive: if the spec was renamed/moved, fail loud + # rather than silently render a stale asset. + raise FileNotFoundError( + f"OpenAPI spec not found at {_OPENAPI_SOURCE}; update " + f"_mkdocs/hooks.py:_OPENAPI_SOURCE if it moved." + ) + + files.append( + File( + path=_OPENAPI_SOURCE.name, + src_dir=str(_OPENAPI_SOURCE.parent), + dest_dir=config["site_dir"], + use_directory_urls=False, + dest_uri=_OPENAPI_DOCS_PATH, + ) + ) + + return files + + def on_page_markdown(markdown: str, page: Any, **kwargs: Any) -> str: """Rewrite source-code links on every page before MkDocs renders it.""" page_src = page.file.src_path diff --git a/cmd/hypercache-server/auth_test.go b/cmd/hypercache-server/auth_test.go new file mode 100644 index 0000000..af163a3 --- /dev/null +++ b/cmd/hypercache-server/auth_test.go @@ -0,0 +1,337 @@ +package main + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + + fiber "github.com/gofiber/fiber/v3" + + "github.com/hyp3rd/hypercache" + "github.com/hyp3rd/hypercache/internal/constants" + "github.com/hyp3rd/hypercache/pkg/backend" + "github.com/hyp3rd/hypercache/pkg/httpauth" +) + +// newAuthedServer mirrors newTestServer but drives registerClientRoutes +// directly so the auth middleware actually runs — handlers_test.go's +// newTestServer wires routes inline without the auth wrapper, which +// means regressions to the auth wiring would not be caught by that +// suite. Tests in this file own the auth contract instead. +// +// Replication=1 keeps assertions deterministic, the 127.0.0.1:0 +// listener stays unbound (fiber.App.Test drives the wire in-memory), +// and the supplied policy shapes the auth wrapping. +func newAuthedServer(t *testing.T, policy httpauth.Policy) *fiber.App { + t.Helper() + + cfg, err := hypercache.NewConfig[backend.DistMemory](constants.DistMemoryBackend) + if err != nil { + t.Fatalf("new config: %v", err) + } + + cfg.DistMemoryOptions = []backend.DistMemoryOption{ + backend.WithDistNode("auth-test-node", "127.0.0.1:0"), + backend.WithDistReplication(1), + } + + hc, err := hypercache.New(t.Context(), hypercache.GetDefaultManager(), cfg) + if err != nil { + t.Fatalf("new hypercache: %v", err) + } + + t.Cleanup(func() { _ = hc.Stop(context.Background()) }) + + app := fiber.New() + registerClientRoutes(app, policy, &nodeContext{hc: hc, nodeID: "auth-test-node"}) + + return app +} + +// singleTokenPolicy is the test-helper that turns a bare bearer +// string into a single-token Policy. Mirrors what +// httpauth.LoadFromEnv synthesizes for the legacy +// HYPERCACHE_AUTH_TOKEN path: one identity, all three scopes. +func singleTokenPolicy(token string) httpauth.Policy { + return httpauth.Policy{ + Tokens: []httpauth.TokenIdentity{ + { + ID: "test", + Token: token, + Scopes: []httpauth.Scope{httpauth.ScopeRead, httpauth.ScopeWrite, httpauth.ScopeAdmin}, + }, + }, + } +} + +// openPolicy is the dev-mode policy: no credentials configured but +// AllowAnonymous flipped on, mirroring what main.go does when neither +// auth env var is set. Keeps the "open mode" test case identical to +// the binary's behavior. +func openPolicy() httpauth.Policy { + return httpauth.Policy{AllowAnonymous: true} +} + +// pathCacheKey is the canonical "/v1/cache/k" string reused across +// the test tables in this file. Hoisted to package scope so the +// goconst linter doesn't flag the repetition. +const pathCacheKey = "/v1/cache/k" + +// authRequest issues a single request against the auth-wrapped app +// and returns just the status — auth assertions never need the body. +// Returning the bare int keeps the test bodies one line per case. +func authRequest(t *testing.T, app *fiber.App, method, target, authHeader string) int { + t.Helper() + + req := httptest.NewRequestWithContext(t.Context(), method, target, strings.NewReader("")) + if authHeader != "" { + req.Header.Set("Authorization", authHeader) + } + + resp, err := app.Test(req) + if err != nil { + t.Fatalf("app.Test %s %s: %v", method, target, err) + } + + defer func() { _ = resp.Body.Close() }() + + return resp.StatusCode +} + +// TestBearerAuth_Required pins the wired-with-token contract. Each +// row is a single (header, expected status) tuple targeting a route +// that is supposed to be auth-protected — coverage here doubles as a +// regression test for `registerClientRoutes` forgetting to wrap a +// future route in bearerAuth. +func TestBearerAuth_Required(t *testing.T) { + t.Parallel() + + const token = "s3cret-token" + + app := newAuthedServer(t, singleTokenPolicy(token)) + + tests := []struct { + name string + header string + want int + }{ + { + name: "no header → 401", + header: "", + want: http.StatusUnauthorized, + }, + { + name: "wrong token → 401", + header: "Bearer wrong-token", + want: http.StatusUnauthorized, + }, + { + name: "missing Bearer prefix → 401", + // Same secret, but the canonical header form is + // "Bearer "; constant-time compare against the + // pre-built `Bearer ...` byte sequence rejects this + // without parsing the header. + header: token, + want: http.StatusUnauthorized, + }, + { + name: "lowercase scheme → 401", + header: "bearer " + token, + want: http.StatusUnauthorized, + }, + { + name: "wrong-length token → 401", + // Different length than the configured token — + // constant-time compare on unequal-length inputs + // returns 0 immediately. Catches a regression where + // someone "optimizes" by short-circuiting on length + // mismatch (which is exactly the timing leak the + // fix is meant to defeat). + header: "Bearer s", + want: http.StatusUnauthorized, + }, + { + name: "correct token → 200", + header: "Bearer " + token, + want: http.StatusOK, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + // /v1/owners/k is the cheapest auth-protected endpoint: + // no body to read, no key to seed, no GetWithInfo + // roundtrip. Any 401 here proves bearerAuth fired + // before the handler. + got := authRequest(t, app, http.MethodGet, "/v1/owners/k", tc.header) + if got != tc.want { + t.Fatalf("status: got %d, want %d", got, tc.want) + } + }) + } +} + +// TestBearerAuth_OpenWhenTokenEmpty preserves the zero-config dev +// posture: when neither HYPERCACHE_AUTH_CONFIG nor +// HYPERCACHE_AUTH_TOKEN is set, main.go's run() flips +// AllowAnonymous on so every route stays reachable without +// credentials. The openPolicy() helper mirrors that exact shape. +func TestBearerAuth_OpenWhenTokenEmpty(t *testing.T) { + t.Parallel() + + app := newAuthedServer(t, openPolicy()) + + got := authRequest(t, app, http.MethodGet, "/v1/owners/k", "") + if got != http.StatusOK { + t.Fatalf("open mode: status %d, want 200", got) + } +} + +// TestBearerAuth_PublicRoutes guarantees that the auth-free meta +// endpoints — /healthz and /v1/openapi.yaml — stay reachable even +// when a token is configured. K8s liveness probes and the +// self-describing spec endpoint must never require credentials. +func TestBearerAuth_PublicRoutes(t *testing.T) { + t.Parallel() + + app := newAuthedServer(t, singleTokenPolicy("s3cret-token")) + + publicRoutes := []string{"/healthz", "/v1/openapi.yaml"} + for _, route := range publicRoutes { + t.Run(route, func(t *testing.T) { + t.Parallel() + + got := authRequest(t, app, http.MethodGet, route, "") + if got != http.StatusOK { + t.Fatalf("public route %s: status %d, want 200", route, got) + } + }) + } +} + +// TestBearerAuth_AllProtectedRoutes asserts the auth wrapper fires +// for every cache and batch endpoint, not just /v1/owners. A bare +// route table here catches regressions where a new route gets added +// to registerClientRoutes without auth — the OpenAPI drift test +// covers documentation drift, this covers wiring drift. +func TestBearerAuth_AllProtectedRoutes(t *testing.T) { + t.Parallel() + + app := newAuthedServer(t, singleTokenPolicy("s3cret-token")) + + protected := []struct { + method string + path string + }{ + {http.MethodPut, pathCacheKey}, + {http.MethodGet, pathCacheKey}, + {http.MethodHead, pathCacheKey}, + {http.MethodDelete, pathCacheKey}, + {http.MethodGet, "/v1/owners/k"}, + {http.MethodPost, "/v1/cache/batch/get"}, + {http.MethodPost, "/v1/cache/batch/put"}, + {http.MethodPost, "/v1/cache/batch/delete"}, + } + + for _, route := range protected { + t.Run(route.method+" "+route.path, func(t *testing.T) { + t.Parallel() + + got := authRequest(t, app, route.method, route.path, "") + if got != http.StatusUnauthorized { + t.Fatalf("expected 401 without token, got %d", got) + } + }) + } +} + +// TestScope_ReadOnlyToken pins the Phase 2 multi-token scope +// enforcement contract: a token granted only ScopeRead can hit +// GET routes but is forbidden from PUT/DELETE/batch-mutating +// routes. This catches the regression class where a future +// route is mistakenly tagged with the wrong scope in +// registerClientRoutes (e.g. a write-mutating endpoint accidentally +// wrapped in `read` middleware). +func TestScope_ReadOnlyToken(t *testing.T) { + t.Parallel() + + policy := httpauth.Policy{ + Tokens: []httpauth.TokenIdentity{ + {ID: "ro", Token: "ro-token", Scopes: []httpauth.Scope{httpauth.ScopeRead}}, + }, + } + + app := newAuthedServer(t, policy) + authHeader := "Bearer ro-token" + + cases := []struct { + method string + path string + want int // 200 for read routes, 403 for write routes + }{ + // Read scope: 200. + {http.MethodGet, "/v1/owners/k", http.StatusOK}, + // Write scope: 403 (token has Read only). + {http.MethodPut, pathCacheKey, http.StatusForbidden}, + {http.MethodDelete, pathCacheKey, http.StatusForbidden}, + {http.MethodPost, "/v1/cache/batch/put", http.StatusForbidden}, + {http.MethodPost, "/v1/cache/batch/delete", http.StatusForbidden}, + } + + for _, tc := range cases { + t.Run(tc.method+" "+tc.path, func(t *testing.T) { + t.Parallel() + + got := authRequest(t, app, tc.method, tc.path, authHeader) + if got != tc.want { + t.Fatalf("got %d, want %d", got, tc.want) + } + }) + } +} + +// TestScope_MultipleIdentities exercises the multi-token resolution +// path: the policy carries two identities (ro + rw), and each is +// granted exactly the scopes its token presents. A read request +// with the rw token works (rw includes Read); a write request with +// the ro token does not (ro lacks Write). +func TestScope_MultipleIdentities(t *testing.T) { + t.Parallel() + + policy := httpauth.Policy{ + Tokens: []httpauth.TokenIdentity{ + {ID: "ro", Token: "ro-token", Scopes: []httpauth.Scope{httpauth.ScopeRead}}, + {ID: "rw", Token: "rw-token", Scopes: []httpauth.Scope{httpauth.ScopeRead, httpauth.ScopeWrite}}, + }, + } + + app := newAuthedServer(t, policy) + + cases := []struct { + name string + method string + path string + token string + want int + }{ + {"ro reads", http.MethodGet, "/v1/owners/k", "ro-token", http.StatusOK}, + {"rw reads", http.MethodGet, "/v1/owners/k", "rw-token", http.StatusOK}, + {"ro writes (forbidden)", http.MethodPut, pathCacheKey, "ro-token", http.StatusForbidden}, + {"rw writes", http.MethodPut, pathCacheKey, "rw-token", http.StatusOK}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + got := authRequest(t, app, tc.method, tc.path, "Bearer "+tc.token) + if got != tc.want { + t.Fatalf("got %d, want %d", got, tc.want) + } + }) + } +} diff --git a/cmd/hypercache-server/main.go b/cmd/hypercache-server/main.go index 6cf6814..c439a8c 100644 --- a/cmd/hypercache-server/main.go +++ b/cmd/hypercache-server/main.go @@ -18,10 +18,13 @@ package main import ( "context" + "crypto/tls" + "crypto/x509" "encoding/base64" "errors" "fmt" "log/slog" + "net" "net/http" "os" "os/signal" @@ -38,6 +41,7 @@ import ( "github.com/hyp3rd/hypercache/internal/sentinel" "github.com/hyp3rd/hypercache/pkg/backend" cache "github.com/hyp3rd/hypercache/pkg/cache/v2" + "github.com/hyp3rd/hypercache/pkg/httpauth" ) // Defaults applied when the corresponding env var is unset. Centralized @@ -71,7 +75,10 @@ type envConfig struct { Seeds []string Replication int Capacity int - AuthToken string + AuthPolicy httpauth.Policy + APITLSCert string + APITLSKey string + APITLSCA string LogLevel slog.Level HintTTL time.Duration HintReplay time.Duration @@ -81,9 +88,22 @@ type envConfig struct { } // loadConfig pulls every knob from the environment and applies sane -// defaults. Returns the parsed config and any non-fatal warnings the -// caller should log after the logger is wired. -func loadConfig() envConfig { +// defaults. The error return covers auth-policy load failures — +// either the operator set HYPERCACHE_AUTH_CONFIG to a missing/ +// malformed file or set both HYPERCACHE_AUTH_CONFIG and +// HYPERCACHE_AUTH_TOKEN. The binary exits non-zero rather than +// silently fall through to open mode (fail-closed by design; +// documented in CHANGELOG as a behavioral change vs pre-v2 where +// any token/config error mapped to permissive open mode). +// +// Other knobs use silent fallbacks to defaults — they are tunables, +// not security boundaries. +func loadConfig() (envConfig, error) { + policy, err := httpauth.LoadFromEnv() + if err != nil { + return envConfig{}, fmt.Errorf("load auth policy: %w", err) + } + cfg := envConfig{ NodeID: envOr("HYPERCACHE_NODE_ID", hostnameOrDefault()), APIAddr: envOr("HYPERCACHE_API_ADDR", ":8080"), @@ -92,7 +112,10 @@ func loadConfig() envConfig { Seeds: splitCSV(os.Getenv("HYPERCACHE_SEEDS")), Replication: envInt("HYPERCACHE_REPLICATION", defaultReplication), Capacity: envInt("HYPERCACHE_CAPACITY", defaultCapacity), - AuthToken: os.Getenv("HYPERCACHE_AUTH_TOKEN"), + AuthPolicy: policy, + APITLSCert: os.Getenv("HYPERCACHE_API_TLS_CERT"), + APITLSKey: os.Getenv("HYPERCACHE_API_TLS_KEY"), + APITLSCA: os.Getenv("HYPERCACHE_API_TLS_CLIENT_CA"), LogLevel: parseLogLevel(envOr("HYPERCACHE_LOG_LEVEL", "info")), HintTTL: envDuration("HYPERCACHE_HINT_TTL", defaultHintTTL), HintReplay: envDuration("HYPERCACHE_HINT_REPLAY", defaultHintReplay), @@ -101,7 +124,7 @@ func loadConfig() envConfig { RebalanceInt: envDuration("HYPERCACHE_REBALANCE_INTERVAL", defaultRebalance), } - return cfg + return cfg, nil } // envOr returns os.Getenv(key) or fallback when unset/empty. @@ -218,10 +241,16 @@ func buildHyperCache(ctx context.Context, cfg envConfig, logger *slog.Logger) (* backend.WithDistLogger(logger), } - if cfg.AuthToken != "" { + // Dist transport auth is intentionally separate from the + // client API's multi-token policy: the cluster is one trust + // domain (every node holds the same peer token), so reading + // HYPERCACHE_AUTH_TOKEN directly here keeps the dist symmetry + // invariant when operators set HYPERCACHE_AUTH_CONFIG for the + // client API but still want peer auth on the wire. + if peerToken := os.Getenv(httpauth.EnvAuthToken); peerToken != "" { hcCfg.DistMemoryOptions = append( hcCfg.DistMemoryOptions, - backend.WithDistHTTPAuth(backend.DistHTTPAuth{Token: cfg.AuthToken}), + backend.WithDistHTTPAuth(backend.DistHTTPAuth{Token: peerToken}), ) } @@ -264,11 +293,83 @@ const ( codeInternal = "INTERNAL" ) +// TLS-config sentinel errors returned by buildAPITLSConfig. Wrapped +// via fmt.Errorf at construction time so callers see the field +// name + path; matched via errors.Is for control-flow. +var ( + errAPITLSPartial = errors.New("HYPERCACHE_API_TLS_CERT and HYPERCACHE_API_TLS_KEY must both be set") + errAPITLSNoPEMInCA = errors.New("HYPERCACHE_API_TLS_CLIENT_CA: no PEM certificates parsed from file") +) + +// registerClientRoutes wires every client-API route onto the +// provided fiber app. Extracted from runClientAPI so tests +// (handlers_test.go, auth_test.go, openapi_test.go) drive the same +// wiring without spinning up a real listener — and so the drift +// test can introspect routes from the *exact* production +// registration rather than a hand-maintained mirror. +// +// Routes are scope-tagged: read endpoints (GET/HEAD/owners-lookup, +// batch-get) require ScopeRead; mutating endpoints (PUT/DELETE, +// batch-put/delete) require ScopeWrite. /healthz and +// /v1/openapi.yaml are deliberately scope-less so liveness probes +// and spec-discovery work without credentials. +// +// When the policy is unconfigured (zero Policy with AllowAnonymous +// false), every protected route 401s — fail-closed by design. The +// hypercache-server binary's loadConfig handles the legacy +// "neither env var set" path by flipping AllowAnonymous on with a +// startup warning, so the zero-config dev posture still works. +func registerClientRoutes(app *fiber.App, policy httpauth.Policy, nodeCtx *nodeContext) { + read := policy.Middleware(httpauth.ScopeRead) + write := policy.Middleware(httpauth.ScopeWrite) + + app.Get("/healthz", func(c fiber.Ctx) error { return c.SendString("ok") }) + + // Self-describing — clients can discover the API surface + // without out-of-band docs. The spec is embedded at build + // time from cmd/hypercache-server/openapi.yaml so it stays + // in lockstep with whatever the binary was built against. + app.Get("/v1/openapi.yaml", func(c fiber.Ctx) error { + c.Set(fiber.HeaderContentType, "application/yaml") + + return c.Send(openapiSpec) + }) + + app.Put("/v1/cache/:key", write, func(c fiber.Ctx) error { return handlePut(c, nodeCtx) }) + app.Get("/v1/cache/:key", read, func(c fiber.Ctx) error { return handleGet(c, nodeCtx) }) + app.Head("/v1/cache/:key", read, func(c fiber.Ctx) error { return handleHead(c, nodeCtx) }) + app.Delete("/v1/cache/:key", write, func(c fiber.Ctx) error { return handleDelete(c, nodeCtx) }) + app.Get("/v1/owners/:key", read, func(c fiber.Ctx) error { return handleOwners(c, nodeCtx) }) + + app.Post("/v1/cache/batch/get", read, func(c fiber.Ctx) error { return handleBatchGet(c, nodeCtx) }) + app.Post("/v1/cache/batch/put", write, func(c fiber.Ctx) error { return handleBatchPut(c, nodeCtx) }) + app.Post("/v1/cache/batch/delete", write, func(c fiber.Ctx) error { return handleBatchDelete(c, nodeCtx) }) +} + // runClientAPI builds and starts the client REST API. Returns the -// fiber app so main can shut it down on signal. Handlers are -// auth-wrapped when the env carries an HYPERCACHE_AUTH_TOKEN, mirroring -// the dist + management HTTP auth posture. -func runClientAPI(addr, nodeID string, hc *hypercache.HyperCache[backend.DistMemory], authToken string, logger *slog.Logger) *fiber.App { +// fiber app so main can shut it down on signal. The provided +// httpauth.Policy gates every protected route — see +// registerClientRoutes for the per-route scope mapping. +// +// TLS posture (controlled via cfg, mirroring dist's pattern): +// +// - cfg.APITLSCert + cfg.APITLSKey both set → standard TLS. +// - Adding cfg.APITLSCA → mTLS with RequireAndVerifyClientCert. +// The verified peer cert's Subject CN is what +// httpauth.Policy.CertIdentities matches against. +// - Either field empty → plaintext on cfg.APIAddr (preserves +// today's default behavior; dev mode and ingress-terminated TLS +// setups keep working). +// +// Listener-construction errors fail fast (the goroutine wouldn't +// have surfaced them anyway via app.Listener); operators see the +// failure at startup rather than silently bound on the wrong +// protocol. +func runClientAPI( + cfg envConfig, + hc *hypercache.HyperCache[backend.DistMemory], + logger *slog.Logger, +) (*fiber.App, error) { app := fiber.New(fiber.Config{ AppName: "hypercache-server", ReadTimeout: clientAPIReadTimeout, @@ -276,29 +377,95 @@ func runClientAPI(addr, nodeID string, hc *hypercache.HyperCache[backend.DistMem IdleTimeout: clientAPIIdleTimeout, }) - auth := bearerAuth(authToken) - nodeCtx := &nodeContext{hc: hc, nodeID: nodeID} + registerClientRoutes(app, cfg.AuthPolicy, &nodeContext{hc: hc, nodeID: cfg.NodeID}) - app.Get("/healthz", func(c fiber.Ctx) error { return c.SendString("ok") }) + tlsCfg, err := buildAPITLSConfig(cfg) + if err != nil { + return nil, fmt.Errorf("build client API TLS config: %w", err) + } + + if tlsCfg == nil { + go runPlaintextListener(app, cfg.APIAddr, logger) - app.Put("/v1/cache/:key", auth(func(c fiber.Ctx) error { return handlePut(c, nodeCtx) })) - app.Get("/v1/cache/:key", auth(func(c fiber.Ctx) error { return handleGet(c, nodeCtx) })) - app.Head("/v1/cache/:key", auth(func(c fiber.Ctx) error { return handleHead(c, nodeCtx) })) - app.Delete("/v1/cache/:key", auth(func(c fiber.Ctx) error { return handleDelete(c, nodeCtx) })) - app.Get("/v1/owners/:key", auth(func(c fiber.Ctx) error { return handleOwners(c, nodeCtx) })) + return app, nil + } - app.Post("/v1/cache/batch/get", auth(func(c fiber.Ctx) error { return handleBatchGet(c, nodeCtx) })) - app.Post("/v1/cache/batch/put", auth(func(c fiber.Ctx) error { return handleBatchPut(c, nodeCtx) })) - app.Post("/v1/cache/batch/delete", auth(func(c fiber.Ctx) error { return handleBatchDelete(c, nodeCtx) })) + ln, err := tls.Listen("tcp", cfg.APIAddr, tlsCfg) + if err != nil { + return nil, fmt.Errorf("client API tls listen %s: %w", cfg.APIAddr, err) + } - go func() { - err := app.Listen(addr) - if err != nil && !errors.Is(err, http.ErrServerClosed) { - logger.Error("client API listener exited", slog.Any("err", err)) - } - }() + go runWrappedListener(app, ln, logger) + + return app, nil +} + +// runPlaintextListener serves on the bare addr — the standard +// non-TLS path that shipped pre-v2. +func runPlaintextListener(app *fiber.App, addr string, logger *slog.Logger) { + err := app.Listen(addr) + if err != nil && !errors.Is(err, http.ErrServerClosed) { + logger.Error("client API listener exited", slog.Any("err", err)) + } +} + +// runWrappedListener serves on a pre-built net.Listener (used by +// the TLS path so the tls.Config — including ClientAuth and +// ClientCAs — is fully under our control). +func runWrappedListener(app *fiber.App, ln net.Listener, logger *slog.Logger) { + err := app.Listener(ln) + if err != nil && !errors.Is(err, http.ErrServerClosed) { + logger.Error("client API listener exited", slog.Any("err", err)) + } +} + +// buildAPITLSConfig assembles the *tls.Config the API listener +// should use, or nil for the plaintext default. CERT+KEY are the +// minimum for TLS; adding CA upgrades to mTLS with +// RequireAndVerifyClientCert (the only mode that gives the auth +// middleware a verified peer cert to map to a CertIdentity). +// +// Returns an error when CERT or KEY is set but the other is missing +// — that shape is operator-misconfiguration, not "TLS off." Don't +// silently fall through to plaintext when the operator clearly +// asked for TLS but typo'd one of the paths. +func buildAPITLSConfig(cfg envConfig) (*tls.Config, error) { + if cfg.APITLSCert == "" && cfg.APITLSKey == "" { + return nil, nil //nolint:nilnil // documented "no TLS" sentinel + } + + if cfg.APITLSCert == "" || cfg.APITLSKey == "" { + return nil, errAPITLSPartial + } + + cert, err := tls.LoadX509KeyPair(cfg.APITLSCert, cfg.APITLSKey) + if err != nil { + return nil, fmt.Errorf("load TLS keypair: %w", err) + } + + tlsCfg := &tls.Config{ + Certificates: []tls.Certificate{cert}, + MinVersion: tls.VersionTLS12, + } + + if cfg.APITLSCA == "" { + return tlsCfg, nil + } - return app + caPEM, err := os.ReadFile(cfg.APITLSCA) + if err != nil { + return nil, fmt.Errorf("read client CA bundle %s: %w", cfg.APITLSCA, err) + } + + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(caPEM) { + return nil, fmt.Errorf("%w: %s", errAPITLSNoPEMInCA, cfg.APITLSCA) + } + + tlsCfg.ClientAuth = tls.RequireAndVerifyClientCert + tlsCfg.ClientCAs = pool + + return tlsCfg, nil } // jsonErr writes the canonical errorResponse with the given status @@ -996,42 +1163,43 @@ func handleOwners(c fiber.Ctx, nodeCtx *nodeContext) error { }) } -// bearerAuth returns a middleware that requires `Authorization: Bearer -// ` when token is non-empty; otherwise it's a passthrough. -// Mirrors the same posture as DistHTTPAuth — applied to the client -// API for symmetry. -func bearerAuth(token string) func(fiber.Handler) fiber.Handler { - if token == "" { - return func(h fiber.Handler) fiber.Handler { return h } - } - - want := "Bearer " + token - - return func(h fiber.Handler) fiber.Handler { - return func(c fiber.Ctx) error { - got := c.Get("Authorization") - if got != want { - return c.SendStatus(fiber.StatusUnauthorized) - } - - return h(c) - } - } -} - func main() { os.Exit(run()) } // run is the testable main body — separated so deferred cleanup // (context cancel, future cleanups) executes before process exit. // Returns 0 on clean shutdown, 1 on construction failure. func run() int { - cfg := loadConfig() + cfg, err := loadConfig() + if err != nil { + // Fall back to a minimal stderr logger because cfg.LogLevel + // is not yet populated. Auth-policy load errors are + // fail-closed: missing/malformed HYPERCACHE_AUTH_CONFIG + // must not silently degrade to open mode. + fmt.Fprintf(os.Stderr, "hypercache-server: %v\n", err) + + return 1 + } baseLogger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: cfg.LogLevel})) logger := baseLogger.With(slog.String("node_id", cfg.NodeID)) slog.SetDefault(logger) + // Preserve the pre-v2 zero-config dev posture: when neither + // HYPERCACHE_AUTH_CONFIG nor HYPERCACHE_AUTH_TOKEN is set, the + // loader returns the zero Policy, and we explicitly opt into + // AllowAnonymous mode here with a loud warning. Without this, + // every protected route would 401 — every existing + // `docker run hypercache` would break on upgrade. + if !cfg.AuthPolicy.IsConfigured() { + logger.Warn( + "hypercache-server running with no client API auth configured; set " + + httpauth.EnvAuthConfig + " or " + httpauth.EnvAuthToken + " for production", + ) + + cfg.AuthPolicy.AllowAnonymous = true + } + logger.Info( "hypercache-server starting", slog.String("api_addr", cfg.APIAddr), @@ -1039,6 +1207,8 @@ func run() int { slog.String("dist_addr", cfg.DistAddr), slog.Any("seeds", cfg.Seeds), slog.Int("replication", cfg.Replication), + slog.Int("auth_token_identities", len(cfg.AuthPolicy.Tokens)), + slog.Int("auth_cert_identities", len(cfg.AuthPolicy.CertIdentities)), ) ctx, cancel := context.WithCancel(context.Background()) @@ -1051,7 +1221,14 @@ func run() int { return 1 } - apiApp := runClientAPI(cfg.APIAddr, cfg.NodeID, hc, cfg.AuthToken, logger) + apiApp, err := runClientAPI(cfg, hc, logger) + if err != nil { + logger.Error("client API construction failed", slog.Any("err", err)) + + _ = hc.Stop(ctx) + + return 1 + } awaitShutdown(ctx, hc, apiApp, logger) diff --git a/cmd/hypercache-server/mtls_e2e_test.go b/cmd/hypercache-server/mtls_e2e_test.go new file mode 100644 index 0000000..bc443a4 --- /dev/null +++ b/cmd/hypercache-server/mtls_e2e_test.go @@ -0,0 +1,354 @@ +package main + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "errors" + "io" + "math/big" + "net" + "net/http" + "net/url" + "os" + "path/filepath" + "testing" + "time" + + fiber "github.com/gofiber/fiber/v3" + + "github.com/hyp3rd/hypercache" + "github.com/hyp3rd/hypercache/internal/constants" + "github.com/hyp3rd/hypercache/pkg/backend" + "github.com/hyp3rd/hypercache/pkg/httpauth" +) + +// signedCert is one cert + matching private key, plus the DER bytes +// the CA pool needs to verify it. Returned by issueCert below so the +// E2E test can wire the same data into both PEM files (the binary +// reads them off disk) and the http.Client (the request side). +type signedCert struct { + cert *x509.Certificate + der []byte + key *ecdsa.PrivateKey + pemCert []byte + pemKey []byte +} + +// issueCert produces a self-signed CA-or-leaf cert. When signer is +// nil the resulting cert self-signs (used for the CA root). When +// signer is non-nil the CA signs the leaf — that's how the client +// and server certs get a verifiable chain back to the test CA. +func issueCert(t *testing.T, cn string, isCA bool, signer *signedCert, sans []string) *signedCert { + t.Helper() + + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("generate key for %s: %v", cn, err) + } + + template := &x509.Certificate{ + SerialNumber: big.NewInt(time.Now().UnixNano()), + Subject: pkix.Name{CommonName: cn}, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: time.Now().Add(time.Hour), + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageClientAuth}, + } + + if isCA { + template.IsCA = true + + template.KeyUsage |= x509.KeyUsageCertSign + + template.BasicConstraintsValid = true + } + + for _, san := range sans { + if ip := net.ParseIP(san); ip != nil { + template.IPAddresses = append(template.IPAddresses, ip) + } else { + template.DNSNames = append(template.DNSNames, san) + } + } + + parent := template + parentKey := any(key) + + if signer != nil { + parent = signer.cert + parentKey = signer.key + } + + der, err := x509.CreateCertificate(rand.Reader, template, parent, &key.PublicKey, parentKey) + if err != nil { + t.Fatalf("create cert %s: %v", cn, err) + } + + parsed, err := x509.ParseCertificate(der) + if err != nil { + t.Fatalf("parse cert %s: %v", cn, err) + } + + keyDER, err := x509.MarshalECPrivateKey(key) + if err != nil { + t.Fatalf("marshal key: %v", err) + } + + return &signedCert{ + cert: parsed, + der: der, + key: key, + pemCert: pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der}), + pemKey: pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER}), + } +} + +// writePEM persists pem-encoded bytes to a tempfile and returns the +// path; sole purpose is feeding the binary's env-var-driven file +// paths. +func writePEM(t *testing.T, dir, name string, data []byte) string { + t.Helper() + + path := filepath.Join(dir, name) + + err := os.WriteFile(path, data, 0o600) + if err != nil { + t.Fatalf("write %s: %v", path, err) + } + + return path +} + +// TestMTLS_E2E_ClientCertResolvesIdentity is the integration test +// that proves the full mTLS path is wired correctly: a real TLS +// handshake against the configured envConfig, with a client +// presenting a cert whose Subject CN matches a configured +// CertIdentity. The auth middleware must resolve the cert into +// an Identity with the right scopes and let the request through +// to the handler. +// +// Without this test, every individual unit test passes but the +// composition (env-vars → tls.Config → fiber listener → +// Policy.resolveCert) could be silently broken. +// +// Cannot t.Parallel() — binds to a fresh ephemeral port and +// shares the test-process server until t.Cleanup; sequential +// execution avoids any port-reuse / goroutine-leak surprises in +// fiber's listener teardown. +// +//nolint:paralleltest // intentional: real listener owned by this test +func TestMTLS_E2E_ClientCertResolvesIdentity(t *testing.T) { + if testing.Short() { + t.Skip("E2E mTLS test starts a real listener; skip with -short") + } + + dir := t.TempDir() + + ca := issueCert(t, "test-ca", true, nil, nil) + server := issueCert(t, "test-server", false, ca, []string{"127.0.0.1"}) + client := issueCert(t, "test-client", false, ca, nil) + + caPath := writePEM(t, dir, "ca.pem", ca.pemCert) + serverCertPath := writePEM(t, dir, "server.crt", server.pemCert) + serverKeyPath := writePEM(t, dir, "server.key", server.pemKey) + + hc := newE2ECacheNode(t) + + cfg := envConfig{ + APIAddr: "127.0.0.1:0", // ephemeral port; real bind below + NodeID: "mtls-test-node", + APITLSCert: serverCertPath, + APITLSKey: serverKeyPath, + APITLSCA: caPath, + AuthPolicy: httpauth.Policy{ + CertIdentities: []httpauth.CertIdentity{ + {SubjectCN: "test-client", Scopes: []httpauth.Scope{httpauth.ScopeRead}}, + }, + }, + } + + addr, app := startTLSServer(t, cfg, hc) + + clientTLS := buildClientTLS(t, ca.cert, client.der, client.key) + + target := "https://" + addr + "/v1/owners/k" + + //nolint:paralleltest // subtests share the parent's listener + t.Run("client cert with matching CN → 200", func(_ *testing.T) { + status := doMTLSRequest(t, target, clientTLS) + if status != http.StatusOK { + t.Fatalf("got status %d, want 200", status) + } + }) + + //nolint:paralleltest // subtests share the parent's listener + t.Run("client cert without matching CN → 401", func(_ *testing.T) { + // Issue a fresh client cert with a CN that is NOT in + // CertIdentities; the policy should reject it (cert is + // validly signed by the CA, but the CN does not map to + // any configured identity). + stranger := issueCert(t, "stranger", false, ca, nil) + strangerTLS := buildClientTLS(t, ca.cert, stranger.der, stranger.key) + + status := doMTLSRequest(t, target, strangerTLS) + if status != http.StatusUnauthorized { + t.Fatalf("got status %d, want 401", status) + } + }) + + t.Cleanup(func() { _ = app.ShutdownWithContext(context.Background()) }) +} + +// newE2ECacheNode spins up a single-replica DistMemory hypercache +// for the E2E test, returns it bound to t.Cleanup. Replication=1 +// avoids the wait-for-quorum dance — we only care about the auth +// middleware, not the cache semantics. +func newE2ECacheNode(t *testing.T) *hypercache.HyperCache[backend.DistMemory] { + t.Helper() + + cfg, err := hypercache.NewConfig[backend.DistMemory](constants.DistMemoryBackend) + if err != nil { + t.Fatalf("new config: %v", err) + } + + cfg.DistMemoryOptions = []backend.DistMemoryOption{ + backend.WithDistNode("mtls-test-node", "127.0.0.1:0"), + backend.WithDistReplication(1), + } + + hc, err := hypercache.New(t.Context(), hypercache.GetDefaultManager(), cfg) + if err != nil { + t.Fatalf("new hypercache: %v", err) + } + + t.Cleanup(func() { _ = hc.Stop(context.Background()) }) + + return hc +} + +// startTLSServer constructs the TLS config and binds to a real +// 127.0.0.1 port, hands the listener to fiber, and returns the +// resolved address. We bind ourselves rather than going through +// runClientAPI because runClientAPI binds inside a goroutine and +// the test needs the resolved port up-front. +func startTLSServer(t *testing.T, cfg envConfig, hc *hypercache.HyperCache[backend.DistMemory]) (string, *fiber.App) { + t.Helper() + + tlsCfg, err := buildAPITLSConfig(cfg) + if err != nil { + t.Fatalf("build TLS config: %v", err) + } + + if tlsCfg == nil { + t.Fatalf("expected non-nil TLS config") + } + + ln, err := tls.Listen("tcp", cfg.APIAddr, tlsCfg) + if err != nil { + t.Fatalf("tls listen: %v", err) + } + + app := fiber.New() + registerClientRoutes(app, cfg.AuthPolicy, &nodeContext{hc: hc, nodeID: cfg.NodeID}) + + go func() { + err := app.Listener(ln) + if err != nil && !errors.Is(err, http.ErrServerClosed) { + t.Logf("listener exited: %v", err) + } + }() + + // Wait briefly for fiber to start serving — without this the + // first request races the listener init and gets ECONNREFUSED. + // Use Dialer.DialContext rather than tls.Dial so the readiness + // probe inherits the test ctx (timeout/cancel propagate cleanly). + addr := ln.Addr().String() + awaitTLSReady(t, addr) + + return addr, app +} + +// awaitTLSReady polls the listener until it accepts a TLS dial, +// or the deadline expires. The skip-verify is intentional: this +// is a startup-readiness probe, not a real client. Cert validation +// happens on the actual test request a few lines later. +func awaitTLSReady(t *testing.T, addr string) { + t.Helper() + + dialer := &tls.Dialer{ + Config: &tls.Config{ + InsecureSkipVerify: true, //nolint:gosec // dev-mode readiness probe; not a trust boundary + MinVersion: tls.VersionTLS12, + }, + } + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + conn, dialErr := dialer.DialContext(t.Context(), "tcp", addr) + if dialErr == nil { + _ = conn.Close() + + return + } + + time.Sleep(20 * time.Millisecond) + } +} + +// buildClientTLS assembles the per-test http.Client TLS config: +// trust the test CA, present the test client's cert. +func buildClientTLS(t *testing.T, caCert *x509.Certificate, clientDER []byte, clientKey *ecdsa.PrivateKey) *tls.Config { + t.Helper() + + pool := x509.NewCertPool() + pool.AddCert(caCert) + + return &tls.Config{ + RootCAs: pool, + Certificates: []tls.Certificate{{ + Certificate: [][]byte{clientDER}, + PrivateKey: clientKey, + }}, + MinVersion: tls.VersionTLS12, + ServerName: "127.0.0.1", + } +} + +// doMTLSRequest issues a GET against url with the supplied client +// TLS config and returns the status code. Body is drained and +// discarded — we only assert auth resolution, not handler logic. +func doMTLSRequest(t *testing.T, target string, tlsCfg *tls.Config) int { + t.Helper() + + parsed, err := url.Parse(target) + if err != nil { + t.Fatalf("parse url: %v", err) + } + + c := &http.Client{ + Transport: &http.Transport{TLSClientConfig: tlsCfg}, + Timeout: 3 * time.Second, + } + + req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, parsed.String(), http.NoBody) + if err != nil { + t.Fatalf("new request: %v", err) + } + + resp, err := c.Do(req) + if err != nil { + t.Fatalf("do request: %v", err) + } + + defer func() { _ = resp.Body.Close() }() + + _, _ = io.Copy(io.Discard, resp.Body) + + return resp.StatusCode +} diff --git a/cmd/hypercache-server/mtls_test.go b/cmd/hypercache-server/mtls_test.go new file mode 100644 index 0000000..a32dda0 --- /dev/null +++ b/cmd/hypercache-server/mtls_test.go @@ -0,0 +1,213 @@ +package main + +import ( + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "errors" + "math/big" + "os" + "path/filepath" + "testing" + "time" +) + +// writeTestCertPair generates a self-signed cert + key and writes +// them to tempfile paths. Returned paths are what the binary's TLS +// env vars would point at in production. Cert is valid for 127.0.0.1 +// only — sufficient for the in-process buildAPITLSConfig path, +// never to be reused outside tests. Returns (certPath, keyPath) in +// that order; caught between paralleltest's preference for named +// returns and nonamedreturns' preference against them, the +// docstring is the disambiguator. +// +//nolint:revive // unnamed results: see docstring; nonamedreturns disagrees with confusing-results here +func writeTestCertPair(t *testing.T) (string, string) { + t.Helper() + + priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("generate key: %v", err) + } + + template := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "hypercache-mtls-test"}, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: time.Now().Add(time.Hour), + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + } + + derBytes, err := x509.CreateCertificate(rand.Reader, template, template, &priv.PublicKey, priv) + if err != nil { + t.Fatalf("create cert: %v", err) + } + + keyDER, err := x509.MarshalECPrivateKey(priv) + if err != nil { + t.Fatalf("marshal key: %v", err) + } + + dir := t.TempDir() + certPath := filepath.Join(dir, "cert.pem") + keyPath := filepath.Join(dir, "key.pem") + + err = os.WriteFile(certPath, pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: derBytes}), 0o600) + if err != nil { + t.Fatalf("write cert: %v", err) + } + + err = os.WriteFile(keyPath, pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER}), 0o600) + if err != nil { + t.Fatalf("write key: %v", err) + } + + return certPath, keyPath +} + +// TestBuildAPITLSConfig_Success walks the env shapes that +// buildAPITLSConfig must accept: nothing set (plaintext), cert+key +// (standard TLS), cert+key+CA (mTLS). +func TestBuildAPITLSConfig_Success(t *testing.T) { + t.Parallel() + + certPath, keyPath := writeTestCertPair(t) + caPath := certPath // self-sign — same cert acts as its own CA + + t.Run("neither set → nil (plaintext)", func(t *testing.T) { + t.Parallel() + assertPlaintextConfig(t, envConfig{}) + }) + + t.Run("cert + key → standard TLS, no client auth", func(t *testing.T) { + t.Parallel() + assertStandardTLSConfig(t, envConfig{APITLSCert: certPath, APITLSKey: keyPath}) + }) + + t.Run("cert + key + CA → mTLS with RequireAndVerifyClientCert", func(t *testing.T) { + t.Parallel() + assertMTLSConfig(t, envConfig{APITLSCert: certPath, APITLSKey: keyPath, APITLSCA: caPath}) + }) +} + +// assertPlaintextConfig verifies buildAPITLSConfig returns +// (nil, nil) — the plaintext sentinel — for the given config. +func assertPlaintextConfig(t *testing.T, cfg envConfig) { + t.Helper() + + got, err := buildAPITLSConfig(cfg) + if err != nil { + t.Fatalf("err = %v, want nil", err) + } + + if got != nil { + t.Fatalf("got = %+v, want nil", got) + } +} + +// assertStandardTLSConfig verifies the returned *tls.Config is the +// standard TLS shape: cert chain populated, ClientAuth disabled +// (no CA → no client cert verification), TLS 1.2 floor. +func assertStandardTLSConfig(t *testing.T, cfg envConfig) { + t.Helper() + + got, err := buildAPITLSConfig(cfg) + if err != nil { + t.Fatalf("err = %v", err) + } + + if got == nil { + t.Fatalf("got nil, want *tls.Config") + } + + if got.ClientAuth != tls.NoClientCert { + t.Errorf("ClientAuth = %v, want NoClientCert (no CA was configured)", got.ClientAuth) + } + + if got.MinVersion != tls.VersionTLS12 { + t.Errorf("MinVersion = %d, want %d (TLS 1.2)", got.MinVersion, tls.VersionTLS12) + } +} + +// assertMTLSConfig verifies the returned *tls.Config is the mTLS +// shape: ClientAuth=RequireAndVerifyClientCert and ClientCAs +// populated. +func assertMTLSConfig(t *testing.T, cfg envConfig) { + t.Helper() + + got, err := buildAPITLSConfig(cfg) + if err != nil { + t.Fatalf("err = %v", err) + } + + if got.ClientAuth != tls.RequireAndVerifyClientCert { + t.Errorf("ClientAuth = %v, want RequireAndVerifyClientCert", got.ClientAuth) + } + + if got.ClientCAs == nil { + t.Error("ClientCAs is nil; CA bundle was not loaded") + } +} + +// TestBuildAPITLSConfig_Errors walks the misconfiguration shapes +// that must surface as startup errors rather than silently +// degrading to plaintext or empty mTLS pools. +func TestBuildAPITLSConfig_Errors(t *testing.T) { + t.Parallel() + + certPath, keyPath := writeTestCertPair(t) + + dir := t.TempDir() + emptyCA := filepath.Join(dir, "empty.pem") + + err := os.WriteFile(emptyCA, []byte("not a pem"), 0o600) + if err != nil { + t.Fatalf("write empty: %v", err) + } + + cases := []struct { + name string + cfg envConfig + }{ + {"cert without key", envConfig{APITLSCert: certPath}}, + {"key without cert", envConfig{APITLSKey: keyPath}}, + {"missing cert file", envConfig{APITLSCert: "/missing.pem", APITLSKey: keyPath}}, + {"missing CA file", envConfig{APITLSCert: certPath, APITLSKey: keyPath, APITLSCA: "/missing.pem"}}, + {"non-PEM CA bundle", envConfig{APITLSCert: certPath, APITLSKey: keyPath, APITLSCA: emptyCA}}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + _, err := buildAPITLSConfig(tc.cfg) + if err == nil { + t.Fatalf("expected an error for %q config", tc.name) + } + }) + } +} + +// TestBuildAPITLSConfig_PartialErrIsSentinel pins the error chain +// for the cert-without-key (and key-without-cert) misconfiguration +// — callers using errors.Is must be able to detect it. +func TestBuildAPITLSConfig_PartialErrIsSentinel(t *testing.T) { + t.Parallel() + + certPath, keyPath := writeTestCertPair(t) + + _, err := buildAPITLSConfig(envConfig{APITLSCert: certPath}) + if !errors.Is(err, errAPITLSPartial) { + t.Errorf("cert-without-key err = %v, want errors.Is(_, errAPITLSPartial)", err) + } + + _, err = buildAPITLSConfig(envConfig{APITLSKey: keyPath}) + if !errors.Is(err, errAPITLSPartial) { + t.Errorf("key-without-cert err = %v, want errors.Is(_, errAPITLSPartial)", err) + } +} diff --git a/cmd/hypercache-server/openapi.go b/cmd/hypercache-server/openapi.go new file mode 100644 index 0000000..3bf8be8 --- /dev/null +++ b/cmd/hypercache-server/openapi.go @@ -0,0 +1,19 @@ +package main + +import _ "embed" + +// openapiSpec is the raw YAML of the client API's OpenAPI 3.1 +// specification, embedded at build time from the sibling +// `openapi.yaml` (Go's `embed` directive cannot traverse `..`, +// so the spec lives next to the binary it describes). The +// server serves it at `GET /v1/openapi.yaml` so clients can +// discover the API surface without out-of-band docs — and so +// a deployed cluster's declared contract can never drift from +// the binary running behind it. +// +// The drift test in `openapi_test.go` asserts that every fiber +// route registered by `registerClientRoutes` is documented in +// this spec, and vice-versa; CI keeps the two in sync. +// +//go:embed openapi.yaml +var openapiSpec []byte diff --git a/cmd/hypercache-server/openapi.yaml b/cmd/hypercache-server/openapi.yaml new file mode 100644 index 0000000..d331fcf --- /dev/null +++ b/cmd/hypercache-server/openapi.yaml @@ -0,0 +1,633 @@ +openapi: 3.1.0 +info: + title: HyperCache HTTP API + version: 1.0.0 + summary: Distributed in-memory cache — client REST API. + description: | + The hypercache-server binary exposes this REST API for application + traffic. Three other listeners run alongside it (peer-to-peer dist + HTTP for replication, management HTTP for admin/observability) but + they are not part of this contract. + + **Auth.** When the server is started with `HYPERCACHE_AUTH_TOKEN` + set, every endpoint requires `Authorization: Bearer `. + Without that env var, auth is open. + + **Wire encoding.** Single-key `GET` returns raw bytes + (`application/octet-stream`) by default for binary fidelity. Send + `Accept: application/json` to receive an `ItemEnvelope` with + metadata + base64 value. Batch endpoints always emit the + base64-encoded form for binary safety. + + **Errors.** Every 4xx/5xx response carries the `ErrorResponse` + shape with stable `code` strings (`BAD_REQUEST`, `NOT_FOUND`, + `DRAINING`, `INTERNAL`, `UNAUTHORIZED`). + license: + name: LicenseRef-Hyperd + url: https://github.com/hyp3rd/hypercache/blob/main/LICENSE + contact: + name: hyp3rd + url: https://github.com/hyp3rd/hypercache + +servers: + - url: http://localhost:8080 + description: Default local binding (HYPERCACHE_API_ADDR). + - url: https://{host}:{port} + description: TLS-fronted production deployment. + variables: + host: + default: localhost + description: Public hostname or load-balancer DNS. + port: + default: "443" + description: TLS port (typically 443 behind an LB). + +security: + - bearerAuth: [] + +tags: + - name: cache + description: Single-key Set/Get/Delete and metadata inspection. + - name: batch + description: Bulk operations — Set/Get/Delete many keys per call. + - name: cluster + description: Ring ownership inspection for client-side debugging. + - name: meta + description: Liveness probe, OpenAPI self-description. + +paths: + /healthz: + get: + operationId: getHealthz + tags: [ meta ] + summary: Liveness probe. + description: | + Returns `200 ok` when the binary is running. Auth-free + regardless of `HYPERCACHE_AUTH_TOKEN` so external probes + (k8s, LBs) can reach it without credentials. + security: [] + responses: + "200": + description: Server is up. + content: + text/plain: + schema: + type: string + example: ok + + /v1/openapi.yaml: + get: + operationId: getOpenAPISpec + tags: [ meta ] + summary: This OpenAPI specification. + description: | + Returns the embedded spec document so clients can discover + the API surface programmatically. Same content as the file + in this repo at `cmd/hypercache-server/openapi.yaml`. + security: [] + responses: + "200": + description: OpenAPI 3.1 YAML. + content: + application/yaml: + schema: + type: string + + /v1/cache/{key}: + parameters: + - $ref: "#/components/parameters/PathKey" + + put: + operationId: putCacheItem + tags: [ cache ] + summary: Store a value. + description: | + Body is the raw value (any bytes; JSON, text, binary). The + cache stores it as `[]byte`. Use the `ttl` query parameter + for time-bounded entries. + parameters: + - $ref: "#/components/parameters/QueryTTL" + requestBody: + required: true + description: Raw value bytes. + content: + application/octet-stream: + schema: + type: string + format: binary + application/json: + schema: {} + text/plain: + schema: + type: string + responses: + "200": + description: Stored. + content: + application/json: + schema: + $ref: "#/components/schemas/PutResponse" + examples: + stored: + value: + key: greeting + stored: true + ttl_ms: 300000 + bytes: 5 + node: node-1 + owners: [ node-1, node-2, node-3 ] + "400": { $ref: "#/components/responses/BadRequest" } + "401": { $ref: "#/components/responses/Unauthorized" } + "503": { $ref: "#/components/responses/Draining" } + + get: + operationId: getCacheItem + tags: [ cache ] + summary: Retrieve a value. + description: | + Default: raw value bytes (`application/octet-stream`) for + binary fidelity. Send `Accept: application/json` to get an + `ItemEnvelope` with TTL, version, owners, and a base64 value. + responses: + "200": + description: Found. + content: + application/octet-stream: + schema: + type: string + format: binary + examples: + rawBytes: + summary: Default (raw bytes) + value: world + application/json: + schema: + $ref: "#/components/schemas/ItemEnvelope" + examples: + envelope: + value: + key: greeting + value: d29ybGQ= + value_encoding: base64 + ttl_ms: 28412 + expires_at: "2026-05-06T10:30:00Z" + version: 1 + origin: node-1 + last_updated: "2026-05-06T10:00:00Z" + node: node-1 + owners: [ node-1, node-2, node-3 ] + "400": { $ref: "#/components/responses/BadRequest" } + "401": { $ref: "#/components/responses/Unauthorized" } + "404": { $ref: "#/components/responses/NotFound" } + + head: + operationId: headCacheItem + tags: [ cache ] + summary: Inspect metadata without fetching the value. + description: | + Returns the same metadata as the `Accept: application/json` + GET, but in `X-Cache-*` response headers and with no body. + Cheap for cache-revalidation flows that just need to know + whether the key is current. + responses: + "200": + description: Found. + headers: + X-Cache-Version: { schema: { type: integer } } + X-Cache-Origin: { schema: { type: string } } + X-Cache-Last-Updated: { schema: { type: string, format: date-time } } + X-Cache-Ttl-Ms: { schema: { type: integer } } + X-Cache-Expires-At: { schema: { type: string, format: date-time } } + X-Cache-Owners: { schema: { type: string, description: "Comma-separated owner IDs." } } + X-Cache-Node: { schema: { type: string } } + "400": + description: Missing key in path. + "401": + description: Unauthorized. + "404": + description: Not found. + + delete: + operationId: deleteCacheItem + tags: [ cache ] + summary: Remove a value. + description: | + Idempotent — deleting a missing key returns 200 with + `deleted: true`. The `owners` list reflects the ring-owners + that should have held the key, useful for follow-up + verification. + responses: + "200": + description: Deleted (or never existed). + content: + application/json: + schema: + $ref: "#/components/schemas/DeleteResponse" + "400": { $ref: "#/components/responses/BadRequest" } + "401": { $ref: "#/components/responses/Unauthorized" } + "503": { $ref: "#/components/responses/Draining" } + + /v1/owners/{key}: + parameters: + - $ref: "#/components/parameters/PathKey" + get: + operationId: getKeyOwners + tags: [ cluster ] + summary: Resolve ring owners for a key. + description: | + Pure visibility — returns the consistent-hash owners for the + given key without touching cache state. Works even when the + key has never been written. + responses: + "200": + description: Owner set (deterministic from membership). + content: + application/json: + schema: + $ref: "#/components/schemas/OwnersResponse" + "400": { $ref: "#/components/responses/BadRequest" } + "401": { $ref: "#/components/responses/Unauthorized" } + + /v1/cache/batch/get: + post: + operationId: batchGet + tags: [ batch ] + summary: Bulk fetch. + description: | + Each requested key is looked up independently — a missing key + produces `{found: false}` without failing the whole batch. + Found entries carry the same shape as a single-key + `Accept: application/json` GET. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/BatchGetRequest" + responses: + "200": + description: Per-key results. + content: + application/json: + schema: + $ref: "#/components/schemas/BatchGetResponse" + "400": { $ref: "#/components/responses/BadRequest" } + "401": { $ref: "#/components/responses/Unauthorized" } + + /v1/cache/batch/put: + post: + operationId: batchPut + tags: [ batch ] + summary: Bulk store. + description: | + Each item's `value_encoding` selects how the wire `value` + string is interpreted: `base64` decodes bytes-first; + anything else (default) treats the string as UTF-8 text and + stores the raw bytes. Per-item errors are surfaced — + a single failure doesn't void the whole batch. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/BatchPutRequest" + responses: + "200": + description: Per-key results. + content: + application/json: + schema: + $ref: "#/components/schemas/BatchPutResponse" + "400": { $ref: "#/components/responses/BadRequest" } + "401": { $ref: "#/components/responses/Unauthorized" } + "503": { $ref: "#/components/responses/Draining" } + + /v1/cache/batch/delete: + post: + operationId: batchDelete + tags: [ batch ] + summary: Bulk delete. + description: | + Same per-item semantics as bulk-put. Idempotent — keys that + never existed return `deleted: true`. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/BatchDeleteRequest" + responses: + "200": + description: Per-key results. + content: + application/json: + schema: + $ref: "#/components/schemas/BatchDeleteResponse" + "400": { $ref: "#/components/responses/BadRequest" } + "401": { $ref: "#/components/responses/Unauthorized" } + "503": { $ref: "#/components/responses/Draining" } + +components: + securitySchemes: + bearerAuth: + type: http + scheme: bearer + bearerFormat: opaque-token + description: | + When `HYPERCACHE_AUTH_TOKEN` is set on the server, every + request must carry `Authorization: Bearer `. Tokens + are opaque strings (constant-time compared on the server). + + parameters: + PathKey: + name: key + in: path + required: true + description: Cache key (URL-safe). + schema: + type: string + minLength: 1 + maxLength: 1024 + + QueryTTL: + name: ttl + in: query + required: false + description: | + Optional time-to-live as a Go duration string (`30s`, `5m`, + `2h30m`). Empty / absent stores without expiration. + schema: + type: string + pattern: "^[0-9]+(\\\\.[0-9]+)?(ns|us|µs|ms|s|m|h)+$" + + responses: + BadRequest: + description: Malformed request (missing key, invalid TTL, bad JSON, …). + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + examples: + badRequest: + value: + error: "invalid ttl: time: invalid duration \"junk\"" + code: BAD_REQUEST + + Unauthorized: + description: Missing or invalid bearer token. + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + examples: + unauthorized: + value: + error: unauthorized + code: UNAUTHORIZED + + NotFound: + description: Key not found. + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + examples: + notFound: + value: + error: key not found + code: NOT_FOUND + + Draining: + description: | + Node is draining (Drain has been called or `/dist/drain` + was POSTed). Clients should redirect writes to a peer. + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + examples: + draining: + value: + error: node is draining; redirect to a peer + code: DRAINING + + schemas: + ErrorResponse: + type: object + required: [ error, code ] + properties: + error: + type: string + description: Human-readable message. + code: + type: string + enum: [ BAD_REQUEST, NOT_FOUND, DRAINING, INTERNAL, UNAUTHORIZED ] + description: Stable machine-readable code. + + PutResponse: + type: object + required: [ key, stored, bytes, node, owners ] + properties: + key: { type: string } + stored: { type: boolean } + ttl_ms: + type: integer + format: int64 + description: TTL in milliseconds (omitted when no TTL was set). + bytes: + type: integer + format: int32 + description: Stored value size in bytes. + node: + type: string + description: ID of the node that handled the request. + owners: + type: array + description: Ring owners for the key — primary first. + items: { type: string } + + DeleteResponse: + type: object + required: [ key, deleted, node, owners ] + properties: + key: { type: string } + deleted: { type: boolean } + node: { type: string } + owners: + type: array + items: { type: string } + + OwnersResponse: + type: object + required: [ key, owners, node ] + properties: + key: { type: string } + owners: + type: array + description: Ring owners — primary first, replicas after. + items: { type: string } + node: { type: string } + + ItemEnvelope: + type: object + required: [ key, value, value_encoding, version, node, owners ] + properties: + key: { type: string } + value: + type: string + description: Always base64-encoded bytes for binary safety. + value_encoding: + type: string + enum: [ base64 ] + ttl_ms: + type: integer + format: int64 + expires_at: + type: string + format: date-time + version: + type: integer + format: int64 + description: Monotonic per-key version assigned by the dist backend. + origin: + type: string + description: ID of the node that originally wrote this version. + last_updated: + type: string + format: date-time + node: { type: string } + owners: + type: array + items: { type: string } + + BatchGetRequest: + type: object + required: [ keys ] + properties: + keys: + type: array + items: { type: string } + minItems: 0 + + BatchGetResult: + type: object + required: [ key, found ] + properties: + key: { type: string } + found: { type: boolean } + value: { type: string, description: Base64 (only when found). } + value_encoding: + type: string + enum: [ base64 ] + ttl_ms: + type: integer + format: int64 + expires_at: + type: string + format: date-time + version: + type: integer + format: int64 + origin: { type: string } + last_updated: + type: string + format: date-time + owners: + type: array + items: { type: string } + + BatchGetResponse: + type: object + required: [ results, node ] + properties: + results: + type: array + items: { $ref: "#/components/schemas/BatchGetResult" } + node: { type: string } + + BatchPutItem: + type: object + required: [ key, value ] + properties: + key: { type: string } + value: + type: string + description: | + UTF-8 text by default, or base64-encoded bytes when + `value_encoding` is `base64`. + value_encoding: + type: string + enum: [ string, base64 ] + default: string + ttl_ms: + type: integer + format: int64 + + BatchPutRequest: + type: object + required: [ items ] + properties: + items: + type: array + items: { $ref: "#/components/schemas/BatchPutItem" } + minItems: 0 + + BatchPutResult: + type: object + required: [ key, stored ] + properties: + key: { type: string } + stored: { type: boolean } + bytes: + type: integer + format: int32 + owners: + type: array + items: { type: string } + error: + type: string + description: Per-item error message (only when stored=false). + code: + type: string + enum: [ BAD_REQUEST, DRAINING, INTERNAL ] + + BatchPutResponse: + type: object + required: [ results, node ] + properties: + results: + type: array + items: { $ref: "#/components/schemas/BatchPutResult" } + node: { type: string } + + BatchDeleteRequest: + type: object + required: [ keys ] + properties: + keys: + type: array + items: { type: string } + minItems: 0 + + BatchDeleteResult: + type: object + required: [ key, deleted ] + properties: + key: { type: string } + deleted: { type: boolean } + owners: + type: array + items: { type: string } + error: { type: string } + code: + type: string + enum: [ BAD_REQUEST, DRAINING, INTERNAL ] + + BatchDeleteResponse: + type: object + required: [ results, node ] + properties: + results: + type: array + items: { $ref: "#/components/schemas/BatchDeleteResult" } + node: { type: string } diff --git a/cmd/hypercache-server/openapi_test.go b/cmd/hypercache-server/openapi_test.go new file mode 100644 index 0000000..fb28a4e --- /dev/null +++ b/cmd/hypercache-server/openapi_test.go @@ -0,0 +1,195 @@ +package main + +import ( + "slices" + "strings" + "testing" + + fiber "github.com/gofiber/fiber/v3" + "gopkg.in/yaml.v3" + + "github.com/hyp3rd/hypercache/pkg/httpauth" +) + +// TestOpenAPISpecMatchesRoutes is the drift detector. It registers +// every client-API route the production binary exposes onto a +// throwaway fiber app, then walks the embedded OpenAPI spec — and +// asserts the two sets of (method, path) tuples are equal. Any +// route added in main.go without a matching path in openapi.yaml +// (or vice-versa) trips this test, so the contract published at +// `GET /v1/openapi.yaml` cannot silently fall out of sync with +// what the binary actually serves. +// +// Approach notes: +// - We drive `registerClientRoutes` directly rather than spinning +// up the management/dist HTTP listeners — the spec only covers +// the client API. Auth token is empty so handler wiring is +// identical to production but no Authorization header is +// required (the routes themselves are registered identically). +// - Fiber stores params as `:key`; OpenAPI uses `{key}`. We +// normalize to OpenAPI form before comparing. +// - We ignore fiber's auto-registered HEAD-for-GET and OPTIONS +// handlers, only counting the methods we explicitly registered. +func TestOpenAPISpecMatchesRoutes(t *testing.T) { + t.Parallel() + + codeRoutes := registeredCodeRoutes(t) + specRoutes := documentedSpecRoutes(t) + + missingFromSpec := difference(codeRoutes, specRoutes) + if len(missingFromSpec) > 0 { + t.Errorf("routes registered in code but NOT documented in openapi.yaml:\n %s", strings.Join(missingFromSpec, "\n ")) + } + + missingFromCode := difference(specRoutes, codeRoutes) + if len(missingFromCode) > 0 { + t.Errorf("paths documented in openapi.yaml but NOT registered in code:\n %s", strings.Join(missingFromCode, "\n ")) + } +} + +// registeredCodeRoutes returns the canonical "METHOD path" set +// for routes the production binary actually serves on the client +// API. We skip fiber's auto-registered HEAD-for-GET (a route we +// did not declare) by tracking which methods we explicitly wire +// in registerClientRoutes — the route table includes every +// fiber.Method, but only the ones that appear in our wiring are +// part of the contract. +func registeredCodeRoutes(t *testing.T) map[string]struct{} { + t.Helper() + + app := fiber.New() + // Drift test only cares about route paths, not auth — the zero + // Policy 401s every protected route, but app.GetRoutes() reads + // the registration table without driving requests. + registerClientRoutes(app, httpauth.Policy{}, &nodeContext{nodeID: "drift-test"}) + + declared := declaredMethodsForPath() + out := map[string]struct{}{} + + for _, r := range app.GetRoutes() { + methods, ok := declared[r.Path] + if !ok { + continue + } + + if _, want := methods[r.Method]; !want { + continue + } + + out[normalize(r.Method, r.Path)] = struct{}{} + } + + return out +} + +// declaredMethodsForPath enumerates the (path, methods) pairs that +// registerClientRoutes wires by hand. Kept here rather than +// reflected from the fiber app because fiber auto-registers HEAD +// for every GET — and we want to assert against the methods we +// explicitly declared, not the implicit ones. If a new route is +// added to registerClientRoutes, it must be mirrored here too; +// this list is a small price for not coupling the test to fiber's +// internal route-expansion behavior. +func declaredMethodsForPath() map[string]map[string]struct{} { + return map[string]map[string]struct{}{ + "/healthz": {fiber.MethodGet: {}}, + "/v1/openapi.yaml": {fiber.MethodGet: {}}, + "/v1/cache/:key": {fiber.MethodPut: {}, fiber.MethodGet: {}, fiber.MethodHead: {}, fiber.MethodDelete: {}}, + "/v1/owners/:key": {fiber.MethodGet: {}}, + "/v1/cache/batch/get": {fiber.MethodPost: {}}, + "/v1/cache/batch/put": {fiber.MethodPost: {}}, + "/v1/cache/batch/delete": {fiber.MethodPost: {}}, + } +} + +// documentedSpecRoutes parses the embedded YAML and projects every +// (method, path) tuple it documents. Only the standard HTTP +// methods are considered — keys like `parameters`, `summary`, and +// `description` at the path-item level are skipped. +func documentedSpecRoutes(t *testing.T) map[string]struct{} { + t.Helper() + + type pathItem map[string]yaml.Node + + var doc struct { + Paths map[string]pathItem `yaml:"paths"` + } + + err := yaml.Unmarshal(openapiSpec, &doc) + if err != nil { + t.Fatalf("parse openapi.yaml: %v", err) + } + + httpMethods := map[string]string{ + "get": fiber.MethodGet, + "put": fiber.MethodPut, + "post": fiber.MethodPost, + "delete": fiber.MethodDelete, + "head": fiber.MethodHead, + "options": fiber.MethodOptions, + "patch": fiber.MethodPatch, + "trace": fiber.MethodTrace, + } + + out := map[string]struct{}{} + + for path, item := range doc.Paths { + for op := range item { + method, ok := httpMethods[strings.ToLower(op)] + if !ok { + continue + } + + out[normalize(method, path)] = struct{}{} + } + } + + return out +} + +// normalize converts a fiber-style or OpenAPI-style path into the +// shared comparison form: METHOD followed by the OpenAPI +// `{param}` representation. Fiber's `:key` becomes `{key}`; query +// strings are not part of the path identity (OpenAPI tracks them +// as separate `parameters` entries). +func normalize(method, path string) string { + out := make([]byte, 0, len(path)) + + for i := 0; i < len(path); i++ { + if path[i] != ':' { + out = append(out, path[i]) + + continue + } + + j := i + 1 + + for j < len(path) && path[j] != '/' { + j++ + } + + out = append(out, '{') + out = append(out, path[i+1:j]...) + out = append(out, '}') + i = j - 1 + } + + return method + " " + string(out) +} + +// difference returns sorted entries present in a but not in b. +// Sorted output keeps the failure message stable across runs so +// CI failures are diff-friendly. +func difference(a, b map[string]struct{}) []string { + var out []string + + for k := range a { + if _, ok := b[k]; !ok { + out = append(out, k) + } + } + + slices.Sort(out) + + return out +} diff --git a/cspell.config.yaml b/cspell.config.yaml index 2961ea9..ffeef84 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -35,6 +35,8 @@ dictionaries: [] words: - acks - ALPN + - APITLS + - APITLSCA - assertable - autosync - backpressure @@ -58,6 +60,7 @@ words: - cmap - Cmder - codacy + - codegen - codemod - containedctx - contextcheck @@ -69,6 +72,7 @@ words: - dels - depguard - derr + - disambiguator - distconfig - distroless - EDITMSG @@ -81,23 +85,23 @@ words: - exhaustruct - Fanout - fasthttp - - fontawesome - - frontmatter - fatals - fctx - ferr - FLUSHALL - FLUSHDB + - fontawesome - forcetypeassert - Fprintf - Fprintln - freqs + - frontmatter - funlen - geomean - - glightbox - gerr - gitversion - GITVERSION + - glightbox - goarch - gocache - goccy @@ -118,11 +122,13 @@ words: - healthz - histogramcollector - HMAC - - hostnames - honnef + - hostnames - hreq + - httpauth - HTTPTLS - hypercache + - Hyperd - idxs - Iface - ineff @@ -133,9 +139,10 @@ words: - ireturn - Itemm - keyf + - keypair - lamport - - linenums - LFUDA + - linenums - localmodule - logrus - longbridgeapp @@ -155,6 +162,7 @@ words: - mvdan - nestif - Newf + - nilnil - nindent - noctx - noinlineerr @@ -164,6 +172,7 @@ words: - nonroot - nosec - NOVENDOR + - oapi - paralleltest - Pipeliner - pluggability @@ -173,11 +182,12 @@ words: - productionization - protoc - pushd - - pygments - pyenv + - pygments - pymdownx - recvcheck - rediscluster + - Redocly - repls - Repls - rerr @@ -192,28 +202,30 @@ words: - skeys - SLRU - softprops - - staticcheck - statefulset + - staticcheck - stdlib - stretchr - - svcname - strfnv + - stringly - strs - subtest - subtests - superfences - sval + - svcname + - tasklist - thelper - toplevel - tparallel - - tasklist - tracetest - traefik - - twemoji - trunc - - tunables - TTLMs + - tunables + - twemoji - ugorji + - unconfigured - unmarshals - unpadded - unsharded diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..e83352c --- /dev/null +++ b/docs/api.md @@ -0,0 +1,50 @@ +--- +title: API Reference +description: Interactive OpenAPI 3.1 reference for the hypercache-server client REST API. +--- + +# API Reference + +The HyperCache server exposes a REST API for application traffic on the +client port (default `:8080`). The interactive reference below is the +**same** OpenAPI 3.1 spec the binary embeds at build time and serves at +`GET /v1/openapi.yaml` — so the contract you read here is exactly what +the deployed cluster will honour. + +!!! tip "Self-describing servers" + +```text + Every running node serves its spec at `/v1/openapi.yaml`. Point your + own tooling at that URL to generate clients, run conformance checks, + or render this same UI against a live cluster. +``` + +## Auth + +When the server is started with `HYPERCACHE_AUTH_TOKEN` set, every +endpoint requires `Authorization: Bearer `. Without that env +var, the API is open. Use the **Authorize** button in the UI below to +inject the header for live "Try it out" calls. + +## Wire encoding + +* Single-key `GET /v1/cache/{key}` returns raw bytes + (`application/octet-stream`) by default for binary fidelity. Send + `Accept: application/json` to receive an `ItemEnvelope` with metadata + and a base64-encoded value. +* Batch endpoints always emit base64 values for binary safety. +* Errors carry the canonical `ErrorResponse` shape with stable + machine-readable `code` strings (`BAD_REQUEST`, `NOT_FOUND`, + `DRAINING`, `INTERNAL`, `UNAUTHORIZED`). + +## Interactive reference + + + +## Downloading the spec + +The raw YAML lives in the repo at +[cmd/hypercache-server/openapi.yaml](../cmd/hypercache-server/openapi.yaml) +and is served by every node at `GET /v1/openapi.yaml`. Use it as the +input to client-codegen tools (`openapi-generator`, `oapi-codegen`, +`@redocly/cli`, …). diff --git a/docs/distributed.md b/docs/distributed.md index 74ef7d6..aa3a1a4 100644 --- a/docs/distributed.md +++ b/docs/distributed.md @@ -100,11 +100,32 @@ Configuration knobs: - No persistent storage or WAL. - No network partitions / latency injection (future chaos tooling). - No tracing spans for distributed operations. -- Security (TLS/mTLS, auth) absent. - Compression unsupported. - Migration & repair actions are fire-and-forget (no retry backoff queues). - Migration retry queue absent. +## Security + +The `hypercache-server` binary supports three auth surfaces and TLS +on every listener: + +- **Client API** (port 8080): bearer-token + mTLS via the + [`pkg/httpauth/`](https://github.com/hyp3rd/hypercache/blob/main/pkg/httpauth) + package. Multi-token configs with per-identity scopes + (Read/Write/Admin) load from `HYPERCACHE_AUTH_CONFIG` (YAML); + the legacy `HYPERCACHE_AUTH_TOKEN` env var still works as a + one-token shortcut. mTLS is enabled with + `HYPERCACHE_API_TLS_CERT`/`KEY`/`CLIENT_CA`; the verified peer + cert's Subject CN maps to a configured `CertIdentity`. +- **Dist transport** (port 7946): symmetric peer auth via + `DistHTTPAuth{Token, ServerVerify, ClientSign}` with + constant-time compare. Reads `HYPERCACHE_AUTH_TOKEN` for the + shared peer token. TLS via `DistHTTPLimits.TLSConfig`. +- **Management HTTP** (port 8081): operator-supplied auth via + `WithMgmtAuth(func(fiber.Ctx) error)` — unauthenticated by + default. Unifying it with `pkg/httpauth/` is deferred to a + future round. + ## Near-Term Roadmap Deltas 1. Migration retry queue + success/failure counters. diff --git a/docs/requirements.txt b/docs/requirements.txt index 3953090..0179294 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -17,4 +17,5 @@ mkdocs-glightbox==0.5.2 mkdocs-include-markdown-plugin==7.2.2 mkdocs-material==9.7.6 mkdocs-material-extensions==1.3.1 +mkdocs-swagger-ui-tag==0.8.0 Pygments==2.20.0 diff --git a/go.mod b/go.mod index bc66b0a..e856050 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,7 @@ require ( go.opentelemetry.io/otel/sdk v1.43.0 go.opentelemetry.io/otel/sdk/metric v1.43.0 go.opentelemetry.io/otel/trace v1.43.0 + gopkg.in/yaml.v3 v3.0.1 ) require ( @@ -40,5 +41,4 @@ require ( golang.org/x/net v0.53.0 // indirect golang.org/x/sys v0.43.0 // indirect golang.org/x/text v0.36.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/hypercache-server b/hypercache-server index a0f8bd9..222b5bc 100755 Binary files a/hypercache-server and b/hypercache-server differ diff --git a/mkdocs.yml b/mkdocs.yml index 4ed7355..7adb59e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -56,6 +56,10 @@ plugins: - search - include-markdown - glightbox + # Renders embedded OpenAPI specs via `!!swagger ...!!` markers. + # `docs/api.md` uses this to render the binary's embedded spec + # (injected into the docs build by `_mkdocs/hooks.py:on_files`). + - swagger-ui-tag # Build-time hooks. _hooks.py rewrites repo-relative links to source # files (e.g. `../pkg/foo.go`) into absolute GitHub URLs so the same @@ -113,5 +117,6 @@ nav: - Architecture: - Distributed Backend: distributed.md - Reference: + - API Reference: api.md - Changelog: changelog.md - RFCs: rfcs/index.md diff --git a/pkg/httpauth/loader.go b/pkg/httpauth/loader.go new file mode 100644 index 0000000..352dcd4 --- /dev/null +++ b/pkg/httpauth/loader.go @@ -0,0 +1,228 @@ +package httpauth + +import ( + "errors" + "fmt" + "os" + + "gopkg.in/yaml.v3" +) + +// Env-var names that drive policy loading. Kept as exported +// constants so the hypercache-server binary's documentation, +// the loader, and the tests all reference one canonical name. +const ( + // EnvAuthConfig points at a YAML file describing tokens + // and cert identities. Takes precedence over EnvAuthToken. + EnvAuthConfig = "HYPERCACHE_AUTH_CONFIG" + // EnvAuthToken is the legacy single-token shortcut: when + // set, the loader synthesizes one all-scopes TokenIdentity + // with ID "default" so existing zero-config deployments + // keep working byte-identical. + EnvAuthToken = "HYPERCACHE_AUTH_TOKEN" // #nosec G101 -- env-var name, not a credential value +) + +// Static loader errors. Wrapped with %w so callers get both +// errors.Is matching for control-flow and the contextual message +// (file path, field name) via Error(). Token bodies are NEVER +// included in any wrap — see fmt.Errorf sites below. +var ( + errEmptyScopes = errors.New("scopes is empty; at least one of read/write/admin required") + errUnknownScope = errors.New("unknown scope (valid: read, write, admin)") +) + +// fileSchema is the YAML wire shape of HYPERCACHE_AUTH_CONFIG. +// Field tags use snake_case to match the project's YAML aesthetic +// (mkdocs.yml, redocly.yaml, openapi.yaml all snake_case). +// +// Wire example: +// +// tokens: +// - id: app-prod +// token: "" +// scopes: [read, write] +// cert_identities: +// - subject_cn: app.internal +// scopes: [read] +// allow_anonymous: false +// +// Unrecognized fields are rejected via yaml.Decoder.KnownFields(true) +// in load() — typos in scope names or field names should fail loudly, +// not silently drop the misnamed identity. +type fileSchema struct { + Tokens []tokenFile `yaml:"tokens"` + CertIdentities []certFile `yaml:"cert_identities"` + AllowAnonymous bool `yaml:"allow_anonymous"` +} + +type tokenFile struct { + ID string `yaml:"id"` + Token string `yaml:"token"` + Scopes []string `yaml:"scopes"` +} + +type certFile struct { + SubjectCN string `yaml:"subject_cn"` + Scopes []string `yaml:"scopes"` +} + +// LoadFromEnv resolves a client-API auth Policy from the process +// environment. Precedence: +// +// 1. EnvAuthConfig set → load multi-token + cert-identity Policy +// from the YAML file. Missing or malformed file returns an +// error; the caller exits non-zero. This is a behavioral break +// vs the legacy "missing token = open mode" posture and is +// documented in CHANGELOG. +// 2. EnvAuthToken set → synthesize a single TokenIdentity with all +// three scopes. Mirrors the pre-v2 behavior where one token +// gated every protected route, so existing zero-config +// deployments keep working byte-identical. +// 3. Neither set → return the zero Policy. Caller should log a +// "running with no auth" warning and decide whether to opt +// into AllowAnonymous mode. +// +// EnvAuthConfig and EnvAuthToken are NOT mutually exclusive: the +// dist transport's symmetric peer auth still reads EnvAuthToken +// directly (see cmd/hypercache-server/main.go's buildHyperCache). +// When both are set, EnvAuthConfig wins for the client API and +// EnvAuthToken is reused for dist — this is the standard config +// for a multi-tenant client API on top of a single-trust-domain +// cluster. +// +// LoadFromEnv runs Policy.Validate before returning so the caller +// gets a single error path; misconfigured policies surface as +// errors here rather than as silent runtime auth bypasses. +func LoadFromEnv() (Policy, error) { + configPath := os.Getenv(EnvAuthConfig) + legacyToken := os.Getenv(EnvAuthToken) + + if configPath != "" { + return loadFromFile(configPath) + } + + if legacyToken != "" { + return synthesizeLegacyPolicy(legacyToken), nil + } + + return Policy{}, nil +} + +// synthesizeLegacyPolicy mirrors the pre-v2 single-token behavior: +// one token grants every scope. ID "default" is what shows up in +// future audit logs / Identity.ID for callers using the legacy env. +func synthesizeLegacyPolicy(token string) Policy { + return Policy{ + Tokens: []TokenIdentity{ + { + ID: "default", + Token: token, + Scopes: []Scope{ScopeRead, ScopeWrite, ScopeAdmin}, + }, + }, + } +} + +// loadFromFile parses the YAML config and returns a validated Policy. +// Read errors (missing file, permission denied) and parse errors are +// wrapped with ErrInvalidPolicy so callers can distinguish bad-config +// from other fatal conditions. The token strings inside the file are +// NEVER included in any error message — wrapper messages mention +// only field names and IDs. +func loadFromFile(path string) (Policy, error) { + // #nosec G304 G703 -- path is from operator-supplied HYPERCACHE_AUTH_CONFIG env var; the env is the trusted boundary, the same posture as every + // other config-file load in the binary. + f, err := os.Open(path) + if err != nil { + // os.PathError already contains the path; don't re-wrap. + return Policy{}, fmt.Errorf("%w: %w", ErrInvalidPolicy, err) + } + + defer func() { _ = f.Close() }() + + dec := yaml.NewDecoder(f) + dec.KnownFields(true) + + var schema fileSchema + + err = dec.Decode(&schema) + if err != nil { + return Policy{}, fmt.Errorf("%w: parse %s: %w", ErrInvalidPolicy, path, err) + } + + policy, err := schemaToPolicy(schema) + if err != nil { + return Policy{}, fmt.Errorf("%w: %s: %w", ErrInvalidPolicy, path, err) + } + + err = policy.Validate() + if err != nil { + return Policy{}, fmt.Errorf("%w: %s: %w", ErrInvalidPolicy, path, err) + } + + return policy, nil +} + +// schemaToPolicy lifts the YAML wire shape into the public Policy +// type. The two-step path (file → schema → Policy) keeps the YAML +// parsing isolated from the runtime auth shape; users embedding +// httpauth without YAML never pull yaml.v3 into their build. +func schemaToPolicy(s fileSchema) (Policy, error) { + tokens := make([]TokenIdentity, 0, len(s.Tokens)) + + for i, t := range s.Tokens { + scopes, err := parseScopes(t.Scopes) + if err != nil { + return Policy{}, fmt.Errorf("tokens[%d] (%q): %w", i, t.ID, err) + } + + tokens = append(tokens, TokenIdentity{ + ID: t.ID, + Token: t.Token, + Scopes: scopes, + }) + } + + certs := make([]CertIdentity, 0, len(s.CertIdentities)) + + for i, c := range s.CertIdentities { + scopes, err := parseScopes(c.Scopes) + if err != nil { + return Policy{}, fmt.Errorf("cert_identities[%d] (%q): %w", i, c.SubjectCN, err) + } + + certs = append(certs, CertIdentity{ + SubjectCN: c.SubjectCN, + Scopes: scopes, + }) + } + + return Policy{ + Tokens: tokens, + CertIdentities: certs, + AllowAnonymous: s.AllowAnonymous, + }, nil +} + +// parseScopes converts the wire string scope names into typed Scope +// values. Unknown scope names error with the offending name (safe +// to log — scope names are public taxonomy, not secrets) so a typo +// in the YAML fails loudly rather than silently dropping the grant. +func parseScopes(raw []string) ([]Scope, error) { + if len(raw) == 0 { + return nil, errEmptyScopes + } + + out := make([]Scope, 0, len(raw)) + + for _, name := range raw { + switch Scope(name) { + case ScopeRead, ScopeWrite, ScopeAdmin: + out = append(out, Scope(name)) + default: + return nil, fmt.Errorf("%w: %q", errUnknownScope, name) + } + } + + return out, nil +} diff --git a/pkg/httpauth/loader_test.go b/pkg/httpauth/loader_test.go new file mode 100644 index 0000000..f3992ff --- /dev/null +++ b/pkg/httpauth/loader_test.go @@ -0,0 +1,299 @@ +package httpauth + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// writeAuthYAML drops a YAML config in t.TempDir and returns its +// path. Failure here is a test-infra problem, never a Policy +// concern, so we t.Fatalf rather than t.Error. +func writeAuthYAML(t *testing.T, content string) string { + t.Helper() + + dir := t.TempDir() + path := filepath.Join(dir, "auth.yaml") + + err := os.WriteFile(path, []byte(content), 0o600) + if err != nil { + t.Fatalf("write tempfile: %v", err) + } + + return path +} + +// TestLoadFromEnv_Legacy pins the backwards-compat shortcut: when +// only HYPERCACHE_AUTH_TOKEN is set, the loader synthesizes a +// single all-scopes TokenIdentity. This is the legacy path every +// pre-v2 deployment relies on; if it ever breaks, every operator +// upgrading hits the wall on day one. +func TestLoadFromEnv_Legacy(t *testing.T) { + // Cannot t.Parallel() — mutates process env. Same constraint + // applies to every test in this file that touches Setenv. + t.Setenv(EnvAuthConfig, "") + t.Setenv(EnvAuthToken, "legacy-token") + + p, err := LoadFromEnv() + if err != nil { + t.Fatalf("LoadFromEnv: %v", err) + } + + if len(p.Tokens) != 1 { + t.Fatalf("len(Tokens) = %d, want 1", len(p.Tokens)) + } + + t0 := p.Tokens[0] + if t0.ID != "default" { + t.Errorf("ID = %q, want %q", t0.ID, "default") + } + + if t0.Token != "legacy-token" { + t.Errorf("Token = %q, want %q", t0.Token, "legacy-token") + } + + want := []Scope{ScopeRead, ScopeWrite, ScopeAdmin} + if len(t0.Scopes) != len(want) { + t.Fatalf("Scopes = %v, want %v", t0.Scopes, want) + } +} + +// TestLoadFromEnv_BothCoexist verifies the loader treats +// EnvAuthConfig and EnvAuthToken as orthogonal — config wins for +// the client API, and EnvAuthToken stays available for the dist +// transport's symmetric peer auth. Concretely: when CONFIG points +// at a valid file, the returned Policy mirrors the file (it does +// NOT also include the legacy token), and no error fires. +// +// This is the standard production shape: a multi-tenant client API +// (config-driven) on top of a single-trust-domain cluster (one +// shared peer token). +func TestLoadFromEnv_BothCoexist(t *testing.T) { + yaml := ` +tokens: + - id: from-file + token: file-token + scopes: [read] +` + + path := writeAuthYAML(t, yaml) + + t.Setenv(EnvAuthConfig, path) + t.Setenv(EnvAuthToken, "dist-peer-token") + + p, err := LoadFromEnv() + if err != nil { + t.Fatalf("LoadFromEnv: %v", err) + } + + if len(p.Tokens) != 1 || p.Tokens[0].ID != "from-file" { + t.Fatalf("policy = %+v, want one token sourced from CONFIG (file wins)", p) + } + + for _, tok := range p.Tokens { + if tok.Token == "dist-peer-token" { + t.Fatalf("legacy EnvAuthToken leaked into Policy.Tokens: %+v", tok) + } + } +} + +// TestLoadFromEnv_Neither returns the zero Policy with no error +// when neither env is set. The hypercache-server caller is +// responsible for emitting the "running with no auth" warning. +func TestLoadFromEnv_Neither(t *testing.T) { + t.Setenv(EnvAuthConfig, "") + t.Setenv(EnvAuthToken, "") + + p, err := LoadFromEnv() + if err != nil { + t.Fatalf("LoadFromEnv: %v", err) + } + + if p.IsConfigured() { + t.Errorf("zero policy should not be configured: %+v", p) + } +} + +// TestLoadFromFile_Happy parses a complete config with both tokens +// and cert identities and pins each field through to the Policy. +func TestLoadFromFile_Happy(t *testing.T) { + yaml := ` +tokens: + - id: app-prod + token: app-prod-secret + scopes: [read, write] + - id: ops + token: ops-secret + scopes: [admin] +cert_identities: + - subject_cn: app.internal + scopes: [read] +allow_anonymous: false +` + + path := writeAuthYAML(t, yaml) + + t.Setenv(EnvAuthConfig, path) + t.Setenv(EnvAuthToken, "") + + p, err := LoadFromEnv() + if err != nil { + t.Fatalf("LoadFromEnv: %v", err) + } + + if len(p.Tokens) != 2 { + t.Errorf("len(Tokens) = %d, want 2", len(p.Tokens)) + } + + if len(p.CertIdentities) != 1 { + t.Errorf("len(CertIdentities) = %d, want 1", len(p.CertIdentities)) + } + + if p.AllowAnonymous { + t.Errorf("AllowAnonymous = true, want false") + } +} + +// loadFailureCase is one row of the loader failure-mode table. +// Hoisted to package scope so the test body stays under the +// function-length lint threshold and adding a new failure case +// is a single literal append. +type loadFailureCase struct { + name string + content string + setup func(t *testing.T) string +} + +// loadFailureCases enumerates every input the loader must reject. +// The "secret-leak-canary" token body in every YAML is the +// regression target: each test asserts the resulting error message +// does NOT contain that substring, so a future wrap-error change +// that accidentally chains the file body into the message gets +// caught by the suite (not by the security audit). +// +//nolint:gochecknoglobals // test-only fixture table; sharing across subtests is the point +var loadFailureCases = []loadFailureCase{ + { + name: "missing file", + setup: func(_ *testing.T) string { + return "/path/that/does/not/exist/auth.yaml" + }, + }, + { + name: "malformed YAML", + content: "tokens:\n - id: app\n token: x\n bad-indent", + }, + { + name: "unknown field rejected (typo guard)", + content: ` +tokens: + - id: x + token: secret-leak-canary + scopes: [read] + unknown_field: typo +`, + }, + { + name: "unknown scope name", + content: ` +tokens: + - id: x + token: secret-leak-canary + scopes: [readonly] +`, + }, + { + name: "empty scopes list", + content: ` +tokens: + - id: x + token: secret-leak-canary + scopes: [] +`, + }, + { + name: "empty token field", + content: ` +tokens: + - id: x + token: "" + scopes: [read] +`, + }, + { + name: "empty ID field", + content: ` +tokens: + - id: "" + token: secret-leak-canary + scopes: [read] +`, + }, + { + name: "empty cert subject_cn", + content: ` +cert_identities: + - subject_cn: "" + scopes: [read] +`, + }, +} + +// TestLoadFromFile_FailureModes covers the load-time errors the +// loader is meant to fail-closed on. Each row asserts the error +// is non-nil AND the message does NOT include any token body — +// regression coverage for accidental secret leaks via wrap-error +// chains. +func TestLoadFromFile_FailureModes(t *testing.T) { + for _, tc := range loadFailureCases { + t.Run(tc.name, func(t *testing.T) { + var path string + + if tc.setup != nil { + path = tc.setup(t) + } else { + path = writeAuthYAML(t, tc.content) + } + + t.Setenv(EnvAuthConfig, path) + t.Setenv(EnvAuthToken, "") + + _, err := LoadFromEnv() + if err == nil { + t.Fatalf("expected an error") + } + + if strings.Contains(err.Error(), "secret-leak-canary") { + t.Fatalf("error leaks token body: %q", err.Error()) + } + }) + } +} + +// TestLoadFromFile_PrecedenceOverLegacy confirms the file path wins +// when EnvAuthConfig is set — except that "both set" is its own +// failure mode (TestLoadFromEnv_Both), so this only exercises +// "config set, token unset". +func TestLoadFromFile_PrecedenceOverLegacy(t *testing.T) { + yaml := ` +tokens: + - id: from-file + token: file-token + scopes: [read] +` + + path := writeAuthYAML(t, yaml) + + t.Setenv(EnvAuthConfig, path) + t.Setenv(EnvAuthToken, "") + + p, err := LoadFromEnv() + if err != nil { + t.Fatalf("LoadFromEnv: %v", err) + } + + if len(p.Tokens) != 1 || p.Tokens[0].ID != "from-file" { + t.Fatalf("policy = %+v, want one token from file", p) + } +} diff --git a/pkg/httpauth/policy.go b/pkg/httpauth/policy.go new file mode 100644 index 0000000..5ed501f --- /dev/null +++ b/pkg/httpauth/policy.go @@ -0,0 +1,357 @@ +// Package httpauth provides authentication policy for the +// hypercache-server client REST API. It is the v2 successor to the +// single-token bearerAuth helper that previously lived inside +// cmd/hypercache-server: that helper supports exactly one shared token +// with no per-identity granularity and no extension hooks. Real +// production deployments need multiple tokens (one per consuming +// service or operator), per-identity scopes (read-only vs read-write +// vs admin), mTLS as a peer mechanism to bearer auth, and a custom +// verify hook for JWT/OAuth/etc. +// +// The package is independent of the dist transport's DistHTTPAuth +// (pkg/backend/dist_http_server.go). Dist auth is intentionally +// symmetric — every node carries the same token because the cluster +// is one trust domain — so multi-identity has no operator meaning +// there. Client API auth is asymmetric (many callers, one server) +// and benefits from the multi-identity shape this package provides. +// +// Wire-shape: a Policy is loaded once at process start (typically +// from HYPERCACHE_AUTH_CONFIG / HYPERCACHE_AUTH_TOKEN; see loader.go) +// and used to build per-route middleware via Policy.Middleware(scope). +// The middleware's verification path runs in time independent of how +// many tokens are configured and which one matched (if any) — see +// the comment on Middleware for the timing-leak considerations. +package httpauth + +import ( + "crypto/subtle" + "crypto/tls" + "errors" + "fmt" + "slices" + + fiber "github.com/gofiber/fiber/v3" + + "github.com/hyp3rd/hypercache/internal/sentinel" +) + +// Scope is a coarse permission grant applied to an identity. The +// three-scope model maps cleanly to cache semantics: Read covers +// GET/HEAD/owners-lookup; Write covers PUT/DELETE plus their batch +// equivalents; Admin is reserved for management-plane endpoints (not +// yet wired — see plan §"Out of scope: Unifying management HTTP"). +// +// Scopes are inclusive, not hierarchical: an identity granted Write +// does NOT implicitly also have Read. Each route declares the exact +// scope it requires; identities carry the union of scopes they hold. +// Hierarchical inheritance (admin > write > read) was rejected as +// the wrong default — it makes "read-only" tokens impossible without +// inverting the polarity, and operators routinely want a service +// that can write but not read (think: ingest-only metrics writers). +type Scope string + +const ( + // ScopeRead permits cache lookups and metadata inspection. + ScopeRead Scope = "read" + // ScopeWrite permits cache mutations. + ScopeWrite Scope = "write" + // ScopeAdmin permits management-plane operations (cluster + // control, eviction triggers, etc.). Unused by the client + // API today; reserved for the management HTTP unification. + ScopeAdmin Scope = "admin" +) + +// Identity is the resolved caller for an authorized request. Stored +// into fiber.Ctx locals under IdentityKey so handlers can attribute +// audit logs / metrics to the calling principal without re-deriving +// it. ID is the human-readable label the operator put in the auth +// config; Scopes is the union of grants for that principal. +type Identity struct { + ID string + Scopes []Scope +} + +// HasScope reports whether the identity carries the given scope. +// O(n) over the identity's scope list — n is small (3 max), so +// micro-optimizations like a bitmap are not justified. +func (i Identity) HasScope(s Scope) bool { + return slices.Contains(i.Scopes, s) +} + +// TokenIdentity is one bearer-token grant in a Policy. The Token +// field is the raw secret; never log it. ID is what shows up in +// audit logs / Identity.ID after a successful match. +type TokenIdentity struct { + ID string + Token string + Scopes []Scope +} + +// CertIdentity is one mTLS-cert-based grant in a Policy. SubjectCN +// is matched against tls.ConnectionState.VerifiedChains[0][0].Subject.CommonName +// of the peer certificate. Exact-match only — wildcard CN matching +// invites accidental over-grant and is deferred until a concrete +// operator request justifies the complexity. +type CertIdentity struct { + SubjectCN string + Scopes []Scope +} + +// Policy is the authoritative auth configuration for an HTTP +// listener. Build via the loader in this package or construct +// in-process for tests; pass the same value to every route via +// Middleware. +// +// Policy is value-semantic and safe for concurrent use after +// construction — the slices are read-only after load, the +// ServerVerify hook is the operator's responsibility to make +// goroutine-safe. +type Policy struct { + // Tokens are the bearer-token identities. Constant-time + // compared against the Authorization header. + Tokens []TokenIdentity + // CertIdentities are the mTLS-cert identities. Resolved + // from the verified peer cert when TLS is enabled with + // client-cert verification. + CertIdentities []CertIdentity + // ServerVerify (optional) is the universal escape hatch. + // When set and bearer + cert both miss, the hook is called + // last; returning a non-error Identity authorizes the + // request. Use for JWT, OIDC introspection, or any other + // auth scheme this package doesn't natively support. + ServerVerify func(fiber.Ctx) (Identity, error) + // AllowAnonymous permits requests with no credentials at + // all to pass — they get the empty Identity with no + // scopes, and only routes requiring no specific scope + // will accept them. Defaults to false. Used by tests and + // dev-mode deployments; production should always require + // at least one credential class. + AllowAnonymous bool +} + +// IdentityKey is the fiber.Ctx.Locals key under which the resolved +// Identity is stored after a successful auth. Handlers that need +// to attribute audit / metrics can read it back via +// `c.Locals(httpauth.IdentityKey).(httpauth.Identity)`. +// +// Exported as a typed key so users don't have to remember the +// stringly-typed name; the type also prevents accidental collisions +// with unrelated fiber locals. +const IdentityKey = "httpauth.identity" + +// IsConfigured reports whether the policy has at least one +// credential class configured. The zero Policy (no tokens, no +// certs, no ServerVerify, AllowAnonymous false) maps to "auth +// disabled" mode — every request passes through. Loaders and the +// hypercache-server binary use this to decide whether to emit a +// "running with no auth" startup warning. Callers should NOT use +// it to gate security checks — Middleware already handles the +// no-credentials-configured fall-through correctly. +func (p Policy) IsConfigured() bool { + return len(p.Tokens) > 0 || len(p.CertIdentities) > 0 || p.ServerVerify != nil +} + +// Validate enforces coherence at load time. Returns nil for the +// zero Policy (open mode by virtue of nothing being configured) +// and for any policy with at least one credential class. The +// AllowAnonymous-with-no-credentials shape is intentionally +// permitted: it's how the hypercache-server binary preserves the +// pre-v2 zero-config dev posture (no env vars set → open mode). +// +// validate's failure modes are all caller-error rather than +// runtime-error. Loaders should call this once at startup and exit +// non-zero on failure, never silently continue. +func (p Policy) Validate() error { + for _, t := range p.Tokens { + if t.Token == "" { + return fmt.Errorf("%w: token identity %q has empty token", sentinel.ErrInsecureAuthConfig, t.ID) + } + + if t.ID == "" { + return fmt.Errorf("%w: token identity has empty ID (token redacted)", sentinel.ErrInsecureAuthConfig) + } + } + + for _, c := range p.CertIdentities { + if c.SubjectCN == "" { + return fmt.Errorf("%w: cert identity has empty subject_cn", sentinel.ErrInsecureAuthConfig) + } + } + + return nil +} + +// Middleware returns a fiber middleware that enforces the policy +// for the given required scope. Order of credential resolution: +// +// 1. Bearer token in Authorization header — constant-time +// compared against EVERY configured token even on early match, +// so the count of configured tokens does not leak via timing. +// 2. mTLS verified peer cert (if TLSConnectionState present and +// VerifiedChains is non-empty) — Subject CN matched against +// CertIdentities. +// 3. ServerVerify hook (if non-nil) — last-resort escape hatch. +// +// On any successful match the resolved Identity is stored under +// IdentityKey and the next handler runs. On no match the request +// gets 401 Unauthorized with no body — credential-class hints +// (which class missed) are deliberately omitted to avoid handing +// attackers a credential-discovery oracle. +// +// When the policy has no configured credentials AND the route +// requires a scope, every request fails 401 — this is fail-closed +// by design. Operators in dev mode should set AllowAnonymous=true +// to opt into permissive behavior. +// +// When the route requires no specific scope (the empty string is +// passed as `required`), the middleware skips scope-checking but +// still resolves the Identity for handlers that want to attribute +// the call. This shape is currently unused but reserved for routes +// that want any-authenticated-caller semantics. +func (p Policy) Middleware(required Scope) fiber.Handler { + return func(c fiber.Ctx) error { + identity, ok := p.resolve(c) + if !ok { + return c.SendStatus(fiber.StatusUnauthorized) + } + + if required != "" && !identity.HasScope(required) { + return c.SendStatus(fiber.StatusForbidden) + } + + c.Locals(IdentityKey, identity) + + return c.Next() + } +} + +// resolve walks the credential resolution chain in priority order: +// bearer → mTLS cert → ServerVerify hook → anonymous fallback. +// Returns (Identity, true) on the first successful match. Extracted +// from Middleware so each branch is its own short clause and +// reviewers can audit the ordering at a glance — the chain itself +// is the security-critical part, not the 401/403 status mapping. +func (p Policy) resolve(c fiber.Ctx) (Identity, bool) { + if id, ok := p.resolveBearer(c.Get("Authorization")); ok { + return id, true + } + + if id, ok := p.resolveCert(c); ok { + return id, true + } + + if p.ServerVerify != nil { + id, err := p.ServerVerify(c) + if err == nil { + return id, true + } + } + + if p.AllowAnonymous { + // Anonymous identities receive every scope. AllowAnonymous + // is the explicit operator opt-in to permissive mode (used + // by the binary's zero-config dev posture); refusing scoped + // routes here would 403 every legacy `docker run hypercache` + // without a paired auth config. + return Identity{ + ID: "anonymous", + Scopes: []Scope{ScopeRead, ScopeWrite, ScopeAdmin}, + }, true + } + + return Identity{}, false +} + +// resolveBearer matches the Authorization header against every +// configured TokenIdentity in constant time per token. CRITICAL: +// the loop runs to completion regardless of when (or whether) a +// match is found — a future contributor MUST NOT add an early +// `break` on match. Doing so would make the wall-clock duration +// of the auth check correlate with the index of the matching +// token, leaking the order of tokens in the config (and, with +// careful timing, the cardinality of the token set). +// +// Returns (zero Identity, false) when no Authorization header is +// present — short-circuiting on the empty case is safe because no +// secret comparison happens; the zero header is publicly observable +// from a network attacker's vantage anyway. +func (p Policy) resolveBearer(authHeader string) (Identity, bool) { + if authHeader == "" || len(p.Tokens) == 0 { + return Identity{}, false + } + + got := []byte(authHeader) + matched := -1 + + for i, t := range p.Tokens { + want := []byte("Bearer " + t.Token) + // ConstantTimeCompare returns 0 immediately on + // length mismatch, but the comparison itself runs + // in time independent of WHERE the first differing + // byte lives. We always run the compare; we never + // `break` after matched is set. + if subtle.ConstantTimeCompare(got, want) == 1 { + matched = i + } + } + + if matched < 0 { + return Identity{}, false + } + + t := p.Tokens[matched] + + return Identity{ID: t.ID, Scopes: t.Scopes}, true +} + +// resolveCert maps a verified peer certificate to a CertIdentity by +// Subject CN. Requires TLS with client-cert verification — the +// fiber.Ctx must report a tls.ConnectionState with at least one +// VerifiedChain. Unverified or missing chains return (zero, false) +// without checking the configured CertIdentities; we never trust a +// cert chain we did not verify ourselves. +func (p Policy) resolveCert(c fiber.Ctx) (Identity, bool) { + if len(p.CertIdentities) == 0 { + return Identity{}, false + } + + state := tlsConnectionState(c) + if state == nil || len(state.VerifiedChains) == 0 || len(state.VerifiedChains[0]) == 0 { + return Identity{}, false + } + + cn := state.VerifiedChains[0][0].Subject.CommonName + if cn == "" { + return Identity{}, false + } + + for _, ci := range p.CertIdentities { + if ci.SubjectCN == cn { + return Identity{ID: cn, Scopes: ci.Scopes}, true + } + } + + return Identity{}, false +} + +// tlsConnectionState extracts the per-connection TLS state from a +// fiber context, or nil when the request was plaintext. Indirection +// kept here so the test suite can stub it without depending on +// fiber's TLS plumbing — see policy_test.go's stubTLSState helper. +func tlsConnectionState(c fiber.Ctx) *tls.ConnectionState { + // fiber/fasthttp expose the TLS state via the request + // context. We read it through the standard interface that + // fiber exposes; nil means plaintext. + req := c.RequestCtx() + if req == nil { + return nil + } + + return req.TLSConnectionState() +} + +// ErrInvalidPolicy wraps a policy validation failure. Loaders return +// this so callers can distinguish "config is wrong" from "filesystem +// is wrong" (which surfaces as os.PathError) or "secrets backend +// failed" (caller's concern). +var ErrInvalidPolicy = errors.New("httpauth: invalid policy") diff --git a/pkg/httpauth/policy_test.go b/pkg/httpauth/policy_test.go new file mode 100644 index 0000000..8a96725 --- /dev/null +++ b/pkg/httpauth/policy_test.go @@ -0,0 +1,368 @@ +package httpauth + +import ( + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + + fiber "github.com/gofiber/fiber/v3" +) + +// errVerifyRejected is the canonical "ServerVerify said no" sentinel +// the policy_test stubs return. Defining it as a static error +// dodges err113 without reaching for fmt.Errorf in test bodies. +var errVerifyRejected = errors.New("rejected") + +// newTestApp wires a single auth-protected route and returns the +// fiber app for in-memory request driving. The route returns 200 +// with "ok" so the test bodies only need to assert status codes — +// auth is what's under test, not handler logic. +func newTestApp(t *testing.T, p Policy, scope Scope) *fiber.App { + t.Helper() + + app := fiber.New() + app.Get("/protected", p.Middleware(scope), func(c fiber.Ctx) error { return c.SendString("ok") }) + + return app +} + +// doStatus issues the request and returns just the status code. +// Auth assertions never need the body, so this keeps the test +// table-row bodies one line each. +func doStatus(t *testing.T, app *fiber.App, header string) int { + t.Helper() + + req := httptest.NewRequestWithContext(t.Context(), http.MethodGet, "/protected", strings.NewReader("")) + if header != "" { + req.Header.Set("Authorization", header) + } + + resp, err := app.Test(req) + if err != nil { + t.Fatalf("app.Test: %v", err) + } + + defer func() { _ = resp.Body.Close() }() + + return resp.StatusCode +} + +// TestPolicy_Bearer drives the multi-token + scope path end-to-end +// through fiber.App.Test. Each row is one (header, required scope, +// expected status) tuple. The shared policy carries two identities +// — a read-only token and a read+write token — so the same policy +// can be checked against both Read-required and Write-required +// routes per case. +func TestPolicy_Bearer(t *testing.T) { + t.Parallel() + + p := Policy{ + Tokens: []TokenIdentity{ + {ID: "ro", Token: "read-only-token", Scopes: []Scope{ScopeRead}}, + {ID: "rw", Token: "read-write-token", Scopes: []Scope{ScopeRead, ScopeWrite}}, + }, + } + + tests := []struct { + name string + header string + scope Scope + want int + }{ + {"no header → 401", "", ScopeRead, http.StatusUnauthorized}, + {"unknown token → 401", "Bearer nope", ScopeRead, http.StatusUnauthorized}, + {"missing Bearer prefix → 401", "read-only-token", ScopeRead, http.StatusUnauthorized}, + {"ro token + Read scope → 200", "Bearer read-only-token", ScopeRead, http.StatusOK}, + {"ro token + Write scope → 403", "Bearer read-only-token", ScopeWrite, http.StatusForbidden}, + {"rw token + Write scope → 200", "Bearer read-write-token", ScopeWrite, http.StatusOK}, + {"rw token + Admin scope → 403", "Bearer read-write-token", ScopeAdmin, http.StatusForbidden}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + app := newTestApp(t, p, tc.scope) + + got := doStatus(t, app, tc.header) + if got != tc.want { + t.Fatalf("got %d, want %d", got, tc.want) + } + }) + } +} + +// TestPolicy_BearerVisitsAllTokens pins the timing-leak guardrail +// from policy.go's resolveBearer comment: the loop must not break +// on early match, so the count of configured tokens does not leak +// via wall-clock duration. +// +// We can't directly assert "ran in constant time" portably, but we +// CAN assert the matched token is the LAST configured token — +// proving the loop continued past earlier matches. If a future +// contributor adds an early `break`, the loop would short-circuit +// after the first match and matching the duplicate-but-later token +// would not be possible. +// +// (The duplicate is constructed deliberately so two configured +// identities accept the same header. resolveBearer's contract is +// "last writer wins" by virtue of the no-break loop; that's what +// we pin here.) +func TestPolicy_BearerVisitsAllTokens(t *testing.T) { + t.Parallel() + + p := Policy{ + Tokens: []TokenIdentity{ + {ID: "first", Token: "shared-token", Scopes: []Scope{ScopeRead}}, + {ID: "second", Token: "shared-token", Scopes: []Scope{ScopeRead, ScopeWrite}}, + }, + } + + identity, ok := p.resolveBearer("Bearer shared-token") + if !ok { + t.Fatalf("expected a match") + } + + // If the loop short-circuited on first match, we'd see "first" + // here. The no-early-break invariant guarantees we see "second". + if identity.ID != "second" { + t.Fatalf("identity.ID = %q, want %q (proves loop continues past matches; see resolveBearer comment)", identity.ID, "second") + } +} + +// TestPolicy_NoCredentialsFailsClosed asserts the fail-closed +// default: a Policy with no configured credential class refuses +// every request unless AllowAnonymous is explicitly set. This is +// the difference between dev-mode and prod-mode posture. +func TestPolicy_NoCredentialsFailsClosed(t *testing.T) { + t.Parallel() + + app := newTestApp(t, Policy{}, ScopeRead) + + got := doStatus(t, app, "Bearer anything") + if got != http.StatusUnauthorized { + t.Fatalf("zero policy: got %d, want %d", got, http.StatusUnauthorized) + } +} + +// TestPolicy_AllowAnonymous opens the route to credential-free +// callers. Anonymous identity carries all three scopes — the +// AllowAnonymous flag is the operator's explicit opt-in to +// permissive mode (used by the binary's zero-config dev posture) +// and refusing scoped routes would 403 every legacy +// `docker run hypercache` without a paired auth config. +func TestPolicy_AllowAnonymous(t *testing.T) { + t.Parallel() + + p := Policy{AllowAnonymous: true} + + scopes := []Scope{"", ScopeRead, ScopeWrite, ScopeAdmin} + for _, s := range scopes { + t.Run("scope="+string(s), func(t *testing.T) { + t.Parallel() + + app := newTestApp(t, p, s) + + if got := doStatus(t, app, ""); got != http.StatusOK { + t.Fatalf("scope=%q: got %d, want 200", s, got) + } + }) + } +} + +// TestPolicy_ServerVerify exercises the universal escape hatch. +// Bearer fails (no tokens configured), cert fails (no TLS), and +// ServerVerify is the last resort. When it returns nil err, the +// caller is authorized; when it returns an error, the request 401s. +func TestPolicy_ServerVerify(t *testing.T) { + t.Parallel() + + p := Policy{ + ServerVerify: func(c fiber.Ctx) (Identity, error) { + if c.Get("X-Custom-Auth") == "yes" { + return Identity{ID: "custom", Scopes: []Scope{ScopeRead}}, nil + } + + return Identity{}, errVerifyRejected + }, + } + + app := fiber.New() + app.Get("/protected", p.Middleware(ScopeRead), func(c fiber.Ctx) error { return c.SendString("ok") }) + + t.Run("hook accepts → 200", func(t *testing.T) { + t.Parallel() + + req := httptest.NewRequestWithContext(t.Context(), http.MethodGet, "/protected", strings.NewReader("")) + req.Header.Set("X-Custom-Auth", "yes") + + resp, err := app.Test(req) + if err != nil { + t.Fatalf("app.Test: %v", err) + } + + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + t.Fatalf("got %d, want 200", resp.StatusCode) + } + }) + + t.Run("hook rejects → 401", func(t *testing.T) { + t.Parallel() + + req := httptest.NewRequestWithContext(t.Context(), http.MethodGet, "/protected", strings.NewReader("")) + + resp, err := app.Test(req) + if err != nil { + t.Fatalf("app.Test: %v", err) + } + + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusUnauthorized { + t.Fatalf("got %d, want 401", resp.StatusCode) + } + }) +} + +// TestPolicy_IdentityInLocals proves the identity is reachable +// from downstream handlers via fiber.Ctx.Locals(IdentityKey). +// This is the hook that future audit/metrics code will use to +// attribute calls to principals. +func TestPolicy_IdentityInLocals(t *testing.T) { + t.Parallel() + + p := Policy{ + Tokens: []TokenIdentity{ + {ID: "audit-target", Token: "tok", Scopes: []Scope{ScopeRead}}, + }, + } + + app := fiber.New() + app.Get("/who", p.Middleware(ScopeRead), func(c fiber.Ctx) error { + v := c.Locals(IdentityKey) + + id, ok := v.(Identity) + if !ok { + return c.Status(http.StatusInternalServerError).SendString("no identity") + } + + return c.SendString(id.ID) + }) + + req := httptest.NewRequestWithContext(t.Context(), http.MethodGet, "/who", strings.NewReader("")) + req.Header.Set("Authorization", "Bearer tok") + + resp, err := app.Test(req) + if err != nil { + t.Fatalf("app.Test: %v", err) + } + + defer func() { _ = resp.Body.Close() }() + + body := make([]byte, 64) + + n, _ := resp.Body.Read(body) + + got := string(body[:n]) + if got != "audit-target" { + t.Fatalf("locals identity ID = %q, want %q", got, "audit-target") + } +} + +// TestPolicy_Validate covers the load-time coherence checks. Cases +// here also pin the messages do NOT include token bodies — a future +// edit that does would leak secrets to anyone reading startup logs. +func TestPolicy_Validate(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + policy Policy + wantErr bool + mustNotIn string // substring that must not appear in any error + }{ + { + name: "zero policy is valid (open mode)", + policy: Policy{}, + wantErr: false, + }, + { + name: "valid token policy", + policy: Policy{ + Tokens: []TokenIdentity{{ID: "x", Token: "secret", Scopes: []Scope{ScopeRead}}}, + }, + wantErr: false, + }, + { + name: "empty token errors and does not leak ID-as-secret", + policy: Policy{ + Tokens: []TokenIdentity{{ID: "missing-token-id", Token: "", Scopes: []Scope{ScopeRead}}}, + }, + wantErr: true, + mustNotIn: "", // ID is fine to log; only secrets are. + }, + { + name: "empty ID errors without leaking the token", + policy: Policy{ + //nolint:gosec // test-fixture string, not a credential + Tokens: []TokenIdentity{ + {ID: "", Token: "leaked-if-included", Scopes: []Scope{ScopeRead}}, + }, + }, + wantErr: true, + mustNotIn: "leaked-if-included", + }, + { + name: "empty cert subject errors", + policy: Policy{ + CertIdentities: []CertIdentity{{SubjectCN: "", Scopes: []Scope{ScopeRead}}}, + }, + wantErr: true, + }, + { + // AllowAnonymous-without-credentials is the legacy + // "open dev mode" the binary falls back to when no + // auth env vars are set. Validate must accept it; the + // "running with no auth" warning is the binary's job. + name: "AllowAnonymous with no creds is valid (open dev mode)", + policy: Policy{AllowAnonymous: true}, + wantErr: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + err := tc.policy.Validate() + if (err != nil) != tc.wantErr { + t.Fatalf("err = %v, wantErr = %v", err, tc.wantErr) + } + + if tc.mustNotIn != "" && err != nil && strings.Contains(err.Error(), tc.mustNotIn) { + t.Fatalf("error message %q contains forbidden substring %q (would leak secret)", err.Error(), tc.mustNotIn) + } + }) + } +} + +// TestPolicy_HasScope is a tiny cheap unit covering the inclusive +// (non-hierarchical) scope semantics — the deliberate counterpart +// to the fact that ScopeWrite does NOT imply ScopeRead. +func TestPolicy_HasScope(t *testing.T) { + t.Parallel() + + id := Identity{Scopes: []Scope{ScopeWrite}} + + if id.HasScope(ScopeRead) { + t.Errorf("Write should not imply Read (inclusive-not-hierarchical scope model)") + } + + if !id.HasScope(ScopeWrite) { + t.Errorf("Write should match Write") + } +} diff --git a/redocly.yaml b/redocly.yaml new file mode 100644 index 0000000..cded3e7 --- /dev/null +++ b/redocly.yaml @@ -0,0 +1,26 @@ +--- +# Redocly CLI configuration. Tells `redocly lint` what rules to +# enforce against `cmd/hypercache-server/openapi.yaml`. +# +# Two rules are intentionally disabled below; everything else +# stays on the `recommended` baseline (which is mostly schema- +# correctness checks plus codegen-friendliness hints). + +extends: + - recommended + +rules: + # Allow `http://localhost:8080` as a `servers[].url`. The default + # `recommended` ruleset flags any placeholder-looking URL + # (including localhost) so consumers don't accidentally publish + # a spec that points at example.com. We document localhost on + # purpose — it is the actual default binding for the binary — + # so the warning is noise here. + no-server-example.com: off + + # `/healthz` and `/v1/openapi.yaml` legitimately have no 4xx + # response shape: the former is auth-free and accepts no input, + # the latter just emits a static document. Forcing a synthetic + # 4xx onto them would document behavior the binary doesn't + # actually have. + operation-4xx-response: off