diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..4614af0
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,92 @@
+---
+name: docs
+
+# Build and deploy the MkDocs site to GitHub Pages.
+# * pull_request — build only (validates the docs render
+# without publishing).
+# * push to main — build + deploy to gh-pages.
+# * workflow_dispatch — same as push (lets operators
+# re-publish without a docs change).
+
+on:
+ pull_request:
+ paths:
+ - "docs/**"
+ - "mkdocs.yml"
+ - "CHANGELOG.md"
+ - "cmd/hypercache-server/README.md"
+ - ".github/workflows/docs.yml"
+ push:
+ branches: [ main ]
+ paths:
+ - "docs/**"
+ - "mkdocs.yml"
+ - "CHANGELOG.md"
+ - "cmd/hypercache-server/README.md"
+ - ".github/workflows/docs.yml"
+ workflow_dispatch:
+
+# Pages deployments require these permissions; the build-only
+# branch (PR) doesn't actually use the deploy steps so the
+# extra permissions are harmless.
+permissions:
+ contents: read
+ pages: write
+ id-token: write
+
+# A single in-flight deploy at a time. Newer pushes cancel
+# older ones to avoid an out-of-order publish.
+concurrency:
+ group: pages
+ cancel-in-progress: true
+
+jobs:
+ build:
+ name: build
+ runs-on: ubuntu-latest
+ timeout-minutes: 10
+ steps:
+ - uses: actions/checkout@v6
+ with:
+ fetch-depth: 0 # docs/ may reference files via relative paths
+
+ - name: Setup Python
+ uses: actions/setup-python@v6
+ with:
+ python-version: "3.13"
+ cache: pip
+ # `actions/setup-python` uses this file's hash as the
+ # pip-cache key. docs/requirements.txt pins the MkDocs
+ # + plugin versions so cache hits are reproducible and
+ # the runner can find a key file at all.
+ cache-dependency-path: docs/requirements.txt
+
+ - name: Install MkDocs + plugins
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r docs/requirements.txt
+
+ - name: Build site (strict)
+ # Strict in CI catches broken links / missing pages on PR;
+ # `mkdocs serve` locally relaxes this for fast iteration.
+ run: mkdocs build --strict
+
+ - name: Upload Pages artifact
+ if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
+ uses: actions/upload-pages-artifact@v5
+ with:
+ path: ./site
+
+ deploy:
+ name: deploy
+ needs: build
+ if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
+ runs-on: ubuntu-latest
+ timeout-minutes: 5
+ environment:
+ name: github-pages
+ url: ${{ steps.deployment.outputs.page_url }}
+ steps:
+ - name: Deploy to GitHub Pages
+ id: deployment
+ uses: actions/deploy-pages@v5
diff --git a/.gitignore b/.gitignore
index 6a5e0d7..81c7573 100644
--- a/.gitignore
+++ b/.gitignore
@@ -96,3 +96,9 @@ tags
### Project ###
.dccache
+
+### MkDocs site build output (CI publishes; local builds shouldn't be committed) ###
+/site/
+
+### Python bytecode caches from MkDocs hooks ###
+_mkdocs/__pycache__/
diff --git a/.gitleaksignore b/.gitleaksignore
index 623dba7..4f464ec 100644
--- a/.gitleaksignore
+++ b/.gitleaksignore
@@ -10,6 +10,11 @@ scripts/tests/10-test-cluster-api.sh:curl-auth-header:36
cmd/hypercache-server/README.md:curl-auth-header:50
cmd/hypercache-server/README.md:curl-auth-header:55
cmd/hypercache-server/README.md:curl-auth-header:59
+cmd/hypercache-server/README.md:curl-auth-header:77
+cmd/hypercache-server/README.md:curl-auth-header:92
cmd/hypercache-server/README.md:curl-auth-header:102
cmd/hypercache-server/README.md:curl-auth-header:108
cmd/hypercache-server/README.md:curl-auth-header:112
+cmd/hypercache-server/README.md:curl-auth-header:117
+cmd/hypercache-server/README.md:curl-auth-header:129
+cmd/hypercache-server/README.md:curl-auth-header:135
diff --git a/.mdl_style.rb b/.mdl_style.rb
index 7ea5620..884184f 100644
--- a/.mdl_style.rb
+++ b/.mdl_style.rb
@@ -13,3 +13,22 @@
# under distinct parent headings — which is exactly the Keep-a-Changelog
# shape, and still catches genuine duplicates within the same section.
rule "MD024", :allow_different_nesting => true
+
+# MkDocs pages start with YAML frontmatter (---\ntitle: ...\n---), so
+# the first line cannot be a top-level heading. MD041 fights that
+# convention; the alternative would be losing per-page metadata.
+exclude_rule 'MD041'
+
+# Hard tabs in code blocks are valid — Go source uses tabs by
+# convention (gofmt enforces it), and MkDocs preserves them. The
+# default rule flags every Go example as broken, which would push
+# us to manually convert tabs in every code block.
+exclude_rule 'MD010'
+
+# MkDocs Material's "grid cards" feature requires `
`
+# HTML wrappers around a markdown list. MD033 (no inline HTML) flags
+# every grid block. Ditto for the surrounding-blank-line rule (MD032)
+# which doesn't see the list inside the div as a list. Skipping both
+# is the standard Material-theme posture.
+exclude_rule 'MD033'
+exclude_rule 'MD032'
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6616413..01cee24 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,52 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
### Added
+- **Documentation site on GitHub Pages**, built with MkDocs Material
+ and published automatically on every push to `main`. Eight
+ navigated pages — landing, quickstart, 5-node cluster tutorial,
+ Helm chart guide, server-binary reference, distributed-backend
+ architecture, operations runbook, RFC index — plus the
+ CHANGELOG and the `cmd/hypercache-server/README.md` pulled in
+ via the include-markdown plugin so they don't drift. A
+ build-time hook at [`_mkdocs/hooks.py`](_mkdocs/hooks.py)
+ rewrites repo-relative source-code references (`../pkg/foo.go`)
+ into canonical GitHub URLs so the same markdown renders
+ correctly both on github.com and on the rendered Pages site.
+ Workflow at
+ [`.github/workflows/docs.yml`](.github/workflows/docs.yml)
+ builds with `--strict` on every PR (catches broken docs-internal
+ links on submission) and deploys via `actions/deploy-pages@v4`
+ on main pushes. The README now links to the rendered site.
+ Polishing pass on the existing markdown surface: relaxed
+ `mdl` rules that fight MkDocs/frontmatter idioms (MD041
+ for YAML frontmatter pages, MD010 for Go's tab-in-code-blocks
+ convention, MD033/MD032 for Material's grid-cards HTML).
+- **Richer client API — metadata inspection, JSON envelopes, batch
+ operations.** Three additions to the
+ `cmd/hypercache-server` HTTP surface:
+ - `HEAD /v1/cache/:key` returns the value's metadata in
+ `X-Cache-*` response headers (Version, Origin, Last-Updated,
+ TTL-Ms, Expires-At, Owners, Node) with no body — fast
+ existence + TTL inspection without paying the value-transfer
+ cost. 200 if present, 404 if not.
+ - `GET /v1/cache/:key` now honors `Accept: application/json`
+ and returns an `itemEnvelope` with the same metadata as
+ HEAD plus the base64-encoded value. The bare-`curl` default
+ remains raw bytes via `application/octet-stream` — current
+ clients are unaffected.
+ - `POST /v1/cache/batch/{get,put,delete}` enable bulk operations
+ in a single round-trip. Each request carries an array; the
+ response carries one result entry per item with per-item
+ status, owners, and error reporting. `batch-put` items
+ accept either UTF-8 strings (default) or base64-encoded byte
+ payloads via `value_encoding: "base64"`. Per-item errors are
+ surfaced in `error` + `code` fields without failing the
+ whole batch.
+ Six unit tests at
+ [cmd/hypercache-server/handlers_test.go](cmd/hypercache-server/handlers_test.go)
+ pin the contracts: HEAD present/missing, Accept-JSON envelope
+ shape, default-raw round-trip, mixed-encoding batch-put,
+ batch-get found/missing, batch-delete cycle.
- **SWIM self-refutation + cross-process gossip dissemination.**
Closes the last `experimental` marker on the heartbeat path.
Three pieces:
diff --git a/Makefile b/Makefile
index 7bc2f48..a19b78c 100644
--- a/Makefile
+++ b/Makefile
@@ -185,6 +185,15 @@ sec:
@echo "\nRunning gosec..."
gosec -exclude-generated -exclude-dir=__examples/size ./...
+docs-build:
+ PYENV_VERSION=mkdocs mkdocs build --strict
+
+docs-publish: docs-build
+ PYENV_VERSION=mkdocs mkdocs gh-deploy
+
+docs-serve: docs-build
+ PYENV_VERSION=mkdocs mkdocs serve
+
# check_command_exists is a helper function that checks if a command exists.
define check_command_exists
@which $(1) > /dev/null 2>&1 || (echo "$(1) command not found" && exit 1)
@@ -219,6 +228,11 @@ help:
@echo " update-deps\t\t\tUpdate all dependencies and tidy go.mod"
@echo
@echo
+ @echo "Documentation commands:"
+ @echo " docs-build"
+ @echo " docs-publish"
+ @echo " docs-serve"
+ @echo
@echo "For more information, see the project README."
.PHONY: init prepare-toolchain prepare-base-tools update-toolchain test test-race typecheck build ci bench bench-baseline vet update-deps lint sec help
diff --git a/README.md b/README.md
index cb3eb67..8cb86a8 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
# HyperCache
-[][build-link] [][codeql-link]
+[][build-link] [][codeql-link] [](https://hyp3rd.github.io/hypercache/)
+
+> **📖 Full documentation**:
## Synopsis
diff --git a/_mkdocs/hooks.py b/_mkdocs/hooks.py
new file mode 100644
index 0000000..f6a8152
--- /dev/null
+++ b/_mkdocs/hooks.py
@@ -0,0 +1,136 @@
+"""MkDocs hooks for the HyperCache site.
+
+Rewrites repo-relative links to source files (`../pkg/foo.go`,
+`../../hypercache.go`, etc.) into canonical GitHub URLs, so the same
+markdown source renders correctly both on github.com and on the
+GitHub Pages MkDocs build.
+
+Without this, the operations runbook and the RFCs reference dozens
+of source files via paths like `../pkg/backend/dist_memory.go`.
+GitHub renders those as in-repo links; MkDocs's strict mode flags
+them as broken because `pkg/` is not part of the documentation
+tree. Rewriting them at build time keeps the source markdown
+GitHub-friendly while letting strict mode actually enforce
+docs-internal correctness.
+"""
+
+import os
+import re
+from typing import Any
+
+GITHUB_REPO_BASE = "https://github.com/hyp3rd/hypercache/blob/main"
+
+# File extensions that we treat as "source code, not docs" — links
+# to these get rewritten to GitHub URLs. .md is intentionally NOT
+# in this list because doc-to-doc links should stay intra-site so
+# MkDocs can validate them.
+SOURCE_EXTENSIONS = (
+ ".go",
+ ".yaml",
+ ".yml",
+ ".sh",
+ ".rb",
+ ".txt",
+ ".dockerignore",
+ ".gitignore",
+ ".env",
+ "Dockerfile",
+ "Makefile",
+)
+
+# Paths that are entire directories the docs reference for context
+# (e.g. "see internal/cluster/"). These get rewritten to GitHub
+# tree URLs — clicking takes the reader to a directory listing.
+SOURCE_DIR_PREFIXES = (
+ "pkg/",
+ "internal/",
+ "cmd/",
+ "chart/",
+ "scripts/",
+ "tests/",
+ "__examples/",
+ ".github/",
+ "docker/",
+ "_mkdocs/",
+)
+
+LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
+
+
+def _is_source_link(target: str) -> bool:
+ """Return True when the link target looks like a repo source ref
+ rather than an in-tree docs link."""
+ # Strip anchor before extension/prefix checks.
+ clean = target.split("#", 1)[0]
+
+ # Source files by extension or basename.
+ if clean.endswith(SOURCE_EXTENSIONS):
+ return True
+
+ # Directory references (no extension) that match known source
+ # roots. We resolve `..` segments first so the prefix match
+ # works against repo-rooted paths.
+ parts = [p for p in clean.split("/") if p and p != "."]
+
+ # Drop leading `..` segments — they all collapse to repo root
+ # for our purposes (rewrite-target side).
+ while parts and parts[0] == "..":
+ parts.pop(0)
+
+ if not parts:
+ return False
+
+ repo_path = "/".join(parts)
+ if any(repo_path.startswith(p) for p in SOURCE_DIR_PREFIXES):
+ return True
+
+ return False
+
+
+def _resolve_to_repo_root(page_src_path: str, target: str) -> str:
+ """Translate a relative target into a repo-rooted path.
+
+ Page src_path is relative to docs/ (e.g. `rfcs/0001-foo.md`).
+ Target is relative to the page (e.g. `../../pkg/foo.go`). The
+ returned path is relative to the repo root.
+ """
+ # `os.path.normpath` collapses `..` correctly; we anchor at
+ # `docs/
` and resolve from there.
+ page_dir = os.path.dirname(page_src_path)
+ docs_anchored = os.path.normpath(os.path.join("docs", page_dir, target))
+
+ # The result may still start with `../` if the relative target
+ # walked above the repo root (it shouldn't in practice). Trim
+ # any leading `../` defensively.
+ while docs_anchored.startswith("../"):
+ docs_anchored = docs_anchored[3:]
+
+ return docs_anchored
+
+
+def on_page_markdown(markdown: str, page: Any, **kwargs: Any) -> str:
+ """Rewrite source-code links on every page before MkDocs renders it."""
+ page_src = page.file.src_path
+
+ def replace(match: re.Match[str]) -> str:
+ link_text = match.group(1)
+ link_target = match.group(2)
+
+ # Absolute URLs, mailtos, and pure anchors stay as-is.
+ if link_target.startswith(("http://", "https://", "mailto:", "#")):
+ return match.group(0)
+
+ if not _is_source_link(link_target):
+ return match.group(0)
+
+ repo_path = _resolve_to_repo_root(page_src, link_target)
+
+ # Preserve any anchor on the target (e.g. line ranges like
+ # `pkg/foo.go#L34-L58`).
+ if "#" in link_target and "#" not in repo_path:
+ anchor = "#" + link_target.split("#", 1)[1]
+ repo_path += anchor
+
+ return f"[{link_text}]({GITHUB_REPO_BASE}/{repo_path})"
+
+ return LINK_RE.sub(replace, markdown)
diff --git a/cmd/hypercache-server/README.md b/cmd/hypercache-server/README.md
index 3563c6f..88bab66 100644
--- a/cmd/hypercache-server/README.md
+++ b/cmd/hypercache-server/README.md
@@ -68,6 +68,79 @@ Bodies are treated as opaque bytes; `Content-Type` round-trips as
`application/octet-stream`. Strings round-trip cleanly; structured
values are JSON-encoded on response.
+### Metadata inspection
+
+`HEAD` returns the value's metadata in `X-Cache-*` response headers
+(no body — fast existence + TTL check):
+
+```sh
+curl -I -H 'Authorization: Bearer dev-token' \
+ 'http://localhost:8080/v1/cache/greeting'
+# X-Cache-Version: 1
+# X-Cache-Origin: node-1
+# X-Cache-Last-Updated: 2026-05-06T10:00:00Z
+# X-Cache-Ttl-Ms: 28412
+# X-Cache-Expires-At: 2026-05-06T10:30:00Z
+# X-Cache-Owners: node-1,node-2,node-3
+# X-Cache-Node: node-1
+```
+
+`GET` with `Accept: application/json` returns the same metadata as
+a JSON envelope (value is base64 for binary fidelity):
+
+```sh
+curl -H 'Authorization: Bearer dev-token' \
+ -H 'Accept: application/json' \
+ 'http://localhost:8080/v1/cache/greeting'
+# {
+# "key": "greeting",
+# "value": "d29ybGQ=",
+# "value_encoding": "base64",
+# "ttl_ms": 28412,
+# "expires_at": "2026-05-06T10:30:00Z",
+# "version": 1,
+# "origin": "node-1",
+# "last_updated": "2026-05-06T10:00:00Z",
+# "node": "node-1",
+# "owners": ["node-1", "node-2", "node-3"]
+# }
+```
+
+### Batch operations
+
+Three endpoints over `POST /v1/cache/batch/{get,put,delete}`. Each
+returns a `results` array with one entry per requested item; per-item
+errors are surfaced without failing the whole batch.
+
+```sh
+# Batch put — mixed UTF-8 strings and base64-encoded byte payloads.
+curl -H 'Authorization: Bearer dev-token' \
+ -X POST -H 'Content-Type: application/json' \
+ --data '{
+ "items": [
+ {"key": "greet-en", "value": "hello", "ttl_ms": 60000},
+ {"key": "greet-bin", "value": "d29ybGQ=", "value_encoding": "base64"}
+ ]
+ }' \
+ 'http://localhost:8080/v1/cache/batch/put'
+
+# Batch get — fetches many keys in one round-trip; results carry
+# the same envelope shape as the single-key Accept:json GET.
+curl -H 'Authorization: Bearer dev-token' \
+ -X POST -H 'Content-Type: application/json' \
+ --data '{"keys": ["greet-en", "greet-bin", "missing"]}' \
+ 'http://localhost:8080/v1/cache/batch/get'
+
+# Batch delete.
+curl -H 'Authorization: Bearer dev-token' \
+ -X POST -H 'Content-Type: application/json' \
+ --data '{"keys": ["greet-en", "greet-bin"]}' \
+ 'http://localhost:8080/v1/cache/batch/delete'
+```
+
+Default `value_encoding` for batch-put items is the literal UTF-8
+string. Pass `"value_encoding": "base64"` for binary payloads.
+
## Graceful shutdown
On `SIGTERM` / `SIGINT` the binary runs:
diff --git a/cmd/hypercache-server/handlers_test.go b/cmd/hypercache-server/handlers_test.go
new file mode 100644
index 0000000..0184da1
--- /dev/null
+++ b/cmd/hypercache-server/handlers_test.go
@@ -0,0 +1,354 @@
+package main
+
+import (
+ "context"
+ "encoding/base64"
+ "io"
+ "net/http"
+ "net/http/httptest"
+ "strings"
+ "testing"
+
+ "github.com/goccy/go-json"
+ fiber "github.com/gofiber/fiber/v3"
+
+ "github.com/hyp3rd/hypercache"
+ "github.com/hyp3rd/hypercache/internal/constants"
+ "github.com/hyp3rd/hypercache/pkg/backend"
+)
+
+// newTestServer builds a single-node hypercache + fiber app wired
+// with every handler under test. Returned together so test bodies
+// can drive the wire (fiber app.Test) without provisioning a real
+// listener.
+//
+// Replication=1 keeps assertions deterministic — no quorum / fan-out
+// concerns — and the in-memory backend's lifecycle is tied to t.
+func newTestServer(t *testing.T) *fiber.App {
+ t.Helper()
+
+ cfg, err := hypercache.NewConfig[backend.DistMemory](constants.DistMemoryBackend)
+ if err != nil {
+ t.Fatalf("new config: %v", err)
+ }
+
+ cfg.DistMemoryOptions = []backend.DistMemoryOption{
+ backend.WithDistNode("test-node", "127.0.0.1:0"),
+ backend.WithDistReplication(1),
+ }
+
+ hc, err := hypercache.New(t.Context(), hypercache.GetDefaultManager(), cfg)
+ if err != nil {
+ t.Fatalf("new hypercache: %v", err)
+ }
+
+ t.Cleanup(func() { _ = hc.Stop(context.Background()) })
+
+ app := fiber.New()
+ nodeCtx := &nodeContext{hc: hc, nodeID: "test-node"}
+
+ app.Get("/v1/cache/:key", func(c fiber.Ctx) error { return handleGet(c, nodeCtx) })
+ app.Head("/v1/cache/:key", func(c fiber.Ctx) error { return handleHead(c, nodeCtx) })
+ app.Put("/v1/cache/:key", func(c fiber.Ctx) error { return handlePut(c, nodeCtx) })
+ app.Delete("/v1/cache/:key", func(c fiber.Ctx) error { return handleDelete(c, nodeCtx) })
+ app.Post("/v1/cache/batch/get", func(c fiber.Ctx) error { return handleBatchGet(c, nodeCtx) })
+ app.Post("/v1/cache/batch/put", func(c fiber.Ctx) error { return handleBatchPut(c, nodeCtx) })
+ app.Post("/v1/cache/batch/delete", func(c fiber.Ctx) error { return handleBatchDelete(c, nodeCtx) })
+
+ return app
+}
+
+// doRequest is a small wrapper around fiber's in-memory test
+// transport. Returns status + body string + Content-Type so each
+// test only has to think about the assertion at hand.
+type doResult struct {
+ status int
+ body string
+ contentType string
+ headers http.Header
+}
+
+func doRequest(t *testing.T, app *fiber.App, method, target, body string, headers map[string]string) doResult {
+ t.Helper()
+
+ req := httptest.NewRequestWithContext(t.Context(), method, target, strings.NewReader(body))
+
+ if body != "" {
+ req.Header.Set("Content-Type", "application/json")
+ }
+
+ for k, v := range headers {
+ req.Header.Set(k, v)
+ }
+
+ resp, err := app.Test(req)
+ if err != nil {
+ t.Fatalf("app.Test %s %s: %v", method, target, err)
+ }
+
+ defer func() { _ = resp.Body.Close() }()
+
+ respBody, readErr := io.ReadAll(resp.Body)
+ if readErr != nil {
+ t.Fatalf("read body: %v", readErr)
+ }
+
+ return doResult{
+ status: resp.StatusCode,
+ body: string(respBody),
+ contentType: resp.Header.Get(fiber.HeaderContentType),
+ headers: resp.Header,
+ }
+}
+
+// TestHandleHead_PresentAndMissing pins the HEAD contract: 200 +
+// X-Cache-* headers when the key exists, 404 with no headers when
+// not. Header set must include version + node identity so cache
+// revalidation flows have everything they need without a body
+// transfer.
+func TestHandleHead_PresentAndMissing(t *testing.T) {
+ t.Parallel()
+
+ app := newTestServer(t)
+
+ // Seed a key with a TTL.
+ put := doRequest(t, app, http.MethodPut, "/v1/cache/k?ttl=30s", "world", nil)
+ if put.status != http.StatusOK {
+ t.Fatalf("put: %d", put.status)
+ }
+
+ head := doRequest(t, app, http.MethodHead, "/v1/cache/k", "", nil)
+ if head.status != http.StatusOK {
+ t.Fatalf("HEAD present: status %d", head.status)
+ }
+
+ if head.headers.Get("X-Cache-Version") == "" {
+ t.Fatal("HEAD response missing X-Cache-Version header")
+ }
+
+ if head.headers.Get("X-Cache-Node") != "test-node" {
+ t.Fatalf("X-Cache-Node = %q, want test-node", head.headers.Get("X-Cache-Node"))
+ }
+
+ if head.headers.Get("X-Cache-Ttl-Ms") == "" {
+ t.Fatal("HEAD with TTL missing X-Cache-Ttl-Ms header")
+ }
+
+ miss := doRequest(t, app, http.MethodHead, "/v1/cache/never", "", nil)
+ if miss.status != http.StatusNotFound {
+ t.Fatalf("HEAD missing: status %d, want 404", miss.status)
+ }
+}
+
+// TestHandleGet_AcceptJSONReturnsEnvelope pins the
+// response-consistency contract: a GET with `Accept:
+// application/json` returns the itemEnvelope shape with TTL,
+// version, owners, and a base64 value — same shape as a
+// batch-get result.
+func TestHandleGet_AcceptJSONReturnsEnvelope(t *testing.T) {
+ t.Parallel()
+
+ app := newTestServer(t)
+
+ put := doRequest(t, app, http.MethodPut, "/v1/cache/k?ttl=30s", "world", nil)
+ if put.status != http.StatusOK {
+ t.Fatalf("put: %d", put.status)
+ }
+
+ got := doRequest(t, app, http.MethodGet, "/v1/cache/k", "", map[string]string{
+ fiber.HeaderAccept: fiber.MIMEApplicationJSON,
+ })
+ if got.status != http.StatusOK {
+ t.Fatalf("GET: status %d", got.status)
+ }
+
+ if !strings.Contains(got.contentType, "json") {
+ t.Fatalf("content-type = %q, want application/json", got.contentType)
+ }
+
+ var env itemEnvelope
+
+ err := json.Unmarshal([]byte(got.body), &env)
+ if err != nil {
+ t.Fatalf("decode envelope: %v; body=%s", err, got.body)
+ }
+
+ if env.Key != "k" {
+ t.Errorf("key = %q, want k", env.Key)
+ }
+
+ if env.ValueEncoding != "base64" {
+ t.Errorf("value_encoding = %q, want base64", env.ValueEncoding)
+ }
+
+ decoded, decodeErr := base64.StdEncoding.DecodeString(env.Value)
+ if decodeErr != nil || string(decoded) != "world" {
+ t.Errorf("value decoded = %q (err=%v), want world", decoded, decodeErr)
+ }
+
+ if env.TTLMs <= 0 || env.TTLMs > 30_000 {
+ t.Errorf("ttl_ms = %d, want (0, 30000]", env.TTLMs)
+ }
+
+ if env.Version == 0 {
+ t.Error("version must be > 0 after a write")
+ }
+}
+
+// TestHandleGet_DefaultIsRawBytes pins the back-compat contract:
+// without an Accept header, GET returns raw bytes — operators
+// using bare `curl` keep seeing the literal value.
+func TestHandleGet_DefaultIsRawBytes(t *testing.T) {
+ t.Parallel()
+
+ app := newTestServer(t)
+
+ put := doRequest(t, app, http.MethodPut, "/v1/cache/k", "hello", nil)
+ if put.status != http.StatusOK {
+ t.Fatalf("put: %d", put.status)
+ }
+
+ got := doRequest(t, app, http.MethodGet, "/v1/cache/k", "", nil)
+ if got.body != "hello" {
+ t.Fatalf("body = %q, want hello", got.body)
+ }
+
+ if !strings.Contains(got.contentType, "octet-stream") {
+ t.Fatalf("content-type = %q, want octet-stream", got.contentType)
+ }
+}
+
+// TestHandleBatchPut_MixedEncodings pins the batch-put contract:
+// items can be UTF-8 strings (default) or base64-encoded bytes
+// via value_encoding. Per-item errors are surfaced without
+// failing the whole batch.
+func TestHandleBatchPut_MixedEncodings(t *testing.T) {
+ t.Parallel()
+
+ app := newTestServer(t)
+
+ body := `{
+ "items": [
+ {"key": "k1", "value": "hello", "ttl_ms": 30000},
+ {"key": "k2", "value": "d29ybGQ=", "value_encoding": "base64"},
+ {"key": "", "value": "rejected"}
+ ]
+ }`
+
+ got := doRequest(t, app, http.MethodPost, "/v1/cache/batch/put", body, nil)
+ if got.status != http.StatusOK {
+ t.Fatalf("batch-put: status %d, body=%s", got.status, got.body)
+ }
+
+ var resp batchPutResponse
+
+ err := json.Unmarshal([]byte(got.body), &resp)
+ if err != nil {
+ t.Fatalf("decode: %v", err)
+ }
+
+ if len(resp.Results) != 3 {
+ t.Fatalf("got %d results, want 3", len(resp.Results))
+ }
+
+ if !resp.Results[0].Stored || resp.Results[0].Bytes != 5 {
+ t.Errorf("k1 result = %+v", resp.Results[0])
+ }
+
+ if !resp.Results[1].Stored || resp.Results[1].Bytes != 5 {
+ t.Errorf("k2 result = %+v", resp.Results[1])
+ }
+
+ if resp.Results[2].Stored || resp.Results[2].Code != codeBadRequest {
+ t.Errorf("empty-key result must be rejected: %+v", resp.Results[2])
+ }
+}
+
+// TestHandleBatchGet_FoundAndMissing pins the batch-get contract:
+// each requested key returns its own result entry; missing keys
+// produce found:false rather than failing the whole batch.
+// Found entries carry the same metadata shape as
+// itemEnvelope — verified by checking the value round-trips
+// from base64 back to the original.
+func TestHandleBatchGet_FoundAndMissing(t *testing.T) {
+ t.Parallel()
+
+ app := newTestServer(t)
+
+ put := doRequest(t, app, http.MethodPut, "/v1/cache/k1", "alpha", nil)
+ if put.status != http.StatusOK {
+ t.Fatalf("seed put: %d", put.status)
+ }
+
+ body := `{"keys": ["k1", "missing", "k1"]}`
+
+ got := doRequest(t, app, http.MethodPost, "/v1/cache/batch/get", body, nil)
+ if got.status != http.StatusOK {
+ t.Fatalf("batch-get: status %d", got.status)
+ }
+
+ var resp batchGetResponse
+
+ err := json.Unmarshal([]byte(got.body), &resp)
+ if err != nil {
+ t.Fatalf("decode: %v", err)
+ }
+
+ if len(resp.Results) != 3 {
+ t.Fatalf("got %d results, want 3", len(resp.Results))
+ }
+
+ if !resp.Results[0].Found {
+ t.Errorf("k1 should be found: %+v", resp.Results[0])
+ }
+
+ decoded, decodeErr := base64.StdEncoding.DecodeString(resp.Results[0].Value)
+ if decodeErr != nil || string(decoded) != "alpha" {
+ t.Errorf("k1 decoded = %q (err=%v), want alpha", decoded, decodeErr)
+ }
+
+ if resp.Results[1].Found {
+ t.Errorf("missing key must be found:false: %+v", resp.Results[1])
+ }
+
+ // Duplicate request — returns the same result twice; pins that
+ // the iteration is per-key, not deduped.
+ if !resp.Results[2].Found || resp.Results[2].Key != "k1" {
+ t.Errorf("duplicate-k1 result = %+v", resp.Results[2])
+ }
+}
+
+// TestHandleBatchDelete_BasicFlow seeds a key, deletes it via
+// batch, and asserts the post-delete batch-get reports it
+// missing.
+func TestHandleBatchDelete_BasicFlow(t *testing.T) {
+ t.Parallel()
+
+ app := newTestServer(t)
+
+ put := doRequest(t, app, http.MethodPut, "/v1/cache/k", "v", nil)
+ if put.status != http.StatusOK {
+ t.Fatalf("put: %d", put.status)
+ }
+
+ del := doRequest(t, app, http.MethodPost, "/v1/cache/batch/delete", `{"keys":["k"]}`, nil)
+ if del.status != http.StatusOK {
+ t.Fatalf("batch-delete: status %d", del.status)
+ }
+
+ var resp batchDeleteResponse
+
+ err := json.Unmarshal([]byte(del.body), &resp)
+ if err != nil {
+ t.Fatalf("decode: %v", err)
+ }
+
+ if len(resp.Results) != 1 || !resp.Results[0].Deleted {
+ t.Fatalf("expected one deleted result; got %+v", resp.Results)
+ }
+
+ got := doRequest(t, app, http.MethodPost, "/v1/cache/batch/get", `{"keys":["k"]}`, nil)
+ if !strings.Contains(got.body, `"found":false`) {
+ t.Fatalf("batch-get post-delete should report found:false; got %s", got.body)
+ }
+}
diff --git a/cmd/hypercache-server/main.go b/cmd/hypercache-server/main.go
index 3946bf0..6cf6814 100644
--- a/cmd/hypercache-server/main.go
+++ b/cmd/hypercache-server/main.go
@@ -37,6 +37,7 @@ import (
"github.com/hyp3rd/hypercache/internal/constants"
"github.com/hyp3rd/hypercache/internal/sentinel"
"github.com/hyp3rd/hypercache/pkg/backend"
+ cache "github.com/hyp3rd/hypercache/pkg/cache/v2"
)
// Defaults applied when the corresponding env var is unset. Centralized
@@ -282,9 +283,14 @@ func runClientAPI(addr, nodeID string, hc *hypercache.HyperCache[backend.DistMem
app.Put("/v1/cache/:key", auth(func(c fiber.Ctx) error { return handlePut(c, nodeCtx) }))
app.Get("/v1/cache/:key", auth(func(c fiber.Ctx) error { return handleGet(c, nodeCtx) }))
+ app.Head("/v1/cache/:key", auth(func(c fiber.Ctx) error { return handleHead(c, nodeCtx) }))
app.Delete("/v1/cache/:key", auth(func(c fiber.Ctx) error { return handleDelete(c, nodeCtx) }))
app.Get("/v1/owners/:key", auth(func(c fiber.Ctx) error { return handleOwners(c, nodeCtx) }))
+ app.Post("/v1/cache/batch/get", auth(func(c fiber.Ctx) error { return handleBatchGet(c, nodeCtx) }))
+ app.Post("/v1/cache/batch/put", auth(func(c fiber.Ctx) error { return handleBatchPut(c, nodeCtx) }))
+ app.Post("/v1/cache/batch/delete", auth(func(c fiber.Ctx) error { return handleBatchDelete(c, nodeCtx) }))
+
go func() {
err := app.Listen(addr)
if err != nil && !errors.Is(err, http.ErrServerClosed) {
@@ -391,22 +397,470 @@ func handlePut(c fiber.Ctx, nodeCtx *nodeContext) error {
})
}
-// handleGet implements GET /v1/cache/:key — returns the raw bytes
-// with Content-Type application/octet-stream, or a JSON 404 when
-// the key is absent. JSON-on-error keeps the response shape
-// machine-friendly even when the value path returns raw bytes.
+// itemEnvelope is the JSON shape returned when the client asks for
+// `Accept: application/json` on a single-key GET. Values are always
+// emitted as base64 in the envelope so the response is binary-safe
+// without the heuristic decode dance the raw-bytes path uses —
+// callers that want the literal string can decode the base64
+// themselves.
+type itemEnvelope struct {
+ Key string `json:"key"`
+ Value string `json:"value"`
+ ValueEncoding string `json:"value_encoding"`
+ TTLMs int64 `json:"ttl_ms,omitempty"`
+ ExpiresAt string `json:"expires_at,omitempty"`
+ Version uint64 `json:"version"`
+ Origin string `json:"origin,omitempty"`
+ LastUpdated string `json:"last_updated,omitempty"`
+ Node string `json:"node"`
+ Owners []string `json:"owners"`
+}
+
+// wantsJSON reports whether the client explicitly asked for the JSON
+// envelope via Accept. A bare `*/*` or absent header keeps the
+// raw-bytes default — operators using `curl -X GET` with no Accept
+// header continue to see the literal value, not a base64 envelope.
+func wantsJSON(c fiber.Ctx) bool {
+ accept := c.Get(fiber.HeaderAccept)
+ if accept == "" {
+ return false
+ }
+
+ return strings.Contains(accept, fiber.MIMEApplicationJSON)
+}
+
+// itemValueAsBytes normalizes the cached value to its underlying
+// byte representation regardless of how it round-tripped through
+// the dist HTTP transport (writer-node []byte vs replica-node
+// base64-string vs non-owner json.RawMessage). Reuses the same
+// heuristics as writeValue so single-key and batch responses stay
+// in agreement.
+func itemValueAsBytes(v any) []byte {
+ switch x := v.(type) {
+ case []byte:
+ return x
+
+ case string:
+ if decoded, ok := decodeBase64Bytes(x); ok {
+ return decoded
+ }
+
+ return []byte(x)
+
+ case json.RawMessage:
+ var s string
+
+ err := json.Unmarshal(x, &s)
+ if err == nil {
+ if decoded, ok := decodeBase64Bytes(s); ok {
+ return decoded
+ }
+
+ return []byte(s)
+ }
+
+ return []byte(x)
+
+ default:
+ raw, err := json.Marshal(v)
+ if err != nil {
+ return nil
+ }
+
+ return raw
+ }
+}
+
+// itemRemainingTTL returns (ttl_ms, expires_at_iso) for an Item.
+// Returns (0, "") when the item has no expiration. Negative
+// remaining TTLs are clamped to 0 — a "currently expiring" item
+// is reported as 0ms left, not as a negative number.
+func itemRemainingTTL(it *cache.Item) (int64, string) {
+ if it.Expiration <= 0 {
+ return 0, ""
+ }
+
+ expiry := it.LastAccess.Add(it.Expiration)
+ remaining := max(time.Until(expiry).Milliseconds(), 0)
+
+ return remaining, expiry.UTC().Format(time.RFC3339)
+}
+
+// buildEnvelope constructs the JSON envelope for a cached item.
+// Centralized so the single-key GET and the batch-get response
+// emit identical shapes.
+func buildEnvelope(key string, it *cache.Item, nodeCtx *nodeContext) itemEnvelope {
+ bytes := itemValueAsBytes(it.Value)
+ ttlMs, expiresAt := itemRemainingTTL(it)
+
+ env := itemEnvelope{
+ Key: key,
+ Value: base64.StdEncoding.EncodeToString(bytes),
+ ValueEncoding: "base64",
+ TTLMs: ttlMs,
+ ExpiresAt: expiresAt,
+ Version: it.Version,
+ Origin: it.Origin,
+ Node: nodeCtx.nodeID,
+ Owners: nodeCtx.hc.ClusterOwners(key),
+ }
+
+ if !it.LastUpdated.IsZero() {
+ env.LastUpdated = it.LastUpdated.UTC().Format(time.RFC3339)
+ }
+
+ return env
+}
+
+// setItemHeaders mirrors buildEnvelope onto response headers — the
+// HEAD handler returns these without a body. Header names use the
+// `X-Cache-*` convention; values are best-effort string forms.
+func setItemHeaders(c fiber.Ctx, key string, it *cache.Item, nodeCtx *nodeContext) {
+ c.Set("X-Cache-Version", strconv.FormatUint(it.Version, 10))
+
+ if it.Origin != "" {
+ c.Set("X-Cache-Origin", it.Origin)
+ }
+
+ if !it.LastUpdated.IsZero() {
+ c.Set("X-Cache-Last-Updated", it.LastUpdated.UTC().Format(time.RFC3339))
+ }
+
+ ttlMs, expiresAt := itemRemainingTTL(it)
+ if ttlMs > 0 {
+ c.Set("X-Cache-TTL-Ms", strconv.FormatInt(ttlMs, 10))
+ c.Set("X-Cache-Expires-At", expiresAt)
+ }
+
+ owners := nodeCtx.hc.ClusterOwners(key)
+ if len(owners) > 0 {
+ c.Set("X-Cache-Owners", strings.Join(owners, ","))
+ }
+
+ c.Set("X-Cache-Node", nodeCtx.nodeID)
+}
+
+// handleGet implements GET /v1/cache/:key.
+//
+// Default response: raw bytes with Content-Type application/octet-stream
+// (binary fidelity, current behavior).
+//
+// Accept: application/json: itemEnvelope JSON with TTL, version,
+// owners, etc. Lets API clients fetch metadata in one round-trip
+// instead of GET + HEAD.
func handleGet(c fiber.Ctx, nodeCtx *nodeContext) error {
key := c.Params("key")
if key == "" {
return jsonErr(c, fiber.StatusBadRequest, codeBadRequest, "missing key in path")
}
- v, ok := nodeCtx.hc.Get(c.Context(), key)
+ it, ok := nodeCtx.hc.GetWithInfo(c.Context(), key)
if !ok {
return jsonErr(c, fiber.StatusNotFound, codeNotFound, "key not found")
}
- return writeValue(c, v)
+ if wantsJSON(c) {
+ return c.JSON(buildEnvelope(key, it, nodeCtx))
+ }
+
+ return writeValue(c, it.Value)
+}
+
+// batchGetRequest documents the request shape for
+// `POST /v1/cache/batch/get`. Empty `keys` returns an empty
+// `results` array with status 200.
+type batchGetRequest struct {
+ Keys []string `json:"keys"`
+}
+
+// batchGetResult is one entry in the batch-get response. `Found:
+// false` results carry no metadata; `Found: true` results carry
+// the same envelope shape as a single-key Accept:json GET.
+type batchGetResult struct {
+ Key string `json:"key"`
+ Found bool `json:"found"`
+ Value string `json:"value,omitempty"`
+ ValueEncoding string `json:"value_encoding,omitempty"`
+ TTLMs int64 `json:"ttl_ms,omitempty"`
+ ExpiresAt string `json:"expires_at,omitempty"`
+ Version uint64 `json:"version,omitempty"`
+ Origin string `json:"origin,omitempty"`
+ LastUpdated string `json:"last_updated,omitempty"`
+ Owners []string `json:"owners,omitempty"`
+}
+
+// batchGetResponse is the top-level wrapper so a future caller can
+// add cluster-wide stats (per-batch latency, owners-touched, etc.)
+// without breaking the wire shape.
+type batchGetResponse struct {
+ Results []batchGetResult `json:"results"`
+ Node string `json:"node"`
+}
+
+// batchPutItem is one entry in the batch-put request. `value` is
+// either a UTF-8 string (default) or a base64-encoded byte payload
+// when `value_encoding` is `"base64"` — the same convention the
+// single-key Accept:json GET emits, so a batch-put can round-trip
+// the result of an earlier batch-get verbatim.
+type batchPutItem struct {
+ Key string `json:"key"`
+ Value string `json:"value"`
+ ValueEncoding string `json:"value_encoding,omitempty"`
+ TTLMs int64 `json:"ttl_ms,omitempty"`
+}
+
+type batchPutRequest struct {
+ Items []batchPutItem `json:"items"`
+}
+
+// batchPutResult is one entry in the batch-put response. On
+// failure, `Stored` is false and `Error`/`Code` describe why —
+// per-item granularity so a single failing item doesn't void
+// the whole batch.
+type batchPutResult struct {
+ Key string `json:"key"`
+ Stored bool `json:"stored"`
+ Bytes int `json:"bytes,omitempty"`
+ Owners []string `json:"owners,omitempty"`
+ Error string `json:"error,omitempty"`
+ Code string `json:"code,omitempty"`
+}
+
+type batchPutResponse struct {
+ Results []batchPutResult `json:"results"`
+ Node string `json:"node"`
+}
+
+// batchDeleteResult is one entry in the batch-delete response.
+type batchDeleteResult struct {
+ Key string `json:"key"`
+ Deleted bool `json:"deleted"`
+ Owners []string `json:"owners,omitempty"`
+ Error string `json:"error,omitempty"`
+ Code string `json:"code,omitempty"`
+}
+
+type batchDeleteRequest struct {
+ Keys []string `json:"keys"`
+}
+
+type batchDeleteResponse struct {
+ Results []batchDeleteResult `json:"results"`
+ Node string `json:"node"`
+}
+
+// handleBatchGet implements POST /v1/cache/batch/get — fetches
+// many keys in one round-trip with the same metadata envelope as
+// the single-key Accept:json GET. Each key's lookup is
+// independent: a missing key produces `{found: false}` rather
+// than failing the whole batch.
+func handleBatchGet(c fiber.Ctx, nodeCtx *nodeContext) error {
+ var req batchGetRequest
+
+ err := json.Unmarshal(c.Body(), &req)
+ if err != nil {
+ return jsonErr(c, fiber.StatusBadRequest, codeBadRequest, "invalid JSON: "+err.Error())
+ }
+
+ results := make([]batchGetResult, 0, len(req.Keys))
+ ctx := c.Context()
+
+ for _, key := range req.Keys {
+ if key == "" {
+ results = append(results, batchGetResult{Key: key, Found: false})
+
+ continue
+ }
+
+ it, ok := nodeCtx.hc.GetWithInfo(ctx, key)
+ if !ok {
+ results = append(results, batchGetResult{Key: key, Found: false})
+
+ continue
+ }
+
+ results = append(results, batchGetResultFromItem(key, it, nodeCtx))
+ }
+
+ return c.JSON(batchGetResponse{Results: results, Node: nodeCtx.nodeID})
+}
+
+// batchGetResultFromItem mirrors buildEnvelope's projection —
+// shared with the single-key Accept:json GET path so the wire
+// shape stays consistent.
+func batchGetResultFromItem(key string, it *cache.Item, nodeCtx *nodeContext) batchGetResult {
+ bytes := itemValueAsBytes(it.Value)
+ ttlMs, expiresAt := itemRemainingTTL(it)
+
+ res := batchGetResult{
+ Key: key,
+ Found: true,
+ Value: base64.StdEncoding.EncodeToString(bytes),
+ ValueEncoding: "base64",
+ TTLMs: ttlMs,
+ ExpiresAt: expiresAt,
+ Version: it.Version,
+ Origin: it.Origin,
+ Owners: nodeCtx.hc.ClusterOwners(key),
+ }
+
+ if !it.LastUpdated.IsZero() {
+ res.LastUpdated = it.LastUpdated.UTC().Format(time.RFC3339)
+ }
+
+ return res
+}
+
+// handleBatchPut implements POST /v1/cache/batch/put. Each item's
+// `value_encoding` selects how the wire `value` string is
+// interpreted: `"base64"` decodes bytes-first; anything else
+// (including absent) treats the string as UTF-8 text and stores
+// the raw bytes. Per-item errors are carried in the response —
+// a single failure doesn't void the whole batch.
+func handleBatchPut(c fiber.Ctx, nodeCtx *nodeContext) error {
+ var req batchPutRequest
+
+ err := json.Unmarshal(c.Body(), &req)
+ if err != nil {
+ return jsonErr(c, fiber.StatusBadRequest, codeBadRequest, "invalid JSON: "+err.Error())
+ }
+
+ results := make([]batchPutResult, 0, len(req.Items))
+ ctx := c.Context()
+
+ for _, item := range req.Items {
+ results = append(results, applyBatchPutItem(ctx, nodeCtx, item))
+ }
+
+ return c.JSON(batchPutResponse{Results: results, Node: nodeCtx.nodeID})
+}
+
+// applyBatchPutItem decodes a single batch-put item and forwards
+// it to the cache. Extracted so handleBatchPut stays readable
+// despite the value-encoding branch.
+func applyBatchPutItem(ctx context.Context, nodeCtx *nodeContext, item batchPutItem) batchPutResult {
+ if item.Key == "" {
+ return batchPutResult{Key: item.Key, Stored: false, Error: "missing key", Code: codeBadRequest}
+ }
+
+ value, decodeErr := decodeBatchPutValue(item)
+ if decodeErr != nil {
+ return batchPutResult{Key: item.Key, Stored: false, Error: decodeErr.Error(), Code: codeBadRequest}
+ }
+
+ ttl := time.Duration(item.TTLMs) * time.Millisecond
+
+ setErr := nodeCtx.hc.Set(ctx, item.Key, value, ttl)
+ if setErr != nil {
+ return batchPutResult{
+ Key: item.Key,
+ Stored: false,
+ Error: setErr.Error(),
+ Code: classifyErrCode(setErr),
+ }
+ }
+
+ return batchPutResult{
+ Key: item.Key,
+ Stored: true,
+ Bytes: len(value),
+ Owners: nodeCtx.hc.ClusterOwners(item.Key),
+ }
+}
+
+// decodeBatchPutValue interprets the wire `value` string per its
+// `value_encoding`. Absent / unknown encoding is treated as
+// "string" (UTF-8 text bytes).
+func decodeBatchPutValue(item batchPutItem) ([]byte, error) {
+ if item.ValueEncoding != "base64" {
+ return []byte(item.Value), nil
+ }
+
+ decoded, err := base64.StdEncoding.DecodeString(item.Value)
+ if err != nil {
+ return nil, fmt.Errorf("invalid base64 value: %w", err)
+ }
+
+ return decoded, nil
+}
+
+// handleBatchDelete implements POST /v1/cache/batch/delete. Same
+// per-item granularity as handleBatchPut.
+func handleBatchDelete(c fiber.Ctx, nodeCtx *nodeContext) error {
+ var req batchDeleteRequest
+
+ err := json.Unmarshal(c.Body(), &req)
+ if err != nil {
+ return jsonErr(c, fiber.StatusBadRequest, codeBadRequest, "invalid JSON: "+err.Error())
+ }
+
+ results := make([]batchDeleteResult, 0, len(req.Keys))
+ ctx := c.Context()
+
+ for _, key := range req.Keys {
+ if key == "" {
+ results = append(results, batchDeleteResult{Key: key, Deleted: false, Error: "missing key", Code: codeBadRequest})
+
+ continue
+ }
+
+ owners := nodeCtx.hc.ClusterOwners(key)
+
+ removeErr := nodeCtx.hc.Remove(ctx, key)
+ if removeErr != nil {
+ results = append(results, batchDeleteResult{
+ Key: key,
+ Owners: owners,
+ Error: removeErr.Error(),
+ Code: classifyErrCode(removeErr),
+ })
+
+ continue
+ }
+
+ results = append(results, batchDeleteResult{Key: key, Deleted: true, Owners: owners})
+ }
+
+ return c.JSON(batchDeleteResponse{Results: results, Node: nodeCtx.nodeID})
+}
+
+// classifyErrCode maps a service-level error to the canonical
+// machine-readable code string. Mirrors classifyAndRespond's
+// status mapping but returns just the code so per-item batch
+// results can include it without overriding the batch's HTTP
+// status.
+func classifyErrCode(err error) string {
+ switch {
+ case errors.Is(err, sentinel.ErrDraining):
+ return codeDraining
+ case errors.Is(err, sentinel.ErrNotOwner):
+ return codeInternal
+ default:
+ return codeInternal
+ }
+}
+
+// handleHead implements HEAD /v1/cache/:key — fast metadata
+// inspection. Returns 200 with X-Cache-* response headers when
+// the key is present, 404 when absent. No body.
+//
+// Lets clients check existence + remaining TTL + version
+// without paying the value-transfer cost. Useful for
+// cache-revalidation flows and conditional logic.
+func handleHead(c fiber.Ctx, nodeCtx *nodeContext) error {
+ key := c.Params("key")
+ if key == "" {
+ return c.SendStatus(fiber.StatusBadRequest)
+ }
+
+ it, ok := nodeCtx.hc.GetWithInfo(c.Context(), key)
+ if !ok {
+ return c.SendStatus(fiber.StatusNotFound)
+ }
+
+ setItemHeaders(c, key, it, nodeCtx)
+
+ return c.SendStatus(fiber.StatusOK)
}
// writeValue emits a cached value back to the client with the right
diff --git a/cspell.config.yaml b/cspell.config.yaml
index 0efe518..2961ea9 100644
--- a/cspell.config.yaml
+++ b/cspell.config.yaml
@@ -81,6 +81,8 @@ words:
- exhaustruct
- Fanout
- fasthttp
+ - fontawesome
+ - frontmatter
- fatals
- fctx
- ferr
@@ -92,6 +94,7 @@ words:
- freqs
- funlen
- geomean
+ - glightbox
- gerr
- gitversion
- GITVERSION
@@ -123,6 +126,7 @@ words:
- idxs
- Iface
- ineff
+ - inlinehilite
- inmemory
- intrange
- ints
@@ -130,10 +134,12 @@ words:
- Itemm
- keyf
- lamport
+ - linenums
- LFUDA
- localmodule
- logrus
- longbridgeapp
+ - mailtos
- maxmemory
- memprofile
- Merkle
@@ -164,8 +170,12 @@ words:
- podname
- popd
- Prealloc
+ - productionization
- protoc
- pushd
+ - pygments
+ - pyenv
+ - pymdownx
- recvcheck
- rediscluster
- repls
@@ -191,12 +201,15 @@ words:
- strs
- subtest
- subtests
+ - superfences
- sval
- thelper
- toplevel
- tparallel
+ - tasklist
- tracetest
- traefik
+ - twemoji
- trunc
- tunables
- TTLMs
@@ -207,6 +220,7 @@ words:
- upserted
- upserts
- varnamelen
+ - venv
- vettool
- vnode
- vnodes
diff --git a/docs/changelog.md b/docs/changelog.md
new file mode 100644
index 0000000..ca59717
--- /dev/null
+++ b/docs/changelog.md
@@ -0,0 +1,8 @@
+---
+title: Changelog
+---
+
+{%
+ include-markdown "../CHANGELOG.md"
+ start="# Changelog"
+%}
diff --git a/docs/cluster.md b/docs/cluster.md
new file mode 100644
index 0000000..2ab9a5f
--- /dev/null
+++ b/docs/cluster.md
@@ -0,0 +1,94 @@
+---
+title: 5-Node Cluster
+---
+
+# 5-Node Cluster (docker-compose)
+
+The repo ships a ready-to-run 5-node cluster definition at
+[`docker-compose.cluster.yml`](https://github.com/hyp3rd/hypercache/blob/main/docker-compose.cluster.yml).
+Replication factor 3, quorum reads/writes, bearer-token auth, peer-to-peer
+DNS via container hostnames.
+
+## Bring it up
+
+```sh
+docker compose -f docker-compose.cluster.yml up --build -d
+```
+
+Wait for the listeners to bind:
+
+```sh
+bash scripts/tests/wait-for-cluster.sh
+```
+
+## Talk to it
+
+| Node | Client API host port | Management host port |
+|---|---|---|
+| `hypercache-1` | `8081` | `9081` |
+| `hypercache-2` | `8082` | `9082` |
+| `hypercache-3` | `8083` | `9083` |
+| `hypercache-4` | `8084` | `9084` |
+| `hypercache-5` | `8085` | `9085` |
+
+Every node accepts every operation — the dist backend's quorum and
+forwarding logic routes to the actual key owners under the hood:
+
+```sh
+TOKEN='dev-token'
+
+# Write to node-1.
+curl -H "Authorization: Bearer $TOKEN" \
+ -X PUT --data 'world' \
+ 'http://localhost:8081/v1/cache/greeting'
+
+# Read from any other node — same value.
+curl -H "Authorization: Bearer $TOKEN" \
+ 'http://localhost:8085/v1/cache/greeting' # -> world
+
+# See which ring nodes own the key.
+curl -H "Authorization: Bearer $TOKEN" \
+ 'http://localhost:8083/v1/owners/greeting'
+```
+
+## Inspect cluster state
+
+The management HTTP server (host ports `9081-9085`) exposes the admin
+endpoints:
+
+```sh
+curl -H "Authorization: Bearer $TOKEN" 'http://localhost:9081/cluster/members'
+curl -H "Authorization: Bearer $TOKEN" 'http://localhost:9081/dist/metrics' | jq
+curl -H "Authorization: Bearer $TOKEN" 'http://localhost:9081/cluster/heartbeat'
+```
+
+## Verify with the regression scripts
+
+Two scripts under `scripts/tests/` assert end-to-end behavior — a smoke
+covering propagation/wire-encoding/cross-node delete, and a resilience
+test that kills a node mid-run and asserts the cluster keeps serving:
+
+```sh
+bash scripts/tests/10-test-cluster-api.sh # 17 assertions
+bash scripts/tests/20-test-cluster-resilience.sh # 24 assertions, ~20s
+```
+
+Or chain everything via the Makefile:
+
+```sh
+make test-cluster # up + smoke + resilience + always-down
+```
+
+## What changed when
+
+The journey from "this didn't actually cluster" to a tested 5-node stack
+is documented in the [changelog](changelog.md). Notable fixes:
+
+- Factory was discarding `cfg.DistMemoryOptions` — every `WithDistNode` /
+ `WithDistSeeds` was a silent no-op until [v0.6.0].
+- Seeds without inline node IDs produced an unusable ring; the
+ `id@addr` syntax (`node-2@hypercache-2:7946`) is the production form.
+- Cross-process gossip dissemination + SWIM self-refutation
+ ([v0.6.0]) retired the `experimental` heartbeat marker.
+
+[v0.6.0]: changelog.md
diff --git a/docs/helm.md b/docs/helm.md
new file mode 100644
index 0000000..0b3a2ef
--- /dev/null
+++ b/docs/helm.md
@@ -0,0 +1,95 @@
+---
+title: Kubernetes (Helm)
+---
+
+# Kubernetes via Helm
+
+The repo ships a Helm chart at
+[`chart/hypercache/`](https://github.com/hyp3rd/hypercache/tree/main/chart/hypercache)
+that produces a production-shaped k8s deployment: StatefulSet for stable
+per-pod identity, headless Service for peer DNS, separate client and
+management Services, a PodDisruptionBudget that holds quorum during
+voluntary disruptions, and a hardened pod-security context.
+
+## Why StatefulSet, not Deployment
+
+Each peer's seed list pre-binds the others by
+`...svc.cluster.local`. A Deployment's pod
+names are random suffixes, which would force a runtime peer-discovery
+loop the dist HTTP transport doesn't have. StatefulSets give the
+deterministic hostnames the seed format needs.
+
+## Install
+
+From a checkout:
+
+```sh
+helm install hyc chart/hypercache \
+ --namespace hyc-prod --create-namespace
+```
+
+Default values produce 5 pods, replication factor 3, no auth, and a
+ClusterIP-only client API. See [`values.yaml`][values] for the full
+surface.
+
+[values]: https://github.com/hyp3rd/hypercache/blob/main/chart/hypercache/values.yaml
+
+## Common configuration
+
+```sh
+# Enable bearer-token auth via a chart-managed Secret.
+helm install hyc chart/hypercache \
+ --namespace hyc-prod --create-namespace \
+ --set auth.token.value=$(openssl rand -base64 32)
+
+# Use an operator-managed Secret (recommended for rotation).
+helm install hyc chart/hypercache \
+ --namespace hyc-prod --create-namespace \
+ --set auth.token.existingSecret=hyc-token \
+ --set auth.token.existingSecretKey=token
+
+# Smaller cluster (3 pods, replication 2).
+helm install hyc chart/hypercache \
+ --namespace hyc-prod --create-namespace \
+ --set replicaCount=3 --set cluster.replicationFactor=2 \
+ --set podDisruptionBudget.minAvailable=2
+
+# Expose the client API via LoadBalancer.
+helm install hyc chart/hypercache \
+ --namespace hyc-prod --create-namespace \
+ --set service.client.type=LoadBalancer
+```
+
+## What gets created
+
+Helm renders six resources by default (seven when auth is set inline):
+
+| Resource | Name | Purpose |
+|---|---|---|
+| StatefulSet | `-hypercache` | The pods themselves |
+| Service (headless) | `-hypercache-headless` | Per-pod DNS for peer discovery |
+| Service | `-hypercache` | Client API entry |
+| Service | `-hypercache-mgmt` | Management/observability |
+| PodDisruptionBudget | `-hypercache` | Holds quorum during drains |
+| ServiceAccount | `-hypercache` | Pod identity |
+| Secret | `-hypercache-auth` | (only when `auth.token.value` is set inline) |
+
+## Probes
+
+- **Liveness** hits the binary's `/healthz` on the client API port. If
+ this fails, k8s restarts the pod — the Go runtime is dead.
+- **Readiness** hits the dist HTTP `/health`. This endpoint flips to 503
+ when an operator calls Drain, so a pod removed from rotation by
+ `Drain` stops receiving Service traffic immediately, regardless of
+ liveness state.
+
+The binary runs SIGTERM → Drain → API stop → Cache.Stop with a 30 s
+internal deadline; `terminationGracePeriodSeconds: 45` in the chart
+gives it slack.
+
+## Operations
+
+The [runbook](operations.md) has split-brain, hint-queue overflow,
+rebalance-under-load, and replica-loss procedures. Every failure mode
+is mapped to a metric exposed by the management HTTP server (or the
+OpenTelemetry pipeline you wire via `WithDistMeterProvider`).
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..9f74b3c
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,82 @@
+---
+title: HyperCache
+hide:
+ - navigation
+---
+
+# HyperCache
+
+Distributed in-memory cache for Go. Sharded for concurrency, replicated for
+durability under partial failure, observable from the start, and shipped as
+both a library and a single-binary HTTP service.
+
+
+
+- :material-rocket-launch: **[Quickstart](quickstart.md)** — five minutes from `go get` to a working cache.
+- :material-server-network: **[5-Node Cluster](cluster.md)** — boot a real cluster with `docker compose`.
+- :fontawesome-brands-kubernetes: **[Helm Chart](helm.md)** — deploy on Kubernetes with stable identities.
+- :material-tools: **[Operations Runbook](operations.md)** — split-brain, hint queues, drain, capacity.
+
+
+
+## Why HyperCache
+
+| | What you get | Why it matters |
+|---|---|---|
+| **Sharded by default** | 32 per-shard mutexes routed by xxhash | Write throughput scales with cores, no global lock. |
+| **Distributed backend** | Consistent hashing, configurable replication, quorum reads/writes | A single failed node does not lose keys. |
+| **Hinted handoff** | Failed forwards queue with TTL, replay on the dist HTTP transport | Transient peer outages don't drop replicas. |
+| **SWIM heartbeat** | Direct + indirect probes; self-refute via incarnation gossip | Filters caller-side network blips, recovers from false suspicion. |
+| **Observable** | `slog` logger + OpenTelemetry tracing + OpenTelemetry metrics, all opt-in | Plug into your existing pipeline, no extra deps. |
+| **Operator-friendly** | `Drain` endpoint, cursor-paged key enumeration, JSON error envelopes | Designed for rolling deploys and on-call clarity. |
+
+## How it fits together
+
+```mermaid
+flowchart LR
+ subgraph App[Your Go application]
+ HC[HyperCache wrapper]
+ end
+
+ subgraph DM[DistMemory backend]
+ Shard1[Shard 1]
+ Shard2[Shard 2]
+ ShardN[... Shard N]
+ Ring[Consistent hash ring]
+ Members[Membership + heartbeat]
+ end
+
+ HC --> DM
+ DM --> Shard1
+ DM --> Shard2
+ DM --> ShardN
+ DM <--> Ring
+ DM <--> Members
+
+ Members <-.HTTP gossip.-> Peer1[(peer node)]
+ Shard1 <-.HTTP replicate.-> Peer1
+```
+
+The `HyperCache` wrapper is a thin facade you embed in your application.
+The `DistMemory` backend handles sharding, replication, and the cluster
+plane. Two HTTP listeners run per process: a peer-to-peer one for
+replication and gossip, and a separate management one for admin and
+observability.
+
+## Two ways to use it
+
+**As a library** — embed `HyperCache` directly in your Go application; it
+uses the in-memory or distributed backend in-process. See
+[Quickstart](quickstart.md).
+
+**As a service** — run the [`hypercache-server`](server.md) binary; clients
+talk to it over a REST API. See [5-Node Cluster](cluster.md) for the
+docker-compose recipe and [Helm Chart](helm.md) for Kubernetes.
+
+## Project status
+
+The distributed backend is production-ready as of v0.6.0 — see the
+[changelog](changelog.md) for the full list of features and fixes that
+landed during the productionization push (Phases A through E in the
+upstream history). Operations procedures live in the
+[runbook](operations.md).
diff --git a/docs/quickstart.md b/docs/quickstart.md
new file mode 100644
index 0000000..d63a5f0
--- /dev/null
+++ b/docs/quickstart.md
@@ -0,0 +1,98 @@
+---
+title: Quickstart
+---
+
+# Quickstart
+
+Five minutes from `go get` to a working cache. Two paths: embed the
+library in a Go program, or run the binary and talk to it over HTTP.
+
+## Library (single process, no cluster)
+
+```sh
+go get github.com/hyp3rd/hypercache@latest
+```
+
+```go
+package main
+
+import (
+ "context"
+ "fmt"
+ "time"
+
+ "github.com/hyp3rd/hypercache"
+)
+
+func main() {
+ ctx := context.Background()
+
+ cache, err := hypercache.NewInMemoryWithDefaults(ctx, 10_000)
+ if err != nil {
+ panic(err)
+ }
+
+ defer cache.Stop(ctx)
+
+ if err := cache.Set(ctx, "greeting", "hello", 5*time.Minute); err != nil {
+ panic(err)
+ }
+
+ v, ok := cache.Get(ctx, "greeting")
+ fmt.Printf("got: %v ok=%v\n", v, ok)
+}
+```
+
+That's the full library surface. Capacity, eviction algorithm, expiration
+interval, and per-shard tuning are all configurable via `hypercache.Config`
++ `WithDist*` / `With*` options.
+
+## Service (single node, HTTP API)
+
+Run the binary directly:
+
+```sh
+go install github.com/hyp3rd/hypercache/cmd/hypercache-server@latest
+
+HYPERCACHE_NODE_ID=demo \
+HYPERCACHE_API_ADDR=:8080 \
+HYPERCACHE_DIST_ADDR=127.0.0.1:7946 \
+hypercache-server
+```
+
+In another terminal:
+
+```sh
+# Store a value.
+curl -X PUT --data 'world' 'http://localhost:8080/v1/cache/greeting'
+
+# Read it back.
+curl 'http://localhost:8080/v1/cache/greeting' # -> world
+
+# Inspect metadata via headers (no body transfer).
+curl -I 'http://localhost:8080/v1/cache/greeting'
+
+# Or as a JSON envelope.
+curl -H 'Accept: application/json' 'http://localhost:8080/v1/cache/greeting'
+
+# Batch operations (3 endpoints under /v1/cache/batch/{get,put,delete}).
+curl -X POST -H 'Content-Type: application/json' \
+ --data '{"keys": ["greeting", "missing"]}' \
+ 'http://localhost:8080/v1/cache/batch/get'
+```
+
+The binary's full env-var reference and the response shapes are documented
+on the [Server Binary](server.md) page.
+
+## Service (5-node cluster on docker-compose)
+
+For a real cluster, see the [5-Node Cluster](cluster.md) tutorial — one
+command brings five nodes up on a docker network, replication factor 3,
+quorum reads/writes, with the same client API.
+
+## Production deployment
+
+[Kubernetes via Helm](helm.md) is the canonical production deployment.
+The chart wires up StatefulSet identities, headless DNS for peer
+discovery, anti-affinity, PodDisruptionBudget, and an optional
+operator-managed Secret for the bearer token.
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..3953090
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,20 @@
+# Pinned dependency set for the MkDocs site build. Used by both
+# `make docs-build` (via the operator's pyenv `mkdocs` venv) and
+# `.github/workflows/docs.yml` (via actions/setup-python with
+# `cache: pip` — the file's hash is the cache key, so pinned
+# versions also produce reproducible cache hits).
+#
+# Bump deliberately: the Material theme moves fast and changes
+# can shift visual output. Verify with `mkdocs build --strict`
+# locally before bumping.
+
+
+# Transitive deps that Material relies on. Pinned for the same
+# reproducibility reason as the plugins above.
+Markdown==3.10.2
+mkdocs==1.6.1
+mkdocs-glightbox==0.5.2
+mkdocs-include-markdown-plugin==7.2.2
+mkdocs-material==9.7.6
+mkdocs-material-extensions==1.3.1
+Pygments==2.20.0
diff --git a/docs/rfcs/index.md b/docs/rfcs/index.md
new file mode 100644
index 0000000..8921b4d
--- /dev/null
+++ b/docs/rfcs/index.md
@@ -0,0 +1,24 @@
+---
+title: RFCs
+---
+
+# RFCs
+
+Design proposals — accepted, rejected, or implemented — that informed
+the architecture. Every RFC is dated and tracked through to its
+final disposition.
+
+| # | Title | Status |
+|---|---|---|
+| [0001](0001-backend-owned-eviction.md) | Backend-owned eviction | **Closed — Rejected** (spike measured, hypothesis falsified, code removed) |
+| [0002](0002-generic-item-typing.md) | Generic `Item[V]` typing | **Phase 1 implemented** (the `Typed[T, V]` wrapper); Phase 2 (deep generics) deferred to v3 |
+
+## When to write one
+
+For changes whose blast radius extends beyond a single PR — wire formats,
+public API shape, multi-phase refactors, or anything that needs a paper
+trail of "we tried X and it didn't work, here's why" so future
+contributors don't re-tread the same ground.
+
+Skip the RFC for bug fixes, internal refactors, and feature work whose
+shape is already obvious from the code.
diff --git a/docs/server.md b/docs/server.md
new file mode 100644
index 0000000..0b99c68
--- /dev/null
+++ b/docs/server.md
@@ -0,0 +1,8 @@
+---
+title: Server Binary
+---
+
+{%
+ include-markdown "../cmd/hypercache-server/README.md"
+ start="# hypercache-server"
+%}
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..4ed7355
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,117 @@
+---
+# MkDocs configuration for the GitHub Pages docs site.
+# Build: mkdocs build --strict
+# Preview: mkdocs serve
+# Deploy: pushed automatically by .github/workflows/docs.yml on main.
+
+site_name: HyperCache
+site_description: Distributed in-memory cache for Go — sharded, replicated, observable.
+site_author: hyp3rd
+site_url: https://hyp3rd.github.io/hypercache/
+
+repo_name: hyp3rd/hypercache
+repo_url: https://github.com/hyp3rd/hypercache
+edit_uri: edit/main/docs/
+
+# Strict mode in CI catches broken links / missing pages on PR. Local
+# `mkdocs serve` ignores this so authoring stays low-friction.
+strict: false
+
+theme:
+ name: material
+ language: en
+ palette:
+ - media: "(prefers-color-scheme: light)"
+ scheme: default
+ primary: indigo
+ accent: indigo
+ toggle:
+ icon: material/weather-night
+ name: Switch to dark mode
+ - media: "(prefers-color-scheme: dark)"
+ scheme: slate
+ primary: indigo
+ accent: indigo
+ toggle:
+ icon: material/weather-sunny
+ name: Switch to light mode
+ features:
+ - navigation.tabs
+ - navigation.sections
+ - navigation.indexes
+ - navigation.top
+ - navigation.tracking
+ - toc.follow
+ - search.suggest
+ - search.highlight
+ - content.code.copy
+ - content.code.annotate
+ - content.tabs.link
+ - content.action.edit
+ icon:
+ repo: fontawesome/brands/github
+ logo: material/database
+
+plugins:
+ - search
+ - include-markdown
+ - glightbox
+
+# Build-time hooks. _hooks.py rewrites repo-relative links to source
+# files (e.g. `../pkg/foo.go`) into absolute GitHub URLs so the same
+# markdown renders correctly both on github.com and on the rendered
+# Pages site.
+hooks:
+ - _mkdocs/hooks.py
+
+markdown_extensions:
+ - admonition
+ - attr_list
+ - def_list
+ - footnotes
+ - md_in_html
+ - tables
+ - toc:
+ permalink: true
+ permalink_title: Anchor link to this section
+ - pymdownx.details
+ - pymdownx.superfences:
+ custom_fences:
+ - name: mermaid
+ class: mermaid
+ format: !!python/name:pymdownx.superfences.fence_code_format
+ - pymdownx.tabbed:
+ alternate_style: true
+ - pymdownx.highlight:
+ anchor_linenums: true
+ line_spans: __span
+ pygments_lang_class: true
+ - pymdownx.inlinehilite
+ - pymdownx.snippets
+ - pymdownx.tasklist:
+ custom_checkbox: true
+ - pymdownx.emoji:
+ emoji_index: !!python/name:material.extensions.emoji.twemoji
+ emoji_generator: !!python/name:material.extensions.emoji.to_svg
+ - pymdownx.keys
+
+extra:
+ social:
+ - icon: fontawesome/brands/github
+ link: https://github.com/hyp3rd/hypercache
+ generator: false
+
+nav:
+ - Home: index.md
+ - Getting Started:
+ - Quickstart: quickstart.md
+ - 5-Node Cluster: cluster.md
+ - Kubernetes (Helm): helm.md
+ - Operations:
+ - Runbook: operations.md
+ - Server Binary: server.md
+ - Architecture:
+ - Distributed Backend: distributed.md
+ - Reference:
+ - Changelog: changelog.md
+ - RFCs: rfcs/index.md