diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..7752b3d --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,83 @@ +--- +name: release + +# Create the GitHub Release page on every `v*.*.*` tag push. Notes +# are auto-generated from PRs merged since the previous tag; a small +# header points readers at the CHANGELOG (which is the source of +# truth for the *operator-facing* summary) and at the matching +# container image published by image.yml. +# +# This workflow does NOT build artifacts — the container image is +# the canonical artifact and image.yml owns it. If a future release +# needs binary tarballs, add a separate matrix-build job here. + +on: + push: + tags: ['v*.*.*'] + workflow_dispatch: + inputs: + tag: + description: 'Existing tag to (re)create a release for' + required: true + +permissions: + contents: write + +jobs: + release: + name: Create GitHub Release + runs-on: ubuntu-latest + timeout-minutes: 10 + + steps: + - uses: actions/checkout@v6 + with: + # Fetch the full history so the auto-notes generator can + # diff against the previous tag. + fetch-depth: 0 + + - name: Resolve tag ref + id: tag + run: | + # workflow_dispatch passes `inputs.tag`; tag pushes use + # GITHUB_REF_NAME. Either way we end up with a clean + # `v1.2.3`-style identifier in the output. + if [[ -n "${{ inputs.tag }}" ]]; then + echo "name=${{ inputs.tag }}" >> "$GITHUB_OUTPUT" + else + echo "name=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT" + fi + + - name: Compose release body header + id: body + run: | + # The header pins readers to the canonical sources of + # truth: the CHANGELOG (operator-facing summary) and the + # container image (the canonical artifact). The + # auto-generated PR list shows up directly underneath + # via softprops's `generate_release_notes: true`. + tag="${{ steps.tag.outputs.name }}" + { + echo "## hypercache ${tag}" + echo "" + echo "**Container image:** \`ghcr.io/${{ github.repository }}/hypercache-server:${tag}\`" + echo "" + echo "See [CHANGELOG.md](https://github.com/${{ github.repository }}/blob/${tag}/CHANGELOG.md) for the operator-facing summary." + echo "" + echo "---" + } > /tmp/release-body.md + + echo "path=/tmp/release-body.md" >> "$GITHUB_OUTPUT" + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + tag_name: ${{ steps.tag.outputs.name }} + name: ${{ steps.tag.outputs.name }} + body_path: ${{ steps.body.outputs.path }} + # Append the auto-generated PR list to the body above. + generate_release_notes: true + # Pre-release detection: any tag with a `-` (e.g. + # `v1.2.3-rc1`, `v1.2.3-beta`) is flagged as pre-release. + # Stable `v1.2.3` tags get the green "Latest" badge. + prerelease: ${{ contains(steps.tag.outputs.name, '-') }} diff --git a/.pre-commit-ci-config.yaml b/.pre-commit-ci-config.yaml deleted file mode 100644 index f01ef95..0000000 --- a/.pre-commit-ci-config.yaml +++ /dev/null @@ -1,47 +0,0 @@ ---- -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v6.0.0 - hooks: - - id: end-of-file-fixer - - id: mixed-line-ending - - id: trailing-whitespace - - id: fix-byte-order-marker - - id: check-executables-have-shebangs - - id: debug-statements - - id: check-yaml - files: .*\.(yaml|yml)$ - exclude: mkdocs.yml - args: [--allow-multiple-documents] - - repo: https://github.com/adrienverge/yamllint.git - rev: v1.38.0 - hooks: - - id: yamllint - files: \.(yaml|yml)$ - types: [file, yaml] - entry: yamllint --strict -f parsable - - repo: https://github.com/streetsidesoftware/cspell-cli - rev: v10.0.0 - hooks: - # Spell check changed files - - id: cspell - # Spell check the commit message - - id: cspell - name: check commit message spelling - args: - - --no-must-find-files - - --no-progress - - --no-summary - - --files - - .git/COMMIT_EDITMSG - stages: [commit-msg] - always_run: true - - repo: https://github.com/markdownlint/markdownlint.git - rev: v0.15.0 - hooks: - - id: markdownlint - name: Markdownlint - description: Run markdownlint on your Markdown files - entry: mdl - language: ruby - files: \.(md|mdown|markdown)$ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8f7c056..ebac1b1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,16 +14,27 @@ repos: - id: debug-statements - id: check-yaml files: .*\.(yaml|yml)$ - exclude: mkdocs.yml - args: [--allow-multiple-documents] + # mkdocs.yml uses custom !! tags PyYAML doesn't grok. + # chart/**/templates/ contains Helm templates whose + # `{{ ... }}` Go-template syntax PyYAML can't parse — + # `helm lint` is the right validator for those. + exclude: ^(mkdocs\.yml|chart/.*/templates/.*)$ + args: [ --allow-multiple-documents ] - id: requirements-txt-fixer - id: no-commit-to-branch + - repo: https://github.com/gitleaks/gitleaks + rev: v8.30.0 + hooks: + - id: gitleaks - repo: https://github.com/adrienverge/yamllint.git rev: v1.38.0 hooks: - id: yamllint files: \.(yaml|yml)$ - types: [file, yaml] + # Same exclusion as check-yaml above — Helm templates + # have their own validator (`helm lint`). + exclude: ^chart/.*/templates/.*$ + types: [ file, yaml ] entry: yamllint --strict -f parsable - repo: https://github.com/hadolint/hadolint rev: v2.14.0 @@ -43,7 +54,7 @@ repos: - --no-summary - --files - .git/COMMIT_EDITMSG - stages: [commit-msg] + stages: [ commit-msg ] always_run: true - repo: https://github.com/markdownlint/markdownlint.git rev: v0.15.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index c474227..888abf4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,36 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). (replica path), `json.RawMessage` (non-owner-GET path), and the base64-heuristic length floors. Runs without docker for tight feedback during development. +- **GitHub Release automation** — + [.github/workflows/release.yml](.github/workflows/release.yml) + triggers on `v*.*.*` tag pushes and creates the GitHub Release + page via `softprops/action-gh-release@v2`. The release body + pins readers to the matching container image tag in GHCR and + the CHANGELOG.md at that ref; PR-since-previous-tag notes are + appended automatically. Pre-release tags (`v1.2.3-rc1`, + `v1.2.3-beta`) are flagged via the `prerelease` field; + `workflow_dispatch` lets operators (re-)create a release for + an existing tag without re-tagging. +- **Helm chart for k8s deployment** at + [chart/hypercache/](chart/hypercache). Renders into a + StatefulSet (stable per-pod hostnames so the `id@addr` seed + list resolves deterministically), a headless Service for peer + DNS, separate client and management Services, an optional + chart-managed Secret for the auth token (or external Secret + reference for production rotation), a PodDisruptionBudget + (default `minAvailable: 4`), pod anti-affinity, and a + hardened pod security context (non-root, read-only rootfs, + all caps dropped). The ServiceAccount + Service + StatefulSet + composition matches what `helm install` emits via `helm lint` + and `helm template` against any kube-version. Configure cluster + size, replication factor, capacity, heartbeat, hint TTL, + rebalance interval, and resources via standard Helm values — + see [chart/hypercache/values.yaml](chart/hypercache/values.yaml) + for the full surface. +- **Pre-commit excludes Helm templates** from `check-yaml` and + `yamllint`. Both validators choke on Go-template `{{ ... }}` + syntax inside the chart manifests; `helm lint` is the right + validator for those, and CI runs that separately. - **Multi-arch container image workflow** — [.github/workflows/image.yml](.github/workflows/image.yml) builds the `hypercache-server` Docker image for `linux/amd64` and diff --git a/chart/hypercache/Chart.yaml b/chart/hypercache/Chart.yaml new file mode 100644 index 0000000..7006a7f --- /dev/null +++ b/chart/hypercache/Chart.yaml @@ -0,0 +1,25 @@ +--- +apiVersion: v2 +name: hypercache +description: Distributed in-memory cache running the hypercache-server binary as a StatefulSet. +type: application + +# version is the chart's own version (bumped per chart change). +# appVersion tracks the upstream container image tag — keep in sync +# with the repo's release tags so `helm upgrade` reflects what's +# running. +version: 0.1.0 +appVersion: "v0.5.0" + +home: https://github.com/hyp3rd/hypercache +sources: + - https://github.com/hyp3rd/hypercache + +keywords: + - cache + - distributed + - go + +maintainers: + - name: hyp3rd + url: https://github.com/hyp3rd diff --git a/chart/hypercache/templates/NOTES.txt b/chart/hypercache/templates/NOTES.txt new file mode 100644 index 0000000..b59a675 --- /dev/null +++ b/chart/hypercache/templates/NOTES.txt @@ -0,0 +1,39 @@ +{{ .Chart.Name }} v{{ .Chart.AppVersion }} installed as release "{{ .Release.Name }}" in namespace "{{ .Release.Namespace }}". + +Cluster size: {{ .Values.replicaCount }} pods, replication factor {{ .Values.cluster.replicationFactor }}. + +Endpoints (from inside the cluster): + + Client API : http://{{ include "hypercache.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.service.client.port }} + Management : http://{{ include "hypercache.fullname" . }}-mgmt.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.service.mgmt.port }} + +Per-pod hostnames (for direct dist-HTTP debugging): +{{- range $i, $_ := until (.Values.replicaCount | int) }} + {{ include "hypercache.fullname" $ }}-{{ $i }}.{{ include "hypercache.headlessServiceName" $ }}.{{ $.Release.Namespace }}.svc.cluster.local:{{ $.Values.ports.dist }} +{{- end }} + +Quick verification: + + # Wait for all pods to become Ready. + kubectl -n {{ .Release.Namespace }} rollout status statefulset/{{ include "hypercache.fullname" . }} + + # Port-forward the client API to your workstation. + kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "hypercache.fullname" . }} 8080:{{ .Values.service.client.port }} & + + # PUT a value and read it back from a different pod via the + # service round-robin. + curl -X PUT --data 'world' http://localhost:8080/v1/cache/greeting + curl http://localhost:8080/v1/cache/greeting # should print: world + +{{- if or .Values.auth.token.value .Values.auth.token.existingSecret }} + +Auth is ENABLED — every request must carry an +`Authorization: Bearer ` header. The token is mounted +{{- if .Values.auth.token.existingSecret }} from existing secret `{{ .Values.auth.token.existingSecret }}` (key `{{ .Values.auth.token.existingSecretKey }}`). +{{- else }} from the chart-managed secret `{{ include "hypercache.fullname" . }}-auth`. +{{- end }} +{{- end }} + +Operations runbook: see docs/operations.md in the upstream +repository for split-brain handling, hint-queue overflow, +rebalance under load, and replica loss procedures. diff --git a/chart/hypercache/templates/_helpers.tpl b/chart/hypercache/templates/_helpers.tpl new file mode 100644 index 0000000..20a09c1 --- /dev/null +++ b/chart/hypercache/templates/_helpers.tpl @@ -0,0 +1,72 @@ +{{/* +Standard Helm helpers — name + fullname trimmed to k8s's 63-char +limit, common labels block, headless-service name + per-pod DNS +helper used by the seed-list template in the StatefulSet. +*/}} + +{{- define "hypercache.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "hypercache.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{- define "hypercache.headlessServiceName" -}} +{{- printf "%s-headless" (include "hypercache.fullname" .) | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "hypercache.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} +{{- default (include "hypercache.fullname" .) .Values.serviceAccount.name -}} +{{- else -}} +{{- default "default" .Values.serviceAccount.name -}} +{{- end -}} +{{- end -}} + +{{- define "hypercache.labels" -}} +helm.sh/chart: {{ printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +app.kubernetes.io/name: {{ include "hypercache.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: cache +{{- end -}} + +{{- define "hypercache.selectorLabels" -}} +app.kubernetes.io/name: {{ include "hypercache.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{/* +hypercache.seedList builds the comma-separated `id@addr` value +the dist backend needs to bootstrap a multi-process ring. Every +pod gets the FULL list (including itself); the dist code's +parseSeedSpec drops the self-entry by ID match. This means the +SAME env value applies to every replica, so a StatefulSet (which +only supports a single pod template) can express it. + +Format: `@...svc.cluster.local:` +*/}} +{{- define "hypercache.seedList" -}} +{{- $fullname := include "hypercache.fullname" . -}} +{{- $svc := include "hypercache.headlessServiceName" . -}} +{{- $ns := .Release.Namespace -}} +{{- $port := .Values.ports.dist | int -}} +{{- $count := .Values.replicaCount | int -}} +{{- $entries := list -}} +{{- range $i, $_ := until $count -}} +{{- $entry := printf "%s-%d@%s-%d.%s.%s.svc.cluster.local:%d" $fullname $i $fullname $i $svc $ns $port -}} +{{- $entries = append $entries $entry -}} +{{- end -}} +{{- join "," $entries -}} +{{- end -}} diff --git a/chart/hypercache/templates/poddisruptionbudget.yaml b/chart/hypercache/templates/poddisruptionbudget.yaml new file mode 100644 index 0000000..b14bd38 --- /dev/null +++ b/chart/hypercache/templates/poddisruptionbudget.yaml @@ -0,0 +1,22 @@ +{{- if .Values.podDisruptionBudget.enabled }} +--- +# PodDisruptionBudget keeps quorum reachable during voluntary +# disruptions (node drains, rolling node upgrades, kubectl +# evict). With replicaCount=5 and replicationFactor=3, the +# default `minAvailable: 4` keeps every key reachable: at most +# one pod can be voluntarily down at a time, and any single +# pod down still leaves a quorum of 2-of-3 owners for every +# key. Operators on smaller clusters should override +# minAvailable accordingly. +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "hypercache.fullname" . }} + labels: + {{- include "hypercache.labels" . | nindent 4 }} +spec: + minAvailable: {{ .Values.podDisruptionBudget.minAvailable }} + selector: + matchLabels: + {{- include "hypercache.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/chart/hypercache/templates/secret.yaml b/chart/hypercache/templates/secret.yaml new file mode 100644 index 0000000..1f57c9d --- /dev/null +++ b/chart/hypercache/templates/secret.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.auth.token.value (not .Values.auth.token.existingSecret) }} +--- +# Chart-managed Secret holding the bearer token. Created only +# when `auth.token.value` is set AND `auth.token.existingSecret` +# is empty — operators using sealed-secrets / external-secrets / +# vault should provide an existing secret instead so the chart +# stays out of the secret-rotation loop. +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "hypercache.fullname" . }}-auth + labels: + {{- include "hypercache.labels" . | nindent 4 }} +type: Opaque +stringData: + {{ .Values.auth.token.existingSecretKey }}: {{ .Values.auth.token.value | quote }} +{{- end }} diff --git a/chart/hypercache/templates/service-client.yaml b/chart/hypercache/templates/service-client.yaml new file mode 100644 index 0000000..09aec5c --- /dev/null +++ b/chart/hypercache/templates/service-client.yaml @@ -0,0 +1,48 @@ +--- +# Client API service. Routes external (or in-cluster app) traffic +# to whichever pod is ready — the dist backend's quorum / forward +# logic re-routes from there to the actual owners. Operators that +# want sticky sessions can override `service.client.type` to a +# LoadBalancer with `sessionAffinity: ClientIP`. +apiVersion: v1 +kind: Service +metadata: + name: {{ include "hypercache.fullname" . }} + labels: + {{- include "hypercache.labels" . | nindent 4 }} + {{- with .Values.service.client.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.client.type }} + selector: + {{- include "hypercache.selectorLabels" . | nindent 4 }} + ports: + - name: api + port: {{ .Values.service.client.port }} + targetPort: api + protocol: TCP +--- +# Management HTTP service. Kept on its own service so operators +# can lock it down independently (e.g. ClusterIP-only with +# NetworkPolicy restricting ingress to /metrics scrape pods). +apiVersion: v1 +kind: Service +metadata: + name: {{ include "hypercache.fullname" . }}-mgmt + labels: + {{- include "hypercache.labels" . | nindent 4 }} + {{- with .Values.service.mgmt.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.mgmt.type }} + selector: + {{- include "hypercache.selectorLabels" . | nindent 4 }} + ports: + - name: mgmt + port: {{ .Values.service.mgmt.port }} + targetPort: mgmt + protocol: TCP diff --git a/chart/hypercache/templates/service-headless.yaml b/chart/hypercache/templates/service-headless.yaml new file mode 100644 index 0000000..ce9586b --- /dev/null +++ b/chart/hypercache/templates/service-headless.yaml @@ -0,0 +1,32 @@ +--- +# Headless service. ClusterIP=None so each pod gets its own +# A record (`...svc.cluster.local`) which +# is what the seed list in the StatefulSet relies on for +# inter-node dist-HTTP routing. Don't expose this externally — +# it's a cluster-internal name resolution helper, not a +# load-balanced endpoint. +apiVersion: v1 +kind: Service +metadata: + name: {{ include "hypercache.headlessServiceName" . }} + labels: + {{- include "hypercache.labels" . | nindent 4 }} +spec: + type: ClusterIP + clusterIP: None + publishNotReadyAddresses: true # peer DNS must resolve before /healthz turns green + selector: + {{- include "hypercache.selectorLabels" . | nindent 4 }} + ports: + - name: dist + port: {{ .Values.ports.dist }} + targetPort: dist + protocol: TCP + - name: api + port: {{ .Values.ports.api }} + targetPort: api + protocol: TCP + - name: mgmt + port: {{ .Values.ports.mgmt }} + targetPort: mgmt + protocol: TCP diff --git a/chart/hypercache/templates/serviceaccount.yaml b/chart/hypercache/templates/serviceaccount.yaml new file mode 100644 index 0000000..5854b4b --- /dev/null +++ b/chart/hypercache/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "hypercache.serviceAccountName" . }} + labels: + {{- include "hypercache.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/chart/hypercache/templates/statefulset.yaml b/chart/hypercache/templates/statefulset.yaml new file mode 100644 index 0000000..6805ef8 --- /dev/null +++ b/chart/hypercache/templates/statefulset.yaml @@ -0,0 +1,157 @@ +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "hypercache.fullname" . }} + labels: + {{- include "hypercache.labels" . | nindent 4 }} +spec: + # StatefulSet (vs Deployment) gives us stable per-pod hostnames + # — required because each peer's seed list pre-binds the others + # by `...svc.cluster.local`. A + # Deployment's pod names are random suffixes, which would force + # us into a runtime peer-discovery loop the dist transport + # doesn't have. + replicas: {{ .Values.replicaCount }} + serviceName: {{ include "hypercache.headlessServiceName" . }} + podManagementPolicy: Parallel # all peers can come up together + selector: + matchLabels: + {{- include "hypercache.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "hypercache.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "hypercache.serviceAccountName" . }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} + {{- with .Values.image.pullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + + containers: + - name: hypercache + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.containerSecurityContext | nindent 12 }} + ports: + - name: api + containerPort: {{ .Values.ports.api }} + protocol: TCP + - name: mgmt + containerPort: {{ .Values.ports.mgmt }} + protocol: TCP + - name: dist + containerPort: {{ .Values.ports.dist }} + protocol: TCP + env: + # POD_NAME flows from downward API → NODE_ID + DIST_ADDR. + # Same StatefulSet template, different identity per pod. + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: HYPERCACHE_NODE_ID + value: "$(POD_NAME)" + - name: HYPERCACHE_API_ADDR + value: ":{{ .Values.ports.api }}" + - name: HYPERCACHE_MGMT_ADDR + value: ":{{ .Values.ports.mgmt }}" + - name: HYPERCACHE_DIST_ADDR + value: "$(POD_NAME).{{ include "hypercache.headlessServiceName" . }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.ports.dist }}" + # The same seed list goes to every pod — the binary's + # parseSeedSpec drops the self-entry by ID match. + - name: HYPERCACHE_SEEDS + value: "{{ include "hypercache.seedList" . }}" + - name: HYPERCACHE_REPLICATION + value: "{{ .Values.cluster.replicationFactor }}" + - name: HYPERCACHE_CAPACITY + value: "{{ .Values.cluster.capacity }}" + - name: HYPERCACHE_HEARTBEAT + value: "{{ .Values.cluster.heartbeat }}" + - name: HYPERCACHE_INDIRECT_PROBE_K + value: "{{ .Values.cluster.indirectProbeK }}" + - name: HYPERCACHE_HINT_TTL + value: "{{ .Values.cluster.hintTTL }}" + - name: HYPERCACHE_HINT_REPLAY + value: "{{ .Values.cluster.hintReplay }}" + - name: HYPERCACHE_REBALANCE_INTERVAL + value: "{{ .Values.cluster.rebalanceInterval }}" + - name: HYPERCACHE_LOG_LEVEL + value: "{{ .Values.cluster.logLevel }}" + {{- if or .Values.auth.token.value .Values.auth.token.existingSecret }} + - name: HYPERCACHE_AUTH_TOKEN + valueFrom: + secretKeyRef: + name: {{ default (printf "%s-auth" (include "hypercache.fullname" .)) .Values.auth.token.existingSecret }} + key: {{ .Values.auth.token.existingSecretKey }} + {{- end }} + + # Liveness probes the client API's /healthz (the binary + # itself is still up). Readiness probes the dist HTTP + # /health which flips to 503 on Drain — so a pod removed + # from rotation by an operator drain stops receiving new + # traffic immediately, regardless of liveness. + livenessProbe: + httpGet: + path: /healthz + port: api + initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.liveness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.liveness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.liveness.failureThreshold }} + readinessProbe: + httpGet: + path: /health + port: dist + {{- if or .Values.auth.token.value .Values.auth.token.existingSecret }} + httpHeaders: + # /health is auth-wrapped when a token is + # configured. The kubelet doesn't have a Secret + # ref here, so we trust the dist-token Secret is + # mounted; if not configured, no header is sent. + - name: Authorization + value: "Bearer $(HYPERCACHE_AUTH_TOKEN)" + {{- end }} + initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.readiness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.readiness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.readiness.failureThreshold }} + + # No preStop hook by design: the hypercache-server binary + # already handles SIGTERM with the canonical Drain → API + # stop → Cache.Stop sequence (capped by the binary's 30s + # internal deadline; terminationGracePeriodSeconds above + # leaves slack). A POST-only /dist/drain rules out + # k8s's GET-only `preStop.httpGet`; relying on SIGTERM + # keeps the chart simple and avoids depending on the + # k8s ≥ 1.30 preStop sleep action. + resources: + {{- toYaml .Values.resources | nindent 12 }} diff --git a/chart/hypercache/values.yaml b/chart/hypercache/values.yaml new file mode 100644 index 0000000..3eb1850 --- /dev/null +++ b/chart/hypercache/values.yaml @@ -0,0 +1,166 @@ +--- +# Default values for the hypercache chart. +# Every key documented here can be overridden via -f or --set. + +# replicaCount is the cluster size. 5 is the documented production +# baseline (matches docker-compose.cluster.yml). Must be at least +# `cluster.replicationFactor` or writes will fail quorum on every +# request. +replicaCount: 5 + +image: + repository: ghcr.io/hyp3rd/hypercache/hypercache-server + # tag falls back to .Chart.AppVersion when empty. + tag: "" + pullPolicy: IfNotPresent + # imagePullSecrets entries are referenced by name from the + # cluster's image-pull configuration. + pullSecrets: [] + +# serviceAccount controls the ServiceAccount the StatefulSet runs +# under. `create: true` provisions one named after the release; +# operators with their own RBAC can set `create: false` and supply +# `name`. +serviceAccount: + create: true + name: "" + annotations: {} + +# ports controls the in-pod listener bindings (and matching +# container ports). The listener addresses are computed by combining +# `0.0.0.0` with these ports — clients on the same network reach +# them via the services exposed below. +ports: + api: 8080 + mgmt: 8081 + dist: 7946 + +# cluster bundles the dist-backend tunables. Defaults match the +# server binary's defaults so a `helm install` produces the same +# behavior as a bare `docker run`. +cluster: + replicationFactor: 3 + capacity: 100000 + heartbeat: 1s + indirectProbeK: 2 + hintTTL: 30s + hintReplay: 200ms + rebalanceInterval: 250ms + logLevel: info + +# auth.token controls the bearer-token policy applied to every +# listener. Three modes: +# * empty (default): no auth — fine for trusted networks / +# development. +# * value: inline string. Materialized as a chart-managed Secret. +# * existingSecret + existingSecretKey: pull from an +# operator-managed Secret. Use this for production rotation. +auth: + token: + value: "" + existingSecret: "" + existingSecretKey: token + +# service controls how clients reach the cluster. +# * client: routed via a regular Service for app traffic. +# * mgmt: separate Service so admin endpoints can be locked down +# independently (typically ClusterIP only, no LB). +# * headless: managed automatically (cluster needs stable per-pod +# DNS), not exposed to operators. +service: + client: + type: ClusterIP + port: 8080 + annotations: {} + mgmt: + type: ClusterIP + port: 8081 + annotations: {} + +# resources is intentionally conservative. Production deployments +# should size based on `cluster.capacity` × value size + GC +# headroom. The defaults give a single-pod working set of ~256 MiB, +# which is adequate for the 100k-item default capacity at small +# values. +resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 1000m + memory: 512Mi + +# probes hook into the binary's /healthz (client API) and /health +# (mgmt) endpoints. Liveness on the binary itself, readiness on the +# dist HTTP /health so the pod stops receiving traffic the moment +# Drain is called (which flips /health to 503). +probes: + liveness: + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + readiness: + initialDelaySeconds: 2 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 2 + +# podDisruptionBudget keeps a quorum-safe number of pods up during +# voluntary disruptions (node drains, rolling updates). With +# replicaCount=5 and replicationFactor=3, minAvailable=4 keeps +# every key reachable. +podDisruptionBudget: + enabled: true + minAvailable: 4 + +# affinity defaults to soft anti-affinity per node — five pods +# *prefer* to spread across five nodes but won't refuse to schedule +# if the cluster is small. Override to `requiredDuringScheduling…` +# for hard spread on large clusters. +affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: ["hypercache"] + topologyKey: kubernetes.io/hostname + +# nodeSelector / tolerations / topologySpreadConstraints are passed +# through verbatim — empty by default so the chart works on any +# vanilla cluster. +nodeSelector: {} +tolerations: [] +topologySpreadConstraints: [] + +# podAnnotations / podLabels are copied onto every pod. Useful for +# sidecar injection, scrape configuration, or org-level labels. +podAnnotations: {} +podLabels: {} + +# securityContext tightens the pod-level posture. The image +# (gcr.io/distroless/static-debian12:nonroot) already runs as a +# non-root user; these settings reinforce that contract. +podSecurityContext: + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + fsGroup: 65532 + seccompProfile: + type: RuntimeDefault + +containerSecurityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] + +# terminationGracePeriodSeconds gives the binary time to drain +# in-flight requests (Drain → API stop → Cache.Stop chain) before +# the kubelet sends SIGKILL. The binary's internal shutdown +# deadline is 30s; this leaves slack. +terminationGracePeriodSeconds: 45 diff --git a/cspell.config.yaml b/cspell.config.yaml index 2c5197b..0efe518 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -115,6 +115,7 @@ words: - healthz - histogramcollector - HMAC + - hostnames - honnef - hreq - HTTPTLS @@ -148,6 +149,7 @@ words: - mvdan - nestif - Newf + - nindent - noctx - noinlineerr - nolint @@ -159,6 +161,7 @@ words: - paralleltest - Pipeliner - pluggability + - podname - popd - Prealloc - protoc @@ -168,6 +171,7 @@ words: - repls - Repls - rerr + - rootfs - sarif - sdkmetric - sectools @@ -177,9 +181,12 @@ words: - shellcheck - skeys - SLRU + - softprops - staticcheck + - statefulset - stdlib - stretchr + - svcname - strfnv - strs - subtest @@ -190,6 +197,8 @@ words: - tparallel - tracetest - traefik + - trunc + - tunables - TTLMs - ugorji - unmarshals