From d645cbefe8f3e871296ee0d81516bc79acf9c9d1 Mon Sep 17 00:00:00 2001 From: "Jens W. Klein" Date: Wed, 8 Apr 2026 09:52:58 +0200 Subject: [PATCH 1/5] fix: add -j none to varnish args for non-root operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture §8.3 H4-Fix requires -j none when running as non-root. Without it, varnish tries chroot jail setup which needs root. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/controller/statefulset.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/controller/statefulset.go b/internal/controller/statefulset.go index 3af479e..5c83292 100644 --- a/internal/controller/statefulset.go +++ b/internal/controller/statefulset.go @@ -65,6 +65,7 @@ func (r *VinylCacheReconciler) reconcileStatefulSet(ctx context.Context, vc *v1a Name: "varnish", Image: vc.Spec.Image, Args: []string{ + "-j", "none", "-T", "127.0.0.1:6082", "-S", "/etc/varnish/secret", }, From c8b9822419e390e5a00e7e56a888f414e0e2b13d Mon Sep 17 00:00:00 2001 From: "Jens W. Klein" Date: Wed, 8 Apr 2026 09:54:59 +0200 Subject: [PATCH 2/5] feat: add readiness probe on agent and preStop hook on varnish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Readiness probe on agent /health (port 9090) ensures pods are only marked Ready when the agent can reach varnishd. preStop sleep(5) gives the endpoints controller time to remove the pod from routing before varnish stops (architecture §3.3 K3-Fix). Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/controller/statefulset.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/internal/controller/statefulset.go b/internal/controller/statefulset.go index 5c83292..9349cd7 100644 --- a/internal/controller/statefulset.go +++ b/internal/controller/statefulset.go @@ -25,6 +25,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -98,6 +99,13 @@ func (r *VinylCacheReconciler) reconcileStatefulSet(ctx context.Context, vc *v1a MountPath: "/tmp", }, }, + Lifecycle: &corev1.Lifecycle{ + PreStop: &corev1.LifecycleHandler{ + Exec: &corev1.ExecAction{ + Command: []string{"sleep", "5"}, + }, + }, + }, SecurityContext: &corev1.SecurityContext{ RunAsNonRoot: boolPtr(true), ReadOnlyRootFilesystem: boolPtr(true), @@ -149,6 +157,17 @@ func (r *VinylCacheReconciler) reconcileStatefulSet(ctx context.Context, vc *v1a ReadOnly: true, }, }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/health", + Port: intstr.FromInt32(agentPort), + }, + }, + InitialDelaySeconds: 5, + PeriodSeconds: 5, + FailureThreshold: 6, + }, SecurityContext: &corev1.SecurityContext{ RunAsNonRoot: boolPtr(true), ReadOnlyRootFilesystem: boolPtr(true), From cc6e6e3ac0c3290ceb2e182f89541a5ddcf2ebde Mon Sep 17 00:00:00 2001 From: "Jens W. Klein" Date: Wed, 8 Apr 2026 09:57:14 +0200 Subject: [PATCH 3/5] feat: health endpoint returns 503 until operator pushes VCL Pods are not ready (readiness probe fails) until the operator successfully pushes real VCL. The 'boot' VCL (varnish default) triggers a 503 response from /health. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/agent/handler.go | 15 ++++++++++++--- internal/agent/handler_test.go | 33 ++++++++++++++++++++++++++++++++- internal/agent/server_test.go | 7 ++++++- 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/internal/agent/handler.go b/internal/agent/handler.go index 4ad6689..3daacb1 100644 --- a/internal/agent/handler.go +++ b/internal/agent/handler.go @@ -182,14 +182,23 @@ func (h *Handler) PurgeXkey(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, xkeyPurgeResponse{Status: "ok", Purged: purged}) } -// Health handles GET /health (no auth required) +// Health handles GET /health (no auth required). +// Returns 503 until the operator pushes real VCL (active VCL name != "boot"). +// This drives the Kubernetes readiness probe — pods are not Ready until +// the operator successfully pushes VCL. func (h *Handler) Health(w http.ResponseWriter, r *http.Request) { ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second) defer cancel() - _, err := h.admin.ActiveVCL(ctx) + name, err := h.admin.ActiveVCL(ctx) if err != nil { writeJSON(w, http.StatusServiceUnavailable, map[string]string{"status": "error", "varnish": "not responding"}) return } - writeJSON(w, http.StatusOK, map[string]string{"status": "ok", "varnish": "running"}) + // "boot" is the default VCL name loaded at varnish startup. + // The pod is not ready until the operator pushes a named VCL. + if name == "boot" { + writeJSON(w, http.StatusServiceUnavailable, map[string]string{"status": "initializing", "varnish": "waiting for VCL push"}) + return + } + writeJSON(w, http.StatusOK, map[string]string{"status": "ok", "varnish": "running", "vcl": name}) } diff --git a/internal/agent/handler_test.go b/internal/agent/handler_test.go index 63c19d4..7b59bd1 100644 --- a/internal/agent/handler_test.go +++ b/internal/agent/handler_test.go @@ -212,7 +212,10 @@ func TestValidateVCL_WrongMethod_Returns405(t *testing.T) { } func TestHealth_VarnishRunning_Returns200(t *testing.T) { - h, _ := newTestHandler() // default mock returns "boot", nil + h, mock := newTestHandler() + mock.activeVCLFn = func(ctx context.Context) (string, error) { + return "operator-pushed-vcl", nil + } req := httptest.NewRequest(http.MethodGet, "/health", nil) rr := httptest.NewRecorder() h.Health(rr, req) @@ -234,6 +237,34 @@ func TestHealth_VarnishDown_Returns503(t *testing.T) { assert.Equal(t, http.StatusServiceUnavailable, rr.Code) } +func TestHealth_BootstrapVCL_Returns503(t *testing.T) { + h, mock := newTestHandler() + mock.activeVCLFn = func(ctx context.Context) (string, error) { + return "boot", nil + } + req := httptest.NewRequest(http.MethodGet, "/health", nil) + rr := httptest.NewRecorder() + h.Health(rr, req) + assert.Equal(t, http.StatusServiceUnavailable, rr.Code) + var resp map[string]string + require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &resp)) + assert.Equal(t, "initializing", resp["status"]) +} + +func TestHealth_OperatorVCL_Returns200(t *testing.T) { + h, mock := newTestHandler() + mock.activeVCLFn = func(ctx context.Context) (string, error) { + return "aaf-prod-cache-abc12345", nil + } + req := httptest.NewRequest(http.MethodGet, "/health", nil) + rr := httptest.NewRecorder() + h.Health(rr, req) + assert.Equal(t, http.StatusOK, rr.Code) + var resp map[string]string + require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &resp)) + assert.Equal(t, "ok", resp["status"]) +} + func TestPurgeXkey_EmptyKeys_Returns400(t *testing.T) { h, _ := newTestHandler() body := `{"keys":[]}` diff --git a/internal/agent/server_test.go b/internal/agent/server_test.go index 197a6c3..7c0046d 100644 --- a/internal/agent/server_test.go +++ b/internal/agent/server_test.go @@ -1,6 +1,7 @@ package agent_test import ( + "context" "net/http" "net/http/httptest" "testing" @@ -19,8 +20,12 @@ func newTestServer(t *testing.T) (*agent.Server, *mockAdmin) { } func TestServer_HealthEndpoint_NoAuth(t *testing.T) { - // Use the handler directly via httptest to avoid binding a real port + // Use the handler directly via httptest to avoid binding a real port. + // Set a non-boot VCL so the readiness check passes. mock := &mockAdmin{} + mock.activeVCLFn = func(ctx context.Context) (string, error) { + return "operator-pushed-vcl", nil + } xkey := agent.NewXkeyPurger("http://127.0.0.1:8080") h := agent.NewHandler(mock, xkey) From abdfa290a2cad571ecef3b7ac62a33378c74cd3d Mon Sep 17 00:00:00 2001 From: "Jens W. Klein" Date: Wed, 8 Apr 2026 09:58:39 +0200 Subject: [PATCH 4/5] fix: use hash-based VCL names to prevent vcl.inline collision VCL name is now --. Each VCL generation gets a unique name, preventing 'Already a VCL named ...' errors when the operator pushes updated VCL. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/controller/vcl_push.go | 2 +- internal/controller/vcl_push_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/controller/vcl_push.go b/internal/controller/vcl_push.go index 25b5d0e..a8819ed 100644 --- a/internal/controller/vcl_push.go +++ b/internal/controller/vcl_push.go @@ -56,7 +56,7 @@ func (r *VinylCacheReconciler) pushVCL( backoffBase = vc.Spec.Retry.BackoffBase.Duration } - vclName := fmt.Sprintf("%s-%s", vc.Namespace, vc.Name) + vclName := fmt.Sprintf("%s-%s-%s", vc.Namespace, vc.Name, result.Hash[:8]) type pushResult struct { peer generator.PeerBackend diff --git a/internal/controller/vcl_push_test.go b/internal/controller/vcl_push_test.go index dd72c07..475e8ed 100644 --- a/internal/controller/vcl_push_test.go +++ b/internal/controller/vcl_push_test.go @@ -60,7 +60,7 @@ func makeVC() *v1alpha1.VinylCache { } func makeResult() *generator.Result { - return &generator.Result{VCL: "vcl 4.1; backend default { .host = \"127.0.0.1\"; }", Hash: "abc123"} + return &generator.Result{VCL: "vcl 4.1; backend default { .host = \"127.0.0.1\"; }", Hash: "abc123def456789012345678901234567890123456789012345678901234"} } func makePeers(n int) []generator.PeerBackend { From 1ceb4c34a75b784fbcad1deb5b3886ca296ab209 Mon Sep 17 00:00:00 2001 From: "Jens W. Klein" Date: Wed, 8 Apr 2026 10:00:58 +0200 Subject: [PATCH 5/5] feat: bootstrap VCL via ConfigMap for clean pod startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator creates a ConfigMap with placeholder VCL that returns 503 'Cache initializing'. Mounted as /etc/varnish/default.vcl so varnish starts cleanly. Combined with the readiness probe, pods are not marked Ready until the operator pushes real VCL. This replaces the need for a vinyl-init container (architecture §3.3 — simpler approach with same result). Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/controller/configmap.go | 73 ++++++++++++++++++++ internal/controller/statefulset.go | 16 +++++ internal/controller/vinylcache_controller.go | 6 ++ 3 files changed, 95 insertions(+) create mode 100644 internal/controller/configmap.go diff --git a/internal/controller/configmap.go b/internal/controller/configmap.go new file mode 100644 index 0000000..d5d6b80 --- /dev/null +++ b/internal/controller/configmap.go @@ -0,0 +1,73 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + v1alpha1 "github.com/bluedynamics/cloud-vinyl/api/v1alpha1" +) + +const bootstrapVCL = `vcl 4.1; + +backend bootstrap_placeholder { + .host = "127.0.0.1"; + .port = "1"; +} + +sub vcl_recv { + return (synth(503, "Cache initializing — waiting for VCL push from cloud-vinyl operator")); +} + +sub vcl_synth { + set resp.http.Content-Type = "text/plain; charset=utf-8"; + set resp.http.Retry-After = "5"; + synthetic(resp.reason); + return (deliver); +} +` + +// reconcileConfigMap creates or updates the ConfigMap containing the bootstrap VCL. +func (r *VinylCacheReconciler) reconcileConfigMap(ctx context.Context, vc *v1alpha1.VinylCache) error { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: vc.Name + "-bootstrap-vcl", + Namespace: vc.Namespace, + }, + } + + _, err := controllerutil.CreateOrUpdate(ctx, r.Client, cm, func() error { + if err := ctrl.SetControllerReference(vc, cm, r.Scheme); err != nil { + return err + } + cm.Labels = map[string]string{labelVinylCacheName: vc.Name} + cm.Data = map[string]string{ + "default.vcl": bootstrapVCL, + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling bootstrap VCL ConfigMap: %w", err) + } + return nil +} diff --git a/internal/controller/statefulset.go b/internal/controller/statefulset.go index 9349cd7..c37f6a5 100644 --- a/internal/controller/statefulset.go +++ b/internal/controller/statefulset.go @@ -98,6 +98,12 @@ func (r *VinylCacheReconciler) reconcileStatefulSet(ctx context.Context, vc *v1a Name: "varnish-tmp", MountPath: "/tmp", }, + { + Name: "bootstrap-vcl", + MountPath: "/etc/varnish/default.vcl", + SubPath: "default.vcl", + ReadOnly: true, + }, }, Lifecycle: &corev1.Lifecycle{ PreStop: &corev1.LifecycleHandler{ @@ -206,6 +212,16 @@ func (r *VinylCacheReconciler) reconcileStatefulSet(ctx context.Context, vc *v1a Name: "varnish-tmp", VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}}, }, + { + Name: "bootstrap-vcl", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: vc.Name + "-bootstrap-vcl", + }, + }, + }, + }, } uid := int64(65532) diff --git a/internal/controller/vinylcache_controller.go b/internal/controller/vinylcache_controller.go index 412b8e1..391f898 100644 --- a/internal/controller/vinylcache_controller.go +++ b/internal/controller/vinylcache_controller.go @@ -114,6 +114,11 @@ func (r *VinylCacheReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } + // 9b. Reconcile bootstrap VCL ConfigMap. + if err := r.reconcileConfigMap(ctx, vc); err != nil { + return ctrl.Result{}, err + } + // 10. Debounce check. if remaining := r.debounceRemaining(vc); remaining > 0 { return ctrl.Result{RequeueAfter: remaining}, nil @@ -214,6 +219,7 @@ func (r *VinylCacheReconciler) SetupWithManager(mgr ctrl.Manager) error { Owns(&appsv1.StatefulSet{}). Owns(&corev1.Service{}). Owns(&corev1.Secret{}). + Owns(&corev1.ConfigMap{}). Watches( &corev1.Pod{}, handler.EnqueueRequestsFromMapFunc(r.podToVinylCache),