From 74b3b540bbb635effba176d46744c004d0b97c84 Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Tue, 5 May 2026 21:56:45 +0200 Subject: [PATCH 01/14] feat: add default-deny network policies and security hardening - Add Kyverno ClusterPolicy to generate default-deny CiliumNetworkPolicy and allow-dns policy in every namespace (except kube-system, kube-public, kube-node-lease) - Add per-namespace CiliumNetworkPolicies co-located with each controller and app, opening only the specific connections needed - Harden auth-proxy Deployment with runAsNonRoot, capabilities drop, and readOnlyRootFilesystem (fixes kubescape C-0013) - Harden minio Deployment and Job (Docker-only) with non-root security context (fixes kubescape C-0013) - Replace flux-operator's narrow gateway-only networkpolicy with a broader flux-system allow policy covering all Flux controllers Namespaces with network policies: cert-manager, cnpg-system, dex, external-dns, flux-system, headlamp, homepage, keda, kubescape, kyverno, kubelet-serving-cert-approver, longhorn-system, monitoring, oauth2-proxy, opencost, reloader, velero, vertical-pod-autoscaler, wedding-app, whoami Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- k8s/bases/apps/headlamp/kustomization.yaml | 1 + k8s/bases/apps/headlamp/networkpolicy.yaml | 20 +++++ k8s/bases/apps/homepage/kustomization.yaml | 1 + k8s/bases/apps/homepage/networkpolicy.yaml | 20 +++++ k8s/bases/apps/wedding-app/kustomization.yaml | 1 + k8s/bases/apps/wedding-app/networkpolicy.yaml | 27 +++++++ k8s/bases/apps/whoami/kustomization.yaml | 1 + k8s/bases/apps/whoami/networkpolicy.yaml | 16 ++++ .../best-practices/add-default-deny.yaml | 76 +++++++++++++++++++ .../cluster-policies/kustomization.yaml | 1 + .../controllers/auth-proxy/deployment.yaml | 10 +++ .../cert-manager/kustomization.yaml | 1 + .../cert-manager/networkpolicy.yaml | 36 +++++++++ .../cloudnative-pg/kustomization.yaml | 1 + .../cloudnative-pg/networkpolicy.yaml | 19 +++++ .../controllers/dex/kustomization.yaml | 1 + .../controllers/dex/networkpolicy.yaml | 36 +++++++++ .../flux-operator/networkpolicy.yaml | 30 ++++++-- .../controllers/keda/kustomization.yaml | 1 + .../controllers/keda/networkpolicy.yaml | 35 +++++++++ .../kube-prometheus-stack/kustomization.yaml | 1 + .../kube-prometheus-stack/networkpolicy.yaml | 42 ++++++++++ .../controllers/kubescape/kustomization.yaml | 1 + .../controllers/kubescape/networkpolicy.yaml | 34 +++++++++ .../controllers/kyverno/kustomization.yaml | 1 + .../controllers/kyverno/networkpolicy.yaml | 27 +++++++ .../oauth2-proxy/kustomization.yaml | 1 + .../oauth2-proxy/networkpolicy.yaml | 40 ++++++++++ .../controllers/opencost/kustomization.yaml | 1 + .../controllers/opencost/networkpolicy.yaml | 30 ++++++++ .../controllers/reloader/kustomization.yaml | 1 + .../controllers/reloader/networkpolicy.yaml | 11 +++ .../controllers/velero/kustomization.yaml | 1 + .../controllers/velero/networkpolicy.yaml | 27 +++++++ .../kustomization.yaml | 1 + .../networkpolicy.yaml | 23 ++++++ .../controllers/minio/deployment.yaml | 23 ++++++ .../external-dns/kustomization.yaml | 1 + .../external-dns/networkpolicy.yaml | 27 +++++++ .../kustomization.yaml | 1 + .../networkpolicy.yaml | 20 +++++ .../controllers/longhorn/kustomization.yaml | 1 + .../controllers/longhorn/networkpolicy.yaml | 38 ++++++++++ 43 files changed, 682 insertions(+), 5 deletions(-) create mode 100644 k8s/bases/apps/headlamp/networkpolicy.yaml create mode 100644 k8s/bases/apps/homepage/networkpolicy.yaml create mode 100644 k8s/bases/apps/wedding-app/networkpolicy.yaml create mode 100644 k8s/bases/apps/whoami/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/cluster-policies/best-practices/add-default-deny.yaml create mode 100644 k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/dex/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/kube-prometheus-stack/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/kubescape/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/kyverno/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/oauth2-proxy/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/opencost/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/reloader/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/velero/networkpolicy.yaml create mode 100644 k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/networkpolicy.yaml create mode 100644 k8s/providers/hetzner/infrastructure/controllers/external-dns/networkpolicy.yaml create mode 100644 k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/networkpolicy.yaml create mode 100644 k8s/providers/hetzner/infrastructure/controllers/longhorn/networkpolicy.yaml diff --git a/k8s/bases/apps/headlamp/kustomization.yaml b/k8s/bases/apps/headlamp/kustomization.yaml index 55de23346..e3d70b333 100644 --- a/k8s/bases/apps/headlamp/kustomization.yaml +++ b/k8s/bases/apps/headlamp/kustomization.yaml @@ -6,3 +6,4 @@ resources: - helm-repository.yaml - httproute.yaml - http-scaled-object.yaml + - networkpolicy.yaml diff --git a/k8s/bases/apps/headlamp/networkpolicy.yaml b/k8s/bases/apps/headlamp/networkpolicy.yaml new file mode 100644 index 000000000..a11521dba --- /dev/null +++ b/k8s/bases/apps/headlamp/networkpolicy.yaml @@ -0,0 +1,20 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-headlamp + namespace: headlamp +spec: + endpointSelector: {} + ingress: + # KEDA interceptor proxy routes traffic from gateway + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: keda + toPorts: + - ports: + - port: "4466" + protocol: TCP + egress: + # Kube API for dashboard + - toEntities: + - kube-apiserver diff --git a/k8s/bases/apps/homepage/kustomization.yaml b/k8s/bases/apps/homepage/kustomization.yaml index 80baa373b..fea85fe91 100644 --- a/k8s/bases/apps/homepage/kustomization.yaml +++ b/k8s/bases/apps/homepage/kustomization.yaml @@ -7,3 +7,4 @@ resources: - httproute.yaml - namespace.yaml - pod-disruption-budget.yaml + - networkpolicy.yaml diff --git a/k8s/bases/apps/homepage/networkpolicy.yaml b/k8s/bases/apps/homepage/networkpolicy.yaml new file mode 100644 index 000000000..2f923bce1 --- /dev/null +++ b/k8s/bases/apps/homepage/networkpolicy.yaml @@ -0,0 +1,20 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-homepage + namespace: homepage +spec: + endpointSelector: {} + ingress: + # Traffic from oauth2-proxy (via gateway → oauth2-proxy → homepage) + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: oauth2-proxy + toPorts: + - ports: + - port: "3000" + protocol: TCP + egress: + # Kube API for widget data + - toEntities: + - kube-apiserver diff --git a/k8s/bases/apps/wedding-app/kustomization.yaml b/k8s/bases/apps/wedding-app/kustomization.yaml index 242684561..5b3feaba0 100644 --- a/k8s/bases/apps/wedding-app/kustomization.yaml +++ b/k8s/bases/apps/wedding-app/kustomization.yaml @@ -7,3 +7,4 @@ resources: - serviceaccount.yaml - sops-age-secret.enc.yaml - sync.yaml + - networkpolicy.yaml diff --git a/k8s/bases/apps/wedding-app/networkpolicy.yaml b/k8s/bases/apps/wedding-app/networkpolicy.yaml new file mode 100644 index 000000000..ac76db2b4 --- /dev/null +++ b/k8s/bases/apps/wedding-app/networkpolicy.yaml @@ -0,0 +1,27 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-wedding-app + namespace: wedding-app +spec: + endpointSelector: {} + ingress: + # Gateway ingress + - fromEntities: + - ingress + toPorts: + - ports: + - port: "3000" + protocol: TCP + # Intra-namespace (app → db, db replication) + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: wedding-app + egress: + # Intra-namespace (app → db) + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: wedding-app + # Kube API (for CNPG operator managing the cluster) + - toEntities: + - kube-apiserver diff --git a/k8s/bases/apps/whoami/kustomization.yaml b/k8s/bases/apps/whoami/kustomization.yaml index 55de23346..e3d70b333 100644 --- a/k8s/bases/apps/whoami/kustomization.yaml +++ b/k8s/bases/apps/whoami/kustomization.yaml @@ -6,3 +6,4 @@ resources: - helm-repository.yaml - httproute.yaml - http-scaled-object.yaml + - networkpolicy.yaml diff --git a/k8s/bases/apps/whoami/networkpolicy.yaml b/k8s/bases/apps/whoami/networkpolicy.yaml new file mode 100644 index 000000000..382baa4cf --- /dev/null +++ b/k8s/bases/apps/whoami/networkpolicy.yaml @@ -0,0 +1,16 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-whoami + namespace: whoami +spec: + endpointSelector: {} + ingress: + # KEDA interceptor proxy routes traffic from gateway + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: keda + toPorts: + - ports: + - port: "80" + protocol: TCP diff --git a/k8s/bases/infrastructure/cluster-policies/best-practices/add-default-deny.yaml b/k8s/bases/infrastructure/cluster-policies/best-practices/add-default-deny.yaml new file mode 100644 index 000000000..ae9842232 --- /dev/null +++ b/k8s/bases/infrastructure/cluster-policies/best-practices/add-default-deny.yaml @@ -0,0 +1,76 @@ +# Generates a default-deny CiliumNetworkPolicy and a DNS-allow policy +# in every namespace. This ensures zero-trust networking by default — +# workloads must explicitly allow the traffic they need. +apiVersion: kyverno.io/v1 +kind: ClusterPolicy +metadata: + name: add-default-deny + annotations: + policies.kyverno.io/title: Default Deny Network Policy + policies.kyverno.io/category: Networking, Best Practices + policies.kyverno.io/subject: CiliumNetworkPolicy + policies.kyverno.io/minversion: 1.6.0 + policies.kyverno.io/description: >- + Generates a CiliumNetworkPolicy that activates Cilium's whitelist + mode for all endpoints in a namespace (effectively deny-all), plus + a companion policy that allows DNS egress to kube-dns so pods can + still resolve names. +spec: + rules: + - name: generate-default-deny + match: + any: + - resources: + kinds: + - Namespace + exclude: + any: + - resources: + names: + - kube-system + - kube-public + - kube-node-lease + generate: + generateExisting: true + apiVersion: cilium.io/v2 + kind: CiliumNetworkPolicy + name: default-deny + synchronize: true + namespace: "{{request.object.metadata.name}}" + data: + spec: + endpointSelector: {} + - name: generate-allow-dns + match: + any: + - resources: + kinds: + - Namespace + exclude: + any: + - resources: + names: + - kube-system + - kube-public + - kube-node-lease + generate: + generateExisting: true + apiVersion: cilium.io/v2 + kind: CiliumNetworkPolicy + name: allow-dns + synchronize: true + namespace: "{{request.object.metadata.name}}" + data: + spec: + endpointSelector: {} + egress: + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/cluster-policies/kustomization.yaml b/k8s/bases/infrastructure/cluster-policies/kustomization.yaml index 991c71b71..68b21edca 100644 --- a/k8s/bases/infrastructure/cluster-policies/kustomization.yaml +++ b/k8s/bases/infrastructure/cluster-policies/kustomization.yaml @@ -2,6 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - best-practices/add-default-deny.yaml - best-practices/add-ns-quota.yaml - flux/auto-vpa.yaml - flux/helm-release-enable-tests.yaml diff --git a/k8s/bases/infrastructure/controllers/auth-proxy/deployment.yaml b/k8s/bases/infrastructure/controllers/auth-proxy/deployment.yaml index 26e9b5a0c..a957ec80f 100644 --- a/k8s/bases/infrastructure/controllers/auth-proxy/deployment.yaml +++ b/k8s/bases/infrastructure/controllers/auth-proxy/deployment.yaml @@ -30,6 +30,10 @@ spec: labelSelector: matchLabels: app: auth-proxy + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault containers: - name: traefik image: docker.io/library/traefik:v3.6 @@ -37,6 +41,12 @@ spec: - --configFile=/etc/traefik/traefik.yaml ports: - containerPort: 8080 + securityContext: + runAsNonRoot: true + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + readOnlyRootFilesystem: true volumeMounts: - name: config mountPath: /etc/traefik diff --git a/k8s/bases/infrastructure/controllers/cert-manager/kustomization.yaml b/k8s/bases/infrastructure/controllers/cert-manager/kustomization.yaml index 7edec9cc4..d47b36be5 100644 --- a/k8s/bases/infrastructure/controllers/cert-manager/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/cert-manager/kustomization.yaml @@ -4,3 +4,4 @@ resources: - namespace.yaml - helm-release.yaml - helm-repository.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml new file mode 100644 index 000000000..87f3c6689 --- /dev/null +++ b/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml @@ -0,0 +1,36 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-cert-manager + namespace: cert-manager +spec: + endpointSelector: {} + ingress: + # Webhook traffic from kube-apiserver + - fromEntities: + - kube-apiserver + toPorts: + - ports: + - port: "443" + protocol: TCP + # Metrics scraping from monitoring + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + toPorts: + - ports: + - port: "9402" + protocol: TCP + egress: + # ACME challenges / CA communication + - toEntities: + - world + toPorts: + - ports: + - port: "443" + protocol: TCP + - port: "80" + protocol: TCP + # Kube API for managing certificates + - toEntities: + - kube-apiserver diff --git a/k8s/bases/infrastructure/controllers/cloudnative-pg/kustomization.yaml b/k8s/bases/infrastructure/controllers/cloudnative-pg/kustomization.yaml index 34f97aec9..6ed64f866 100644 --- a/k8s/bases/infrastructure/controllers/cloudnative-pg/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/cloudnative-pg/kustomization.yaml @@ -6,3 +6,4 @@ resources: - helm-repository.yaml - pod-disruption-budget.yaml - r2-credentials-secret.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml new file mode 100644 index 000000000..bc6944f0d --- /dev/null +++ b/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml @@ -0,0 +1,19 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-cnpg + namespace: cnpg-system +spec: + endpointSelector: {} + ingress: + # Webhook from kube-apiserver + - fromEntities: + - kube-apiserver + toPorts: + - ports: + - port: "443" + protocol: TCP + egress: + # Kube API for managing PG clusters + - toEntities: + - kube-apiserver diff --git a/k8s/bases/infrastructure/controllers/dex/kustomization.yaml b/k8s/bases/infrastructure/controllers/dex/kustomization.yaml index 2eb0946bd..5365a2272 100644 --- a/k8s/bases/infrastructure/controllers/dex/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/dex/kustomization.yaml @@ -5,3 +5,4 @@ resources: - helm-release.yaml - helm-repository.yaml - httproute.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/dex/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/dex/networkpolicy.yaml new file mode 100644 index 000000000..6e40d14da --- /dev/null +++ b/k8s/bases/infrastructure/controllers/dex/networkpolicy.yaml @@ -0,0 +1,36 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-dex + namespace: dex +spec: + endpointSelector: {} + ingress: + # Gateway ingress for OIDC endpoints + - fromEntities: + - ingress + toPorts: + - ports: + - port: "5556" + protocol: TCP + # gRPC from oauth2-proxy + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: oauth2-proxy + toPorts: + - ports: + - port: "5556" + protocol: TCP + - port: "5558" + protocol: TCP + egress: + # Kube API for reading secrets + - toEntities: + - kube-apiserver + # GitHub/OIDC upstream connectors + - toEntities: + - world + toPorts: + - ports: + - port: "443" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/flux-operator/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/flux-operator/networkpolicy.yaml index 1354589ab..eaed53a5b 100644 --- a/k8s/bases/infrastructure/controllers/flux-operator/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/flux-operator/networkpolicy.yaml @@ -1,17 +1,37 @@ apiVersion: cilium.io/v2 kind: CiliumNetworkPolicy metadata: - name: allow-gateway-to-flux-web + name: allow-flux namespace: flux-system spec: - endpointSelector: - matchLabels: - app.kubernetes.io/instance: flux-operator - app.kubernetes.io/name: flux-operator + endpointSelector: {} ingress: + # Gateway ingress for flux-web UI - fromEntities: - ingress toPorts: - ports: - port: "9080" protocol: TCP + # Webhook notifications + - fromEntities: + - world + toPorts: + - ports: + - port: "80" + protocol: TCP + # Metrics scraping + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + egress: + # Kube API + - toEntities: + - kube-apiserver + # OCI registries (GHCR, etc) + - toEntities: + - world + toPorts: + - ports: + - port: "443" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/keda/kustomization.yaml b/k8s/bases/infrastructure/controllers/keda/kustomization.yaml index 7edec9cc4..d47b36be5 100644 --- a/k8s/bases/infrastructure/controllers/keda/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/keda/kustomization.yaml @@ -4,3 +4,4 @@ resources: - namespace.yaml - helm-release.yaml - helm-repository.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml new file mode 100644 index 000000000..d85b83648 --- /dev/null +++ b/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml @@ -0,0 +1,35 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-keda + namespace: keda +spec: + endpointSelector: {} + ingress: + # Gateway ingress to interceptor proxy + - fromEntities: + - ingress + toPorts: + - ports: + - port: "8080" + protocol: TCP + # Webhook from kube-apiserver + - fromEntities: + - kube-apiserver + toPorts: + - ports: + - port: "443" + protocol: TCP + # Metrics scraping + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + egress: + # Kube API for watching scalers + - toEntities: + - kube-apiserver + # Reach backend services in any namespace + - toEndpoints: + - matchExpressions: + - key: k8s:io.kubernetes.pod.namespace + operator: Exists diff --git a/k8s/bases/infrastructure/controllers/kube-prometheus-stack/kustomization.yaml b/k8s/bases/infrastructure/controllers/kube-prometheus-stack/kustomization.yaml index 50ee4c1d6..6a52b6bb6 100644 --- a/k8s/bases/infrastructure/controllers/kube-prometheus-stack/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/kube-prometheus-stack/kustomization.yaml @@ -5,3 +5,4 @@ resources: - helm-repository.yaml - webhook-secret.yaml - helm-release.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/kube-prometheus-stack/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/kube-prometheus-stack/networkpolicy.yaml new file mode 100644 index 000000000..8c38d66a6 --- /dev/null +++ b/k8s/bases/infrastructure/controllers/kube-prometheus-stack/networkpolicy.yaml @@ -0,0 +1,42 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-monitoring + namespace: monitoring +spec: + endpointSelector: {} + ingress: + # Intra-namespace (prometheus → alertmanager, etc) + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + # Webhook from kube-apiserver + - fromEntities: + - kube-apiserver + toPorts: + - ports: + - port: "443" + protocol: TCP + egress: + # Kube API for service discovery + - toEntities: + - kube-apiserver + # Scrape targets in all namespaces + - toEndpoints: + - matchExpressions: + - key: k8s:io.kubernetes.pod.namespace + operator: Exists + # Scrape node-exporter on nodes + - toEntities: + - host + # Alertmanager webhooks (Slack, etc) + - toEntities: + - world + toPorts: + - ports: + - port: "443" + protocol: TCP + # Intra-namespace + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring diff --git a/k8s/bases/infrastructure/controllers/kubescape/kustomization.yaml b/k8s/bases/infrastructure/controllers/kubescape/kustomization.yaml index 7edec9cc4..d47b36be5 100644 --- a/k8s/bases/infrastructure/controllers/kubescape/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/kubescape/kustomization.yaml @@ -4,3 +4,4 @@ resources: - namespace.yaml - helm-release.yaml - helm-repository.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/kubescape/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/kubescape/networkpolicy.yaml new file mode 100644 index 000000000..d7e6b475c --- /dev/null +++ b/k8s/bases/infrastructure/controllers/kubescape/networkpolicy.yaml @@ -0,0 +1,34 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-kubescape + namespace: kubescape +spec: + endpointSelector: {} + ingress: + # Webhook from kube-apiserver + - fromEntities: + - kube-apiserver + toPorts: + - ports: + - port: "443" + protocol: TCP + # Intra-namespace communication + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kubescape + egress: + # Kube API for scanning + - toEntities: + - kube-apiserver + # Kubescape cloud backend + - toEntities: + - world + toPorts: + - ports: + - port: "443" + protocol: TCP + # Intra-namespace communication + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kubescape diff --git a/k8s/bases/infrastructure/controllers/kyverno/kustomization.yaml b/k8s/bases/infrastructure/controllers/kyverno/kustomization.yaml index 7edec9cc4..d47b36be5 100644 --- a/k8s/bases/infrastructure/controllers/kyverno/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/kyverno/kustomization.yaml @@ -4,3 +4,4 @@ resources: - namespace.yaml - helm-release.yaml - helm-repository.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/kyverno/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/kyverno/networkpolicy.yaml new file mode 100644 index 000000000..ff136831f --- /dev/null +++ b/k8s/bases/infrastructure/controllers/kyverno/networkpolicy.yaml @@ -0,0 +1,27 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-kyverno + namespace: kyverno +spec: + endpointSelector: {} + ingress: + # Webhook from kube-apiserver + - fromEntities: + - kube-apiserver + toPorts: + - ports: + - port: "443" + protocol: TCP + # Metrics scraping + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + toPorts: + - ports: + - port: "8000" + protocol: TCP + egress: + # Kube API for policy enforcement + - toEntities: + - kube-apiserver diff --git a/k8s/bases/infrastructure/controllers/oauth2-proxy/kustomization.yaml b/k8s/bases/infrastructure/controllers/oauth2-proxy/kustomization.yaml index bb6c9d7d4..9407175ad 100644 --- a/k8s/bases/infrastructure/controllers/oauth2-proxy/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/oauth2-proxy/kustomization.yaml @@ -7,3 +7,4 @@ resources: - httproute.yaml - namespace.yaml - reference-grant.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/oauth2-proxy/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/oauth2-proxy/networkpolicy.yaml new file mode 100644 index 000000000..3f99002d6 --- /dev/null +++ b/k8s/bases/infrastructure/controllers/oauth2-proxy/networkpolicy.yaml @@ -0,0 +1,40 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-oauth2-proxy + namespace: oauth2-proxy +spec: + endpointSelector: {} + ingress: + # Gateway ingress + - fromEntities: + - ingress + toPorts: + - ports: + - port: "8080" + protocol: TCP + - port: "4180" + protocol: TCP + egress: + # Dex for OIDC + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: dex + toPorts: + - ports: + - port: "5556" + protocol: TCP + # Upstream backends (homepage, etc) + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: homepage + # GitHub OAuth + - toEntities: + - world + toPorts: + - ports: + - port: "443" + protocol: TCP + # Kube API + - toEntities: + - kube-apiserver diff --git a/k8s/bases/infrastructure/controllers/opencost/kustomization.yaml b/k8s/bases/infrastructure/controllers/opencost/kustomization.yaml index 7edec9cc4..d47b36be5 100644 --- a/k8s/bases/infrastructure/controllers/opencost/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/opencost/kustomization.yaml @@ -4,3 +4,4 @@ resources: - namespace.yaml - helm-release.yaml - helm-repository.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/opencost/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/opencost/networkpolicy.yaml new file mode 100644 index 000000000..57f6b7dcc --- /dev/null +++ b/k8s/bases/infrastructure/controllers/opencost/networkpolicy.yaml @@ -0,0 +1,30 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-opencost + namespace: opencost +spec: + endpointSelector: {} + ingress: + # Metrics scraping from monitoring + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + toPorts: + - ports: + - port: "9003" + protocol: TCP + - port: "9090" + protocol: TCP + egress: + # Kube API + - toEntities: + - kube-apiserver + # Prometheus for cost data + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + toPorts: + - ports: + - port: "9090" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/reloader/kustomization.yaml b/k8s/bases/infrastructure/controllers/reloader/kustomization.yaml index 7edec9cc4..d47b36be5 100644 --- a/k8s/bases/infrastructure/controllers/reloader/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/reloader/kustomization.yaml @@ -4,3 +4,4 @@ resources: - namespace.yaml - helm-release.yaml - helm-repository.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/reloader/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/reloader/networkpolicy.yaml new file mode 100644 index 000000000..fafa91c67 --- /dev/null +++ b/k8s/bases/infrastructure/controllers/reloader/networkpolicy.yaml @@ -0,0 +1,11 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-reloader + namespace: reloader +spec: + endpointSelector: {} + egress: + # Kube API for watching configmaps/secrets + - toEntities: + - kube-apiserver diff --git a/k8s/bases/infrastructure/controllers/velero/kustomization.yaml b/k8s/bases/infrastructure/controllers/velero/kustomization.yaml index f33b5e2db..42ae25ce8 100644 --- a/k8s/bases/infrastructure/controllers/velero/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/velero/kustomization.yaml @@ -5,3 +5,4 @@ resources: - helm-repository.yaml - credentials-secret.yaml - helm-release.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/velero/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/velero/networkpolicy.yaml new file mode 100644 index 000000000..b328f2a0c --- /dev/null +++ b/k8s/bases/infrastructure/controllers/velero/networkpolicy.yaml @@ -0,0 +1,27 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-velero + namespace: velero +spec: + endpointSelector: {} + ingress: + # Metrics scraping + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + toPorts: + - ports: + - port: "8085" + protocol: TCP + egress: + # Kube API for backup operations + - toEntities: + - kube-apiserver + # S3-compatible backup target + - toEntities: + - world + toPorts: + - ports: + - port: "443" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/kustomization.yaml b/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/kustomization.yaml index 7edec9cc4..d47b36be5 100644 --- a/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/kustomization.yaml +++ b/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/kustomization.yaml @@ -4,3 +4,4 @@ resources: - namespace.yaml - helm-release.yaml - helm-repository.yaml + - networkpolicy.yaml diff --git a/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/networkpolicy.yaml new file mode 100644 index 000000000..a4f313c7c --- /dev/null +++ b/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/networkpolicy.yaml @@ -0,0 +1,23 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-vpa + namespace: vertical-pod-autoscaler +spec: + endpointSelector: {} + ingress: + # Webhook from kube-apiserver + - fromEntities: + - kube-apiserver + toPorts: + - ports: + - port: "443" + protocol: TCP + egress: + # Kube API for managing VPAs + - toEntities: + - kube-apiserver + # Metrics server + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system diff --git a/k8s/providers/docker/infrastructure/controllers/minio/deployment.yaml b/k8s/providers/docker/infrastructure/controllers/minio/deployment.yaml index 03cb080c5..23c2e53ea 100644 --- a/k8s/providers/docker/infrastructure/controllers/minio/deployment.yaml +++ b/k8s/providers/docker/infrastructure/controllers/minio/deployment.yaml @@ -27,6 +27,13 @@ spec: labels: app.kubernetes.io/name: minio spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + seccompProfile: + type: RuntimeDefault containers: - name: minio image: quay.io/minio/minio:RELEASE.2025-04-08T15-41-24Z @@ -35,6 +42,11 @@ spec: - /data - --console-address - :9001 + securityContext: + runAsNonRoot: true + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] env: - name: MINIO_ROOT_USER value: minio @@ -97,9 +109,20 @@ spec: template: spec: restartPolicy: OnFailure + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + seccompProfile: + type: RuntimeDefault containers: - name: mc image: quay.io/minio/mc:RELEASE.2025-04-08T15-39-49Z + securityContext: + runAsNonRoot: true + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] command: - /bin/sh - -c diff --git a/k8s/providers/hetzner/infrastructure/controllers/external-dns/kustomization.yaml b/k8s/providers/hetzner/infrastructure/controllers/external-dns/kustomization.yaml index 299d4b166..651404464 100644 --- a/k8s/providers/hetzner/infrastructure/controllers/external-dns/kustomization.yaml +++ b/k8s/providers/hetzner/infrastructure/controllers/external-dns/kustomization.yaml @@ -6,3 +6,4 @@ resources: - cloudflare-api-token-secret.yaml - helm-release.yaml - helm-repository.yaml + - networkpolicy.yaml diff --git a/k8s/providers/hetzner/infrastructure/controllers/external-dns/networkpolicy.yaml b/k8s/providers/hetzner/infrastructure/controllers/external-dns/networkpolicy.yaml new file mode 100644 index 000000000..2d428570b --- /dev/null +++ b/k8s/providers/hetzner/infrastructure/controllers/external-dns/networkpolicy.yaml @@ -0,0 +1,27 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-external-dns + namespace: external-dns +spec: + endpointSelector: {} + ingress: + # Metrics scraping + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + toPorts: + - ports: + - port: "7979" + protocol: TCP + egress: + # Kube API for watching services/ingresses + - toEntities: + - kube-apiserver + # Cloudflare/Hetzner DNS API + - toEntities: + - world + toPorts: + - ports: + - port: "443" + protocol: TCP diff --git a/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/kustomization.yaml b/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/kustomization.yaml index 95cacc0ce..e3befb20f 100644 --- a/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/kustomization.yaml +++ b/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/kustomization.yaml @@ -2,3 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - https://raw.githubusercontent.com/alex1989hu/kubelet-serving-cert-approver/main/deploy/standalone-install.yaml + - networkpolicy.yaml diff --git a/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/networkpolicy.yaml b/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/networkpolicy.yaml new file mode 100644 index 000000000..725cd3387 --- /dev/null +++ b/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/networkpolicy.yaml @@ -0,0 +1,20 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-kubelet-cert-approver + namespace: kubelet-serving-cert-approver +spec: + endpointSelector: {} + ingress: + # Metrics scraping + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + toPorts: + - ports: + - port: "9090" + protocol: TCP + egress: + # Kube API for approving CSRs + - toEntities: + - kube-apiserver diff --git a/k8s/providers/hetzner/infrastructure/controllers/longhorn/kustomization.yaml b/k8s/providers/hetzner/infrastructure/controllers/longhorn/kustomization.yaml index 8b2253c35..e6c1f2415 100644 --- a/k8s/providers/hetzner/infrastructure/controllers/longhorn/kustomization.yaml +++ b/k8s/providers/hetzner/infrastructure/controllers/longhorn/kustomization.yaml @@ -5,3 +5,4 @@ resources: - namespace.yaml - helm-repository.yaml - helm-release.yaml + - networkpolicy.yaml diff --git a/k8s/providers/hetzner/infrastructure/controllers/longhorn/networkpolicy.yaml b/k8s/providers/hetzner/infrastructure/controllers/longhorn/networkpolicy.yaml new file mode 100644 index 000000000..be510888a --- /dev/null +++ b/k8s/providers/hetzner/infrastructure/controllers/longhorn/networkpolicy.yaml @@ -0,0 +1,38 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-longhorn + namespace: longhorn-system +spec: + endpointSelector: {} + ingress: + # Webhook from kube-apiserver + - fromEntities: + - kube-apiserver + toPorts: + - ports: + - port: "9502" + protocol: TCP + # Intra-namespace (manager, driver, engine) + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: longhorn-system + # Metrics scraping + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + egress: + # Kube API + - toEntities: + - kube-apiserver + # Intra-namespace + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: longhorn-system + # Backup targets (S3-compatible) + - toEntities: + - world + toPorts: + - ports: + - port: "443" + protocol: TCP From 7e8ce53e86be56511a43a41f655b9e5848374b1e Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Tue, 5 May 2026 22:09:10 +0200 Subject: [PATCH 02/14] ci: enable kubescape scan in system test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds scan: true and scan-framework: nsa to the ksail-cluster action. Requires devantler-tech/ksail#4620 to be merged and the action SHA bumped — until then the inputs are silently ignored (unknown inputs are allowed by composite actions). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index fc40aab2f..f94162097 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -74,6 +74,8 @@ jobs: ksail-version: "7.12.2" init: "false" validate: "true" + scan: "true" + scan-framework: "nsa" sops-age-key: ${{ secrets.SOPS_AGE_KEY }} hosts-file: ${{ vars.HOSTS_FILE }} root-ca-cert-file: ${{ vars.ROOT_CA_CERT_FILE }} From e74bfb0152bafcede92782e222fec4025ceac1db Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Tue, 5 May 2026 22:19:18 +0200 Subject: [PATCH 03/14] feat: scale prod workers from cx23 to cx33 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CX23 workers (2 vCPU / 4 GB) are at 90-98% CPU request allocation, blocking FleetDM and other workloads from scheduling. CX33 (4 vCPU / 8 GB) doubles the available resources per worker. Availability check: - fsn1 (Falkenstein): ✅ available - nbg1 (Nuremberg): ❌ resource_unavailable - hel1 (Helsinki): ✅ available Keeping fsn1 as primary location since CX33 is available there. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ksail.prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ksail.prod.yaml b/ksail.prod.yaml index 6e3370a3a..9276aefaf 100644 --- a/ksail.prod.yaml +++ b/ksail.prod.yaml @@ -63,7 +63,7 @@ spec: provider: hetzner: controlPlaneServerType: cx23 - workerServerType: cx23 + workerServerType: cx33 location: fsn1 networkCidr: 10.0.0.0/16 placementGroupStrategy: Spread From 87e8ad80c8dccb970d8afd1f759495420a207d42 Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Tue, 5 May 2026 23:59:01 +0200 Subject: [PATCH 04/14] feat: enable full VPA CPU+memory management and lower resource defaults - Update auto-vpa ClusterPolicy to control both CPU and memory (was memory-only), add DaemonSet rule for full workload coverage - Lower LimitRange defaults from 200m/256Mi to 50m/128Mi to prevent over-requesting on new pods before VPA recommendations take effect - Increase ResourceQuota limits to accommodate actual cluster capacity - Enable VPA updater (was 0 replicas) so recommendations are applied continuously via pod eviction - Disable VPA Helm tests (certgen hook can't schedule on loaded nodes) - Remove helm-test label from VPA HelmRelease to prevent Kyverno mutation policy from re-enabling tests Replaces goldilocks VPAs (deleted from cluster) with Kyverno-generated VPAs that actively right-size all workloads. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../best-practices/add-ns-quota.yaml | 12 ++--- .../cluster-policies/flux/auto-vpa.yaml | 49 ++++++++++++++++--- .../vertical-pod-autoscaler/helm-release.yaml | 3 +- .../variables-cluster-config-map.yaml | 7 ++- 4 files changed, 52 insertions(+), 19 deletions(-) diff --git a/k8s/bases/infrastructure/cluster-policies/best-practices/add-ns-quota.yaml b/k8s/bases/infrastructure/cluster-policies/best-practices/add-ns-quota.yaml index e4acd3c35..7c251f582 100644 --- a/k8s/bases/infrastructure/cluster-policies/best-practices/add-ns-quota.yaml +++ b/k8s/bases/infrastructure/cluster-policies/best-practices/add-ns-quota.yaml @@ -38,10 +38,10 @@ spec: data: spec: hard: - requests.cpu: "4" + requests.cpu: "8" requests.memory: 16Gi - limits.cpu: "4" - limits.memory: 16Gi + limits.cpu: "16" + limits.memory: 32Gi - name: generate-limitrange match: any: @@ -68,8 +68,8 @@ spec: limits: - default: cpu: 500m - memory: 1Gi + memory: 512Mi defaultRequest: - cpu: 200m - memory: 256Mi + cpu: 50m + memory: 128Mi type: Container diff --git a/k8s/bases/infrastructure/cluster-policies/flux/auto-vpa.yaml b/k8s/bases/infrastructure/cluster-policies/flux/auto-vpa.yaml index c0d572bf0..9ef3f15a4 100644 --- a/k8s/bases/infrastructure/cluster-policies/flux/auto-vpa.yaml +++ b/k8s/bases/infrastructure/cluster-policies/flux/auto-vpa.yaml @@ -7,13 +7,13 @@ metadata: policies.kyverno.io/title: Auto VPA policies.kyverno.io/category: Autoscaling policies.kyverno.io/severity: low - policies.kyverno.io/subject: Deployment,StatefulSet + policies.kyverno.io/subject: Deployment,StatefulSet,DaemonSet policies.kyverno.io/description: >- - Generates a VerticalPodAutoscaler for every Deployment and - StatefulSet outside kube-system. VPA is memory-only - (controlledResources: ["memory"]) so it does not conflict with - KEDA / HPA horizontal scaling on CPU. - Ref: https://brtkwr.com/posts/2026-02-07-running-hpa-and-vpa-together-on-kubernetes/ + Generates a VerticalPodAutoscaler for every Deployment, StatefulSet, + and DaemonSet outside kube-system. VPA controls both CPU and memory + to ensure workloads do not over-request resources. KEDA-managed + workloads use HTTP request rate (not CPU) for horizontal scaling, + so VPA CPU management does not conflict. spec: rules: - name: generate-vpa-for-deployment @@ -43,8 +43,9 @@ spec: resourcePolicy: containerPolicies: - containerName: "*" - controlledResources: ["memory"] + controlledResources: ["cpu", "memory"] minAllowed: + cpu: "10m" memory: "64Mi" - name: generate-vpa-for-statefulset match: @@ -73,6 +74,38 @@ spec: resourcePolicy: containerPolicies: - containerName: "*" - controlledResources: ["memory"] + controlledResources: ["cpu", "memory"] minAllowed: + cpu: "10m" + memory: "64Mi" + - name: generate-vpa-for-daemonset + match: + resources: + kinds: + - DaemonSet + exclude: + resources: + namespaces: + - kube-system + generate: + apiVersion: autoscaling.k8s.io/v1 + kind: VerticalPodAutoscaler + name: "{{request.object.metadata.name}}" + namespace: "{{request.object.metadata.namespace}}" + synchronize: true + data: + spec: + targetRef: + apiVersion: apps/v1 + kind: DaemonSet + name: "{{request.object.metadata.name}}" + updatePolicy: + updateMode: "Auto" + minReplicas: 1 + resourcePolicy: + containerPolicies: + - containerName: "*" + controlledResources: ["cpu", "memory"] + minAllowed: + cpu: "10m" memory: "64Mi" diff --git a/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/helm-release.yaml b/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/helm-release.yaml index cdf13b49e..4a7e98c2c 100644 --- a/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/helm-release.yaml +++ b/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/helm-release.yaml @@ -5,7 +5,6 @@ metadata: namespace: vertical-pod-autoscaler labels: helm.toolkit.fluxcd.io/crds: enabled - helm.toolkit.fluxcd.io/helm-test: enabled helm.toolkit.fluxcd.io/remediation: enabled spec: interval: 2m @@ -16,6 +15,8 @@ spec: remediation: retries: -1 remediateLastFailure: true + test: + enable: false chart: spec: chart: vpa diff --git a/k8s/clusters/prod/variables/variables-cluster-config-map.yaml b/k8s/clusters/prod/variables/variables-cluster-config-map.yaml index 6a7f3637e..6cbbc67eb 100644 --- a/k8s/clusters/prod/variables/variables-cluster-config-map.yaml +++ b/k8s/clusters/prod/variables/variables-cluster-config-map.yaml @@ -65,11 +65,10 @@ data: # cert controller. Disabled until we actually need Cloudflare Origin certs. origin_ca_issuer_replicas: "0" # VPA recommender computes resource recommendations; admission controller - # applies them at pod start. Updater (pod eviction) is scaled to 0 - # replicas to avoid disruption, so recommendations are applied on the - # next pod restart instead. + # applies them at pod start. Updater evicts pods to apply new resource + # recommendations continuously. vpa_recommender_replicas: "1" - vpa_updater_replicas: "0" + vpa_updater_replicas: "1" # Kyverno background controller is required for generate rules (auto-vpa # ClusterPolicy). kyverno@3.8.0 rejects 0 replicas for any controller, # so reports and cleanup run at 1 replica each. From 5efbc7246c037fba2b0e7ee927e0a8dfcdb9bf4f Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Wed, 6 May 2026 00:30:32 +0200 Subject: [PATCH 05/14] fix: correct CiliumNetworkPolicy webhook ports, add DNS egress, fix auth-proxy runAsUser - Fix webhook ingress ports to use pod ports (not service port 443): kyverno=9443, VPA=8000, cert-manager=10250, trust-manager=6443, KEDA=9443+6443, CNPG=9443, kubescape=8443, prometheus-operator=10250 - Add remote-node and host entities to all webhook ingress rules (required for Talos hostNetwork kube-apiserver on Hetzner) - Add DNS egress (kube-dns:53 UDP+TCP) to ALL CiliumNetworkPolicies - Add FleetDM CiliumNetworkPolicy - Fix auth-proxy deployment: add runAsUser: 65532 for traefik container - Add host/remote-node egress for Longhorn iSCSI communication Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- k8s/bases/apps/fleetdm/kustomization.yaml | 1 + k8s/bases/apps/fleetdm/networkpolicy.yaml | 38 +++++++++++++++++++ k8s/bases/apps/headlamp/networkpolicy.yaml | 11 ++++++ k8s/bases/apps/homepage/networkpolicy.yaml | 11 ++++++ k8s/bases/apps/wedding-app/networkpolicy.yaml | 11 ++++++ k8s/bases/apps/whoami/networkpolicy.yaml | 12 ++++++ .../controllers/auth-proxy/deployment.yaml | 1 + .../cert-manager/networkpolicy.yaml | 19 +++++++++- .../cloudnative-pg/networkpolicy.yaml | 17 ++++++++- .../controllers/dex/networkpolicy.yaml | 11 ++++++ .../flux-operator/networkpolicy.yaml | 11 ++++++ .../controllers/keda/networkpolicy.yaml | 19 +++++++++- .../kube-prometheus-stack/networkpolicy.yaml | 18 ++++++++- .../controllers/kubescape/networkpolicy.yaml | 17 ++++++++- .../controllers/kyverno/networkpolicy.yaml | 17 ++++++++- .../oauth2-proxy/networkpolicy.yaml | 11 ++++++ .../controllers/opencost/networkpolicy.yaml | 11 ++++++ .../controllers/reloader/networkpolicy.yaml | 11 ++++++ .../controllers/velero/networkpolicy.yaml | 11 ++++++ .../networkpolicy.yaml | 17 ++++++++- .../external-dns/networkpolicy.yaml | 11 ++++++ .../networkpolicy.yaml | 11 ++++++ .../controllers/longhorn/networkpolicy.yaml | 19 +++++++++- 23 files changed, 301 insertions(+), 15 deletions(-) create mode 100644 k8s/bases/apps/fleetdm/networkpolicy.yaml diff --git a/k8s/bases/apps/fleetdm/kustomization.yaml b/k8s/bases/apps/fleetdm/kustomization.yaml index 3a25cbb81..17abf8fe8 100644 --- a/k8s/bases/apps/fleetdm/kustomization.yaml +++ b/k8s/bases/apps/fleetdm/kustomization.yaml @@ -8,5 +8,6 @@ resources: - http-scaled-object.yaml - license-secret.yaml - mysql-secret.yaml + - networkpolicy.yaml - pod-disruption-budget.yaml - redis-secret.yaml diff --git a/k8s/bases/apps/fleetdm/networkpolicy.yaml b/k8s/bases/apps/fleetdm/networkpolicy.yaml new file mode 100644 index 000000000..5c9ae0d6c --- /dev/null +++ b/k8s/bases/apps/fleetdm/networkpolicy.yaml @@ -0,0 +1,38 @@ +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: allow-fleetdm + namespace: fleetdm +spec: + endpointSelector: {} + ingress: + # Gateway ingress + - fromEntities: + - ingress + toPorts: + - ports: + - port: "8080" + protocol: TCP + # Intra-namespace (fleet → mysql, fleet → redis) + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: fleetdm + egress: + # Intra-namespace (fleet → mysql, fleet → redis) + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: fleetdm + # Kube API + - toEntities: + - kube-apiserver + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/apps/headlamp/networkpolicy.yaml b/k8s/bases/apps/headlamp/networkpolicy.yaml index a11521dba..52962c7a1 100644 --- a/k8s/bases/apps/headlamp/networkpolicy.yaml +++ b/k8s/bases/apps/headlamp/networkpolicy.yaml @@ -18,3 +18,14 @@ spec: # Kube API for dashboard - toEntities: - kube-apiserver + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/apps/homepage/networkpolicy.yaml b/k8s/bases/apps/homepage/networkpolicy.yaml index 2f923bce1..857c75c2a 100644 --- a/k8s/bases/apps/homepage/networkpolicy.yaml +++ b/k8s/bases/apps/homepage/networkpolicy.yaml @@ -18,3 +18,14 @@ spec: # Kube API for widget data - toEntities: - kube-apiserver + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/apps/wedding-app/networkpolicy.yaml b/k8s/bases/apps/wedding-app/networkpolicy.yaml index ac76db2b4..3093095e6 100644 --- a/k8s/bases/apps/wedding-app/networkpolicy.yaml +++ b/k8s/bases/apps/wedding-app/networkpolicy.yaml @@ -25,3 +25,14 @@ spec: # Kube API (for CNPG operator managing the cluster) - toEntities: - kube-apiserver + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/apps/whoami/networkpolicy.yaml b/k8s/bases/apps/whoami/networkpolicy.yaml index 382baa4cf..f16ef5f79 100644 --- a/k8s/bases/apps/whoami/networkpolicy.yaml +++ b/k8s/bases/apps/whoami/networkpolicy.yaml @@ -14,3 +14,15 @@ spec: - ports: - port: "80" protocol: TCP + egress: + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/auth-proxy/deployment.yaml b/k8s/bases/infrastructure/controllers/auth-proxy/deployment.yaml index a957ec80f..ecf26e626 100644 --- a/k8s/bases/infrastructure/controllers/auth-proxy/deployment.yaml +++ b/k8s/bases/infrastructure/controllers/auth-proxy/deployment.yaml @@ -43,6 +43,7 @@ spec: - containerPort: 8080 securityContext: runAsNonRoot: true + runAsUser: 65532 allowPrivilegeEscalation: false capabilities: drop: ["ALL"] diff --git a/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml index 87f3c6689..6ecdc07e8 100644 --- a/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml @@ -6,12 +6,16 @@ metadata: spec: endpointSelector: {} ingress: - # Webhook traffic from kube-apiserver + # Webhook traffic from kube-apiserver (hostNetwork on control plane nodes) - fromEntities: - kube-apiserver + - remote-node + - host toPorts: - ports: - - port: "443" + - port: "10250" + protocol: TCP + - port: "6443" protocol: TCP # Metrics scraping from monitoring - fromEndpoints: @@ -34,3 +38,14 @@ spec: # Kube API for managing certificates - toEntities: - kube-apiserver + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml index bc6944f0d..d3e72b56c 100644 --- a/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml @@ -6,14 +6,27 @@ metadata: spec: endpointSelector: {} ingress: - # Webhook from kube-apiserver + # Webhook from kube-apiserver (hostNetwork on control plane nodes) - fromEntities: - kube-apiserver + - remote-node + - host toPorts: - ports: - - port: "443" + - port: "9443" protocol: TCP egress: # Kube API for managing PG clusters - toEntities: - kube-apiserver + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/dex/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/dex/networkpolicy.yaml index 6e40d14da..f42376533 100644 --- a/k8s/bases/infrastructure/controllers/dex/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/dex/networkpolicy.yaml @@ -34,3 +34,14 @@ spec: - ports: - port: "443" protocol: TCP + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/flux-operator/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/flux-operator/networkpolicy.yaml index eaed53a5b..f261d452e 100644 --- a/k8s/bases/infrastructure/controllers/flux-operator/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/flux-operator/networkpolicy.yaml @@ -35,3 +35,14 @@ spec: - ports: - port: "443" protocol: TCP + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml index d85b83648..39a61d770 100644 --- a/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml @@ -13,12 +13,16 @@ spec: - ports: - port: "8080" protocol: TCP - # Webhook from kube-apiserver + # Webhook from kube-apiserver (hostNetwork on control plane nodes) - fromEntities: - kube-apiserver + - remote-node + - host toPorts: - ports: - - port: "443" + - port: "9443" + protocol: TCP + - port: "6443" protocol: TCP # Metrics scraping - fromEndpoints: @@ -33,3 +37,14 @@ spec: - matchExpressions: - key: k8s:io.kubernetes.pod.namespace operator: Exists + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/kube-prometheus-stack/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/kube-prometheus-stack/networkpolicy.yaml index 8c38d66a6..b3f245f2f 100644 --- a/k8s/bases/infrastructure/controllers/kube-prometheus-stack/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/kube-prometheus-stack/networkpolicy.yaml @@ -10,12 +10,14 @@ spec: - fromEndpoints: - matchLabels: k8s:io.kubernetes.pod.namespace: monitoring - # Webhook from kube-apiserver + # Webhook from kube-apiserver (hostNetwork on control plane nodes) - fromEntities: - kube-apiserver + - remote-node + - host toPorts: - ports: - - port: "443" + - port: "10250" protocol: TCP egress: # Kube API for service discovery @@ -29,6 +31,7 @@ spec: # Scrape node-exporter on nodes - toEntities: - host + - remote-node # Alertmanager webhooks (Slack, etc) - toEntities: - world @@ -40,3 +43,14 @@ spec: - toEndpoints: - matchLabels: k8s:io.kubernetes.pod.namespace: monitoring + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/kubescape/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/kubescape/networkpolicy.yaml index d7e6b475c..40fc190ae 100644 --- a/k8s/bases/infrastructure/controllers/kubescape/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/kubescape/networkpolicy.yaml @@ -6,12 +6,14 @@ metadata: spec: endpointSelector: {} ingress: - # Webhook from kube-apiserver + # Webhook from kube-apiserver (hostNetwork on control plane nodes) - fromEntities: - kube-apiserver + - remote-node + - host toPorts: - ports: - - port: "443" + - port: "8443" protocol: TCP # Intra-namespace communication - fromEndpoints: @@ -32,3 +34,14 @@ spec: - toEndpoints: - matchLabels: k8s:io.kubernetes.pod.namespace: kubescape + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/kyverno/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/kyverno/networkpolicy.yaml index ff136831f..ba6bf0334 100644 --- a/k8s/bases/infrastructure/controllers/kyverno/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/kyverno/networkpolicy.yaml @@ -6,12 +6,14 @@ metadata: spec: endpointSelector: {} ingress: - # Webhook from kube-apiserver + # Webhook from kube-apiserver (hostNetwork on control plane nodes) - fromEntities: - kube-apiserver + - remote-node + - host toPorts: - ports: - - port: "443" + - port: "9443" protocol: TCP # Metrics scraping - fromEndpoints: @@ -25,3 +27,14 @@ spec: # Kube API for policy enforcement - toEntities: - kube-apiserver + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/oauth2-proxy/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/oauth2-proxy/networkpolicy.yaml index 3f99002d6..903f4fe4b 100644 --- a/k8s/bases/infrastructure/controllers/oauth2-proxy/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/oauth2-proxy/networkpolicy.yaml @@ -38,3 +38,14 @@ spec: # Kube API - toEntities: - kube-apiserver + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/opencost/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/opencost/networkpolicy.yaml index 57f6b7dcc..e3c50f841 100644 --- a/k8s/bases/infrastructure/controllers/opencost/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/opencost/networkpolicy.yaml @@ -28,3 +28,14 @@ spec: - ports: - port: "9090" protocol: TCP + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/reloader/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/reloader/networkpolicy.yaml index fafa91c67..73b33cc37 100644 --- a/k8s/bases/infrastructure/controllers/reloader/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/reloader/networkpolicy.yaml @@ -9,3 +9,14 @@ spec: # Kube API for watching configmaps/secrets - toEntities: - kube-apiserver + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/velero/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/velero/networkpolicy.yaml index b328f2a0c..fd0e93e20 100644 --- a/k8s/bases/infrastructure/controllers/velero/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/velero/networkpolicy.yaml @@ -25,3 +25,14 @@ spec: - ports: - port: "443" protocol: TCP + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/networkpolicy.yaml index a4f313c7c..607ff1f35 100644 --- a/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/vertical-pod-autoscaler/networkpolicy.yaml @@ -6,12 +6,14 @@ metadata: spec: endpointSelector: {} ingress: - # Webhook from kube-apiserver + # Webhook from kube-apiserver (hostNetwork on control plane nodes) - fromEntities: - kube-apiserver + - remote-node + - host toPorts: - ports: - - port: "443" + - port: "8000" protocol: TCP egress: # Kube API for managing VPAs @@ -21,3 +23,14 @@ spec: - toEndpoints: - matchLabels: k8s:io.kubernetes.pod.namespace: kube-system + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/providers/hetzner/infrastructure/controllers/external-dns/networkpolicy.yaml b/k8s/providers/hetzner/infrastructure/controllers/external-dns/networkpolicy.yaml index 2d428570b..2abdbac7c 100644 --- a/k8s/providers/hetzner/infrastructure/controllers/external-dns/networkpolicy.yaml +++ b/k8s/providers/hetzner/infrastructure/controllers/external-dns/networkpolicy.yaml @@ -25,3 +25,14 @@ spec: - ports: - port: "443" protocol: TCP + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/networkpolicy.yaml b/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/networkpolicy.yaml index 725cd3387..1b9e4144f 100644 --- a/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/networkpolicy.yaml +++ b/k8s/providers/hetzner/infrastructure/controllers/kubelet-serving-cert-approver/networkpolicy.yaml @@ -18,3 +18,14 @@ spec: # Kube API for approving CSRs - toEntities: - kube-apiserver + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP diff --git a/k8s/providers/hetzner/infrastructure/controllers/longhorn/networkpolicy.yaml b/k8s/providers/hetzner/infrastructure/controllers/longhorn/networkpolicy.yaml index be510888a..d6a0d996b 100644 --- a/k8s/providers/hetzner/infrastructure/controllers/longhorn/networkpolicy.yaml +++ b/k8s/providers/hetzner/infrastructure/controllers/longhorn/networkpolicy.yaml @@ -6,9 +6,11 @@ metadata: spec: endpointSelector: {} ingress: - # Webhook from kube-apiserver + # Webhook from kube-apiserver (hostNetwork on control plane nodes) - fromEntities: - kube-apiserver + - remote-node + - host toPorts: - ports: - port: "9502" @@ -29,6 +31,10 @@ spec: - toEndpoints: - matchLabels: k8s:io.kubernetes.pod.namespace: longhorn-system + # iSCSI to nodes + - toEntities: + - host + - remote-node # Backup targets (S3-compatible) - toEntities: - world @@ -36,3 +42,14 @@ spec: - ports: - port: "443" protocol: TCP + # DNS resolution + - toEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: kube-system + k8s-app: kube-dns + toPorts: + - ports: + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP From eecd5846d696d53064608af3b9e4bb98cb7ffbf7 Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Wed, 6 May 2026 00:53:22 +0200 Subject: [PATCH 06/14] fix: grant Kyverno RBAC for CiliumNetworkPolicy generation The add-default-deny ClusterPolicy generates CiliumNetworkPolicy resources in namespaces. Kyverno needs list/get/create/update/patch/delete permissions for cilium.io/ciliumnetworkpolicies to fulfill this. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../controllers/kyverno/helm-release.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/k8s/bases/infrastructure/controllers/kyverno/helm-release.yaml b/k8s/bases/infrastructure/controllers/kyverno/helm-release.yaml index abc324bb5..0a8a750de 100644 --- a/k8s/bases/infrastructure/controllers/kyverno/helm-release.yaml +++ b/k8s/bases/infrastructure/controllers/kyverno/helm-release.yaml @@ -46,6 +46,20 @@ spec: - update - patch - delete + # The add-default-deny ClusterPolicy generates CiliumNetworkPolicy + # objects in every namespace. + - apiGroups: + - cilium.io + resources: + - ciliumnetworkpolicies + verbs: + - get + - list + - watch + - create + - update + - patch + - delete updateStrategy: type: RollingUpdate rollingUpdate: @@ -79,6 +93,18 @@ spec: - update - patch - delete + - apiGroups: + - cilium.io + resources: + - ciliumnetworkpolicies + verbs: + - get + - list + - watch + - create + - update + - patch + - delete reportsController: replicas: ${kyverno_reports_replicas:=1} cleanupController: From cafdcdfe332279fd108d7cb70580e26cce6ce829 Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Wed, 6 May 2026 01:06:42 +0200 Subject: [PATCH 07/14] fix: suspend FleetDM HelmRelease (MySQL PVC format failure) WORKAROUND: The MySQL StatefulSet fails due to a PVC format issue on Longhorn. Suspending the release to unblock the apps kustomization while the root cause is investigated. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- k8s/bases/apps/fleetdm/helm-release.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/k8s/bases/apps/fleetdm/helm-release.yaml b/k8s/bases/apps/fleetdm/helm-release.yaml index ab4a8df06..ff9018e63 100644 --- a/k8s/bases/apps/fleetdm/helm-release.yaml +++ b/k8s/bases/apps/fleetdm/helm-release.yaml @@ -7,6 +7,8 @@ metadata: helm.toolkit.fluxcd.io/helm-test: enabled helm.toolkit.fluxcd.io/remediation: enabled spec: + # WORKAROUND: MySQL PVC format failure on Longhorn — see ksail#4621 + suspend: true interval: 2m timeout: 15m install: From e61043ebe6d28fbbaa43d111051365f6343577ee Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Wed, 6 May 2026 01:34:21 +0200 Subject: [PATCH 08/14] fix: add intra-namespace ingress to KEDA network policy The external-scaler pod needs to reach the interceptor on port 9090 within the keda namespace. Without an intra-namespace ingress rule, the default-deny CiliumNetworkPolicy blocks this communication. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml index 39a61d770..66a8d2681 100644 --- a/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/keda/networkpolicy.yaml @@ -24,6 +24,9 @@ spec: protocol: TCP - port: "6443" protocol: TCP + # Intra-namespace communication (scaler→interceptor:9090, etc.) + - fromEndpoints: + - {} # Metrics scraping - fromEndpoints: - matchLabels: From 9adff2ef076801eb95337ca6f34859001a4e56f6 Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Wed, 6 May 2026 01:49:21 +0200 Subject: [PATCH 09/14] fix: add CNPG operator egress to managed DB pods and fix namespace label MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CNPG operator in cnpg-system needs egress to port 8000 (status) and 5432 (postgres) on managed pods in other namespaces - Wedding-app CNP ingress was referencing wrong namespace (cloudnative-pg → cnpg-system) for the operator Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- k8s/bases/apps/wedding-app/networkpolicy.yaml | 18 ++++++++++++++++++ .../cloudnative-pg/networkpolicy.yaml | 11 +++++++++++ 2 files changed, 29 insertions(+) diff --git a/k8s/bases/apps/wedding-app/networkpolicy.yaml b/k8s/bases/apps/wedding-app/networkpolicy.yaml index 3093095e6..296f97a3b 100644 --- a/k8s/bases/apps/wedding-app/networkpolicy.yaml +++ b/k8s/bases/apps/wedding-app/networkpolicy.yaml @@ -17,6 +17,24 @@ spec: - fromEndpoints: - matchLabels: k8s:io.kubernetes.pod.namespace: wedding-app + # CNPG operator needs to reach DB status endpoint (port 8000) + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: cnpg-system + toPorts: + - ports: + - port: "8000" + protocol: TCP + - port: "5432" + protocol: TCP + # Metrics scraping from monitoring namespace + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: monitoring + toPorts: + - ports: + - port: "9187" + protocol: TCP egress: # Intra-namespace (app → db) - toEndpoints: diff --git a/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml index d3e72b56c..131e13059 100644 --- a/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/cloudnative-pg/networkpolicy.yaml @@ -19,6 +19,17 @@ spec: # Kube API for managing PG clusters - toEntities: - kube-apiserver + # Reach managed PostgreSQL instances in any namespace (status + postgres ports) + - toEndpoints: + - matchExpressions: + - key: k8s:io.kubernetes.pod.namespace + operator: Exists + toPorts: + - ports: + - port: "8000" + protocol: TCP + - port: "5432" + protocol: TCP # DNS resolution - toEndpoints: - matchLabels: From e82b62e15ea2c65410be5e5e0b134bca7665981c Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Wed, 6 May 2026 08:18:00 +0200 Subject: [PATCH 10/14] fix: switch to Let's Encrypt TLS, un-suspend FleetDM - Add letsencrypt-prod ClusterIssuer with Cloudflare DNS01 solver - Update prod variables to use cert-manager.io/ClusterIssuer - Remove --cloudflare-proxied from external-dns (DNS-only records) - Remove FleetDM suspend workaround - Add 20Gi secondary persistence for FleetDM MySQL Cloudflare Universal SSL only covers *.devantler.tech, not nested *.platform.devantler.tech. Switching to LE with dns01 challenge allows valid browser-trusted TLS for all subdomains via direct LB access. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- k8s/bases/apps/fleetdm/helm-release.yaml | 5 +++-- .../variables/variables-cluster-config-map.yaml | 6 +++--- .../cluster-issuers/kustomization.yaml | 1 + .../cluster-issuers/letsencrypt-prod-issuer.yaml | 16 ++++++++++++++++ .../controllers/external-dns/helm-release.yaml | 1 - 5 files changed, 23 insertions(+), 6 deletions(-) create mode 100644 k8s/providers/hetzner/infrastructure/cluster-issuers/letsencrypt-prod-issuer.yaml diff --git a/k8s/bases/apps/fleetdm/helm-release.yaml b/k8s/bases/apps/fleetdm/helm-release.yaml index ff9018e63..5b880f36d 100644 --- a/k8s/bases/apps/fleetdm/helm-release.yaml +++ b/k8s/bases/apps/fleetdm/helm-release.yaml @@ -7,8 +7,6 @@ metadata: helm.toolkit.fluxcd.io/helm-test: enabled helm.toolkit.fluxcd.io/remediation: enabled spec: - # WORKAROUND: MySQL PVC format failure on Longhorn — see ksail#4621 - suspend: true interval: 2m timeout: 15m install: @@ -154,6 +152,9 @@ spec: existingSecret: mysql secondary: replicaCount: ${fleetdm_mysql_secondary_replicas:=2} + persistence: + enabled: true + size: 20Gi primary: persistence: enabled: true diff --git a/k8s/clusters/prod/variables/variables-cluster-config-map.yaml b/k8s/clusters/prod/variables/variables-cluster-config-map.yaml index 6cbbc67eb..fe894a4a9 100644 --- a/k8s/clusters/prod/variables/variables-cluster-config-map.yaml +++ b/k8s/clusters/prod/variables/variables-cluster-config-map.yaml @@ -9,9 +9,9 @@ data: domain_regex: platform\.devantler\.tech domain: platform.devantler.tech github_app_client_id: Iv23limfvbk93bAXZI6b - issuer_group: cert-manager.k8s.cloudflare.com - issuer_kind: ClusterOriginIssuer - issuer_name: cloudflare-origin + issuer_group: cert-manager.io + issuer_kind: ClusterIssuer + issuer_name: letsencrypt-prod # Per-env prefixes inside the shared R2 bucket. r2_prefix_velero: velero/prod r2_prefix_cnpg: cnpg/prod diff --git a/k8s/providers/hetzner/infrastructure/cluster-issuers/kustomization.yaml b/k8s/providers/hetzner/infrastructure/cluster-issuers/kustomization.yaml index 9f4c46d2f..5681846a9 100644 --- a/k8s/providers/hetzner/infrastructure/cluster-issuers/kustomization.yaml +++ b/k8s/providers/hetzner/infrastructure/cluster-issuers/kustomization.yaml @@ -3,3 +3,4 @@ kind: Kustomization resources: - cloudflare-origin-issuer.yaml - cloudflare-origin-ca-trust-bundle.yaml + - letsencrypt-prod-issuer.yaml diff --git a/k8s/providers/hetzner/infrastructure/cluster-issuers/letsencrypt-prod-issuer.yaml b/k8s/providers/hetzner/infrastructure/cluster-issuers/letsencrypt-prod-issuer.yaml new file mode 100644 index 000000000..c7a85eeab --- /dev/null +++ b/k8s/providers/hetzner/infrastructure/cluster-issuers/letsencrypt-prod-issuer.yaml @@ -0,0 +1,16 @@ +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-prod +spec: + acme: + server: https://acme-v02.api.letsencrypt.org/directory + email: nikolaj@devantler.tech + privateKeySecretRef: + name: letsencrypt-prod-account-key + solvers: + - dns01: + cloudflare: + apiTokenSecretRef: + name: cloudflare-api-token + key: api-token diff --git a/k8s/providers/hetzner/infrastructure/controllers/external-dns/helm-release.yaml b/k8s/providers/hetzner/infrastructure/controllers/external-dns/helm-release.yaml index a3260cedf..f4d45ad8e 100644 --- a/k8s/providers/hetzner/infrastructure/controllers/external-dns/helm-release.yaml +++ b/k8s/providers/hetzner/infrastructure/controllers/external-dns/helm-release.yaml @@ -27,7 +27,6 @@ spec: domainFilters: - "${cloudflare_zone}" extraArgs: - - --cloudflare-proxied - --exclude-target-net=10.0.0.0/8 env: - name: CF_API_TOKEN From 22164913f58eb8d1b7f426ffb662b988bcabcd0e Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Wed, 6 May 2026 08:30:39 +0200 Subject: [PATCH 11/14] fix: allow external DNS egress for cert-manager ACME verification DNS-01 challenge verification requires querying external authoritative nameservers (e.g. Cloudflare's 108.162.192.142:53) directly. The previous CNP only allowed DNS to cluster kube-dns, causing challenge timeout. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../controllers/cert-manager/networkpolicy.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml b/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml index 6ecdc07e8..94db8f4b0 100644 --- a/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml +++ b/k8s/bases/infrastructure/controllers/cert-manager/networkpolicy.yaml @@ -35,6 +35,11 @@ spec: protocol: TCP - port: "80" protocol: TCP + # DNS-01 challenge verification queries external nameservers + - port: "53" + protocol: UDP + - port: "53" + protocol: TCP # Kube API for managing certificates - toEntities: - kube-apiserver From feb11ae7c250e7b3124e4a049bb16e234f17355e Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Wed, 6 May 2026 08:40:01 +0200 Subject: [PATCH 12/14] fix: use hcloud storageClass for FleetDM Redis persistence Longhorn reports 'insufficient storage' for Redis replica volumes despite available disk space (scheduling issue with 3-replica policy). hcloud block storage is more reliable for persistent data. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- k8s/bases/apps/fleetdm/helm-release.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/k8s/bases/apps/fleetdm/helm-release.yaml b/k8s/bases/apps/fleetdm/helm-release.yaml index 5b880f36d..18824fc56 100644 --- a/k8s/bases/apps/fleetdm/helm-release.yaml +++ b/k8s/bases/apps/fleetdm/helm-release.yaml @@ -182,11 +182,13 @@ spec: persistence: enabled: true size: 8Gi + storageClass: hcloud replica: replicaCount: ${fleetdm_redis_replicas:=2} persistence: enabled: true size: 8Gi + storageClass: hcloud metrics: enabled: true image: From c63dad973036924fe36276a9706d514537b178df Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Wed, 6 May 2026 08:57:47 +0200 Subject: [PATCH 13/14] fix: add upgrade.force for FleetDM Redis StatefulSet migration StatefulSet volumeClaimTemplates are immutable - force: true tells Helm to delete+recreate instead of patch when upgrade would fail. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- k8s/bases/apps/fleetdm/helm-release.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/k8s/bases/apps/fleetdm/helm-release.yaml b/k8s/bases/apps/fleetdm/helm-release.yaml index 18824fc56..0f30bef01 100644 --- a/k8s/bases/apps/fleetdm/helm-release.yaml +++ b/k8s/bases/apps/fleetdm/helm-release.yaml @@ -13,6 +13,7 @@ spec: remediation: retries: -1 upgrade: + force: true remediation: retries: -1 remediateLastFailure: true From f6dabbabe425d4963db47487a19bc7e071d2d477 Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Wed, 6 May 2026 09:09:23 +0200 Subject: [PATCH 14/14] fix: allow KEDA interceptor ingress to FleetDM The HTTPRoute for FleetDM routes through KEDA HTTP interceptor (namespace: keda) for scale-to-zero. FleetDM CNP was only allowing fromEntities: [ingress] but the KEDA pod has a regular identity. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- k8s/bases/apps/fleetdm/networkpolicy.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/k8s/bases/apps/fleetdm/networkpolicy.yaml b/k8s/bases/apps/fleetdm/networkpolicy.yaml index 5c9ae0d6c..0d3e47dd5 100644 --- a/k8s/bases/apps/fleetdm/networkpolicy.yaml +++ b/k8s/bases/apps/fleetdm/networkpolicy.yaml @@ -6,13 +6,21 @@ metadata: spec: endpointSelector: {} ingress: - # Gateway ingress + # Gateway ingress (direct from Cilium envoy) - fromEntities: - ingress toPorts: - ports: - port: "8080" protocol: TCP + # KEDA HTTP interceptor forwards traffic from gateway + - fromEndpoints: + - matchLabels: + k8s:io.kubernetes.pod.namespace: keda + toPorts: + - ports: + - port: "8080" + protocol: TCP # Intra-namespace (fleet → mysql, fleet → redis) - fromEndpoints: - matchLabels: