From 9320412b193ae4afa57700e8b75bfe2becdde12d Mon Sep 17 00:00:00 2001 From: Rick Gardner Date: Tue, 24 Feb 2026 00:12:38 -0500 Subject: [PATCH 1/3] Add ff infrastructure: shared namespaces, CNPG operator, Postgres clusters - infra/shared-namespaces: ff-dev and ff-production namespaces owned independently of any app or db Kustomization (ADR 008) - infra/databases/cloudnativepg: CNPG operator via Helm (cnpg-system) - infra/databases/postgres: ff-postgres Cluster in each namespace (dev: 5Gi, production: 10Gi), depends on shared-namespaces + cnpg - docs/adr.md: ADR 008 documents the shared-namespaces pattern --- .../cloudnativepg/chart/helmrelease.yaml | 23 +++++++++++++++++++ .../cloudnativepg/chart/helmrepository.yaml | 8 +++++++ .../cloudnativepg/chart/kustomization.yaml | 6 +++++ .../cloudnativepg/chart/namespace.yaml | 4 ++++ .../databases/cloudnativepg/ks-chart.yaml | 15 ++++++++++++ .../cloudnativepg/kustomization.yaml | 4 ++++ .../hlcl1/infra/databases/kustomization.yaml | 5 ++++ .../databases/postgres/config/ff-dev.yaml | 21 +++++++++++++++++ .../postgres/config/ff-production.yaml | 21 +++++++++++++++++ .../postgres/config/kustomization.yaml | 5 ++++ .../infra/databases/postgres/ks-config.yaml | 17 ++++++++++++++ .../databases/postgres/kustomization.yaml | 4 ++++ .../shared-namespaces/ks-namespaces.yaml | 14 +++++++++++ .../shared-namespaces/kustomization.yaml | 5 ++++ .../namespaces/kustomization.yaml | 4 ++++ .../namespaces/namespaces.yaml | 9 ++++++++ clusters/hlcl1/kustomization.yaml | 2 ++ docs/adr.md | 17 ++++++++++++++ 18 files changed, 184 insertions(+) create mode 100644 clusters/hlcl1/infra/databases/cloudnativepg/chart/helmrelease.yaml create mode 100644 clusters/hlcl1/infra/databases/cloudnativepg/chart/helmrepository.yaml create mode 100644 clusters/hlcl1/infra/databases/cloudnativepg/chart/kustomization.yaml create mode 100644 clusters/hlcl1/infra/databases/cloudnativepg/chart/namespace.yaml create mode 100644 clusters/hlcl1/infra/databases/cloudnativepg/ks-chart.yaml create mode 100644 clusters/hlcl1/infra/databases/cloudnativepg/kustomization.yaml create mode 100644 clusters/hlcl1/infra/databases/kustomization.yaml create mode 100644 clusters/hlcl1/infra/databases/postgres/config/ff-dev.yaml create mode 100644 clusters/hlcl1/infra/databases/postgres/config/ff-production.yaml create mode 100644 clusters/hlcl1/infra/databases/postgres/config/kustomization.yaml create mode 100644 clusters/hlcl1/infra/databases/postgres/ks-config.yaml create mode 100644 clusters/hlcl1/infra/databases/postgres/kustomization.yaml create mode 100644 clusters/hlcl1/infra/shared-namespaces/ks-namespaces.yaml create mode 100644 clusters/hlcl1/infra/shared-namespaces/kustomization.yaml create mode 100644 clusters/hlcl1/infra/shared-namespaces/namespaces/kustomization.yaml create mode 100644 clusters/hlcl1/infra/shared-namespaces/namespaces/namespaces.yaml diff --git a/clusters/hlcl1/infra/databases/cloudnativepg/chart/helmrelease.yaml b/clusters/hlcl1/infra/databases/cloudnativepg/chart/helmrelease.yaml new file mode 100644 index 0000000..0a3b4d2 --- /dev/null +++ b/clusters/hlcl1/infra/databases/cloudnativepg/chart/helmrelease.yaml @@ -0,0 +1,23 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: cloudnative-pg + namespace: cnpg-system +spec: + interval: 30m + chart: + spec: + chart: cloudnative-pg + version: ">=0.23.0 <1.0.0" + sourceRef: + kind: HelmRepository + name: cloudnative-pg + namespace: flux-system + install: + crds: CreateReplace + remediation: + retries: 3 + upgrade: + crds: CreateReplace + remediation: + retries: 3 diff --git a/clusters/hlcl1/infra/databases/cloudnativepg/chart/helmrepository.yaml b/clusters/hlcl1/infra/databases/cloudnativepg/chart/helmrepository.yaml new file mode 100644 index 0000000..668b9d2 --- /dev/null +++ b/clusters/hlcl1/infra/databases/cloudnativepg/chart/helmrepository.yaml @@ -0,0 +1,8 @@ +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: cloudnative-pg + namespace: flux-system +spec: + interval: 24h + url: https://cloudnative-pg.github.io/charts diff --git a/clusters/hlcl1/infra/databases/cloudnativepg/chart/kustomization.yaml b/clusters/hlcl1/infra/databases/cloudnativepg/chart/kustomization.yaml new file mode 100644 index 0000000..b4a3d7c --- /dev/null +++ b/clusters/hlcl1/infra/databases/cloudnativepg/chart/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - helmrepository.yaml + - helmrelease.yaml diff --git a/clusters/hlcl1/infra/databases/cloudnativepg/chart/namespace.yaml b/clusters/hlcl1/infra/databases/cloudnativepg/chart/namespace.yaml new file mode 100644 index 0000000..8deac4c --- /dev/null +++ b/clusters/hlcl1/infra/databases/cloudnativepg/chart/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: cnpg-system diff --git a/clusters/hlcl1/infra/databases/cloudnativepg/ks-chart.yaml b/clusters/hlcl1/infra/databases/cloudnativepg/ks-chart.yaml new file mode 100644 index 0000000..269bac0 --- /dev/null +++ b/clusters/hlcl1/infra/databases/cloudnativepg/ks-chart.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: infra-cloudnativepg + namespace: flux-system +spec: + interval: 30m + retryInterval: 1m + path: ./clusters/hlcl1/infra/databases/cloudnativepg/chart + prune: true + sourceRef: + kind: GitRepository + name: flux-system + wait: true + timeout: 10m diff --git a/clusters/hlcl1/infra/databases/cloudnativepg/kustomization.yaml b/clusters/hlcl1/infra/databases/cloudnativepg/kustomization.yaml new file mode 100644 index 0000000..3cf3936 --- /dev/null +++ b/clusters/hlcl1/infra/databases/cloudnativepg/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ks-chart.yaml diff --git a/clusters/hlcl1/infra/databases/kustomization.yaml b/clusters/hlcl1/infra/databases/kustomization.yaml new file mode 100644 index 0000000..22049da --- /dev/null +++ b/clusters/hlcl1/infra/databases/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - cloudnativepg/ + - postgres/ diff --git a/clusters/hlcl1/infra/databases/postgres/config/ff-dev.yaml b/clusters/hlcl1/infra/databases/postgres/config/ff-dev.yaml new file mode 100644 index 0000000..6e59c72 --- /dev/null +++ b/clusters/hlcl1/infra/databases/postgres/config/ff-dev.yaml @@ -0,0 +1,21 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: ff-postgres + namespace: ff-dev +spec: + instances: 1 + + imageName: ghcr.io/cloudnative-pg/postgresql:16 + + storage: + size: 5Gi + + postgresql: + parameters: + timezone: UTC + + bootstrap: + initdb: + database: flockfeed + owner: flockfeed diff --git a/clusters/hlcl1/infra/databases/postgres/config/ff-production.yaml b/clusters/hlcl1/infra/databases/postgres/config/ff-production.yaml new file mode 100644 index 0000000..e3c85aa --- /dev/null +++ b/clusters/hlcl1/infra/databases/postgres/config/ff-production.yaml @@ -0,0 +1,21 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: ff-postgres + namespace: ff-production +spec: + instances: 1 + + imageName: ghcr.io/cloudnative-pg/postgresql:16 + + storage: + size: 10Gi + + postgresql: + parameters: + timezone: UTC + + bootstrap: + initdb: + database: flockfeed + owner: flockfeed diff --git a/clusters/hlcl1/infra/databases/postgres/config/kustomization.yaml b/clusters/hlcl1/infra/databases/postgres/config/kustomization.yaml new file mode 100644 index 0000000..70092d1 --- /dev/null +++ b/clusters/hlcl1/infra/databases/postgres/config/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ff-dev.yaml + - ff-production.yaml diff --git a/clusters/hlcl1/infra/databases/postgres/ks-config.yaml b/clusters/hlcl1/infra/databases/postgres/ks-config.yaml new file mode 100644 index 0000000..ac4bdc3 --- /dev/null +++ b/clusters/hlcl1/infra/databases/postgres/ks-config.yaml @@ -0,0 +1,17 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: infra-ff-postgres + namespace: flux-system +spec: + interval: 30m + retryInterval: 1m + path: ./clusters/hlcl1/infra/databases/postgres/config + prune: true + sourceRef: + kind: GitRepository + name: flux-system + dependsOn: + - name: infra-shared-namespaces + - name: infra-cloudnativepg + timeout: 10m diff --git a/clusters/hlcl1/infra/databases/postgres/kustomization.yaml b/clusters/hlcl1/infra/databases/postgres/kustomization.yaml new file mode 100644 index 0000000..32312ba --- /dev/null +++ b/clusters/hlcl1/infra/databases/postgres/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ks-config.yaml diff --git a/clusters/hlcl1/infra/shared-namespaces/ks-namespaces.yaml b/clusters/hlcl1/infra/shared-namespaces/ks-namespaces.yaml new file mode 100644 index 0000000..036663e --- /dev/null +++ b/clusters/hlcl1/infra/shared-namespaces/ks-namespaces.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: infra-shared-namespaces + namespace: flux-system +spec: + interval: 30m + retryInterval: 1m + path: ./clusters/hlcl1/infra/shared-namespaces/namespaces + prune: true + sourceRef: + kind: GitRepository + name: flux-system + timeout: 5m diff --git a/clusters/hlcl1/infra/shared-namespaces/kustomization.yaml b/clusters/hlcl1/infra/shared-namespaces/kustomization.yaml new file mode 100644 index 0000000..c6493c8 --- /dev/null +++ b/clusters/hlcl1/infra/shared-namespaces/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ks-namespaces.yaml + diff --git a/clusters/hlcl1/infra/shared-namespaces/namespaces/kustomization.yaml b/clusters/hlcl1/infra/shared-namespaces/namespaces/kustomization.yaml new file mode 100644 index 0000000..a1eee58 --- /dev/null +++ b/clusters/hlcl1/infra/shared-namespaces/namespaces/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespaces.yaml diff --git a/clusters/hlcl1/infra/shared-namespaces/namespaces/namespaces.yaml b/clusters/hlcl1/infra/shared-namespaces/namespaces/namespaces.yaml new file mode 100644 index 0000000..78a4ef6 --- /dev/null +++ b/clusters/hlcl1/infra/shared-namespaces/namespaces/namespaces.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: ff-dev +--- +apiVersion: v1 +kind: Namespace +metadata: + name: ff-production diff --git a/clusters/hlcl1/kustomization.yaml b/clusters/hlcl1/kustomization.yaml index d85b833..b43a267 100644 --- a/clusters/hlcl1/kustomization.yaml +++ b/clusters/hlcl1/kustomization.yaml @@ -5,5 +5,7 @@ resources: - infra/storage/nfs - infra/network/metallb - infra/secrets/external-secrets + - infra/shared-namespaces + - infra/databases - apps/pihole - apps/monitoring diff --git a/docs/adr.md b/docs/adr.md index 1114854..7d73c9d 100644 --- a/docs/adr.md +++ b/docs/adr.md @@ -127,3 +127,20 @@ Deploy the NFS CSI driver (csi-driver-nfs) as a second StorageClass alongside Lo - NFS CSI driver deploys a DaemonSet (node plugin) and a Deployment (controller) — lightweight resource footprint. - StorageClass `nfs` is explicitly not default — workloads must opt-in by specifying `storageClassName: nfs`. - If NAS is down, Prometheus stops writing but the cluster keeps running, acceptable. + +## 008 - Shared namespaces for multi-Kustomization workloads + +### Context + +When an application has multiple independent Flux Kustomizations that share a namespace (e.g. a CNPG database cluster managed by infra and an app deployment managed via a separate GitRepository), namespace ownership becomes ambiguous. If the namespace is created by one Kustomization and Flux prune is enabled, deleting that Kustomization would delete the namespace and take down the other workload with it. + +### Decision + +Introduce `infra/shared-namespaces/` as the single owner of any namespace that is shared between two or more independent Flux Kustomizations. Neither the app nor the database config creates or owns these namespaces — they are pre-created infrastructure. + +### Impacts + +- Deleting an app or its database config will not accidentally delete the namespace or affect other workloads in it. +- Namespaces are created early in the reconciliation order, before anything that depends on them. +- New apps with shared namespaces add their namespace entry to `infra/shared-namespaces/namespaces.yaml` rather than creating their own namespace resource. +- Single-namespace apps that own their namespace entirely (e.g. pihole) are unaffected and continue to manage their namespace locally. From 36d151a166fc595e31585b530537866be857a9e9 Mon Sep 17 00:00:00 2001 From: Rick Gardner Date: Tue, 24 Feb 2026 00:22:29 -0500 Subject: [PATCH 2/3] Production postgres to 3 replicas, update disaster recovery docs - ff-production: bump instances to 3 (1 primary, 2 standbys) - disaster-recovery.md: add CNPG backup checklist items, RTO/RPO entries for ff-postgres, and recovery runbook section --- .../postgres/config/ff-production.yaml | 2 +- docs/disaster-recovery.md | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/clusters/hlcl1/infra/databases/postgres/config/ff-production.yaml b/clusters/hlcl1/infra/databases/postgres/config/ff-production.yaml index e3c85aa..0fa4df3 100644 --- a/clusters/hlcl1/infra/databases/postgres/config/ff-production.yaml +++ b/clusters/hlcl1/infra/databases/postgres/config/ff-production.yaml @@ -4,7 +4,7 @@ metadata: name: ff-postgres namespace: ff-production spec: - instances: 1 + instances: 3 imageName: ghcr.io/cloudnative-pg/postgresql:16 diff --git a/docs/disaster-recovery.md b/docs/disaster-recovery.md index 9199335..47984a4 100644 --- a/docs/disaster-recovery.md +++ b/docs/disaster-recovery.md @@ -6,6 +6,8 @@ - [ ] Longhorn backup target configured (NAS NFS share) - [ ] Longhorn recurring snapshots scheduled - [X] Age private key backed up offline +- [ ] CNPG backup target configured (NAS NFS share or object storage) +- [ ] CNPG scheduled backups enabled for ff-postgres (production) - [ ] NAS app config directories backed up - [ ] Find per app solution for NAS backed apps - [ ] Sonarr @@ -29,6 +31,8 @@ | Longhorn volumes | Last snapshot | ~1 hr | Restore from Longhorn backup target (NFS Share) | | OpenBao data | Last Raft snapshot | ~1 hr | Requires restore + unseal | | Prometheus/Grafana data | Last Longhorn snapshot | ~1 hr | Dashboards are in Git (ConfigMaps) | +| ff-postgres (production) | Last CNPG backup | ~30 min | 3-replica HA; restore from CNPG backup target | +| ff-postgres (dev) | N/A | ~5 min | Single instance, dev data is disposable — recreate from scratch | | Media library | N/A | N/A | Not backed up (re-downloadable, want to find a way to track what currently exists, will *arr config backups work here?) | | App configs (Sonarr, Radarr, etc.) | Last NAS backup | ~30 min | Restore config dirs, redeploy | | SOPS encryption key (age) | Offline backup and external Password Manager | Manual | Required to decrypt all secrets | @@ -65,6 +69,22 @@ MetalLB is fully managed by Flux via the chart/config split pattern (see [ADR-00 No manual intervention required. +### ff-postgres (CloudNativePG) recovery + +ff-postgres is managed by the CloudNativePG operator, reconciled by Flux via the `infra-ff-postgres` Kustomization. + +**Production** runs 3 replicas (1 primary, 2 standbys). On single-node failure, CNPG promotes a standby automatically — no data loss, no manual intervention. + +**On total cluster loss:** + +1. Flux reconciles `infra-shared-namespaces` → creates `ff-dev` and `ff-production` namespaces +2. Flux reconciles `infra-cloudnativepg` → installs CNPG operator (`wait: true`) +3. Flux reconciles `infra-ff-postgres` → creates `Cluster` resources +4. CNPG restores from backup target (once backup is configured — see checklist above) +5. `ff-postgres-app` Secret is recreated by CNPG and available to the ff app + +**Dev** instance (single replica) is treated as disposable — recreate from scratch, run migrations. + ### PiHole recovery 1. One instance runs on k8s. If it dies, it should migrate to another node. If the entire cluster is down, we still have a second instance running on From 936323805db4d1f69beff51426ea2b97310a365e Mon Sep 17 00:00:00 2001 From: Richard S Gardner <8586577+rgardner4012@users.noreply.github.com> Date: Tue, 24 Feb 2026 18:00:27 -0500 Subject: [PATCH 3/3] Remove extra newline in kustomization.yaml --- clusters/hlcl1/infra/shared-namespaces/kustomization.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/clusters/hlcl1/infra/shared-namespaces/kustomization.yaml b/clusters/hlcl1/infra/shared-namespaces/kustomization.yaml index c6493c8..e3a45b3 100644 --- a/clusters/hlcl1/infra/shared-namespaces/kustomization.yaml +++ b/clusters/hlcl1/infra/shared-namespaces/kustomization.yaml @@ -2,4 +2,3 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - ks-namespaces.yaml -