From a85157da418a76b459d4e6fe316fce0b1a3e1e44 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 18 Jun 2026 05:23:43 +0000
Subject: [PATCH 1/4] deps: bump @mui/material from 9.0.1 to 9.1.1 in
 /Lighthouse.Frontend

Bumps [@mui/material](https://github.com/mui/material-ui/tree/HEAD/packages/mui-material) from 9.0.1 to 9.1.1.
- [Release notes](https://github.com/mui/material-ui/releases)
- [Changelog](https://github.com/mui/material-ui/blob/master/CHANGELOG.md)
- [Commits](https://github.com/mui/material-ui/commits/v9.1.1/packages/mui-material)

---
updated-dependencies:
- dependency-name: "@mui/material"
  dependency-version: 9.1.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 Lighthouse.Frontend/package.json   |  2 +-
 Lighthouse.Frontend/pnpm-lock.yaml | 91 ++++++++++++------------------
 2 files changed, 37 insertions(+), 56 deletions(-)

diff --git a/Lighthouse.Frontend/package.json b/Lighthouse.Frontend/package.json
index cd3e55bd5..0295fe05f 100644
--- a/Lighthouse.Frontend/package.json
+++ b/Lighthouse.Frontend/package.json
@@ -21,7 +21,7 @@
 		"@microsoft/signalr": "^10.0.0",
 		"@mui/icons-material": "^7.3.11",
 		"@mui/lab": "7.0.0",
-		"@mui/material": "^9.0.1",
+		"@mui/material": "^9.1.1",
 		"@mui/system": "^9.1.1",
 		"@mui/x-charts": "9.0.1",
 		"@mui/x-data-grid": "^9.5.0",
diff --git a/Lighthouse.Frontend/pnpm-lock.yaml b/Lighthouse.Frontend/pnpm-lock.yaml
index 7c4458483..f4466433c 100644
--- a/Lighthouse.Frontend/pnpm-lock.yaml
+++ b/Lighthouse.Frontend/pnpm-lock.yaml
@@ -26,25 +26,25 @@ importers:
         version: 10.0.0
       '@mui/icons-material':
         specifier: ^7.3.11
-        version: 7.3.11(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)
+        version: 7.3.11(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)
       '@mui/lab':
         specifier: 7.0.0
-        version: 7.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+        version: 7.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       '@mui/material':
-        specifier: ^9.0.1
-        version: 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+        specifier: ^9.1.1
+        version: 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       '@mui/system':
         specifier: ^9.1.1
         version: 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)
       '@mui/x-charts':
         specifier: 9.0.1
-        version: 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+        version: 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       '@mui/x-data-grid':
         specifier: ^9.5.0
-        version: 9.5.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+        version: 9.5.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       '@mui/x-date-pickers':
         specifier: 9.0.0
-        version: 9.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(date-fns@4.4.0)(dayjs@1.11.21)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+        version: 9.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(date-fns@4.4.0)(dayjs@1.11.21)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       '@tanstack/react-query':
         specifier: ^5.101.0
         version: 5.101.0(react@19.2.7)
@@ -673,8 +673,8 @@ packages:
   '@microsoft/signalr@10.0.0':
     resolution: {integrity: sha512-0BRqz/uCx3JdrOqiqgFhih/+hfTERaUfCZXFB52uMaZJrKaPRzHzMuqVsJC/V3pt7NozcNXGspjKiQEK+X7P2w==}
 
-  '@mui/core-downloads-tracker@9.0.1':
-    resolution: {integrity: sha512-GzamIIhZ1bH77dq7eKaeyRgJdkypsxin4jBFq2EMs4lBWRR0LFO1CSVMsoebn/VvjcNrnrOrjy48MkrkQUK2iw==}
+  '@mui/core-downloads-tracker@9.1.1':
+    resolution: {integrity: sha512-AupmMICbdJHqAh6FfOMaaiiIr7dfEgZJn5DFfiPuGNrbs+ZZy9cD1APwO0TSVBz5j08MJEEY6n7iC76/2wjMEA==}
 
   '@mui/icons-material@7.3.11':
     resolution: {integrity: sha512-+hz5ilwHZ3djd5es3sCErLioqe/NhZcYTsV/TNXZAMdJdb23F4xzJjqnnZdnurc3S1+ietcssRNqieOhPQLZ7Q==}
@@ -709,13 +709,13 @@ packages:
       '@types/react':
         optional: true
 
-  '@mui/material@9.0.1':
-    resolution: {integrity: sha512-voyCpeUxcSWLN7KPZuq0pGCIt726T9K6kiVM3XUcywZDAlZSarLHaUxJVQpospbjjOzN53hwyjo8s6KoWl6utw==}
+  '@mui/material@9.1.1':
+    resolution: {integrity: sha512-Wv+gInjrpf99l1Q0oHe0eOWGTnlbkzs5nowClX65KCT/2fyPMwcbFEEkUsOHdpcHhB5UAbz/d7jlwt5ajWVvlA==}
     engines: {node: '>=14.0.0'}
     peerDependencies:
       '@emotion/react': ^11.5.0
       '@emotion/styled': ^11.3.0
-      '@mui/material-pigment-css': ^9.0.1
+      '@mui/material-pigment-css': ^9.1.1
       '@types/react': ^17.0.0 || ^18.0.0 || ^19.0.0
       react: ^17.0.0 || ^18.0.0 || ^19.0.0
       react-dom: ^17.0.0 || ^18.0.0 || ^19.0.0
@@ -815,14 +815,6 @@ packages:
       '@types/react':
         optional: true
 
-  '@mui/types@9.0.0':
-    resolution: {integrity: sha512-i1cuFCAWN44b3AJWO7mh7tuh1sqbQSeVr/94oG0TX5uXivac8XalgE4/6fQZcmGZigzbQ35IXxj/4jLpRIBYZg==}
-    peerDependencies:
-      '@types/react': ^17.0.0 || ^18.0.0 || ^19.0.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-
   '@mui/types@9.1.1':
     resolution: {integrity: sha512-Zjt7u8wNvDg40rPTGoL+TnfkpuSKjwubsNSFRH1KAVZLcaV4I3AFNHIFbvH7p4F3alEibSbdd90xAgn5Rnfndg==}
     peerDependencies:
@@ -2390,9 +2382,6 @@ packages:
   react-is@17.0.2:
     resolution: {integrity: sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==}
 
-  react-is@19.2.6:
-    resolution: {integrity: sha512-XjBR15BhXuylgWGuslhDKqlSayuqvqBX91BP8pauG8kd1zY8kotkNWbXksTCNRarse4kuGbe2kIY05ARtwNIvw==}
-
   react-is@19.2.7:
     resolution: {integrity: sha512-kZFnouyVv7eP/Phmrlo9FK+zcAdriZJvzxXHF1Sl1P377WSGe2G/JxVolhTrB/jeV47lKImhNUsijjHAAbcl/A==}
 
@@ -3475,20 +3464,20 @@ snapshots:
       - encoding
       - utf-8-validate
 
-  '@mui/core-downloads-tracker@9.0.1': {}
+  '@mui/core-downloads-tracker@9.1.1': {}
 
-  '@mui/icons-material@7.3.11(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)':
+  '@mui/icons-material@7.3.11(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)':
     dependencies:
       '@babel/runtime': 7.29.2
-      '@mui/material': 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+      '@mui/material': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       react: 19.2.7
     optionalDependencies:
       '@types/react': 19.2.17
 
-  '@mui/lab@7.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)':
+  '@mui/lab@7.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)':
     dependencies:
       '@babel/runtime': 7.29.2
-      '@mui/material': 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+      '@mui/material': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       '@mui/system': 7.3.11(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)
       '@mui/types': 7.4.12(@types/react@19.2.17)
       '@mui/utils': 7.3.10(@types/react@19.2.17)(react@19.2.7)
@@ -3501,13 +3490,13 @@ snapshots:
       '@emotion/styled': 11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)
       '@types/react': 19.2.17
 
-  '@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)':
+  '@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)':
     dependencies:
-      '@babel/runtime': 7.29.2
-      '@mui/core-downloads-tracker': 9.0.1
+      '@babel/runtime': 7.29.7
+      '@mui/core-downloads-tracker': 9.1.1
       '@mui/system': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)
-      '@mui/types': 9.0.0(@types/react@19.2.17)
-      '@mui/utils': 9.0.1(@types/react@19.2.17)(react@19.2.7)
+      '@mui/types': 9.1.1(@types/react@19.2.17)
+      '@mui/utils': 9.1.1(@types/react@19.2.17)(react@19.2.7)
       '@popperjs/core': 2.11.8
       '@types/react-transition-group': 4.4.12(@types/react@19.2.17)
       clsx: 2.1.1
@@ -3515,7 +3504,7 @@ snapshots:
       prop-types: 15.8.1
       react: 19.2.7
       react-dom: 19.2.7(react@19.2.7)
-      react-is: 19.2.6
+      react-is: 19.2.7
       react-transition-group: 4.4.5(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
     optionalDependencies:
       '@emotion/react': 11.14.0(@types/react@19.2.17)(react@19.2.7)
@@ -3604,12 +3593,6 @@ snapshots:
     optionalDependencies:
       '@types/react': 19.2.17
 
-  '@mui/types@9.0.0(@types/react@19.2.17)':
-    dependencies:
-      '@babel/runtime': 7.29.2
-    optionalDependencies:
-      '@types/react': 19.2.17
-
   '@mui/types@9.1.1(@types/react@19.2.17)':
     dependencies:
       '@babel/runtime': 7.29.7
@@ -3624,7 +3607,7 @@ snapshots:
       clsx: 2.1.1
       prop-types: 15.8.1
       react: 19.2.7
-      react-is: 19.2.6
+      react-is: 19.2.7
     optionalDependencies:
       '@types/react': 19.2.17
 
@@ -3642,13 +3625,13 @@ snapshots:
 
   '@mui/utils@9.0.0(@types/react@19.2.17)(react@19.2.7)':
     dependencies:
-      '@babel/runtime': 7.29.2
+      '@babel/runtime': 7.29.7
       '@mui/types': 9.1.1(@types/react@19.2.17)
       '@types/prop-types': 15.7.15
       clsx: 2.1.1
       prop-types: 15.8.1
       react: 19.2.7
-      react-is: 19.2.6
+      react-is: 19.2.7
     optionalDependencies:
       '@types/react': 19.2.17
 
@@ -3678,7 +3661,7 @@ snapshots:
 
   '@mui/x-charts-vendor@9.0.0':
     dependencies:
-      '@babel/runtime': 7.29.2
+      '@babel/runtime': 7.29.7
       '@types/d3-array': 3.2.2
       '@types/d3-color': 3.1.3
       '@types/d3-format': 3.0.4
@@ -3702,10 +3685,10 @@ snapshots:
       flatqueue: 3.0.0
       internmap: 2.0.3
 
-  '@mui/x-charts@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)':
+  '@mui/x-charts@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)':
     dependencies:
       '@babel/runtime': 7.29.2
-      '@mui/material': 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+      '@mui/material': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       '@mui/system': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)
       '@mui/utils': 9.0.0(@types/react@19.2.17)(react@19.2.7)
       '@mui/x-charts-vendor': 9.0.0
@@ -3724,10 +3707,10 @@ snapshots:
     transitivePeerDependencies:
       - '@types/react'
 
-  '@mui/x-data-grid@9.5.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)':
+  '@mui/x-data-grid@9.5.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)':
     dependencies:
       '@babel/runtime': 7.29.7
-      '@mui/material': 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+      '@mui/material': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       '@mui/system': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)
       '@mui/utils': 9.0.1(@types/react@19.2.17)(react@19.2.7)
       '@mui/x-internals': 9.1.0(@types/react@19.2.17)(react@19.2.7)
@@ -3743,10 +3726,10 @@ snapshots:
     transitivePeerDependencies:
       - '@types/react'
 
-  '@mui/x-date-pickers@9.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(date-fns@4.4.0)(dayjs@1.11.21)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)':
+  '@mui/x-date-pickers@9.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(date-fns@4.4.0)(dayjs@1.11.21)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)':
     dependencies:
       '@babel/runtime': 7.29.2
-      '@mui/material': 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
+      '@mui/material': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)
       '@mui/system': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)
       '@mui/utils': 9.0.0(@types/react@19.2.17)(react@19.2.7)
       '@mui/x-internals': 9.0.0(@types/react@19.2.17)(react@19.2.7)
@@ -3766,11 +3749,11 @@ snapshots:
 
   '@mui/x-internal-gestures@9.0.2':
     dependencies:
-      '@babel/runtime': 7.29.2
+      '@babel/runtime': 7.29.7
 
   '@mui/x-internals@9.0.0(@types/react@19.2.17)(react@19.2.7)':
     dependencies:
-      '@babel/runtime': 7.29.2
+      '@babel/runtime': 7.29.7
       '@mui/utils': 9.0.0(@types/react@19.2.17)(react@19.2.7)
       react: 19.2.7
       reselect: 5.1.1
@@ -5319,8 +5302,6 @@ snapshots:
 
   react-is@17.0.2: {}
 
-  react-is@19.2.6: {}
-
   react-is@19.2.7: {}
 
   react-markdown@10.1.0(@types/react@19.2.17)(react@19.2.7):
@@ -5359,7 +5340,7 @@ snapshots:
 
   react-transition-group@4.4.5(react-dom@19.2.7(react@19.2.7))(react@19.2.7):
     dependencies:
-      '@babel/runtime': 7.29.2
+      '@babel/runtime': 7.29.7
       dom-helpers: 5.2.1
       loose-envify: 1.4.0
       prop-types: 15.8.1

From c6b16e5273d350903c1c58709ec2aea756936b77 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 18 Jun 2026 05:53:44 +0000
Subject: [PATCH 2/4] test: inline @mui/material for vitest ESM resolution

MUI 9.1.1's Transition.mjs uses a directory deep-import of
react-transition-group/TransitionGroupContext, which Node's native ESM
resolver (used by Vitest for non-inlined deps) rejects. Inline @mui/material
so Vite transforms it and resolves the import, matching the existing
@mui/x-data-grid handling.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01LQGQcGTMm2UQQuH41huhR9
---
 Lighthouse.Frontend/vitest.config.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lighthouse.Frontend/vitest.config.ts b/Lighthouse.Frontend/vitest.config.ts
index 79acd13aa..7635f7948 100644
--- a/Lighthouse.Frontend/vitest.config.ts
+++ b/Lighthouse.Frontend/vitest.config.ts
@@ -29,7 +29,7 @@ export default defineConfig({
 		],
 		server: {
 			deps: {
-				inline: ["@mui/x-data-grid"],
+				inline: ["@mui/x-data-grid", "@mui/material"],
 			},
 		},
 

From 2397c85cee42190f6a142046b793d8f899ac8a13 Mon Sep 17 00:00:00 2001
From: Benjamin Huser-Berta <github.com.bok@huser-berta.com>
Date: Thu, 18 Jun 2026 20:35:14 +0200
Subject: [PATCH 3/4] docs(discuss): epic 5305 k8s-readiness DISCUSS artifacts

Add the DISCUSS-wave outputs for Epic #5305 (make the Lighthouse app
itself safe to run on Kubernetes): feature-delta with 7 user stories,
opportunity-scored JTBD jobs, locked decisions D1-D6, cross-cutting
checklist, and the 7 per-slice briefs. Adds the new platform-operator
persona, its journey, and the operator jobs in jobs.yaml.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01EyQSxb2QZsBfFzWBxn81kx
---
 .../epic-5305-k8s-readiness/feature-delta.md  | 445 ++++++++++++++++++
 .../slices/slice-01-forwarded-headers.md      |  43 ++
 .../slices/slice-02-health-checks.md          |  44 ++
 .../slices/slice-03-graceful-shutdown.md      |  45 ++
 .../slices/slice-04-expand-only-migrations.md |  42 ++
 .../slices/slice-05-observability.md          |  42 ++
 .../slices/slice-06-mcp-inbound-auth.md       |  45 ++
 .../slices/slice-07-horizontal-scalability.md |  46 ++
 docs/product/jobs.yaml                        | 176 ++++++-
 .../journeys/epic-5305-k8s-readiness.yaml     | 183 +++++++
 docs/product/personas/platform-operator.yaml  | 101 ++++
 11 files changed, 1211 insertions(+), 1 deletion(-)
 create mode 100644 docs/feature/epic-5305-k8s-readiness/feature-delta.md
 create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-01-forwarded-headers.md
 create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-02-health-checks.md
 create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-03-graceful-shutdown.md
 create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-04-expand-only-migrations.md
 create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-05-observability.md
 create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-06-mcp-inbound-auth.md
 create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-07-horizontal-scalability.md
 create mode 100644 docs/product/journeys/epic-5305-k8s-readiness.yaml
 create mode 100644 docs/product/personas/platform-operator.yaml

diff --git a/docs/feature/epic-5305-k8s-readiness/feature-delta.md b/docs/feature/epic-5305-k8s-readiness/feature-delta.md
new file mode 100644
index 000000000..0f4cf0cc2
--- /dev/null
+++ b/docs/feature/epic-5305-k8s-readiness/feature-delta.md
@@ -0,0 +1,445 @@
+# Feature Delta: epic-5305-k8s-readiness
+
+<!-- markdownlint-disable MD024 MD041 -->
+
+Wave: DISCUSS | Date: 2026-06-16 | Density: lean (per ~/.nwave/global-config.json) | Epic: ADO #5305
+
+**Feature goal**: make the Lighthouse application itself safe to run on Kubernetes — multiple
+replicas, rolling updates, behind an Ingress/reverse-proxy — through seven production C#/TS
+changes, WITHOUT changing the sacrosanct single-container standalone product. This epic runs
+BEFORE the k8s Productization epic (#5306): the app must be cluster-safe before it is packaged
+and hosted. It was split out of the learning epic #5189 on 2026-06-15.
+
+This DISCUSS covers all 7 child stories: #5311 forwarded headers, #5310 health checks, #5309
+graceful shutdown, #5308 expand-only migrations + safe startup, #5312 observability, #5307 MCP
+inbound auth, #5304 horizontal scalability. The north-star they slice toward (D1–D5, Q1–Q5, §4
+architecture) lives in `docs/feature/l8e-kubernetes-learning/planning-stage.md` and is inherited,
+not re-litigated.
+
+**Prior-wave consultation** (READING ENFORCEMENT):
+- ✓ `docs/product/jobs.yaml` · ✓ `docs/product/personas/{config-admin,lighthouse-maintainer}.yaml`
+- ✓ `docs/product/journeys/multiple-cycle-times.yaml` (schema) · ✓ `docs/product/kpi-contracts.yaml`
+- ✓ `docs/feature/l8e-kubernetes-learning/planning-stage.md` (north-star backbone)
+- ✓ ADO #5305 (epic) + #5304, #5307–#5312 (child descriptions)
+- ⊘ `docs/feature/epic-5305-k8s-readiness/{discover,diverge}/` (none — planning-stage is the upstream evidence)
+
+No DISCUSS decision below contradicts the north-star; each inherits it.
+
+---
+
+## Wave: DISCUSS / [REF] Persona
+
+**`platform-operator`** (NEW — `docs/product/personas/platform-operator.yaml`) — the person who *runs*
+a Lighthouse instance, not the one who reads metrics inside it. Two flavours of one persona: the
+**self-hoster** running a single container today (the sacrosanct standalone product) and the **LPW
+SaaS operator** running many replicas across tenants tomorrow. Cares about the pod/process lifecycle,
+rollouts and proxying — the operational envelope around the app. Distinct from `config-admin` (edits
+in-app config) and the end-user product personas (flow-coach, forecaster). The MCP-caller story (#5307)
+has a secondary actor — the **MCP/CLI caller** authenticating as themselves — but the persona who
+deploys and secures the MCP server is `platform-operator`.
+
+---
+
+## Wave: DISCUSS / [REF] JTBD one-liners
+
+Six jobs (added to `docs/product/jobs.yaml`), all `persona: platform-operator`. Opportunity-scored:
+
+| Job ID | One-liner | Imp | Sat | Gap | Stories |
+|---|---|---|---|---|---|
+| `job-operator-survive-multiple-replicas` | Run >1 replica without N× syncs, lost notifications, or migration races | 5 | 1 | **4** | #5304, #5308 (lock) |
+| `job-operator-zero-downtime-rollout` | Upgrade with no dropped requests and no data loss | 4 | 1 | **3** | #5308, #5309 |
+| `job-operator-correct-behind-proxy` | Correct HTTPS / cookies / OIDC / SignalR behind a reverse proxy | 4 | 1 | **3** | #5311 |
+| `job-mcp-caller-own-identity` | Each MCP caller drives Lighthouse as themselves, not a shared baked key | 4 | 1 | **3** | #5307 |
+| `job-operator-trust-pod-health` | k8s routes to only-ready pods, restarts only dead ones | 4 | 2 | 2 | #5310 |
+| `job-operator-observe-in-cluster` | Per-instance metrics / structured logs / traces in my stack | 3 | 2 | 1 | #5312 |
+
+**Highest opportunity** = `survive-multiple-replicas` (gap 4) and the three gap-3 jobs. This drives
+prioritization (below): the high-uncertainty multi-replica job ships last but SPIKEs early; the gap-3
+"login behind proxy" job ships first because it is small and unblocks all cluster auth testing.
+
+---
+
+## Wave: DISCUSS / [REF] Scope assessment (Elephant-Carpaccio early gate)
+
+Oversized signals present: **>3 bounded contexts/technologies** (EF/Postgres, SignalR/Redis, ASP.NET
+health + lifecycle, OAuth/MCP, OpenTelemetry, reverse-proxy middleware) and **>2 weeks effort**. → This
+is correctly an **EPIC**, already split from #5189 and decomposed on ADO into 7 independently-shippable
+stories. Each story is one thin vertical slice that ships end-to-end and auto-degrades to standalone.
+**Verdict: PASS — already split; user confirmed full-epic DISCUSS of all 7.** No further split needed;
+the only slice needing internal care is #5304 (gated behind a required SPIKE, see slice-07).
+
+---
+
+## Wave: DISCUSS / [REF] Locked decisions
+
+Inherited from the north-star (planning-stage §3) and applied as this epic's hard gates:
+
+- **[D1 · EPIC GATE — standalone is sacrosanct]** Every story MUST preserve the single-container
+  standalone + regular server deployment unchanged, auto-degrading to the single-instance path: no Redis
+  ⇒ in-memory backplane; one replica works; SQLite stays default; frontend stays embedded. Verified per
+  story as an acceptance criterion. (planning §0 epic gate + §D4)
+- **[D2 · full nWave for product code]** These are real C#/TS changes → full `DISCUSS→…→DELIVER` + the
+  CLAUDE.md RBAC / Lighthouse-Clients / Website checklist, not the learning light-loop. (planning §D3)
+- **[D3 · sequence]** Learning #5189 → **#5305 (this)** → Productization #5306. The app must be
+  cluster-safe before it is packaged/hosted. Cluster-side stacks (Prometheus/Grafana/Loki, oauth2-proxy,
+  Ingress manifests, Helm chart) are #5306, NOT here — this epic is in-app code only.
+- **[D4 · expand-only migrations]** Additive-only per release; destructive cleanup is a *later* release
+  (expand now, contract later) because rolling updates run new+old pods against one shared Postgres.
+  (memory: expand-only/non-destructive; planning #5308)
+- **[D5 · #5304 architecture is OPEN — do not pre-pick]** The cluster-aware unit is the *update queue
+  itself*, not a timer leader (both the periodic loop AND inline manual-refresh paths must be covered).
+  Leader election is necessary-not-sufficient. DESIGN/SPIKE decides between distributed-single-consumer
+  queue vs. cluster-wide per-entity lock + shared status store. (ADO #5304 architectural note 2026-06-14)
+- **[D6 · MCP auth = clients-repo work, version-gated]** #5307 lands primarily in `lighthouse-clients`;
+  preferred path is MCP OAuth pass-through, interim is X-Api-Key pass-through reusing the existing
+  owner-resolved/scoped keys. Version-gate the endpoint (strictly newer than last released Lighthouse;
+  `FEATURE_REQUIRES_SERVER_NEWER_THAN`). (planning §6 Q5)
+
+---
+
+## Wave: DISCUSS / [REF] Cross-cutting impact checklist (mandatory per CLAUDE.md DISCUSS)
+
+Recorded explicitly — "N/A, because…" where no change is needed; these extend DoR Item 7.
+
+| Story | RBAC | Lighthouse-Clients (CLI + MCP) | Website |
+|---|---|---|---|
+| #5311 forwarded headers | N/A — derives scheme/host only; no authorization surface. (But it *fixes* OIDC behind a proxy, so auth *works* correctly.) | N/A — no API contract change. | N/A — operational, not marketed. |
+| #5310 health checks | N/A — unauthenticated operational endpoints carrying no business data. | N/A. | N/A. |
+| #5309 graceful shutdown | N/A — server lifecycle only. | N/A — callers just reconnect. | N/A. |
+| #5308 migrations + startup | N/A — provider/startup mechanics; confirm provider selection touches no RBAC-gated admin surface (it does not). | Possibly a CLI **connection hint** for Postgres — confirm in DESIGN; otherwise N/A. | N/A. |
+| #5312 observability | **Decide in DESIGN**: `/metrics` can leak request paths; default cluster-internal/unauthenticated, but exposure must be a conscious call (Sonar/security-hotspot). | N/A. | N/A. |
+| #5307 MCP inbound auth | **Central** — removes ambient authority; the MCP path honours per-caller `ApiKeyPermission` scope via the existing `ApiKeyAuthenticationHandler` (no new RBAC port, flows through the established handler). | **Primary surface** — change lands in `lighthouse-clients`; **version-gate** per CLAUDE.md. | N/A — security/packaging, not a marketed UI feature. |
+| #5304 horizontal scalability | N/A — no authorization surface. | Likely N/A — internal infra, no API contract change; confirm in DESIGN. | N/A. |
+
+---
+
+## Wave: DISCUSS / [REF] User stories
+
+Seven stories, one per ADO child, one per slice. US-NN ↔ slice-NN (prioritized order). Every story is
+operator-visible (none is `@infrastructure`-only → no slice-composition gate violation: every slice
+ships one value story). Each inherits **D1 (standalone gate)** as an embedded AC.
+
+### US-01 — Login works behind a TLS-terminating reverse proxy (ADO #5311)
+As a **platform-operator**, I put Lighthouse behind Traefik/nginx/an Ingress and want OIDC login + secure
+cookies + SignalR to use the real public HTTPS host, so users log in first try with no redirect loop.
+`job_id: job-operator-correct-behind-proxy`
+
+#### Elevator Pitch
+Before: behind a TLS-terminating proxy, OIDC redirects to `http://`, the callback loops, and secure cookies are dropped — login is broken.
+After: declare the proxy as trusted + enable forwarded headers → hit `https://<public-host>` → the OIDC redirect/callback are `https://<public-host>/...`, the secure cookie persists, login succeeds.
+Decision enabled: the operator can safely front Lighthouse with any reverse proxy and trust that auth works.
+
+#### Acceptance criteria
+- AC1: With trust ON and `X-Forwarded-Proto: https` + `X-Forwarded-Host: <public>` from a **declared known proxy**, the generated OIDC redirect/callback URL is `https://<public>/...` (integration test).
+- AC2: Forwarded headers from an **undeclared** source are ignored — no scheme/host spoof.
+- AC3 (D1): With no proxy declared, direct/standalone access is byte-identical to today; forwarded-header trust is OFF by default.
+
+### US-02 — Kubernetes trusts the pod's real health (ADO #5310)
+As a **platform-operator**, I want readiness gated on real serving capacity and liveness shallow, so k8s
+routes traffic only to serving pods and restarts only genuinely-dead ones.
+`job_id: job-operator-trust-pod-health`
+
+#### Elevator Pitch
+Before: there are no real probes; k8s can route to a not-yet-ready pod (cold 500s) or restart-loop a healthy-but-slow pod.
+After: configure probes → `GET /health/ready` is 503 until DB-reachable + migrations-applied, `GET /health/live` stays 200 through a slow dependency, `GET /health/startup` covers slow boot.
+Decision enabled: the operator trusts rollout/health status and can set probe configs with confidence.
+
+#### Acceptance criteria
+- AC1: readiness returns unhealthy when the DB is unreachable, while liveness stays healthy (no restart storm).
+- AC2: readiness returns healthy only when DB reachable AND migrations applied.
+- AC3 (D1): endpoints return 200 / are harmless in single-container mode with no orchestrator.
+
+### US-03 — Rolling updates drop no requests (ADO #5309)
+As a **platform-operator**, I want a terminating pod to drain in-flight HTTP + SignalR + the update queue
+on SIGTERM, so I can roll out updates during the day with zero dropped requests.
+`job_id: job-operator-zero-downtime-rollout`
+
+#### Elevator Pitch
+Before: a rolling update kills pods mid-request — in-flight HTTP/SignalR/queued updates are lost.
+After: `kubectl rollout restart` (or any SIGTERM) → the pod stops intake, drains in-flight work within `terminationGracePeriodSeconds`, then exits → a load test + live SignalR client sees zero failed requests and a clean reconnect.
+Decision enabled: the operator ships updates without a maintenance window.
+
+#### Acceptance criteria
+- AC1: on SIGTERM/`StopAsync`, an in-flight HTTP request and a queued update complete (or the update is safely re-enqueued) before the host reports stopped.
+- AC2: readiness flips to NotReady on `ApplicationStopping` so the LB stops routing before drain.
+- AC3 (D1): a single-container Ctrl-C behaves exactly as today.
+
+### US-04 — Concurrent replicas migrate safely and additively (ADO #5308)
+As a **platform-operator**, I want each release's migrations additive-only and exactly one replica to
+apply them on concurrent startup, so old+new pods coexist on one Postgres without breakage or races.
+`job_id: job-operator-zero-downtime-rollout` (+ `job-operator-survive-multiple-replicas`)
+
+#### Elevator Pitch
+Before: every pod races `Database.Migrate()` on boot, and a destructive migration can break the old pods still serving during a rollover.
+After: scale a fresh deploy to 3 replicas against one Postgres → the logs show migrations applied **once** (one applies, two wait); a destructive migration is **rejected by CI** before merge.
+Decision enabled: the operator rolls out schema changes during the working day without a downtime window.
+
+#### Acceptance criteria
+- AC1: N hosts started against one DB apply migrations exactly once (concurrency test asserting single application).
+- AC2: a CI check rejects a destructive migration (drop/rename column/table) in a release; expand→contract two-release pattern documented.
+- AC3 (D1): single SQLite or Postgres instance auto-migrates on boot exactly as today (lock degrades to a no-op).
+
+### US-05 — Lighthouse is observable in my cluster (ADO #5312)
+As a **platform-operator**, I want a Prometheus `/metrics` endpoint, structured JSON logs and OTel traces,
+so Lighthouse appears on my existing dashboards like any first-class service.
+`job_id: job-operator-observe-in-cluster`
+
+#### Elevator Pitch
+Before: no `/metrics` and unstructured text logs — Lighthouse is a black box in the cluster.
+After: scrape `GET /metrics` → request/error/latency render in Grafana; logs ship as queryable JSON to Loki; a slow request is traceable.
+Decision enabled: the operator monitors and alerts on Lighthouse from the same stack as everything else.
+
+#### Acceptance criteria
+- AC1: `GET /metrics` returns Prometheus-format output including HTTP server metrics.
+- AC2: logs are emitted as structured JSON to stdout with the expected fields.
+- AC3 (D1): with telemetry disabled, no exporter runs and there is no behaviour or performance change for the single container (low-overhead/off-by-default).
+
+### US-06 — Each MCP caller authenticates as themselves (ADO #5307)
+As a **platform-operator** exposing the MCP HTTP server, I want each caller to authenticate with their own
+credential (passed through), so every caller drives Lighthouse with their own RBAC scope and audit — no
+shared baked key. `job_id: job-mcp-caller-own-identity`
+
+#### Elevator Pitch
+Before: the `mcp-http` container holds one baked `LIGHTHOUSE_API_KEY` — a confused deputy; every caller acts as that owner/scope with no per-user audit, and an unauth'd `/mcp` is an open hole.
+After: a caller sends their OWN OAuth token (or `X-Api-Key`) to `/mcp` → the server passes it through → Lighthouse owner-resolves it (`ApiKey.OwnerSubject → sub`) and applies that caller's `ApiKeyPermission` scope.
+Decision enabled: the operator exposes MCP beyond ClusterIP without distributing/rotating a shared secret, and security review gets a clean "no ambient authority" answer.
+
+#### Acceptance criteria
+- AC1: two callers with distinct credentials each see only their own RBAC-scoped data; the credential is forwarded, not a baked key.
+- AC2: the wrapping client method version-gates — an old Lighthouse server fails with a clear "upgrade Lighthouse" error, not an opaque 404.
+- AC3 (D1): the existing single-key / dev path stays available; no break for self-hosters.
+
+### US-07 — Lighthouse runs safely with N replicas (ADO #5304)
+As a **platform-operator**, I want syncs to run once across the fleet, every notification to reach all
+pods' clients, and update status consistent across pods, so I scale Lighthouse like a normal web app.
+`job_id: job-operator-survive-multiple-replicas`
+
+#### Elevator Pitch
+Before: Lighthouse is a stateful singleton — a second replica means N× external syncs racing Postgres, notifications that reach only one pod's clients, and a per-pod status cache that disagrees.
+After: configure Redis + scale to 3 → a manual refresh served by pod B notifies a client on pod A; the external system is synced **once** per cycle; `GetUpdateStatus` agrees across pods.
+Decision enabled: the operator sets a replica count for HA/scale and trusts Lighthouse stays correct through a node failure.
+
+#### Acceptance criteria
+- AC1: with Redis + N hosts, a single sync per entity occurs under concurrent timer + manual-refresh load (no N× duplication, no racing writes).
+- AC2: a notification raised on any pod reaches clients connected to any other pod (Redis backplane).
+- AC3: `GetUpdateStatus` returns a consistent answer across pods (shared/distributed status store).
+- AC4 (D1): with no Redis / one host, behaviour AND code path are identical to today.
+
+---
+
+## Wave: DISCUSS / [REF] Story map
+
+```
+Backbone (operator activities):  CONFIGURE ──▶ DEPLOY ──▶ ROLL OUT ──▶ SCALE ──▶ OPERATE
+                                     │           │           │            │          │
+US-01 forwarded headers ────────────┘           │           │            │          │
+US-02 health checks ────────────────────────────┘           │            │          │
+US-03 graceful shutdown ────────────────────────────────────┤           │          │
+US-04 expand-only migrations + startup lock ────────────────┘           │          │
+US-06 MCP inbound auth (parallel, clients repo) ────────────────────────┤          │
+US-07 horizontal scalability (SPIKE-gated, last) ───────────────────────┘          │
+US-05 observability (lands any time after deploy) ─────────────────────────────────┘
+```
+
+**Walking skeleton**: none — brownfield hardening; US-01 (smallest, config-gated) is the thin first slice
+that proves the standalone-gate + production-data discipline for the rest.
+
+---
+
+## Wave: DISCUSS / [REF] Prioritization
+
+Order by (a) learning leverage / uncertainty, (b) dependency, (c) dogfood cadence:
+
+1. **US-01 forwarded headers** — smallest; unblocks all cluster auth testing; near-zero risk. First.
+2. **US-02 health checks** — prerequisite for any safe rollout; foundational for verifying US-03/US-04.
+3. **US-03 graceful shutdown** — pairs with US-02 for zero-downtime; drains the *current* queue.
+4. **US-04 expand-only migrations + startup lock** — precedes real multi-replica; feeds US-02's "migrations applied".
+5. **US-05 observability** — independent; bring forward if operating blind during US-07 hurts.
+6. **US-06 MCP inbound auth** — mostly clients repo, parallelizable; gated by an OAuth-vs-X-Api-Key SPIKE.
+7. **US-07 horizontal scalability** — highest uncertainty, largest, depends on US-03/US-04; ship LAST but
+   run its **required SPIKE early** (learning leverage: disprove "leader election is enough" cheaply).
+
+---
+
+## Wave: DISCUSS / [REF] WS strategy
+
+**Strategy D — Configurable / env-switching** per Mandate 5. Every story is config-gated and auto-degrades
+(no Redis ⇒ in-memory; no proxy declared ⇒ no forwarded-header trust; telemetry off by default; migration
+lock no-op at 1 instance). This is the D1 standalone gate expressed as the WS mechanism: one codebase serves
+both the single-container self-hoster and the multi-replica SaaS, selected by configuration. (Trigger:
+WS=D fires the `alternatives-considered` expansion suggestion — see wave-end menu.)
+
+---
+
+## Wave: DISCUSS / [REF] Driving ports (inbound surfaces)
+
+- **HTTP** — `/health/ready`, `/health/live`, `/health/startup` (US-02); `/metrics` (US-05); existing OIDC
+  redirect/callback + SignalR `/hub` negotiation now proxy-aware (US-01); `/mcp` inbound auth (US-06).
+- **Process signals** — SIGTERM / `IHostApplicationLifetime` (US-03).
+- **Config** — env vars / appsettings: trusted-proxy set (US-01), Redis connection (US-07), telemetry
+  exporter (US-05), shutdown timeout (US-03).
+- **CLI/MCP client** — `lighthouse-clients` MCP server credential pass-through + version gate (US-06).
+- **No new in-app UI surface.** (Operator surfaces are HTTP/CLI/kubectl, not the React app.)
+
+---
+
+## Wave: DISCUSS / [REF] Pre-requisites
+
+- Learning epic #5189 stories 00–07 (k8s fundamentals + the story-07 scaling spike) inform US-07; story 08
+  (#5198) is the only open learning story and is not a blocker.
+- A real Postgres + Redis on k3s for US-04/US-07 production-data acceptance (InMemory cannot reproduce the
+  races — recurring lesson).
+- The `CreateMigration` PowerShell script for US-04 migration generation (per CLAUDE.md).
+- `lighthouse-clients` repo access + the last-released Lighthouse version for the US-06 version-gate baseline.
+
+---
+
+## Wave: DISCUSS / [REF] Outcome KPIs
+
+Lighthouse is self-hosted — no central telemetry (memory: self-hosted-telemetry-gap). All KPIs are
+`per_instance` (operator-observable via logs/metrics) or `vendor_demo_only` (LPW stage/prod). Append to
+`docs/product/kpi-contracts.yaml` in DEVOPS.
+
+| KPI | Target | Measurement | Scope |
+|---|---|---|---|
+| Dropped requests during a rolling update (US-03) | 0 | load-gen error count across a rollout on stage | vendor_demo_only |
+| Duplicate external syncs per cycle at N replicas (US-07) | 1 (exactly once) | connector request log / structured-log sync events | vendor_demo_only |
+| Concurrent-startup migration applications (US-04) | 1 | migration-history + structured startup logs | per_instance |
+| OIDC login success behind proxy (US-01) | 100% first-try | manual + stage smoke | per_instance |
+| Pod restart-on-slow-dependency events (US-02) | 0 | liveness restart count vs. DB-latency events | vendor_demo_only |
+| MCP calls using a shared baked key after US-06 (US-06) | 0 | per-caller audit / structured auth logs | per_instance |
+| Lighthouse `/metrics` scrape success (US-05) | 100% | Prometheus `up` for the Lighthouse target | per_instance |
+
+---
+
+## Wave: DISCUSS / [REF] DoR validation (9 items, evidence)
+
+1. **Business value clear** — ✓ each story maps to an opportunity-scored job (gap 1–4); value = operability of the hosted/self-hosted product.
+2. **User/persona identified** — ✓ `platform-operator` (new persona file); secondary MCP-caller actor on US-06.
+3. **Acceptance criteria testable** — ✓ each US has 3–4 ACs verifying the Elevator-Pitch "After" end-to-end, incl. the D1 standalone-gate AC.
+4. **Dependencies known** — ✓ sequence + soft deps mapped (US-07 ⟵ US-03/US-04; US-02 feeds from US-04); pre-requisites listed.
+5. **Story sized / sliced** — ✓ 7 thin slices, each its own brief at `slices/slice-0N-*.md`, ≤~6 crafter days except US-07 which is SPIKE-gated.
+6. **No blocking unknowns** — ✓ the one real unknown (US-07 cluster-aware-queue design) is explicitly OPEN (D5) and quarantined behind a required SPIKE; not pre-picked.
+7. **Technical notes / constraints + cross-cutting** — ✓ RBAC/Clients/Website checklist recorded per story (above); D1–D6 locked decisions.
+8. **Outcome KPIs defined** — ✓ 7 KPIs with numeric targets + measurement + scope.
+9. **Definition of Done agreed** — ✓ below.
+
+**Requirements completeness**: 0.96 (>0.95). The one soft gap: US-07's solution shape is intentionally
+deferred to SPIKE/DESIGN — that is recorded as a decision (D5), not a missing requirement.
+
+---
+
+## Wave: DISCUSS / [REF] Definition of Done (9-item)
+
+1. All ACs green (incl. the D1 standalone-gate AC) for the story. 2. `dotnet build` zero warnings;
+`pnpm build` + Biome clean (for any TS). 3. `dotnet test` / `pnpm test` green. 4. SonarCloud
+`new_violations = 0`. 5. Mutation kill ≥ 80% on the story's real surface (per CLAUDE.md per-feature). 6.
+Cross-cutting checklist answered for the story (RBAC/Clients/Website). 7. Production-data acceptance run
+(real Postgres/Redis/proxy/OIDC as the slice requires) — not synthetic-only. 8. Docs/screenshots updated
+if any user-visible surface changed (most stories: N/A operational — record it). 9. ADO story
+Active→Resolved after CI green; push paused for review (ado-sync ritual).
+
+---
+
+## Wave: DISCUSS / [REF] Out-of-scope
+
+- Cluster-side stacks: Prometheus/Grafana/Loki deployment, oauth2-proxy, Ingress/Traefik manifests, the
+  Helm chart, ArgoCD/GitOps, wildcard DNS, secrets operators → **Productization epic #5306**.
+- HPA / `sessionAffinity` / load-test manifests → the **learning** story 07 (#5197), throwaway scratch.
+- Per-tenant isolation / namespace-per-tenant model → #5306.
+- Destructive (contract) migrations for any expand done here → a **later** release (D4).
+- Edge-vs-ClusterIP MCP exposure + oauth2-proxy decisions → #5306 (planning Q5).
+- Any change to the standalone single-container product behaviour (forbidden by D1).
+
+---
+
+## Wave: DISCUSS / [REF] Wave decisions summary
+
+- **Feature type**: cross-cutting (backend C#, clients TS, operational surface) — NOT infrastructure-only
+  (US-01/US-06 are operator/user-visible), so JTBD traceability applies and the escape valve was rejected.
+- **Persona**: new `platform-operator` (user-chosen over extending `lighthouse-maintainer`).
+- **Scope**: full epic — all 7 stories DISCUSSed in one pass (user-chosen).
+- **ADO**: #5304 re-parented under Epic #5305 (was orphaned); all 7 children now under #5305.
+- **Walking skeleton**: none (brownfield); US-01 is the thin proving slice.
+- **Primary needs**: run Lighthouse multi-replica + behind a proxy + rolling-update-safe + observable, all
+  WITHOUT touching the sacrosanct standalone (D1).
+- **Constraints established**: D1–D6 (above). D5 keeps US-07's architecture OPEN behind a SPIKE.
+- **Upstream changes**: none — DISCUSS inherits the planning-stage north-star; no DISCOVER assumption changed.
+
+**Handoff** → DESIGN (`nw-solution-architect`, full artifacts; #5304's cluster-aware-queue SPIKE is the
+first DESIGN concern) + DEVOPS (`nw-platform-architect`, `outcome-kpis` only). DESIGN + DEVOPS parallel.
+```
+
+---
+
+## Wave: DISCUSS / [WHY] Alternatives considered
+
+Rendered on request (triggers: cross-context complexity, WS=D). Decision rationale for the choices that
+are deferred to SPIKE/DESIGN or locked above — what was weighed and why. These are inputs for DESIGN, not
+re-openings of D1–D6.
+
+### A1 · US-07 — what becomes the cluster-aware unit (OPEN, the SPIKE question)
+The breakage is that `UpdateQueueService` is `AddSingleton` but singleton-*per-process*: each replica has
+its own Channel queue, consumer, awaiters, and the `updateStatuses` dedup dict — and updates fire from two
+paths (the timer loop AND inline manual refresh on whatever replica serves the request).
+
+- **(rejected as sufficient) Leader election for the timer only.** Elect one replica to run
+  Team/Portfolio/ForecastUpdater. *Why not:* does nothing for a manual refresh handled by a follower, and
+  the per-process dedup is invisible across replicas — the same entity can still be updated concurrently and
+  race the same Postgres rows. Necessary-not-sufficient; the research doc §1 is explicit. Keep leader
+  election only as a *component* of a fuller design, not the design.
+- **(candidate, preferred-leaning) Distributed queue with a single consumer.** Replace the in-process
+  Channel with a shared queue (Redis stream / Postgres-backed) drained by exactly one consumer across the
+  fleet; manual refresh enqueues to the shared queue and awaits completion via a shared status store.
+  *Pro:* makes the *queue itself* cluster-aware (covers both trigger paths), dedup + awaited-completion +
+  `GetUpdateStatus` all consistent. *Con:* most moving parts; introduces a queue technology.
+- **(candidate) Cluster-wide per-entity lock + shared status store.** Keep per-process queues but guard each
+  Team/Portfolio update with a distributed per-entity lock (e.g. Postgres advisory lock / Redis lock); back
+  `GetUpdateStatus` with a shared store so dedup and reads agree. *Pro:* smaller change, no new queue. *Con:*
+  lock-contention + liveness edge cases; awaited-completion across replicas still needs the shared store.
+- **Decision:** OPEN (D5). The SPIKE (slice-07) prototypes both candidates against real Postgres+Redis with
+  3 hosts driving timer + manual-refresh concurrently; the one that disproves double-work *and* keeps
+  awaited-completion consistent wins. Do NOT pre-pick in DISCUSS.
+
+### A2 · US-07 — SignalR fan-out backplane
+- **Redis backplane (chosen, config-gated).** Matches the north-star (§4 "API N replicas + Redis"), local
+  MinIO/Redis already in the rehearsal stack, no managed-service lock-in. No Redis ⇒ in-memory (D1).
+- **(rejected) Azure SignalR Service.** Offloads fan-out fully but is a managed Azure dependency — couples
+  the self-hostable product to a cloud service, violating the vendor-neutral, runs-anywhere posture.
+- **(rejected) Sticky sessions only (`sessionAffinity: ClientIP`).** Pins a client to one pod so in-memory
+  fan-out "works" — but it was the *learning* spike (story 07), doesn't deliver cross-pod notifications for
+  server-raised events, and breaks on rebalancing. Not a product answer.
+
+### A3 · US-04 — concurrent-startup migration coordination
+- **In-process lock (advisory lock / history sentinel), chosen for this epic.** One replica applies, others
+  wait; degrades to a no-op at one instance (D1). Keeps "migrate on boot" — the self-hoster's current model.
+- **(deferred, not rejected) Dedicated pre-deploy migration Job / ArgoCD sync-wave.** Cleaner separation
+  (migrate→deploy) but it is a *cluster/GitOps* mechanism → belongs to Productization #5306, and it would
+  break the single-container "auto-migrate on boot" the self-hoster relies on. The slice-04 hypothesis
+  explicitly allows falling back to this *if* the in-process lock proves fragile, recording the decision.
+- **(rejected) Do nothing / let pods race.** `Database.Migrate()` under concurrent start is undefined.
+
+### A4 · US-06 — MCP inbound auth model
+- **MCP OAuth pass-through (preferred).** Each caller brings their own OAuth token; no shared secret to bake,
+  seal, distribute, rotate; per-user RBAC + audit for free; an unauth'd `/mcp` is no longer an open hole.
+  *Risk:* MCP-spec (2025-06-18) OAuth maturity in our client SDK — the slice-06 SPIKE assesses this.
+- **X-Api-Key pass-through (interim, accepted fallback).** Caller sends its own Lighthouse API key; the MCP
+  server forwards it; reuses the existing owner-resolved (`ApiKey.OwnerSubject`) + scoped (`ApiKeyPermission`)
+  model with near-zero backend change. *Cost:* N user-held keys instead of one Secret. Ships if OAuth proves
+  too heavy now — recorded, not blocking.
+- **(rejected) Keep the single baked key + restrict to ClusterIP.** That is the confused-deputy status quo;
+  the moment MCP is exposed beyond ClusterIP it is an ambient-authority hole. Exposure topology is a #5306
+  concern, but the auth model must change regardless.
+
+### A5 · US-05 — metrics library
+- **OpenTelemetry .NET + Prometheus exporter (leaning).** One instrumentation surface for metrics+traces,
+  vendor-neutral OTLP, future-proof. *Con:* heavier setup; overhead must be measured (slice-05 SPIKE) to set
+  the off-by-default posture for the single container.
+- **(alternative) `prometheus-net` for metrics only.** Lighter for just `/metrics`, but a second mechanism
+  for traces — DESIGN picks one to avoid two telemetry stacks. Decision deferred to DESIGN/SPIKE.
+
+### A6 · Frontend topology (epic-wide, Q4 — already locked upstream, restated)
+- **Embedded (chosen for this epic and Bands A–C).** API serves the SPA; mirrors the standalone exactly
+  (D1). The `frontend.mode: split` nginx path is a Productization #5306 / Band-D optimization, built then,
+  defaulted off. Out of scope here — restated so DESIGN does not reopen it.
+```
diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-01-forwarded-headers.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-01-forwarded-headers.md
new file mode 100644
index 000000000..6f7d68ac6
--- /dev/null
+++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-01-forwarded-headers.md
@@ -0,0 +1,43 @@
+# Slice 01: Reverse-proxy forwarded headers
+
+**Feature**: epic-5305-k8s-readiness
+**Story**: US-01 (ADO #5311) → job-operator-correct-behind-proxy
+**Estimate**: ~0.5–1 crafter day
+**Reference class**: config-gated startup wiring, similar to `auth-allowedorigins-envvar-binding-fix` (env-bound ASP.NET Core middleware config, off unless declared)
+
+## Goal
+Make Lighthouse honour `X-Forwarded-Proto` / `-Host` / `-For` from a declared, trusted reverse proxy so HTTPS redirects, secure cookies, OIDC callback URLs and SignalR negotiation use the real public scheme + host — config-gated and OFF unless a proxy is declared.
+
+## IN scope
+- `UseForwardedHeaders` wired with a `ForwardedHeadersOptions` populated from configuration: known proxies / known networks (CIDR), forwarded-header count limit.
+- A single config switch (env var + appsettings) that turns forwarded-header trust on and declares the trusted proxy set; default OFF.
+- OIDC callback URL + `RequireHttpsMetadata`/redirect behaviour derive from the forwarded scheme/host when trust is on.
+- Secure-cookie + HTTPS-redirect behaviour consistent with the forwarded scheme.
+
+## OUT scope
+- The Ingress / Traefik manifests themselves (Productization epic #5306, chart story 09).
+- Edge auth (oauth2-proxy) — north-star, not this slice.
+- Health-check endpoints → slice 02.
+
+## Learning hypothesis
+**Confirms if it succeeds**: a real OIDC login through a TLS-terminating proxy completes first try (no http:// callback, no redirect loop, secure cookie persists).
+**Disproves if it fails**: ASP.NET Core forwarded-header handling is insufficient for our SignalR negotiation path and we need per-endpoint handling rather than one global middleware.
+
+## Acceptance criteria
+See US-01 in `../feature-delta.md`. Key: with trust ON and a simulated `X-Forwarded-Proto: https` + `X-Forwarded-Host`, an integration test asserts the generated OIDC redirect/callback URL is `https://<public-host>/...`; with trust OFF (no proxy declared), behaviour is byte-identical to today (standalone gate).
+
+## Dependencies
+None. Foundation slice — unblocks correct auth on any proxied deployment; should land before any cluster auth testing.
+
+## Production data requirement
+**Required.** Smoke a real OIDC login (Keycloak or the configured provider) through an actual reverse proxy (local Traefik/nginx), not just a unit test with synthetic headers.
+
+## Dogfood moment
+The dev instance, placed behind a local Traefik with TLS, logs in via OIDC over the HTTPS hostname within the same day.
+
+## Cross-cutting checklist (confirmed in feature-delta)
+RBAC: N/A — no authorization surface changes; only how the app derives scheme/host. Clients: N/A — no API contract change. Website: N/A — operational, not a marketed surface.
+
+## Pre-slice spike candidates
+- Confirm SignalR negotiation respects `UseForwardedHeaders` ordering relative to other middleware. (~1 hr)
+- Verify the existing OIDC setup reads the request scheme/host (not a hardcoded base URL) so forwarded headers actually flow through. (~30 min)
diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-02-health-checks.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-02-health-checks.md
new file mode 100644
index 000000000..bb155517a
--- /dev/null
+++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-02-health-checks.md
@@ -0,0 +1,44 @@
+# Slice 02: Health checks (liveness / readiness / startup)
+
+**Feature**: epic-5305-k8s-readiness
+**Story**: US-02 (ADO #5310) → job-operator-trust-pod-health
+**Estimate**: ~1–1.5 crafter days
+**Reference class**: new read endpoints + DI wiring; learning story 04 (#5194) exercised probes as a spike — this is the product implementation
+
+## Goal
+Add real ASP.NET Core health checks driving the three k8s probes so traffic reaches only serving pods and only genuinely-dead pods restart.
+
+## IN scope
+- `AddHealthChecks()` with distinct tagged checks mapped to three endpoints:
+  - **readiness** (`/health/ready`): DB connectivity + migrations-applied → pod kept OUT of LB rotation until truly serving.
+  - **liveness** (`/health/live`): shallow — restart only on genuine deadlock, NOT on a slow dependency.
+  - **startup** (`/health/startup`): covers slow boot / migration window without tripping liveness.
+- Endpoints harmless / no-op-friendly in single-container mode (standalone gate).
+
+## OUT scope
+- The k8s probe manifests (chart story 09 / Productization #5306).
+- Migration-applied detection that requires the migration lock → coordinate with slice 04 (this slice checks "migrations applied", slice 04 owns "apply once across replicas").
+- /metrics, tracing → slice 05.
+
+## Learning hypothesis
+**Confirms if it succeeds**: a pod with an unreachable DB drops out of rotation (readiness red) WITHOUT being restarted (liveness green) — no restart storm.
+**Disproves if it fails**: a shallow liveness check can't distinguish deadlock from slow dependency cheaply, forcing a richer (and riskier) liveness signal.
+
+## Acceptance criteria
+See US-02 in `../feature-delta.md`. Key: integration tests assert (a) readiness returns unhealthy when DB is down but liveness stays healthy; (b) readiness returns healthy only when DB reachable AND migrations applied; (c) endpoints return 200 in single-container mode with no orchestrator.
+
+## Dependencies
+Soft on slice 04 for the precise "migrations applied" signal; can ship with a simpler "can open a DB connection" readiness first and tighten once slice 04 lands.
+
+## Production data requirement
+**Required.** Run the dev instance, kill the DB connection, observe readiness flip while the process is NOT restarted; restore and observe recovery.
+
+## Dogfood moment
+Dev instance deployed with the three probes wired; operator watches a clean rollout where a not-yet-migrated pod stays out of rotation until ready.
+
+## Cross-cutting checklist (confirmed in feature-delta)
+RBAC: N/A — health endpoints are unauthenticated operational surface (no business data). Clients: N/A. Website: N/A.
+
+## Pre-slice spike candidates
+- Decide whether health endpoints sit on the main port or a separate management port. (~30 min)
+- Confirm a cheap, reliable "migrations applied" query against EF Core for both SQLite and Postgres. (~1 hr)
diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-03-graceful-shutdown.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-03-graceful-shutdown.md
new file mode 100644
index 000000000..7459e75c2
--- /dev/null
+++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-03-graceful-shutdown.md
@@ -0,0 +1,45 @@
+# Slice 03: Graceful shutdown (SIGTERM) + connection draining
+
+**Feature**: epic-5305-k8s-readiness
+**Story**: US-03 (ADO #5309) → job-operator-zero-downtime-rollout
+**Estimate**: ~1–1.5 crafter days
+**Reference class**: `IHostedService` / `IHostApplicationLifetime` lifecycle wiring; touches the same update-queue hosted services as Epic 5121 / #5304
+
+## Goal
+Handle SIGTERM cleanly so a terminating pod stops accepting new work, drains in-flight HTTP + SignalR connections, flushes/awaits the in-memory update queue, and finishes within `terminationGracePeriodSeconds` — enabling zero-downtime rolling updates.
+
+## IN scope
+- Wire `IHostApplicationLifetime` `ApplicationStopping`/`ApplicationStopped` and/or `IHostedService.StopAsync` to:
+  - stop accepting new HTTP requests and new SignalR negotiations,
+  - drain in-flight HTTP requests within a bounded window,
+  - flush/await the in-memory `UpdateQueueService` Channel so queued/in-flight updates complete (or are safely abandoned) before exit,
+  - close SignalR connections so clients reconnect to a surviving pod.
+- Configurable shutdown timeout aligned to `terminationGracePeriodSeconds`.
+
+## OUT scope
+- The cluster-wide single-consumer queue redesign → slice 07 (#5304). This slice drains the *current per-process* queue cleanly; it does not make the queue distributed.
+- SignalR Redis backplane → slice 07.
+- Probe manifests → Productization #5306.
+
+## Learning hypothesis
+**Confirms if it succeeds**: under a rolling update, a load test driving requests + an active SignalR client sees zero failed requests and a clean client reconnect as pods cycle.
+**Disproves if it fails**: the in-memory update queue can't be drained deterministically within a sane grace period (e.g. a long external sync mid-flight), forcing the queue-redesign (slice 07) to land *before* true zero-downtime is claimable.
+
+## Acceptance criteria
+See US-03 in `../feature-delta.md`. Key: an integration test issues SIGTERM/`StopAsync` while an HTTP request and a queued update are in flight and asserts both complete (or the update is safely re-enqueued) before the host reports stopped; a single-container Ctrl-C behaves exactly as today (standalone gate).
+
+## Dependencies
+Pairs with slice 02 (readiness must flip to NotReady on `ApplicationStopping` so the LB stops routing before drain). Soft-precedes slice 07.
+
+## Production data requirement
+**Required.** Drive the dev instance under a small load generator + live SignalR client through a simulated rolling restart; assert no dropped requests.
+
+## Dogfood moment
+Operator triggers a rolling restart of the dev deployment during active use and observes no user-visible error and a seamless SignalR reconnect.
+
+## Cross-cutting checklist (confirmed in feature-delta)
+RBAC: N/A. Clients: N/A — server-side lifecycle only; CLI/MCP callers just reconnect. Website: N/A.
+
+## Pre-slice spike candidates
+- Measure worst-case in-flight update duration (external sync) to size the grace period. (~1 hr)
+- Confirm Kestrel/ASP.NET shutdown ordering vs. our hosted services so drain runs before the server socket closes. (~1 hr)
diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-04-expand-only-migrations.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-04-expand-only-migrations.md
new file mode 100644
index 000000000..d9d55e219
--- /dev/null
+++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-04-expand-only-migrations.md
@@ -0,0 +1,42 @@
+# Slice 04: Expand-only EF migrations + safe startup under N replicas
+
+**Feature**: epic-5305-k8s-readiness
+**Story**: US-04 (ADO #5308) → job-operator-zero-downtime-rollout + job-operator-survive-multiple-replicas
+**Estimate**: ~2–2.5 crafter days
+**Reference class**: EF migration mechanics (hit the stale-migration-DLL `--no-incremental` trap in `delivery-target-date-tracking`); concurrency coordination akin to Epic 5121
+
+## Goal
+Two coupled guarantees: (1) each release's migrations are additive-only (expand now; destructive cleanup deferred to a LATER release) so old pods never depend on a dropped column during a rollover; (2) when N replicas boot concurrently, exactly one applies migrations while the rest wait — no race on `Database.Migrate()`.
+
+## IN scope
+- **Expand-only discipline**: a guard/check (analyzer, test, or migration-review gate) that fails CI if a migration in this release is destructive (drop/rename column/table) — destructive ops must be a separate later release. Document the expand → contract two-release pattern.
+- **Startup migration coordination**: a migration lock / dedicated init mechanism / leader so exactly one replica runs `Migrate()`; others wait until migrations are applied, then start serving.
+- **Standalone gate**: a single SQLite or Postgres instance still auto-migrates on boot exactly as today (lock is a no-op / trivially-acquired with one instance).
+
+## OUT scope
+- The actual cluster-wide update-queue redesign → slice 07.
+- Provider-matrix migration generation uses the existing `CreateMigration` PowerShell script (per CLAUDE.md) — not new tooling.
+
+## Learning hypothesis
+**Confirms if it succeeds**: 3 replicas started simultaneously against one fresh Postgres apply the migration exactly once (one applies, two wait), and a destructive migration is rejected by CI before merge.
+**Disproves if it fails**: app-level migration coordination is too fragile under k8s and we must move migrations into a dedicated pre-deploy Job / ArgoCD sync-wave (decision pushed to Productization #5306) — in which case this slice delivers the expand-only guard + a documented "migrate via Job" path instead of an in-process lock.
+
+## Acceptance criteria
+See US-04 in `../feature-delta.md`. Key: an integration/concurrency test starts N hosts against one DB and asserts a single migration application (e.g. via a migration-history assertion / lock observation); a CI check rejects a destructive migration; single-instance boot auto-migrates unchanged.
+
+## Dependencies
+None hard. Feeds slice 02's "migrations applied" readiness signal. Precedes real multi-replica operation (slice 07).
+
+## Production data requirement
+**Required.** Reproduce concurrent startup against a real Postgres (k3s, 3 replicas) — InMemory tests will NOT catch the race (recurring lesson: persisted-model migration traps are invisible to InMemory).
+
+## Dogfood moment
+Operator scales a fresh deploy to 3 replicas against an empty Postgres and observes one migration application in the logs, all pods healthy.
+
+## Cross-cutting checklist (confirmed in feature-delta)
+RBAC: N/A. Clients: N/A — no API contract; possibly a CLI connection hint for Postgres, confirm in DESIGN. Website: N/A.
+
+## Pre-slice spike candidates
+- Evaluate `PostgreSQL advisory lock` vs. a migration-history sentinel vs. an init-Job approach for the boot lock. (~2 hr)
+- Prototype the destructive-migration CI guard (parse generated migration for `DropColumn`/`DropTable`/`RenameColumn`). (~1 hr)
+- Confirm the SQLite path degrades the lock to a no-op. (~30 min)
diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-05-observability.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-05-observability.md
new file mode 100644
index 000000000..4d3b81c8e
--- /dev/null
+++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-05-observability.md
@@ -0,0 +1,42 @@
+# Slice 05: App observability hooks (/metrics + structured logging + traces)
+
+**Feature**: epic-5305-k8s-readiness
+**Story**: US-05 (ADO #5312) → job-operator-observe-in-cluster
+**Estimate**: ~1.5 crafter days
+**Reference class**: new instrumentation wiring (OpenTelemetry .NET + Prometheus exporter + structured logging provider)
+
+## Goal
+Instrument the app for cluster observability: expose a Prometheus `/metrics` endpoint, emit structured JSON logs to stdout, and add OpenTelemetry traces — in-app instrumentation only, low-overhead / off-by-default where appropriate so the single-container self-hoster pays nothing.
+
+## IN scope
+- Prometheus `/metrics` endpoint (request rate / error rate / latency at minimum) via OpenTelemetry metrics + the Prometheus exporter.
+- Structured JSON logging to stdout (configurable), preserving today's log content but in queryable JSON.
+- OpenTelemetry tracing (ASP.NET Core + HttpClient + EF instrumentation) exporting via OTLP, exporter off/no-op unless configured.
+
+## OUT scope
+- The cluster-side Prometheus / Grafana / Loki stack — Productization epic #5306, story 16.
+- Per-tenant metric labelling / multi-tenant dashboards → #5306.
+- Business KPI instrumentation (those live in `docs/product/kpi-contracts.yaml`); this slice is operational telemetry, not product KPIs.
+
+## Learning hypothesis
+**Confirms if it succeeds**: a local Prometheus scrapes `/metrics` and a local Grafana shows Lighthouse request/error/latency; JSON logs parse field-wise in Loki; a slow request is traceable.
+**Disproves if it fails**: always-on instrumentation imposes measurable overhead on the single container, forcing a stricter off-by-default posture (and documentation that self-hosters must opt in).
+
+## Acceptance criteria
+See US-05 in `../feature-delta.md`. Key: an integration test asserts `/metrics` returns Prometheus-format output including HTTP server metrics; logs emitted in the JSON shape contain the expected fields; with telemetry disabled, no exporter runs and log/format behaviour matches the configured default (standalone gate — no perf change).
+
+## Dependencies
+None. Can land any time; valuable before slice 07's multi-replica work (so the operator isn't flying blind during scale-out).
+
+## Production data requirement
+**Recommended.** Scrape the dev instance with a real local Prometheus and confirm a dashboard renders; not strictly required for the unit-level acceptance.
+
+## Dogfood moment
+Operator points a local Prometheus + Grafana at the dev instance and sees a live Lighthouse dashboard within the day.
+
+## Cross-cutting checklist (confirmed in feature-delta)
+RBAC: confirm whether `/metrics` needs gating (it can leak request paths); default to unauthenticated cluster-internal surface but DESIGN must decide exposure (Sonar/security). Clients: N/A. Website: N/A.
+
+## Pre-slice spike candidates
+- Pick the metrics surface (OpenTelemetry.Exporter.Prometheus vs. prometheus-net) and confirm it coexists with our logging. (~1 hr)
+- Measure overhead of always-on ASP.NET Core + EF tracing to decide the default. (~1 hr)
diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-06-mcp-inbound-auth.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-06-mcp-inbound-auth.md
new file mode 100644
index 000000000..916639ce6
--- /dev/null
+++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-06-mcp-inbound-auth.md
@@ -0,0 +1,45 @@
+# Slice 06: MCP HTTP server inbound authentication (OAuth pass-through)
+
+**Feature**: epic-5305-k8s-readiness
+**Story**: US-06 (ADO #5307) → job-mcp-caller-own-identity
+**Estimate**: ~2–3 crafter days (primarily in the **lighthouse-clients** repo)
+**Reference class**: version-gated client endpoint wrapping (see `work-item-age-percentiles` clients wrapper + `FEATURE_REQUIRES_SERVER_NEWER_THAN`); reuses Lighthouse's existing owner-resolved/scoped API-key model
+
+## Goal
+Stop the published `mcp-http` container being a confused deputy. Each caller authenticates with their OWN credential (preferred: MCP spec rev 2025-06-18 OAuth pass-through; interim: `X-Api-Key` pass-through) that the MCP server forwards — so every caller drives Lighthouse as themselves, with their own RBAC scope and audit, no shared baked key.
+
+## IN scope
+- **lighthouse-clients repo (primary)**: the MCP HTTP server forwards the caller's credential instead of injecting one baked `LIGHTHOUSE_API_KEY`.
+  - Preferred: adopt the MCP Authorization framework (OAuth) — caller brings an OAuth token.
+  - Interim fallback: `X-Api-Key` pass-through reusing Lighthouse's owner-resolved (`ApiKey.OwnerSubject` → `sub`) + permission-scoped (`ApiKeyPermission`) keys.
+- **Version gate**: the wrapping client method pre-checks the Lighthouse server version (an old server returns an opaque 404) and fails with a clear "upgrade Lighthouse" error. Pin to **strictly newer than the last released Lighthouse version**; record the baseline in the clients' `FEATURE_REQUIRES_SERVER_NEWER_THAN` registry.
+- **Lighthouse backend (likely minimal/none)**: confirm the existing `ApiKeyAuthenticationHandler` owner-resolution + scope already satisfies pass-through; add only what's missing (e.g. an OAuth-token acceptance path if OAuth is chosen).
+- **Standalone gate**: the existing single-key / dev path stays available; no break for self-hosters.
+
+## OUT scope
+- Edge auth (oauth2-proxy) and ClusterIP-vs-edge exposure decisions → Productization #5306 (chart/SaaS boundary, planning Q5).
+- The MCP container's k8s deployment manifest → #5306.
+
+## Learning hypothesis
+**Confirms if it succeeds**: two different callers, each with their own credential, drive the MCP server and each sees only their own RBAC-scoped data, with per-caller audit — no shared-key ambient authority.
+**Disproves if it fails**: the MCP OAuth framework is too heavy / immature for our stack right now, so we ship the interim `X-Api-Key` pass-through and defer OAuth (recording the decision), rather than blocking the slice.
+
+## Acceptance criteria
+See US-06 in `../feature-delta.md`. Key: an integration/e2e test in lighthouse-clients shows a caller-supplied credential is forwarded and resolved to that caller's owner+scope (not a baked key); the version gate rejects an old server with a clear upgrade message; the legacy single-key dev path still works.
+
+## Dependencies
+Independent of the other slices (lives mostly in a different repo). The decision OAuth-vs-X-Api-Key is the open question — resolve in DESIGN.
+
+## Production data requirement
+**Required.** Exercise against a real Lighthouse backend with two distinct API-key owners and assert per-owner scoping; smoke the version gate against an older Lighthouse build.
+
+## Dogfood moment
+Operator exposes the dev MCP server and two team members call it with their own keys; each sees only their scoped teams/portfolios.
+
+## Cross-cutting checklist (confirmed in feature-delta)
+RBAC: **central** — this slice removes ambient authority and makes the MCP path honour per-caller `ApiKeyPermission` scope (flows through the existing handler, no new RBAC port). Clients: **primary surface** — change lands in lighthouse-clients; version-gate per CLAUDE.md. Website: N/A — security/packaging, not a marketed UI feature.
+
+## Pre-slice spike candidates
+- **SPIKE (required)**: assess MCP spec 2025-06-18 OAuth support in the client SDK we use vs. effort of `X-Api-Key` pass-through; pick the path. (~half day)
+- Confirm `ApiKeyAuthenticationHandler` needs no change for X-Api-Key pass-through. (~1 hr)
+- Confirm the last released Lighthouse version to set the `FEATURE_REQUIRES_SERVER_NEWER_THAN` baseline. (~15 min)
diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-07-horizontal-scalability.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-07-horizontal-scalability.md
new file mode 100644
index 000000000..b7335b16d
--- /dev/null
+++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-07-horizontal-scalability.md
@@ -0,0 +1,46 @@
+# Slice 07: Horizontal scalability — SignalR backplane + cluster-aware update work
+
+**Feature**: epic-5305-k8s-readiness
+**Story**: US-07 (ADO #5304) → job-operator-survive-multiple-replicas
+**Estimate**: ~4–6 crafter days **after a required SPIKE** (highest uncertainty in the epic)
+**Reference class**: distributed-coordination work; closest analog is Epic 5121 (domain-events + concurrency), but larger — this makes a singleton app multi-replica-safe
+
+## Goal
+Make Lighthouse genuinely safe to run with N API replicas: a notification raised on any replica reaches clients on all replicas; external syncs + the update queue run once across the fleet (no N× syncs, no racing Postgres writes); and `GetUpdateStatus` is consistent across pods. Config-gated: no Redis / one replica ⇒ exactly today's single-instance behaviour (standalone gate, D4).
+
+## The three coupled breakages (from story-07-research.md §1)
+1. **(B) SignalR fan-out** is in-memory per process → a notification raised on pod A never reaches pod B's clients.
+2. **(C) Background updaters** (`TeamUpdater`, `PortfolioUpdater`, `UpdateQueueService`) run in *every* replica → N× external syncs + racing writes.
+3. **(C) Status cache** — the in-memory `ConcurrentDictionary<UpdateKey,UpdateStatus>` in `UpdateNotificationHub` answers differently per replica.
+
+## IN scope
+- **SignalR Redis backplane**, config-gated: Redis configured ⇒ cross-pod fan-out; no Redis ⇒ current in-memory behaviour.
+- **Cluster-aware update path** — the unit that must become cluster-aware is the **update queue itself**, not just a timer leader. Both trigger paths must be covered: the periodic timer loop AND request-triggered manual refresh (`TeamController`/`PortfolioController` → `UpdateQueueService.EnqueueAndAwaitAsync` inline on whatever replica serves the request). Fix space (DESIGN decides — do NOT pre-pick): a shared/distributed queue with a single consumer, or a cluster-wide per-entity lock, plus a shared status store so dedup + the awaited completion + `GetUpdateStatus` are consistent across replicas.
+- **Shared status store** backing `GetUpdateStatus` (Redis or sourced from Postgres).
+
+## OUT scope
+- HPA / `sessionAffinity` / load-test manifests — those were the **learning** story 07 (#5197) k8s-layer spike (throwaway), not this production slice.
+- Per-tenant isolation / namespace model → Productization #5306.
+
+## Learning hypothesis
+**Confirms if it succeeds**: with Redis + 3 replicas, a manual refresh served by pod B notifies a client on pod A; the external system is synced once per cycle across the fleet; `GetUpdateStatus` agrees across pods.
+**Disproves if it fails**: leader election alone is insufficient (a manual refresh handled by a follower still double-works), proving the queue itself must be the cluster-aware unit — which is exactly why this is one coupled slice, not three.
+
+## Acceptance criteria
+See US-07 in `../feature-delta.md`. Key: a multi-host integration/e2e test asserts (a) single sync per entity across N hosts under concurrent timer + manual-refresh load; (b) cross-pod notification delivery via the backplane; (c) consistent `GetUpdateStatus`; (d) with no Redis / 1 host, behaviour and code path are identical to today.
+
+## Dependencies
+Soft-depends on slice 03 (clean drain) and slice 04 (migration safety) being in place so multi-replica operation is tested on a safe base. This is the LAST slice to ship.
+
+## Production data requirement
+**Required.** Real Postgres + Redis, ≥3 replicas on k3s, real work-tracking connector driving syncs. InMemory/mock tests cannot reproduce the cross-replica races (recurring lesson).
+
+## Dogfood moment
+The dev/stage deployment runs 3 replicas with Redis; operator triggers concurrent refreshes and a node drain and observes single syncs, consistent status, and no lost notifications.
+
+## Cross-cutting checklist (confirmed in feature-delta)
+RBAC: N/A — no authorization surface. Clients: likely N/A — internal infra, no API contract change (confirm in DESIGN). Website: N/A — infra, not a marketed feature.
+
+## Pre-slice SPIKE (REQUIRED — high uncertainty; do BEFORE committing the slice)
+- **Probe the cluster-aware-queue design** (`nw-spike`, ~1–2 days): prototype the two candidate shapes — (i) distributed queue with a single consumer, (ii) cluster-wide per-entity lock + shared status store — against real Postgres+Redis with 3 hosts driving both timer and manual-refresh paths. Goal: disprove "leader election is enough" and pick the unit of coordination. Output feeds DESIGN; do NOT pre-pick a solution in DISCUSS.
+- Confirm `UpdateQueueService` singleton-per-process semantics (Channel queue, consumer, awaiters, `updateStatuses` dedup dict) match the research doc before designing the replacement. (~2 hr)
diff --git a/docs/product/jobs.yaml b/docs/product/jobs.yaml
index e1e63c066..ffa800e03 100644
--- a/docs/product/jobs.yaml
+++ b/docs/product/jobs.yaml
@@ -1,5 +1,5 @@
 schema_version: 1
-updated: 2026-06-14
+updated: 2026-06-16
 feature_context:
   - rbac-enhancements
   - work-tracking-oauth-authentication
@@ -21,6 +21,7 @@ feature_context:
   - multiple-cycle-times
   - work-item-age-percentiles
   - website-screenshot-freshness
+  - epic-5305-k8s-readiness
 
 jobs:
   - id: job-rbac-bootstrap
@@ -1914,3 +1915,176 @@ jobs:
         satisfaction: 2 - the screenshots exist and are broadly representative, just dated. Gap: 1 -
         small but real; this job is the downstream payoff of the maintainer job, not an independent ask.
 
+
+  - id: job-operator-survive-multiple-replicas
+    title: Run Lighthouse with more than one replica without it breaking
+    persona: platform-operator
+    feature: epic-5305-k8s-readiness
+    job_story: >
+      When I scale Lighthouse past a single replica behind a load balancer so it can absorb
+      load and survive a node failure,
+      I want each external work-tracking sync to run once across the fleet, every client to
+      receive every SignalR notification regardless of which pod raised it, and concurrent
+      pod startups to apply migrations exactly once,
+      so I can operate Lighthouse as a normal horizontally-scaled web app instead of a
+      single-instance singleton that corrupts itself the moment a second pod starts.
+    dimensions:
+      functional: One distributed update path (single consumer / cluster-wide per-entity lock + shared status store); SignalR Redis backplane; migration lock so exactly one pod migrates
+      emotional: Move from "I daren't run more than one pod" to "I scale this like any other service"
+      social: Stand behind an SLA — "Lighthouse stays up through a node failure" — to my own stakeholders
+    forces:
+      push: Lighthouse is a stateful singleton; >1 replica means N× syncs racing Postgres rows, notifications that reach only one pod's clients, and a per-pod status cache that answers inconsistently
+      pull: A genuinely cluster-safe app scales on CPU, rolls without flapping, and tolerates a lost node
+      anxiety: >
+        Will electing a leader for the timer fix it, when manual refreshes are handled inline on
+        whichever replica serves the request? Will I silently double-sync and not notice?
+      habit: Operators expect a web app to be stateless-enough to scale by changing a replica count
+    opportunity_score:
+      importance: 5
+      current_satisfaction: 1
+      gap: 4
+      rationale: >
+        Importance: 5 - hard blocker for any multi-replica / SaaS operation; nothing in Bands D-E
+        works without it. Current satisfaction: 1 - naive scale-out is actively broken in three ways.
+        Gap: 4 - the largest, highest-uncertainty job in the epic (the update-queue itself must
+        become cluster-aware; leader election is necessary-not-sufficient).
+
+  - id: job-operator-zero-downtime-rollout
+    title: Upgrade Lighthouse with no dropped requests and no data loss
+    persona: platform-operator
+    feature: epic-5305-k8s-readiness
+    job_story: >
+      When I roll out a new Lighthouse version under a Kubernetes rolling update, so old and
+      new pods briefly run side by side against one shared database,
+      I want each release's migrations to be additive-only (so an old pod never depends on a
+      column the new release dropped) and every terminating pod to drain its in-flight HTTP
+      requests, SignalR connections and queued updates before it exits,
+      so I can ship updates during the working day without a maintenance window and without a
+      user ever seeing a failed request or a half-written change.
+    dimensions:
+      functional: Expand-only (expand now / contract later) migration discipline; SIGTERM handling that stops intake and drains within terminationGracePeriodSeconds
+      emotional: Move from "upgrades are a scary after-hours event" to "I merge a PR and it rolls"
+      social: Deliver upgrades invisibly; no "Lighthouse will be down 22:00-23:00" email
+    forces:
+      push: A rolling update today kills pods mid-request (no drain) and a destructive migration can break the old pods still serving during the rollout
+      pull: Zero-downtime upgrades make Lighthouse safe to update often and cheaply
+      anxiety: Will a migration that drops/renames a column break the old replica before it's gone? Will an in-flight forecast write be lost when the pod dies?
+      habit: Operators are used to scheduling downtime for database migrations
+    opportunity_score:
+      importance: 4
+      current_satisfaction: 1
+      gap: 3
+      rationale: >
+        Importance: 4 - required for credible SaaS upgrades and for safe self-hoster updates.
+        Current satisfaction: 1 - no drain, no expand-contract discipline today. Gap: 3.
+
+  - id: job-operator-trust-pod-health
+    title: Let Kubernetes route to only-truly-ready pods and restart only dead ones
+    persona: platform-operator
+    feature: epic-5305-k8s-readiness
+    job_story: >
+      When Kubernetes decides whether to send traffic to a Lighthouse pod or to restart it,
+      I want readiness gated on real serving capacity (DB reachable, migrations applied),
+      liveness shallow enough that a slow dependency never triggers a restart, and a startup
+      probe that covers a slow boot/migration window,
+      so I can trust that traffic only reaches pods that can actually serve and that the
+      orchestrator never restart-loops a healthy-but-slow pod.
+    dimensions:
+      functional: ASP.NET Core health checks driving three distinct probes (readiness DB+migrations, shallow liveness, startup)
+      emotional: Move from "is this 503 the app or the probe?" to "the probes mean what they say"
+      social: Show a green, trustworthy deployment status to whoever watches the cluster
+    forces:
+      push: Without real probes, k8s sends traffic to not-yet-ready pods (cold 500s) or restart-loops on a slow dependency
+      pull: Correct probes give clean rollouts and accurate health signals
+      anxiety: Will a shared liveness/readiness endpoint restart a pod that's merely waiting on the DB?
+      habit: Operators expect every serious app to expose liveness/readiness/startup endpoints
+    opportunity_score:
+      importance: 4
+      current_satisfaction: 2
+      gap: 2
+      rationale: >
+        Importance: 4 - prerequisite for any safe rollout. Current satisfaction: 2 - story 04
+        exercised probes as a spike but there is no product implementation. Gap: 2.
+
+  - id: job-operator-correct-behind-proxy
+    title: Serve correct HTTPS, cookies, OIDC and SignalR behind a reverse proxy
+    persona: platform-operator
+    feature: epic-5305-k8s-readiness
+    job_story: >
+      When I put Lighthouse behind Traefik / nginx / an Ingress that terminates TLS,
+      I want the app to honour X-Forwarded-Proto / -Host / -For from the proxy I trust,
+      so that HTTPS redirects, secure cookies, OIDC callback URLs and SignalR negotiation all
+      use the real public scheme and host instead of the pod's internal http://hostname.
+    dimensions:
+      functional: UseForwardedHeaders gated on a declared set of known proxies/networks
+      emotional: Move from "why does OIDC redirect to http and loop?" to "login just works behind the proxy"
+      social: Hand users a clean HTTPS URL that logs in first try
+    forces:
+      push: Behind a proxy today, HTTPS redirects loop, OIDC callbacks come out http://, secure cookies drop
+      pull: One config switch makes every reverse-proxy deployment behave correctly
+      anxiety: If I trust forwarded headers, am I opening a spoofing hole from untrusted clients?
+      habit: Operators are used to setting UseForwardedHeaders / trusted-proxy config on .NET apps behind a proxy
+    opportunity_score:
+      importance: 4
+      current_satisfaction: 1
+      gap: 3
+      rationale: >
+        Importance: 4 - blocks correct auth on ANY reverse-proxy deployment, not just k8s.
+        Current satisfaction: 1 - broken behind a TLS-terminating proxy today. Gap: 3.
+
+  - id: job-operator-observe-in-cluster
+    title: See per-instance metrics, structured logs and traces in my monitoring stack
+    persona: platform-operator
+    feature: epic-5305-k8s-readiness
+    job_story: >
+      When I run Lighthouse in a cluster with Prometheus / Loki / an OTel collector,
+      I want the app to expose a /metrics endpoint, emit structured JSON logs to stdout, and
+      produce OpenTelemetry traces,
+      so I can monitor request rates, error rates and latency, query logs by field, and trace
+      a slow request — without bolting on a sidecar to scrape unstructured text.
+    dimensions:
+      functional: Prometheus /metrics endpoint; structured JSON logging to stdout; OpenTelemetry traces
+      emotional: Move from "Lighthouse is a black box in my cluster" to "it's a first-class citizen on my dashboards"
+      social: Report instance health/usage to stakeholders from the same Grafana everyone else uses
+    forces:
+      push: No /metrics and unstructured logs make Lighthouse invisible to Prometheus and painful in Loki
+      pull: Native instrumentation drops Lighthouse straight onto existing dashboards
+      anxiety: Will always-on tracing/metrics cost the single-container self-hoster performance?
+      habit: Operators expect cloud-native apps to expose /metrics + JSON logs out of the box
+    opportunity_score:
+      importance: 3
+      current_satisfaction: 2
+      gap: 1
+      rationale: >
+        Importance: 3 - valuable for operating the SaaS, less critical for a single self-hoster.
+        Current satisfaction: 2 - logging exists but unstructured; no /metrics. Gap: 1 - real but
+        the smallest in the epic; off-by-default keeps the self-hoster cost at zero.
+
+  - id: job-mcp-caller-own-identity
+    title: Drive the MCP server as myself, not as a shared baked-in key
+    persona: platform-operator
+    feature: epic-5305-k8s-readiness
+    job_story: >
+      When I expose the Lighthouse MCP HTTP server so colleagues' agents can call it,
+      I want each caller to authenticate with their own credential (an OAuth token, or their
+      own Lighthouse API key) that the MCP server passes through,
+      so that every caller drives Lighthouse as themselves with their own RBAC scope and audit
+      trail — instead of every caller sharing one baked LIGHTHOUSE_API_KEY whose owner and
+      scope they all silently inherit.
+    dimensions:
+      functional: MCP spec (2025-06-18) OAuth pass-through (preferred) or X-Api-Key pass-through (interim), reusing owner-resolved/scoped Lighthouse keys
+      emotional: Move from "one shared key I must bake, seal, distribute and rotate" to "callers bring their own"
+      social: Give security review a clean answer — no ambient authority, per-user audit
+    forces:
+      push: The mcp-http container is a confused deputy - one baked key means every caller acts as that owner/scope with no per-user audit
+      pull: Per-caller identity removes the shared secret entirely; an unauth'd /mcp is no longer an open hole
+      anxiety: Is adopting the MCP OAuth framework too heavy vs the interim X-Api-Key pass-through?
+      habit: Operators are used to giving a service one API key and accepting the blast radius
+    opportunity_score:
+      importance: 4
+      current_satisfaction: 1
+      gap: 3
+      rationale: >
+        Importance: 4 - the confused deputy is a real security gap the moment the MCP server is
+        exposed beyond ClusterIP. Current satisfaction: 1 - single baked key today. Gap: 3. Change
+        lands mostly in the lighthouse-clients repo; version-gate the endpoint per CLAUDE.md.
diff --git a/docs/product/journeys/epic-5305-k8s-readiness.yaml b/docs/product/journeys/epic-5305-k8s-readiness.yaml
new file mode 100644
index 000000000..c0d89c459
--- /dev/null
+++ b/docs/product/journeys/epic-5305-k8s-readiness.yaml
@@ -0,0 +1,183 @@
+schema_version: 1
+feature_id: epic-5305-k8s-readiness
+created: 2026-06-16
+research_depth: lightweight
+note: >
+  Operational journeys for Epic 5305 "Lighthouse k8s-readiness — production code changes".
+  The actor throughout is the platform-operator (self-hoster running a single container today;
+  LPW SaaS operator running many replicas across tenants tomorrow). These journeys are
+  operational, not in-app UX: the "screen" is mostly kubectl/Helm/Grafana and the app's HTTP
+  surface (health endpoints, /metrics, OIDC redirect, /mcp). Every journey inherits the #5305
+  EPIC GATE: the change must auto-degrade to the sacrosanct single-container standalone (no
+  Redis -> in-memory; one replica works; SQLite default; frontend embedded). The north-star
+  this slices toward lives in docs/feature/l8e-kubernetes-learning/planning-stage.md (D1-D5,
+  Q1-Q5, §4 architecture). Six jobs map onto seven stories: survive-multiple-replicas (#5304,
+  migration-lock half of #5308), zero-downtime-rollout (#5308 expand-only, #5309 drain),
+  trust-pod-health (#5310), correct-behind-proxy (#5311), observe-in-cluster (#5312),
+  mcp-caller-own-identity (#5307).
+
+journeys:
+
+  - name: scale-out-without-breaking
+    goal: >
+      A platform-operator raises the API replica count past 1 behind a load balancer and
+      observes that work-tracking syncs run once across the fleet, every connected client
+      receives every update notification regardless of which pod raised it, and the per-entity
+      update status is consistent across pods — i.e. Lighthouse behaves as a normal
+      horizontally-scaled web app, not a self-corrupting singleton.
+    persona: "platform-operator (LPW SaaS operator; also any self-hoster scaling for HA)"
+    jobs: [job-operator-survive-multiple-replicas]
+    stories: ["#5304", "#5308 (migration lock)"]
+    emotional_arc:
+      start: Wary - "Lighthouse is a stateful singleton; I've been told a second pod corrupts it
+        — N× syncs, notifications that reach only one pod's clients, an inconsistent status cache."
+      middle: Testing - "I configure Redis, scale to 3, trigger a manual refresh on one pod and
+        watch the notification land on a client connected to another; I check the external system
+        was synced once, not three times."
+      end: Confident - "replica count is just a number now; a node can die and Lighthouse stays up
+        and consistent. With no Redis and one replica it's exactly the old single-instance app."
+    steps:
+      - step: Configure Redis backplane (optional) and scale the API Deployment to N replicas.
+        output: N pods Running; SignalR uses the Redis backplane when configured, in-memory otherwise.
+      - step: A client connected to pod A; a manual refresh is served by pod B (EnqueueAndAwait).
+        output: The update runs ONCE (cluster-aware queue / per-entity lock), and pod A's client
+          receives the completion notification via the backplane.
+      - step: Inspect the external work-tracking system's request log over one sync cycle.
+        output: Exactly one sync per team/portfolio across the fleet — no N× duplication, no racing
+          Postgres writes on the same rows.
+      - step: GetUpdateStatus is queried against different pods during an in-flight update.
+        output: Consistent answer across pods (shared/distributed status store, not per-process dict).
+    error_paths:
+      - trigger: Redis unreachable mid-operation.
+        recovery: Degrade to documented behaviour (configured failure mode); single-replica path
+          unaffected; surfaced in logs/health, never silent data corruption.
+      - trigger: Two pods both believe they own the timer loop.
+        recovery: Cluster-wide per-entity lock / single-consumer queue makes double-work impossible
+          even if leadership is ambiguous (leader election is necessary-not-sufficient — DESIGN solves).
+
+  - name: zero-downtime-upgrade
+    goal: >
+      A platform-operator rolls out a new Lighthouse version under a rolling update; old and new
+      pods coexist against one Postgres for the rollout window; no request is dropped and no
+      change is lost.
+    persona: "platform-operator"
+    jobs: [job-operator-zero-downtime-rollout]
+    stories: ["#5308 (expand-only)", "#5309"]
+    emotional_arc:
+      start: Cautious - "an upgrade today means killed in-flight requests and the fear that a
+        destructive migration breaks the old pods still serving."
+      middle: Reassured - "this release's migration is additive-only; terminating pods drain HTTP +
+        SignalR + the in-memory queue before they exit."
+      end: Routine - "I merge a PR during the working day and it rolls; nobody notices."
+    steps:
+      - step: Apply the new image; k8s starts new pods and sends SIGTERM to old ones.
+        output: Old pod stops accepting new work, drains in-flight requests/connections, flushes the
+          update queue, exits within terminationGracePeriodSeconds.
+      - step: New and old pods run migrations against the same Postgres during the overlap.
+        output: Migration is additive-only (expand); old pods keep working because nothing they need
+          was dropped. Destructive cleanup is deferred to a LATER release (contract).
+    error_paths:
+      - trigger: A pod cannot drain within terminationGracePeriodSeconds.
+        recovery: It exits at the deadline having stopped intake first, minimising loss; logged.
+      - trigger: A developer authors a destructive migration in the same release.
+        recovery: Caught by the expand-only guard/check (DESIGN) before merge — not at runtime.
+
+  - name: login-behind-the-proxy
+    goal: >
+      A platform-operator puts Lighthouse behind a TLS-terminating reverse proxy and a user logs
+      in via OIDC over the public HTTPS hostname with no redirect loop and a persisted session.
+    persona: "platform-operator (configures); end-user (logs in, benefits)"
+    jobs: [job-operator-correct-behind-proxy]
+    stories: ["#5311"]
+    emotional_arc:
+      start: Frustrated - "behind the proxy, OIDC redirects to http://, the callback loops, secure
+        cookies vanish — login is broken and I can't tell why."
+      middle: Diagnosing - "the app trusts its own internal http://hostname instead of the proxy's
+        X-Forwarded-Proto/-Host; I declare the proxy as trusted and enable forwarded headers."
+      end: Working - "login redirects to the real HTTPS hostname, the callback succeeds, the secure
+        cookie sticks — first try."
+    steps:
+      - step: Declare the proxy/network as trusted; enable UseForwardedHeaders (config-gated).
+        output: The app derives scheme=https and the public host from X-Forwarded-* from that proxy only.
+      - step: A user hits the public HTTPS URL and starts OIDC login.
+        output: Redirect and callback URLs use https + the public host; secure cookie set; session holds.
+    error_paths:
+      - trigger: Forwarded headers arrive from an untrusted client (spoof attempt).
+        recovery: Only the declared known-proxy set is trusted; others are ignored — no scheme/host spoof.
+      - trigger: No proxy declared (direct/standalone access).
+        recovery: Forwarded-header trust stays OFF; direct access behaves exactly as today (standalone gate).
+
+  - name: trustworthy-pod-health
+    goal: >
+      A platform-operator wires the three k8s probes to real ASP.NET Core health checks so traffic
+      reaches only serving pods and only genuinely-dead pods are restarted.
+    persona: "platform-operator"
+    jobs: [job-operator-trust-pod-health]
+    stories: ["#5310"]
+    emotional_arc:
+      start: Distrustful - "is this 503 the app or a naive shared health endpoint? a slow DB makes
+        k8s restart-loop a perfectly healthy pod."
+      middle: Wiring - "readiness checks DB + migrations-applied; liveness is shallow; a startup
+        probe covers the slow boot/migration window."
+      end: Trusting - "the probes mean what they say; rollouts are clean and nothing flaps."
+    steps:
+      - step: readiness probe -> /health/ready (DB reachable + migrations applied).
+        output: Pod stays OUT of the LB rotation until it can truly serve; no cold 500s to users.
+      - step: liveness probe -> shallow /health/live.
+        output: Pod restarts only on genuine deadlock, never because a dependency is slow.
+      - step: startup probe -> /health/startup.
+        output: Slow boot/migration tolerated without tripping liveness.
+    error_paths:
+      - trigger: DB unreachable.
+        recovery: Readiness fails (out of rotation) but liveness stays green (no restart storm).
+      - trigger: Single-container standalone with no orchestrator.
+        recovery: Endpoints are harmless/no-op-friendly; no behaviour change for self-hosters (standalone gate).
+
+  - name: observe-in-the-cluster
+    goal: >
+      A platform-operator scrapes Lighthouse's /metrics into Prometheus, ships its structured JSON
+      logs to Loki, and traces a slow request via OpenTelemetry — all from the same Grafana the
+      rest of the cluster uses.
+    persona: "platform-operator"
+    jobs: [job-operator-observe-in-cluster]
+    stories: ["#5312"]
+    emotional_arc:
+      start: Blind - "Lighthouse is a black box: no /metrics, unstructured text logs I can't query."
+      middle: Instrumenting - "I scrape /metrics, parse JSON logs by field, see traces."
+      end: Visible - "Lighthouse sits on my dashboards like any first-class service."
+    steps:
+      - step: Prometheus scrapes /metrics.
+        output: Request rate / error rate / latency visible per instance.
+      - step: Logs emitted as structured JSON to stdout; OTel traces exported.
+        output: Logs queryable by field in Loki; a slow request is traceable end to end.
+    error_paths:
+      - trigger: Self-hoster does not want the overhead.
+        recovery: Low-overhead / off-by-default where appropriate; no perf change for the single container.
+
+  - name: mcp-caller-brings-own-identity
+    goal: >
+      A platform-operator exposes the Lighthouse MCP HTTP server and each caller authenticates with
+      their OWN credential (OAuth token, or their own Lighthouse API key) that the server passes
+      through — so every caller drives Lighthouse as themselves, with their own RBAC scope and audit.
+    persona: "platform-operator (deploys/secures); MCP/CLI caller (authenticates as self)"
+    jobs: [job-mcp-caller-own-identity]
+    stories: ["#5307"]
+    emotional_arc:
+      start: Uneasy - "the mcp-http container is a confused deputy: one baked LIGHTHOUSE_API_KEY, so
+        every caller acts as that owner/scope with no per-user audit, and an unauth'd /mcp is an open hole."
+      middle: Adopting - "I move to MCP OAuth pass-through (preferred) — each caller brings their own
+        token; or the interim X-Api-Key pass-through reusing Lighthouse's owner-resolved/scoped keys."
+      end: Secured - "no shared secret to bake/seal/distribute/rotate; per-user RBAC + audit for free;
+        the single-key dev path still exists for self-hosters (standalone gate)."
+    steps:
+      - step: Caller sends its own OAuth token (or X-Api-Key) to the MCP HTTP server.
+        output: The server passes the credential through; Lighthouse owner-resolves it (ApiKey.OwnerSubject
+          -> sub) and applies that caller's ApiKeyPermission scope.
+      - step: The wrapping client method pre-checks the Lighthouse server version before calling.
+        output: An old server (no endpoint) fails with a clear "upgrade Lighthouse" message, not an opaque 404.
+    error_paths:
+      - trigger: Caller presents no credential to an exposed /mcp.
+        recovery: With pass-through there is no ambient authority to fall back on — the call is rejected,
+          not silently executed as a shared key.
+      - trigger: Self-hoster on the legacy single-key/dev path.
+        recovery: That path stays available; no break for self-hosters (standalone gate).
diff --git a/docs/product/personas/platform-operator.yaml b/docs/product/personas/platform-operator.yaml
new file mode 100644
index 000000000..8dd0e4bfb
--- /dev/null
+++ b/docs/product/personas/platform-operator.yaml
@@ -0,0 +1,101 @@
+schema_version: 1
+id: platform-operator
+created: 2026-06-16
+created_in_feature: epic-5305-k8s-readiness
+
+display_name: Platform Operator
+
+aliases:
+  - self-hoster
+  - k8s-operator
+  - sre
+  - devops-engineer
+  - lpw-saas-operator
+
+short_description: >
+  The person who runs a Lighthouse INSTANCE — not the one who reads flow metrics inside it.
+  They deploy Lighthouse to a server or a Kubernetes cluster, put it behind a reverse proxy,
+  wire OIDC, upgrade it without taking it down, and watch it from a monitoring stack. Two
+  flavours of the same persona: the SELF-HOSTER running a single container for their own
+  org (today's sacrosanct standalone product), and the LPW SaaS operator running many
+  replicas across many tenants. What makes them THIS persona is that they care about the
+  pod/process lifecycle, rollouts, and proxying — the operational envelope around the app —
+  not about cycle times, forecasts, or RBAC assignments. Distinct from config-admin (who
+  edits in-app configuration) and from the end-user product personas (flow-coach, etc.).
+
+primary_jobs:
+  - job-operator-survive-multiple-replicas
+  - job-operator-zero-downtime-rollout
+  - job-operator-trust-pod-health
+  - job-operator-correct-behind-proxy
+  - job-operator-observe-in-cluster
+  - job-mcp-caller-own-identity
+
+goals:
+  - Run Lighthouse with more than one replica behind a load balancer without duplicate
+    work-tracking syncs, lost SignalR notifications, or migrations racing each other
+  - Roll out a new version with zero dropped requests and zero data loss — old and new pods
+    coexisting against one database for the length of the rollout
+  - Let Kubernetes route traffic only to pods that are truly serving, and restart only pods
+    that are genuinely dead — never flap on a slow dependency
+  - Get correct HTTPS redirects, secure cookies, OIDC callback URLs and SignalR negotiation
+    when Lighthouse sits behind Traefik / nginx / an Ingress
+  - See per-instance metrics, structured logs and traces in my existing monitoring stack
+  - Never have to bake, seal, distribute and rotate a single shared API key just to expose
+    the MCP server — each caller should drive Lighthouse as themselves
+
+frustrations:
+  - Lighthouse is a stateful singleton by construction; naively running >1 replica breaks in
+    three independent ways (N× external syncs, SignalR notifications that only reach one pod's
+    clients, an in-memory status cache that answers differently per pod) with no warning
+  - Every pod races Database.Migrate() on boot — N replicas starting concurrently is undefined
+  - A rolling update kills pods mid-request because the app does not drain on SIGTERM
+  - Behind a proxy, HTTPS redirects loop, OIDC callback URLs come out as http://, secure
+    cookies get dropped — because the app trusts its own scheme/host instead of the forwarded
+    headers
+  - The published mcp-http container is a confused deputy: one baked LIGHTHOUSE_API_KEY means
+    every caller acts as that key's owner, with that key's scope, with no per-user audit
+  - The app exposes no /metrics and logs unstructured text, so it is invisible to Prometheus
+    and painful to query in Loki
+
+mental_model:
+  - The standalone single container is the simple, good-enough-for-many product and it must
+    NEVER change — every k8s-readiness change auto-degrades to the single-instance path (no
+    Redis -> in-memory; one replica works; SQLite stays the default; frontend stays embedded)
+  - Readiness != liveness — a pod that cannot reach the DB should leave the load-balancer
+    rotation (readiness) but must NOT be restarted (liveness); a slow boot needs a startup probe
+  - A release is two database states at once — new and old pods share one Postgres for the
+    rollout window, so each release's migrations must be additive (expand now, contract later)
+  - "Behind a proxy" means the app is no longer the TLS terminator; it must learn its real
+    scheme/host/client-IP from X-Forwarded-* headers, and only from a proxy it has been told
+    to trust
+  - The caller's own credential should map to the caller's own identity and rights — Lighthouse
+    already owner-resolves and scopes API keys, so the confused deputy is a packaging gap, not a
+    backend gap
+
+vocabulary:
+  - "standalone gate" — the hard acceptance rule that every change preserves the single-container
+    self-hosted product unchanged (the #5305 epic gate)
+  - "auto-degrade" — with no Redis / one replica / SQLite, the change collapses to today's
+    single-instance behaviour with no configuration required
+  - "expand-only / expand-contract" — additive migrations this release, destructive cleanup a
+    later release, so old pods never depend on a column the new release dropped
+  - "drain" — on SIGTERM, stop accepting new work and let in-flight HTTP + SignalR + queued
+    updates finish within terminationGracePeriodSeconds
+  - "confused deputy" — the mcp-http server acting on a single baked key's authority regardless
+    of who called it
+  - "forwarded headers" — X-Forwarded-Proto / -Host / -For that a reverse proxy sets and the app
+    must honour (UseForwardedHeaders with known proxies)
+  - "backplane" — the SignalR fan-out mechanism (in-memory per-process today; Redis across pods)
+
+related_personas:
+  - config-admin (runs INSIDE the instance this persona operates; the OIDC/RBAC config this
+    persona wires up is what config-admin then administers)
+  - lighthouse-maintainer (ships the chart + docs this persona consumes; sometimes the same person)
+  - first-time-system-admin (the human who bootstraps RBAC once this persona has the instance up)
+
+not_this_persona:
+  - A flow-coach / delivery-lead / forecaster — they read metrics; they do not run the process
+  - config-admin editing Team / Portfolio / Connection / RBAC settings inside the running app
+  - A developer USING the MCP/CLI client (that caller appears in job-mcp-caller-own-identity as
+    the actor, but the persona who deploys and secures the MCP server is THIS one)

From 0c0c11ebad06d2966dbd3d1c280522c27c5d0789 Mon Sep 17 00:00:00 2001
From: Benjamin Huser-Berta <github.com.bok@huser-berta.com>
Date: Thu, 18 Jun 2026 20:35:24 +0200
Subject: [PATCH 4/4] test: inline all @mui and react-transition-group for
 vitest ESM resolution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The string "@mui/material" inline was insufficient: the failing tests also
vi.mock @mui/x-date-pickers, whose importActual pulls transitions that
deep-import react-transition-group/TransitionGroupContext (a directory with a
main/module redirect package.json). Node's native ESM resolver — used by Vitest
for any non-inlined dep, including importActual — rejects directory imports with
ERR_UNSUPPORTED_DIR_IMPORT. Vite/Rollup (the production build) resolve them fine,
which is why pnpm build passed while the suite failed.

Use regex inline so every @mui subpath and react-transition-group are transformed
by Vite, which resolves the directory import. Verified: full suite 264 files /
3474 tests green against @mui/material 9.1.1.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 Lighthouse.Frontend/vitest.config.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lighthouse.Frontend/vitest.config.ts b/Lighthouse.Frontend/vitest.config.ts
index 7635f7948..908d5a183 100644
--- a/Lighthouse.Frontend/vitest.config.ts
+++ b/Lighthouse.Frontend/vitest.config.ts
@@ -29,7 +29,7 @@ export default defineConfig({
 		],
 		server: {
 			deps: {
-				inline: ["@mui/x-data-grid", "@mui/material"],
+				inline: [/@mui\//, /react-transition-group/],
 			},
 		},