From a85157da418a76b459d4e6fe316fce0b1a3e1e44 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Jun 2026 05:23:43 +0000 Subject: [PATCH 1/4] deps: bump @mui/material from 9.0.1 to 9.1.1 in /Lighthouse.Frontend Bumps [@mui/material](https://github.com/mui/material-ui/tree/HEAD/packages/mui-material) from 9.0.1 to 9.1.1. - [Release notes](https://github.com/mui/material-ui/releases) - [Changelog](https://github.com/mui/material-ui/blob/master/CHANGELOG.md) - [Commits](https://github.com/mui/material-ui/commits/v9.1.1/packages/mui-material) --- updated-dependencies: - dependency-name: "@mui/material" dependency-version: 9.1.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Lighthouse.Frontend/package.json | 2 +- Lighthouse.Frontend/pnpm-lock.yaml | 91 ++++++++++++------------------ 2 files changed, 37 insertions(+), 56 deletions(-) diff --git a/Lighthouse.Frontend/package.json b/Lighthouse.Frontend/package.json index cd3e55bd5..0295fe05f 100644 --- a/Lighthouse.Frontend/package.json +++ b/Lighthouse.Frontend/package.json @@ -21,7 +21,7 @@ "@microsoft/signalr": "^10.0.0", "@mui/icons-material": "^7.3.11", "@mui/lab": "7.0.0", - "@mui/material": "^9.0.1", + "@mui/material": "^9.1.1", "@mui/system": "^9.1.1", "@mui/x-charts": "9.0.1", "@mui/x-data-grid": "^9.5.0", diff --git a/Lighthouse.Frontend/pnpm-lock.yaml b/Lighthouse.Frontend/pnpm-lock.yaml index 7c4458483..f4466433c 100644 --- a/Lighthouse.Frontend/pnpm-lock.yaml +++ b/Lighthouse.Frontend/pnpm-lock.yaml @@ -26,25 +26,25 @@ importers: version: 10.0.0 '@mui/icons-material': specifier: ^7.3.11 - version: 7.3.11(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react@19.2.7) + version: 7.3.11(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react@19.2.7) '@mui/lab': specifier: 7.0.0 - version: 7.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + version: 7.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) '@mui/material': - specifier: ^9.0.1 - version: 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + specifier: ^9.1.1 + version: 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) '@mui/system': specifier: ^9.1.1 version: 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7) '@mui/x-charts': specifier: 9.0.1 - version: 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + version: 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) '@mui/x-data-grid': specifier: ^9.5.0 - version: 9.5.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + version: 9.5.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) '@mui/x-date-pickers': specifier: 9.0.0 - version: 9.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(date-fns@4.4.0)(dayjs@1.11.21)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + version: 9.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(date-fns@4.4.0)(dayjs@1.11.21)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) '@tanstack/react-query': specifier: ^5.101.0 version: 5.101.0(react@19.2.7) @@ -673,8 +673,8 @@ packages: '@microsoft/signalr@10.0.0': resolution: {integrity: sha512-0BRqz/uCx3JdrOqiqgFhih/+hfTERaUfCZXFB52uMaZJrKaPRzHzMuqVsJC/V3pt7NozcNXGspjKiQEK+X7P2w==} - '@mui/core-downloads-tracker@9.0.1': - resolution: {integrity: sha512-GzamIIhZ1bH77dq7eKaeyRgJdkypsxin4jBFq2EMs4lBWRR0LFO1CSVMsoebn/VvjcNrnrOrjy48MkrkQUK2iw==} + '@mui/core-downloads-tracker@9.1.1': + resolution: {integrity: sha512-AupmMICbdJHqAh6FfOMaaiiIr7dfEgZJn5DFfiPuGNrbs+ZZy9cD1APwO0TSVBz5j08MJEEY6n7iC76/2wjMEA==} '@mui/icons-material@7.3.11': resolution: {integrity: sha512-+hz5ilwHZ3djd5es3sCErLioqe/NhZcYTsV/TNXZAMdJdb23F4xzJjqnnZdnurc3S1+ietcssRNqieOhPQLZ7Q==} @@ -709,13 +709,13 @@ packages: '@types/react': optional: true - '@mui/material@9.0.1': - resolution: {integrity: sha512-voyCpeUxcSWLN7KPZuq0pGCIt726T9K6kiVM3XUcywZDAlZSarLHaUxJVQpospbjjOzN53hwyjo8s6KoWl6utw==} + '@mui/material@9.1.1': + resolution: {integrity: sha512-Wv+gInjrpf99l1Q0oHe0eOWGTnlbkzs5nowClX65KCT/2fyPMwcbFEEkUsOHdpcHhB5UAbz/d7jlwt5ajWVvlA==} engines: {node: '>=14.0.0'} peerDependencies: '@emotion/react': ^11.5.0 '@emotion/styled': ^11.3.0 - '@mui/material-pigment-css': ^9.0.1 + '@mui/material-pigment-css': ^9.1.1 '@types/react': ^17.0.0 || ^18.0.0 || ^19.0.0 react: ^17.0.0 || ^18.0.0 || ^19.0.0 react-dom: ^17.0.0 || ^18.0.0 || ^19.0.0 @@ -815,14 +815,6 @@ packages: '@types/react': optional: true - '@mui/types@9.0.0': - resolution: {integrity: sha512-i1cuFCAWN44b3AJWO7mh7tuh1sqbQSeVr/94oG0TX5uXivac8XalgE4/6fQZcmGZigzbQ35IXxj/4jLpRIBYZg==} - peerDependencies: - '@types/react': ^17.0.0 || ^18.0.0 || ^19.0.0 - peerDependenciesMeta: - '@types/react': - optional: true - '@mui/types@9.1.1': resolution: {integrity: sha512-Zjt7u8wNvDg40rPTGoL+TnfkpuSKjwubsNSFRH1KAVZLcaV4I3AFNHIFbvH7p4F3alEibSbdd90xAgn5Rnfndg==} peerDependencies: @@ -2390,9 +2382,6 @@ packages: react-is@17.0.2: resolution: {integrity: sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==} - react-is@19.2.6: - resolution: {integrity: sha512-XjBR15BhXuylgWGuslhDKqlSayuqvqBX91BP8pauG8kd1zY8kotkNWbXksTCNRarse4kuGbe2kIY05ARtwNIvw==} - react-is@19.2.7: resolution: {integrity: sha512-kZFnouyVv7eP/Phmrlo9FK+zcAdriZJvzxXHF1Sl1P377WSGe2G/JxVolhTrB/jeV47lKImhNUsijjHAAbcl/A==} @@ -3475,20 +3464,20 @@ snapshots: - encoding - utf-8-validate - '@mui/core-downloads-tracker@9.0.1': {} + '@mui/core-downloads-tracker@9.1.1': {} - '@mui/icons-material@7.3.11(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)': + '@mui/icons-material@7.3.11(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react@19.2.7)': dependencies: '@babel/runtime': 7.29.2 - '@mui/material': 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + '@mui/material': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) react: 19.2.7 optionalDependencies: '@types/react': 19.2.17 - '@mui/lab@7.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': + '@mui/lab@7.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': dependencies: '@babel/runtime': 7.29.2 - '@mui/material': 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + '@mui/material': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) '@mui/system': 7.3.11(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7) '@mui/types': 7.4.12(@types/react@19.2.17) '@mui/utils': 7.3.10(@types/react@19.2.17)(react@19.2.7) @@ -3501,13 +3490,13 @@ snapshots: '@emotion/styled': 11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7) '@types/react': 19.2.17 - '@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': + '@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': dependencies: - '@babel/runtime': 7.29.2 - '@mui/core-downloads-tracker': 9.0.1 + '@babel/runtime': 7.29.7 + '@mui/core-downloads-tracker': 9.1.1 '@mui/system': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7) - '@mui/types': 9.0.0(@types/react@19.2.17) - '@mui/utils': 9.0.1(@types/react@19.2.17)(react@19.2.7) + '@mui/types': 9.1.1(@types/react@19.2.17) + '@mui/utils': 9.1.1(@types/react@19.2.17)(react@19.2.7) '@popperjs/core': 2.11.8 '@types/react-transition-group': 4.4.12(@types/react@19.2.17) clsx: 2.1.1 @@ -3515,7 +3504,7 @@ snapshots: prop-types: 15.8.1 react: 19.2.7 react-dom: 19.2.7(react@19.2.7) - react-is: 19.2.6 + react-is: 19.2.7 react-transition-group: 4.4.5(react-dom@19.2.7(react@19.2.7))(react@19.2.7) optionalDependencies: '@emotion/react': 11.14.0(@types/react@19.2.17)(react@19.2.7) @@ -3604,12 +3593,6 @@ snapshots: optionalDependencies: '@types/react': 19.2.17 - '@mui/types@9.0.0(@types/react@19.2.17)': - dependencies: - '@babel/runtime': 7.29.2 - optionalDependencies: - '@types/react': 19.2.17 - '@mui/types@9.1.1(@types/react@19.2.17)': dependencies: '@babel/runtime': 7.29.7 @@ -3624,7 +3607,7 @@ snapshots: clsx: 2.1.1 prop-types: 15.8.1 react: 19.2.7 - react-is: 19.2.6 + react-is: 19.2.7 optionalDependencies: '@types/react': 19.2.17 @@ -3642,13 +3625,13 @@ snapshots: '@mui/utils@9.0.0(@types/react@19.2.17)(react@19.2.7)': dependencies: - '@babel/runtime': 7.29.2 + '@babel/runtime': 7.29.7 '@mui/types': 9.1.1(@types/react@19.2.17) '@types/prop-types': 15.7.15 clsx: 2.1.1 prop-types: 15.8.1 react: 19.2.7 - react-is: 19.2.6 + react-is: 19.2.7 optionalDependencies: '@types/react': 19.2.17 @@ -3678,7 +3661,7 @@ snapshots: '@mui/x-charts-vendor@9.0.0': dependencies: - '@babel/runtime': 7.29.2 + '@babel/runtime': 7.29.7 '@types/d3-array': 3.2.2 '@types/d3-color': 3.1.3 '@types/d3-format': 3.0.4 @@ -3702,10 +3685,10 @@ snapshots: flatqueue: 3.0.0 internmap: 2.0.3 - '@mui/x-charts@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': + '@mui/x-charts@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': dependencies: '@babel/runtime': 7.29.2 - '@mui/material': 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + '@mui/material': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) '@mui/system': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7) '@mui/utils': 9.0.0(@types/react@19.2.17)(react@19.2.7) '@mui/x-charts-vendor': 9.0.0 @@ -3724,10 +3707,10 @@ snapshots: transitivePeerDependencies: - '@types/react' - '@mui/x-data-grid@9.5.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': + '@mui/x-data-grid@9.5.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': dependencies: '@babel/runtime': 7.29.7 - '@mui/material': 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + '@mui/material': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) '@mui/system': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7) '@mui/utils': 9.0.1(@types/react@19.2.17)(react@19.2.7) '@mui/x-internals': 9.1.0(@types/react@19.2.17)(react@19.2.7) @@ -3743,10 +3726,10 @@ snapshots: transitivePeerDependencies: - '@types/react' - '@mui/x-date-pickers@9.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(date-fns@4.4.0)(dayjs@1.11.21)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': + '@mui/x-date-pickers@9.0.0(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@mui/material@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7))(@mui/system@9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(date-fns@4.4.0)(dayjs@1.11.21)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': dependencies: '@babel/runtime': 7.29.2 - '@mui/material': 9.0.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) + '@mui/material': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) '@mui/system': 9.1.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@emotion/styled@11.14.1(@emotion/react@11.14.0(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7))(@types/react@19.2.17)(react@19.2.7) '@mui/utils': 9.0.0(@types/react@19.2.17)(react@19.2.7) '@mui/x-internals': 9.0.0(@types/react@19.2.17)(react@19.2.7) @@ -3766,11 +3749,11 @@ snapshots: '@mui/x-internal-gestures@9.0.2': dependencies: - '@babel/runtime': 7.29.2 + '@babel/runtime': 7.29.7 '@mui/x-internals@9.0.0(@types/react@19.2.17)(react@19.2.7)': dependencies: - '@babel/runtime': 7.29.2 + '@babel/runtime': 7.29.7 '@mui/utils': 9.0.0(@types/react@19.2.17)(react@19.2.7) react: 19.2.7 reselect: 5.1.1 @@ -5319,8 +5302,6 @@ snapshots: react-is@17.0.2: {} - react-is@19.2.6: {} - react-is@19.2.7: {} react-markdown@10.1.0(@types/react@19.2.17)(react@19.2.7): @@ -5359,7 +5340,7 @@ snapshots: react-transition-group@4.4.5(react-dom@19.2.7(react@19.2.7))(react@19.2.7): dependencies: - '@babel/runtime': 7.29.2 + '@babel/runtime': 7.29.7 dom-helpers: 5.2.1 loose-envify: 1.4.0 prop-types: 15.8.1 From c6b16e5273d350903c1c58709ec2aea756936b77 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 18 Jun 2026 05:53:44 +0000 Subject: [PATCH 2/4] test: inline @mui/material for vitest ESM resolution MUI 9.1.1's Transition.mjs uses a directory deep-import of react-transition-group/TransitionGroupContext, which Node's native ESM resolver (used by Vitest for non-inlined deps) rejects. Inline @mui/material so Vite transforms it and resolves the import, matching the existing @mui/x-data-grid handling. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01LQGQcGTMm2UQQuH41huhR9 --- Lighthouse.Frontend/vitest.config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lighthouse.Frontend/vitest.config.ts b/Lighthouse.Frontend/vitest.config.ts index 79acd13aa..7635f7948 100644 --- a/Lighthouse.Frontend/vitest.config.ts +++ b/Lighthouse.Frontend/vitest.config.ts @@ -29,7 +29,7 @@ export default defineConfig({ ], server: { deps: { - inline: ["@mui/x-data-grid"], + inline: ["@mui/x-data-grid", "@mui/material"], }, }, From 2397c85cee42190f6a142046b793d8f899ac8a13 Mon Sep 17 00:00:00 2001 From: Benjamin Huser-Berta Date: Thu, 18 Jun 2026 20:35:14 +0200 Subject: [PATCH 3/4] docs(discuss): epic 5305 k8s-readiness DISCUSS artifacts Add the DISCUSS-wave outputs for Epic #5305 (make the Lighthouse app itself safe to run on Kubernetes): feature-delta with 7 user stories, opportunity-scored JTBD jobs, locked decisions D1-D6, cross-cutting checklist, and the 7 per-slice briefs. Adds the new platform-operator persona, its journey, and the operator jobs in jobs.yaml. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01EyQSxb2QZsBfFzWBxn81kx --- .../epic-5305-k8s-readiness/feature-delta.md | 445 ++++++++++++++++++ .../slices/slice-01-forwarded-headers.md | 43 ++ .../slices/slice-02-health-checks.md | 44 ++ .../slices/slice-03-graceful-shutdown.md | 45 ++ .../slices/slice-04-expand-only-migrations.md | 42 ++ .../slices/slice-05-observability.md | 42 ++ .../slices/slice-06-mcp-inbound-auth.md | 45 ++ .../slices/slice-07-horizontal-scalability.md | 46 ++ docs/product/jobs.yaml | 176 ++++++- .../journeys/epic-5305-k8s-readiness.yaml | 183 +++++++ docs/product/personas/platform-operator.yaml | 101 ++++ 11 files changed, 1211 insertions(+), 1 deletion(-) create mode 100644 docs/feature/epic-5305-k8s-readiness/feature-delta.md create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-01-forwarded-headers.md create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-02-health-checks.md create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-03-graceful-shutdown.md create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-04-expand-only-migrations.md create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-05-observability.md create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-06-mcp-inbound-auth.md create mode 100644 docs/feature/epic-5305-k8s-readiness/slices/slice-07-horizontal-scalability.md create mode 100644 docs/product/journeys/epic-5305-k8s-readiness.yaml create mode 100644 docs/product/personas/platform-operator.yaml diff --git a/docs/feature/epic-5305-k8s-readiness/feature-delta.md b/docs/feature/epic-5305-k8s-readiness/feature-delta.md new file mode 100644 index 000000000..0f4cf0cc2 --- /dev/null +++ b/docs/feature/epic-5305-k8s-readiness/feature-delta.md @@ -0,0 +1,445 @@ +# Feature Delta: epic-5305-k8s-readiness + + + +Wave: DISCUSS | Date: 2026-06-16 | Density: lean (per ~/.nwave/global-config.json) | Epic: ADO #5305 + +**Feature goal**: make the Lighthouse application itself safe to run on Kubernetes — multiple +replicas, rolling updates, behind an Ingress/reverse-proxy — through seven production C#/TS +changes, WITHOUT changing the sacrosanct single-container standalone product. This epic runs +BEFORE the k8s Productization epic (#5306): the app must be cluster-safe before it is packaged +and hosted. It was split out of the learning epic #5189 on 2026-06-15. + +This DISCUSS covers all 7 child stories: #5311 forwarded headers, #5310 health checks, #5309 +graceful shutdown, #5308 expand-only migrations + safe startup, #5312 observability, #5307 MCP +inbound auth, #5304 horizontal scalability. The north-star they slice toward (D1–D5, Q1–Q5, §4 +architecture) lives in `docs/feature/l8e-kubernetes-learning/planning-stage.md` and is inherited, +not re-litigated. + +**Prior-wave consultation** (READING ENFORCEMENT): +- ✓ `docs/product/jobs.yaml` · ✓ `docs/product/personas/{config-admin,lighthouse-maintainer}.yaml` +- ✓ `docs/product/journeys/multiple-cycle-times.yaml` (schema) · ✓ `docs/product/kpi-contracts.yaml` +- ✓ `docs/feature/l8e-kubernetes-learning/planning-stage.md` (north-star backbone) +- ✓ ADO #5305 (epic) + #5304, #5307–#5312 (child descriptions) +- ⊘ `docs/feature/epic-5305-k8s-readiness/{discover,diverge}/` (none — planning-stage is the upstream evidence) + +No DISCUSS decision below contradicts the north-star; each inherits it. + +--- + +## Wave: DISCUSS / [REF] Persona + +**`platform-operator`** (NEW — `docs/product/personas/platform-operator.yaml`) — the person who *runs* +a Lighthouse instance, not the one who reads metrics inside it. Two flavours of one persona: the +**self-hoster** running a single container today (the sacrosanct standalone product) and the **LPW +SaaS operator** running many replicas across tenants tomorrow. Cares about the pod/process lifecycle, +rollouts and proxying — the operational envelope around the app. Distinct from `config-admin` (edits +in-app config) and the end-user product personas (flow-coach, forecaster). The MCP-caller story (#5307) +has a secondary actor — the **MCP/CLI caller** authenticating as themselves — but the persona who +deploys and secures the MCP server is `platform-operator`. + +--- + +## Wave: DISCUSS / [REF] JTBD one-liners + +Six jobs (added to `docs/product/jobs.yaml`), all `persona: platform-operator`. Opportunity-scored: + +| Job ID | One-liner | Imp | Sat | Gap | Stories | +|---|---|---|---|---|---| +| `job-operator-survive-multiple-replicas` | Run >1 replica without N× syncs, lost notifications, or migration races | 5 | 1 | **4** | #5304, #5308 (lock) | +| `job-operator-zero-downtime-rollout` | Upgrade with no dropped requests and no data loss | 4 | 1 | **3** | #5308, #5309 | +| `job-operator-correct-behind-proxy` | Correct HTTPS / cookies / OIDC / SignalR behind a reverse proxy | 4 | 1 | **3** | #5311 | +| `job-mcp-caller-own-identity` | Each MCP caller drives Lighthouse as themselves, not a shared baked key | 4 | 1 | **3** | #5307 | +| `job-operator-trust-pod-health` | k8s routes to only-ready pods, restarts only dead ones | 4 | 2 | 2 | #5310 | +| `job-operator-observe-in-cluster` | Per-instance metrics / structured logs / traces in my stack | 3 | 2 | 1 | #5312 | + +**Highest opportunity** = `survive-multiple-replicas` (gap 4) and the three gap-3 jobs. This drives +prioritization (below): the high-uncertainty multi-replica job ships last but SPIKEs early; the gap-3 +"login behind proxy" job ships first because it is small and unblocks all cluster auth testing. + +--- + +## Wave: DISCUSS / [REF] Scope assessment (Elephant-Carpaccio early gate) + +Oversized signals present: **>3 bounded contexts/technologies** (EF/Postgres, SignalR/Redis, ASP.NET +health + lifecycle, OAuth/MCP, OpenTelemetry, reverse-proxy middleware) and **>2 weeks effort**. → This +is correctly an **EPIC**, already split from #5189 and decomposed on ADO into 7 independently-shippable +stories. Each story is one thin vertical slice that ships end-to-end and auto-degrades to standalone. +**Verdict: PASS — already split; user confirmed full-epic DISCUSS of all 7.** No further split needed; +the only slice needing internal care is #5304 (gated behind a required SPIKE, see slice-07). + +--- + +## Wave: DISCUSS / [REF] Locked decisions + +Inherited from the north-star (planning-stage §3) and applied as this epic's hard gates: + +- **[D1 · EPIC GATE — standalone is sacrosanct]** Every story MUST preserve the single-container + standalone + regular server deployment unchanged, auto-degrading to the single-instance path: no Redis + ⇒ in-memory backplane; one replica works; SQLite stays default; frontend stays embedded. Verified per + story as an acceptance criterion. (planning §0 epic gate + §D4) +- **[D2 · full nWave for product code]** These are real C#/TS changes → full `DISCUSS→…→DELIVER` + the + CLAUDE.md RBAC / Lighthouse-Clients / Website checklist, not the learning light-loop. (planning §D3) +- **[D3 · sequence]** Learning #5189 → **#5305 (this)** → Productization #5306. The app must be + cluster-safe before it is packaged/hosted. Cluster-side stacks (Prometheus/Grafana/Loki, oauth2-proxy, + Ingress manifests, Helm chart) are #5306, NOT here — this epic is in-app code only. +- **[D4 · expand-only migrations]** Additive-only per release; destructive cleanup is a *later* release + (expand now, contract later) because rolling updates run new+old pods against one shared Postgres. + (memory: expand-only/non-destructive; planning #5308) +- **[D5 · #5304 architecture is OPEN — do not pre-pick]** The cluster-aware unit is the *update queue + itself*, not a timer leader (both the periodic loop AND inline manual-refresh paths must be covered). + Leader election is necessary-not-sufficient. DESIGN/SPIKE decides between distributed-single-consumer + queue vs. cluster-wide per-entity lock + shared status store. (ADO #5304 architectural note 2026-06-14) +- **[D6 · MCP auth = clients-repo work, version-gated]** #5307 lands primarily in `lighthouse-clients`; + preferred path is MCP OAuth pass-through, interim is X-Api-Key pass-through reusing the existing + owner-resolved/scoped keys. Version-gate the endpoint (strictly newer than last released Lighthouse; + `FEATURE_REQUIRES_SERVER_NEWER_THAN`). (planning §6 Q5) + +--- + +## Wave: DISCUSS / [REF] Cross-cutting impact checklist (mandatory per CLAUDE.md DISCUSS) + +Recorded explicitly — "N/A, because…" where no change is needed; these extend DoR Item 7. + +| Story | RBAC | Lighthouse-Clients (CLI + MCP) | Website | +|---|---|---|---| +| #5311 forwarded headers | N/A — derives scheme/host only; no authorization surface. (But it *fixes* OIDC behind a proxy, so auth *works* correctly.) | N/A — no API contract change. | N/A — operational, not marketed. | +| #5310 health checks | N/A — unauthenticated operational endpoints carrying no business data. | N/A. | N/A. | +| #5309 graceful shutdown | N/A — server lifecycle only. | N/A — callers just reconnect. | N/A. | +| #5308 migrations + startup | N/A — provider/startup mechanics; confirm provider selection touches no RBAC-gated admin surface (it does not). | Possibly a CLI **connection hint** for Postgres — confirm in DESIGN; otherwise N/A. | N/A. | +| #5312 observability | **Decide in DESIGN**: `/metrics` can leak request paths; default cluster-internal/unauthenticated, but exposure must be a conscious call (Sonar/security-hotspot). | N/A. | N/A. | +| #5307 MCP inbound auth | **Central** — removes ambient authority; the MCP path honours per-caller `ApiKeyPermission` scope via the existing `ApiKeyAuthenticationHandler` (no new RBAC port, flows through the established handler). | **Primary surface** — change lands in `lighthouse-clients`; **version-gate** per CLAUDE.md. | N/A — security/packaging, not a marketed UI feature. | +| #5304 horizontal scalability | N/A — no authorization surface. | Likely N/A — internal infra, no API contract change; confirm in DESIGN. | N/A. | + +--- + +## Wave: DISCUSS / [REF] User stories + +Seven stories, one per ADO child, one per slice. US-NN ↔ slice-NN (prioritized order). Every story is +operator-visible (none is `@infrastructure`-only → no slice-composition gate violation: every slice +ships one value story). Each inherits **D1 (standalone gate)** as an embedded AC. + +### US-01 — Login works behind a TLS-terminating reverse proxy (ADO #5311) +As a **platform-operator**, I put Lighthouse behind Traefik/nginx/an Ingress and want OIDC login + secure +cookies + SignalR to use the real public HTTPS host, so users log in first try with no redirect loop. +`job_id: job-operator-correct-behind-proxy` + +#### Elevator Pitch +Before: behind a TLS-terminating proxy, OIDC redirects to `http://`, the callback loops, and secure cookies are dropped — login is broken. +After: declare the proxy as trusted + enable forwarded headers → hit `https://` → the OIDC redirect/callback are `https:///...`, the secure cookie persists, login succeeds. +Decision enabled: the operator can safely front Lighthouse with any reverse proxy and trust that auth works. + +#### Acceptance criteria +- AC1: With trust ON and `X-Forwarded-Proto: https` + `X-Forwarded-Host: ` from a **declared known proxy**, the generated OIDC redirect/callback URL is `https:///...` (integration test). +- AC2: Forwarded headers from an **undeclared** source are ignored — no scheme/host spoof. +- AC3 (D1): With no proxy declared, direct/standalone access is byte-identical to today; forwarded-header trust is OFF by default. + +### US-02 — Kubernetes trusts the pod's real health (ADO #5310) +As a **platform-operator**, I want readiness gated on real serving capacity and liveness shallow, so k8s +routes traffic only to serving pods and restarts only genuinely-dead ones. +`job_id: job-operator-trust-pod-health` + +#### Elevator Pitch +Before: there are no real probes; k8s can route to a not-yet-ready pod (cold 500s) or restart-loop a healthy-but-slow pod. +After: configure probes → `GET /health/ready` is 503 until DB-reachable + migrations-applied, `GET /health/live` stays 200 through a slow dependency, `GET /health/startup` covers slow boot. +Decision enabled: the operator trusts rollout/health status and can set probe configs with confidence. + +#### Acceptance criteria +- AC1: readiness returns unhealthy when the DB is unreachable, while liveness stays healthy (no restart storm). +- AC2: readiness returns healthy only when DB reachable AND migrations applied. +- AC3 (D1): endpoints return 200 / are harmless in single-container mode with no orchestrator. + +### US-03 — Rolling updates drop no requests (ADO #5309) +As a **platform-operator**, I want a terminating pod to drain in-flight HTTP + SignalR + the update queue +on SIGTERM, so I can roll out updates during the day with zero dropped requests. +`job_id: job-operator-zero-downtime-rollout` + +#### Elevator Pitch +Before: a rolling update kills pods mid-request — in-flight HTTP/SignalR/queued updates are lost. +After: `kubectl rollout restart` (or any SIGTERM) → the pod stops intake, drains in-flight work within `terminationGracePeriodSeconds`, then exits → a load test + live SignalR client sees zero failed requests and a clean reconnect. +Decision enabled: the operator ships updates without a maintenance window. + +#### Acceptance criteria +- AC1: on SIGTERM/`StopAsync`, an in-flight HTTP request and a queued update complete (or the update is safely re-enqueued) before the host reports stopped. +- AC2: readiness flips to NotReady on `ApplicationStopping` so the LB stops routing before drain. +- AC3 (D1): a single-container Ctrl-C behaves exactly as today. + +### US-04 — Concurrent replicas migrate safely and additively (ADO #5308) +As a **platform-operator**, I want each release's migrations additive-only and exactly one replica to +apply them on concurrent startup, so old+new pods coexist on one Postgres without breakage or races. +`job_id: job-operator-zero-downtime-rollout` (+ `job-operator-survive-multiple-replicas`) + +#### Elevator Pitch +Before: every pod races `Database.Migrate()` on boot, and a destructive migration can break the old pods still serving during a rollover. +After: scale a fresh deploy to 3 replicas against one Postgres → the logs show migrations applied **once** (one applies, two wait); a destructive migration is **rejected by CI** before merge. +Decision enabled: the operator rolls out schema changes during the working day without a downtime window. + +#### Acceptance criteria +- AC1: N hosts started against one DB apply migrations exactly once (concurrency test asserting single application). +- AC2: a CI check rejects a destructive migration (drop/rename column/table) in a release; expand→contract two-release pattern documented. +- AC3 (D1): single SQLite or Postgres instance auto-migrates on boot exactly as today (lock degrades to a no-op). + +### US-05 — Lighthouse is observable in my cluster (ADO #5312) +As a **platform-operator**, I want a Prometheus `/metrics` endpoint, structured JSON logs and OTel traces, +so Lighthouse appears on my existing dashboards like any first-class service. +`job_id: job-operator-observe-in-cluster` + +#### Elevator Pitch +Before: no `/metrics` and unstructured text logs — Lighthouse is a black box in the cluster. +After: scrape `GET /metrics` → request/error/latency render in Grafana; logs ship as queryable JSON to Loki; a slow request is traceable. +Decision enabled: the operator monitors and alerts on Lighthouse from the same stack as everything else. + +#### Acceptance criteria +- AC1: `GET /metrics` returns Prometheus-format output including HTTP server metrics. +- AC2: logs are emitted as structured JSON to stdout with the expected fields. +- AC3 (D1): with telemetry disabled, no exporter runs and there is no behaviour or performance change for the single container (low-overhead/off-by-default). + +### US-06 — Each MCP caller authenticates as themselves (ADO #5307) +As a **platform-operator** exposing the MCP HTTP server, I want each caller to authenticate with their own +credential (passed through), so every caller drives Lighthouse with their own RBAC scope and audit — no +shared baked key. `job_id: job-mcp-caller-own-identity` + +#### Elevator Pitch +Before: the `mcp-http` container holds one baked `LIGHTHOUSE_API_KEY` — a confused deputy; every caller acts as that owner/scope with no per-user audit, and an unauth'd `/mcp` is an open hole. +After: a caller sends their OWN OAuth token (or `X-Api-Key`) to `/mcp` → the server passes it through → Lighthouse owner-resolves it (`ApiKey.OwnerSubject → sub`) and applies that caller's `ApiKeyPermission` scope. +Decision enabled: the operator exposes MCP beyond ClusterIP without distributing/rotating a shared secret, and security review gets a clean "no ambient authority" answer. + +#### Acceptance criteria +- AC1: two callers with distinct credentials each see only their own RBAC-scoped data; the credential is forwarded, not a baked key. +- AC2: the wrapping client method version-gates — an old Lighthouse server fails with a clear "upgrade Lighthouse" error, not an opaque 404. +- AC3 (D1): the existing single-key / dev path stays available; no break for self-hosters. + +### US-07 — Lighthouse runs safely with N replicas (ADO #5304) +As a **platform-operator**, I want syncs to run once across the fleet, every notification to reach all +pods' clients, and update status consistent across pods, so I scale Lighthouse like a normal web app. +`job_id: job-operator-survive-multiple-replicas` + +#### Elevator Pitch +Before: Lighthouse is a stateful singleton — a second replica means N× external syncs racing Postgres, notifications that reach only one pod's clients, and a per-pod status cache that disagrees. +After: configure Redis + scale to 3 → a manual refresh served by pod B notifies a client on pod A; the external system is synced **once** per cycle; `GetUpdateStatus` agrees across pods. +Decision enabled: the operator sets a replica count for HA/scale and trusts Lighthouse stays correct through a node failure. + +#### Acceptance criteria +- AC1: with Redis + N hosts, a single sync per entity occurs under concurrent timer + manual-refresh load (no N× duplication, no racing writes). +- AC2: a notification raised on any pod reaches clients connected to any other pod (Redis backplane). +- AC3: `GetUpdateStatus` returns a consistent answer across pods (shared/distributed status store). +- AC4 (D1): with no Redis / one host, behaviour AND code path are identical to today. + +--- + +## Wave: DISCUSS / [REF] Story map + +``` +Backbone (operator activities): CONFIGURE ──▶ DEPLOY ──▶ ROLL OUT ──▶ SCALE ──▶ OPERATE + │ │ │ │ │ +US-01 forwarded headers ────────────┘ │ │ │ │ +US-02 health checks ────────────────────────────┘ │ │ │ +US-03 graceful shutdown ────────────────────────────────────┤ │ │ +US-04 expand-only migrations + startup lock ────────────────┘ │ │ +US-06 MCP inbound auth (parallel, clients repo) ────────────────────────┤ │ +US-07 horizontal scalability (SPIKE-gated, last) ───────────────────────┘ │ +US-05 observability (lands any time after deploy) ─────────────────────────────────┘ +``` + +**Walking skeleton**: none — brownfield hardening; US-01 (smallest, config-gated) is the thin first slice +that proves the standalone-gate + production-data discipline for the rest. + +--- + +## Wave: DISCUSS / [REF] Prioritization + +Order by (a) learning leverage / uncertainty, (b) dependency, (c) dogfood cadence: + +1. **US-01 forwarded headers** — smallest; unblocks all cluster auth testing; near-zero risk. First. +2. **US-02 health checks** — prerequisite for any safe rollout; foundational for verifying US-03/US-04. +3. **US-03 graceful shutdown** — pairs with US-02 for zero-downtime; drains the *current* queue. +4. **US-04 expand-only migrations + startup lock** — precedes real multi-replica; feeds US-02's "migrations applied". +5. **US-05 observability** — independent; bring forward if operating blind during US-07 hurts. +6. **US-06 MCP inbound auth** — mostly clients repo, parallelizable; gated by an OAuth-vs-X-Api-Key SPIKE. +7. **US-07 horizontal scalability** — highest uncertainty, largest, depends on US-03/US-04; ship LAST but + run its **required SPIKE early** (learning leverage: disprove "leader election is enough" cheaply). + +--- + +## Wave: DISCUSS / [REF] WS strategy + +**Strategy D — Configurable / env-switching** per Mandate 5. Every story is config-gated and auto-degrades +(no Redis ⇒ in-memory; no proxy declared ⇒ no forwarded-header trust; telemetry off by default; migration +lock no-op at 1 instance). This is the D1 standalone gate expressed as the WS mechanism: one codebase serves +both the single-container self-hoster and the multi-replica SaaS, selected by configuration. (Trigger: +WS=D fires the `alternatives-considered` expansion suggestion — see wave-end menu.) + +--- + +## Wave: DISCUSS / [REF] Driving ports (inbound surfaces) + +- **HTTP** — `/health/ready`, `/health/live`, `/health/startup` (US-02); `/metrics` (US-05); existing OIDC + redirect/callback + SignalR `/hub` negotiation now proxy-aware (US-01); `/mcp` inbound auth (US-06). +- **Process signals** — SIGTERM / `IHostApplicationLifetime` (US-03). +- **Config** — env vars / appsettings: trusted-proxy set (US-01), Redis connection (US-07), telemetry + exporter (US-05), shutdown timeout (US-03). +- **CLI/MCP client** — `lighthouse-clients` MCP server credential pass-through + version gate (US-06). +- **No new in-app UI surface.** (Operator surfaces are HTTP/CLI/kubectl, not the React app.) + +--- + +## Wave: DISCUSS / [REF] Pre-requisites + +- Learning epic #5189 stories 00–07 (k8s fundamentals + the story-07 scaling spike) inform US-07; story 08 + (#5198) is the only open learning story and is not a blocker. +- A real Postgres + Redis on k3s for US-04/US-07 production-data acceptance (InMemory cannot reproduce the + races — recurring lesson). +- The `CreateMigration` PowerShell script for US-04 migration generation (per CLAUDE.md). +- `lighthouse-clients` repo access + the last-released Lighthouse version for the US-06 version-gate baseline. + +--- + +## Wave: DISCUSS / [REF] Outcome KPIs + +Lighthouse is self-hosted — no central telemetry (memory: self-hosted-telemetry-gap). All KPIs are +`per_instance` (operator-observable via logs/metrics) or `vendor_demo_only` (LPW stage/prod). Append to +`docs/product/kpi-contracts.yaml` in DEVOPS. + +| KPI | Target | Measurement | Scope | +|---|---|---|---| +| Dropped requests during a rolling update (US-03) | 0 | load-gen error count across a rollout on stage | vendor_demo_only | +| Duplicate external syncs per cycle at N replicas (US-07) | 1 (exactly once) | connector request log / structured-log sync events | vendor_demo_only | +| Concurrent-startup migration applications (US-04) | 1 | migration-history + structured startup logs | per_instance | +| OIDC login success behind proxy (US-01) | 100% first-try | manual + stage smoke | per_instance | +| Pod restart-on-slow-dependency events (US-02) | 0 | liveness restart count vs. DB-latency events | vendor_demo_only | +| MCP calls using a shared baked key after US-06 (US-06) | 0 | per-caller audit / structured auth logs | per_instance | +| Lighthouse `/metrics` scrape success (US-05) | 100% | Prometheus `up` for the Lighthouse target | per_instance | + +--- + +## Wave: DISCUSS / [REF] DoR validation (9 items, evidence) + +1. **Business value clear** — ✓ each story maps to an opportunity-scored job (gap 1–4); value = operability of the hosted/self-hosted product. +2. **User/persona identified** — ✓ `platform-operator` (new persona file); secondary MCP-caller actor on US-06. +3. **Acceptance criteria testable** — ✓ each US has 3–4 ACs verifying the Elevator-Pitch "After" end-to-end, incl. the D1 standalone-gate AC. +4. **Dependencies known** — ✓ sequence + soft deps mapped (US-07 ⟵ US-03/US-04; US-02 feeds from US-04); pre-requisites listed. +5. **Story sized / sliced** — ✓ 7 thin slices, each its own brief at `slices/slice-0N-*.md`, ≤~6 crafter days except US-07 which is SPIKE-gated. +6. **No blocking unknowns** — ✓ the one real unknown (US-07 cluster-aware-queue design) is explicitly OPEN (D5) and quarantined behind a required SPIKE; not pre-picked. +7. **Technical notes / constraints + cross-cutting** — ✓ RBAC/Clients/Website checklist recorded per story (above); D1–D6 locked decisions. +8. **Outcome KPIs defined** — ✓ 7 KPIs with numeric targets + measurement + scope. +9. **Definition of Done agreed** — ✓ below. + +**Requirements completeness**: 0.96 (>0.95). The one soft gap: US-07's solution shape is intentionally +deferred to SPIKE/DESIGN — that is recorded as a decision (D5), not a missing requirement. + +--- + +## Wave: DISCUSS / [REF] Definition of Done (9-item) + +1. All ACs green (incl. the D1 standalone-gate AC) for the story. 2. `dotnet build` zero warnings; +`pnpm build` + Biome clean (for any TS). 3. `dotnet test` / `pnpm test` green. 4. SonarCloud +`new_violations = 0`. 5. Mutation kill ≥ 80% on the story's real surface (per CLAUDE.md per-feature). 6. +Cross-cutting checklist answered for the story (RBAC/Clients/Website). 7. Production-data acceptance run +(real Postgres/Redis/proxy/OIDC as the slice requires) — not synthetic-only. 8. Docs/screenshots updated +if any user-visible surface changed (most stories: N/A operational — record it). 9. ADO story +Active→Resolved after CI green; push paused for review (ado-sync ritual). + +--- + +## Wave: DISCUSS / [REF] Out-of-scope + +- Cluster-side stacks: Prometheus/Grafana/Loki deployment, oauth2-proxy, Ingress/Traefik manifests, the + Helm chart, ArgoCD/GitOps, wildcard DNS, secrets operators → **Productization epic #5306**. +- HPA / `sessionAffinity` / load-test manifests → the **learning** story 07 (#5197), throwaway scratch. +- Per-tenant isolation / namespace-per-tenant model → #5306. +- Destructive (contract) migrations for any expand done here → a **later** release (D4). +- Edge-vs-ClusterIP MCP exposure + oauth2-proxy decisions → #5306 (planning Q5). +- Any change to the standalone single-container product behaviour (forbidden by D1). + +--- + +## Wave: DISCUSS / [REF] Wave decisions summary + +- **Feature type**: cross-cutting (backend C#, clients TS, operational surface) — NOT infrastructure-only + (US-01/US-06 are operator/user-visible), so JTBD traceability applies and the escape valve was rejected. +- **Persona**: new `platform-operator` (user-chosen over extending `lighthouse-maintainer`). +- **Scope**: full epic — all 7 stories DISCUSSed in one pass (user-chosen). +- **ADO**: #5304 re-parented under Epic #5305 (was orphaned); all 7 children now under #5305. +- **Walking skeleton**: none (brownfield); US-01 is the thin proving slice. +- **Primary needs**: run Lighthouse multi-replica + behind a proxy + rolling-update-safe + observable, all + WITHOUT touching the sacrosanct standalone (D1). +- **Constraints established**: D1–D6 (above). D5 keeps US-07's architecture OPEN behind a SPIKE. +- **Upstream changes**: none — DISCUSS inherits the planning-stage north-star; no DISCOVER assumption changed. + +**Handoff** → DESIGN (`nw-solution-architect`, full artifacts; #5304's cluster-aware-queue SPIKE is the +first DESIGN concern) + DEVOPS (`nw-platform-architect`, `outcome-kpis` only). DESIGN + DEVOPS parallel. +``` + +--- + +## Wave: DISCUSS / [WHY] Alternatives considered + +Rendered on request (triggers: cross-context complexity, WS=D). Decision rationale for the choices that +are deferred to SPIKE/DESIGN or locked above — what was weighed and why. These are inputs for DESIGN, not +re-openings of D1–D6. + +### A1 · US-07 — what becomes the cluster-aware unit (OPEN, the SPIKE question) +The breakage is that `UpdateQueueService` is `AddSingleton` but singleton-*per-process*: each replica has +its own Channel queue, consumer, awaiters, and the `updateStatuses` dedup dict — and updates fire from two +paths (the timer loop AND inline manual refresh on whatever replica serves the request). + +- **(rejected as sufficient) Leader election for the timer only.** Elect one replica to run + Team/Portfolio/ForecastUpdater. *Why not:* does nothing for a manual refresh handled by a follower, and + the per-process dedup is invisible across replicas — the same entity can still be updated concurrently and + race the same Postgres rows. Necessary-not-sufficient; the research doc §1 is explicit. Keep leader + election only as a *component* of a fuller design, not the design. +- **(candidate, preferred-leaning) Distributed queue with a single consumer.** Replace the in-process + Channel with a shared queue (Redis stream / Postgres-backed) drained by exactly one consumer across the + fleet; manual refresh enqueues to the shared queue and awaits completion via a shared status store. + *Pro:* makes the *queue itself* cluster-aware (covers both trigger paths), dedup + awaited-completion + + `GetUpdateStatus` all consistent. *Con:* most moving parts; introduces a queue technology. +- **(candidate) Cluster-wide per-entity lock + shared status store.** Keep per-process queues but guard each + Team/Portfolio update with a distributed per-entity lock (e.g. Postgres advisory lock / Redis lock); back + `GetUpdateStatus` with a shared store so dedup and reads agree. *Pro:* smaller change, no new queue. *Con:* + lock-contention + liveness edge cases; awaited-completion across replicas still needs the shared store. +- **Decision:** OPEN (D5). The SPIKE (slice-07) prototypes both candidates against real Postgres+Redis with + 3 hosts driving timer + manual-refresh concurrently; the one that disproves double-work *and* keeps + awaited-completion consistent wins. Do NOT pre-pick in DISCUSS. + +### A2 · US-07 — SignalR fan-out backplane +- **Redis backplane (chosen, config-gated).** Matches the north-star (§4 "API N replicas + Redis"), local + MinIO/Redis already in the rehearsal stack, no managed-service lock-in. No Redis ⇒ in-memory (D1). +- **(rejected) Azure SignalR Service.** Offloads fan-out fully but is a managed Azure dependency — couples + the self-hostable product to a cloud service, violating the vendor-neutral, runs-anywhere posture. +- **(rejected) Sticky sessions only (`sessionAffinity: ClientIP`).** Pins a client to one pod so in-memory + fan-out "works" — but it was the *learning* spike (story 07), doesn't deliver cross-pod notifications for + server-raised events, and breaks on rebalancing. Not a product answer. + +### A3 · US-04 — concurrent-startup migration coordination +- **In-process lock (advisory lock / history sentinel), chosen for this epic.** One replica applies, others + wait; degrades to a no-op at one instance (D1). Keeps "migrate on boot" — the self-hoster's current model. +- **(deferred, not rejected) Dedicated pre-deploy migration Job / ArgoCD sync-wave.** Cleaner separation + (migrate→deploy) but it is a *cluster/GitOps* mechanism → belongs to Productization #5306, and it would + break the single-container "auto-migrate on boot" the self-hoster relies on. The slice-04 hypothesis + explicitly allows falling back to this *if* the in-process lock proves fragile, recording the decision. +- **(rejected) Do nothing / let pods race.** `Database.Migrate()` under concurrent start is undefined. + +### A4 · US-06 — MCP inbound auth model +- **MCP OAuth pass-through (preferred).** Each caller brings their own OAuth token; no shared secret to bake, + seal, distribute, rotate; per-user RBAC + audit for free; an unauth'd `/mcp` is no longer an open hole. + *Risk:* MCP-spec (2025-06-18) OAuth maturity in our client SDK — the slice-06 SPIKE assesses this. +- **X-Api-Key pass-through (interim, accepted fallback).** Caller sends its own Lighthouse API key; the MCP + server forwards it; reuses the existing owner-resolved (`ApiKey.OwnerSubject`) + scoped (`ApiKeyPermission`) + model with near-zero backend change. *Cost:* N user-held keys instead of one Secret. Ships if OAuth proves + too heavy now — recorded, not blocking. +- **(rejected) Keep the single baked key + restrict to ClusterIP.** That is the confused-deputy status quo; + the moment MCP is exposed beyond ClusterIP it is an ambient-authority hole. Exposure topology is a #5306 + concern, but the auth model must change regardless. + +### A5 · US-05 — metrics library +- **OpenTelemetry .NET + Prometheus exporter (leaning).** One instrumentation surface for metrics+traces, + vendor-neutral OTLP, future-proof. *Con:* heavier setup; overhead must be measured (slice-05 SPIKE) to set + the off-by-default posture for the single container. +- **(alternative) `prometheus-net` for metrics only.** Lighter for just `/metrics`, but a second mechanism + for traces — DESIGN picks one to avoid two telemetry stacks. Decision deferred to DESIGN/SPIKE. + +### A6 · Frontend topology (epic-wide, Q4 — already locked upstream, restated) +- **Embedded (chosen for this epic and Bands A–C).** API serves the SPA; mirrors the standalone exactly + (D1). The `frontend.mode: split` nginx path is a Productization #5306 / Band-D optimization, built then, + defaulted off. Out of scope here — restated so DESIGN does not reopen it. +``` diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-01-forwarded-headers.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-01-forwarded-headers.md new file mode 100644 index 000000000..6f7d68ac6 --- /dev/null +++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-01-forwarded-headers.md @@ -0,0 +1,43 @@ +# Slice 01: Reverse-proxy forwarded headers + +**Feature**: epic-5305-k8s-readiness +**Story**: US-01 (ADO #5311) → job-operator-correct-behind-proxy +**Estimate**: ~0.5–1 crafter day +**Reference class**: config-gated startup wiring, similar to `auth-allowedorigins-envvar-binding-fix` (env-bound ASP.NET Core middleware config, off unless declared) + +## Goal +Make Lighthouse honour `X-Forwarded-Proto` / `-Host` / `-For` from a declared, trusted reverse proxy so HTTPS redirects, secure cookies, OIDC callback URLs and SignalR negotiation use the real public scheme + host — config-gated and OFF unless a proxy is declared. + +## IN scope +- `UseForwardedHeaders` wired with a `ForwardedHeadersOptions` populated from configuration: known proxies / known networks (CIDR), forwarded-header count limit. +- A single config switch (env var + appsettings) that turns forwarded-header trust on and declares the trusted proxy set; default OFF. +- OIDC callback URL + `RequireHttpsMetadata`/redirect behaviour derive from the forwarded scheme/host when trust is on. +- Secure-cookie + HTTPS-redirect behaviour consistent with the forwarded scheme. + +## OUT scope +- The Ingress / Traefik manifests themselves (Productization epic #5306, chart story 09). +- Edge auth (oauth2-proxy) — north-star, not this slice. +- Health-check endpoints → slice 02. + +## Learning hypothesis +**Confirms if it succeeds**: a real OIDC login through a TLS-terminating proxy completes first try (no http:// callback, no redirect loop, secure cookie persists). +**Disproves if it fails**: ASP.NET Core forwarded-header handling is insufficient for our SignalR negotiation path and we need per-endpoint handling rather than one global middleware. + +## Acceptance criteria +See US-01 in `../feature-delta.md`. Key: with trust ON and a simulated `X-Forwarded-Proto: https` + `X-Forwarded-Host`, an integration test asserts the generated OIDC redirect/callback URL is `https:///...`; with trust OFF (no proxy declared), behaviour is byte-identical to today (standalone gate). + +## Dependencies +None. Foundation slice — unblocks correct auth on any proxied deployment; should land before any cluster auth testing. + +## Production data requirement +**Required.** Smoke a real OIDC login (Keycloak or the configured provider) through an actual reverse proxy (local Traefik/nginx), not just a unit test with synthetic headers. + +## Dogfood moment +The dev instance, placed behind a local Traefik with TLS, logs in via OIDC over the HTTPS hostname within the same day. + +## Cross-cutting checklist (confirmed in feature-delta) +RBAC: N/A — no authorization surface changes; only how the app derives scheme/host. Clients: N/A — no API contract change. Website: N/A — operational, not a marketed surface. + +## Pre-slice spike candidates +- Confirm SignalR negotiation respects `UseForwardedHeaders` ordering relative to other middleware. (~1 hr) +- Verify the existing OIDC setup reads the request scheme/host (not a hardcoded base URL) so forwarded headers actually flow through. (~30 min) diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-02-health-checks.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-02-health-checks.md new file mode 100644 index 000000000..bb155517a --- /dev/null +++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-02-health-checks.md @@ -0,0 +1,44 @@ +# Slice 02: Health checks (liveness / readiness / startup) + +**Feature**: epic-5305-k8s-readiness +**Story**: US-02 (ADO #5310) → job-operator-trust-pod-health +**Estimate**: ~1–1.5 crafter days +**Reference class**: new read endpoints + DI wiring; learning story 04 (#5194) exercised probes as a spike — this is the product implementation + +## Goal +Add real ASP.NET Core health checks driving the three k8s probes so traffic reaches only serving pods and only genuinely-dead pods restart. + +## IN scope +- `AddHealthChecks()` with distinct tagged checks mapped to three endpoints: + - **readiness** (`/health/ready`): DB connectivity + migrations-applied → pod kept OUT of LB rotation until truly serving. + - **liveness** (`/health/live`): shallow — restart only on genuine deadlock, NOT on a slow dependency. + - **startup** (`/health/startup`): covers slow boot / migration window without tripping liveness. +- Endpoints harmless / no-op-friendly in single-container mode (standalone gate). + +## OUT scope +- The k8s probe manifests (chart story 09 / Productization #5306). +- Migration-applied detection that requires the migration lock → coordinate with slice 04 (this slice checks "migrations applied", slice 04 owns "apply once across replicas"). +- /metrics, tracing → slice 05. + +## Learning hypothesis +**Confirms if it succeeds**: a pod with an unreachable DB drops out of rotation (readiness red) WITHOUT being restarted (liveness green) — no restart storm. +**Disproves if it fails**: a shallow liveness check can't distinguish deadlock from slow dependency cheaply, forcing a richer (and riskier) liveness signal. + +## Acceptance criteria +See US-02 in `../feature-delta.md`. Key: integration tests assert (a) readiness returns unhealthy when DB is down but liveness stays healthy; (b) readiness returns healthy only when DB reachable AND migrations applied; (c) endpoints return 200 in single-container mode with no orchestrator. + +## Dependencies +Soft on slice 04 for the precise "migrations applied" signal; can ship with a simpler "can open a DB connection" readiness first and tighten once slice 04 lands. + +## Production data requirement +**Required.** Run the dev instance, kill the DB connection, observe readiness flip while the process is NOT restarted; restore and observe recovery. + +## Dogfood moment +Dev instance deployed with the three probes wired; operator watches a clean rollout where a not-yet-migrated pod stays out of rotation until ready. + +## Cross-cutting checklist (confirmed in feature-delta) +RBAC: N/A — health endpoints are unauthenticated operational surface (no business data). Clients: N/A. Website: N/A. + +## Pre-slice spike candidates +- Decide whether health endpoints sit on the main port or a separate management port. (~30 min) +- Confirm a cheap, reliable "migrations applied" query against EF Core for both SQLite and Postgres. (~1 hr) diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-03-graceful-shutdown.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-03-graceful-shutdown.md new file mode 100644 index 000000000..7459e75c2 --- /dev/null +++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-03-graceful-shutdown.md @@ -0,0 +1,45 @@ +# Slice 03: Graceful shutdown (SIGTERM) + connection draining + +**Feature**: epic-5305-k8s-readiness +**Story**: US-03 (ADO #5309) → job-operator-zero-downtime-rollout +**Estimate**: ~1–1.5 crafter days +**Reference class**: `IHostedService` / `IHostApplicationLifetime` lifecycle wiring; touches the same update-queue hosted services as Epic 5121 / #5304 + +## Goal +Handle SIGTERM cleanly so a terminating pod stops accepting new work, drains in-flight HTTP + SignalR connections, flushes/awaits the in-memory update queue, and finishes within `terminationGracePeriodSeconds` — enabling zero-downtime rolling updates. + +## IN scope +- Wire `IHostApplicationLifetime` `ApplicationStopping`/`ApplicationStopped` and/or `IHostedService.StopAsync` to: + - stop accepting new HTTP requests and new SignalR negotiations, + - drain in-flight HTTP requests within a bounded window, + - flush/await the in-memory `UpdateQueueService` Channel so queued/in-flight updates complete (or are safely abandoned) before exit, + - close SignalR connections so clients reconnect to a surviving pod. +- Configurable shutdown timeout aligned to `terminationGracePeriodSeconds`. + +## OUT scope +- The cluster-wide single-consumer queue redesign → slice 07 (#5304). This slice drains the *current per-process* queue cleanly; it does not make the queue distributed. +- SignalR Redis backplane → slice 07. +- Probe manifests → Productization #5306. + +## Learning hypothesis +**Confirms if it succeeds**: under a rolling update, a load test driving requests + an active SignalR client sees zero failed requests and a clean client reconnect as pods cycle. +**Disproves if it fails**: the in-memory update queue can't be drained deterministically within a sane grace period (e.g. a long external sync mid-flight), forcing the queue-redesign (slice 07) to land *before* true zero-downtime is claimable. + +## Acceptance criteria +See US-03 in `../feature-delta.md`. Key: an integration test issues SIGTERM/`StopAsync` while an HTTP request and a queued update are in flight and asserts both complete (or the update is safely re-enqueued) before the host reports stopped; a single-container Ctrl-C behaves exactly as today (standalone gate). + +## Dependencies +Pairs with slice 02 (readiness must flip to NotReady on `ApplicationStopping` so the LB stops routing before drain). Soft-precedes slice 07. + +## Production data requirement +**Required.** Drive the dev instance under a small load generator + live SignalR client through a simulated rolling restart; assert no dropped requests. + +## Dogfood moment +Operator triggers a rolling restart of the dev deployment during active use and observes no user-visible error and a seamless SignalR reconnect. + +## Cross-cutting checklist (confirmed in feature-delta) +RBAC: N/A. Clients: N/A — server-side lifecycle only; CLI/MCP callers just reconnect. Website: N/A. + +## Pre-slice spike candidates +- Measure worst-case in-flight update duration (external sync) to size the grace period. (~1 hr) +- Confirm Kestrel/ASP.NET shutdown ordering vs. our hosted services so drain runs before the server socket closes. (~1 hr) diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-04-expand-only-migrations.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-04-expand-only-migrations.md new file mode 100644 index 000000000..d9d55e219 --- /dev/null +++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-04-expand-only-migrations.md @@ -0,0 +1,42 @@ +# Slice 04: Expand-only EF migrations + safe startup under N replicas + +**Feature**: epic-5305-k8s-readiness +**Story**: US-04 (ADO #5308) → job-operator-zero-downtime-rollout + job-operator-survive-multiple-replicas +**Estimate**: ~2–2.5 crafter days +**Reference class**: EF migration mechanics (hit the stale-migration-DLL `--no-incremental` trap in `delivery-target-date-tracking`); concurrency coordination akin to Epic 5121 + +## Goal +Two coupled guarantees: (1) each release's migrations are additive-only (expand now; destructive cleanup deferred to a LATER release) so old pods never depend on a dropped column during a rollover; (2) when N replicas boot concurrently, exactly one applies migrations while the rest wait — no race on `Database.Migrate()`. + +## IN scope +- **Expand-only discipline**: a guard/check (analyzer, test, or migration-review gate) that fails CI if a migration in this release is destructive (drop/rename column/table) — destructive ops must be a separate later release. Document the expand → contract two-release pattern. +- **Startup migration coordination**: a migration lock / dedicated init mechanism / leader so exactly one replica runs `Migrate()`; others wait until migrations are applied, then start serving. +- **Standalone gate**: a single SQLite or Postgres instance still auto-migrates on boot exactly as today (lock is a no-op / trivially-acquired with one instance). + +## OUT scope +- The actual cluster-wide update-queue redesign → slice 07. +- Provider-matrix migration generation uses the existing `CreateMigration` PowerShell script (per CLAUDE.md) — not new tooling. + +## Learning hypothesis +**Confirms if it succeeds**: 3 replicas started simultaneously against one fresh Postgres apply the migration exactly once (one applies, two wait), and a destructive migration is rejected by CI before merge. +**Disproves if it fails**: app-level migration coordination is too fragile under k8s and we must move migrations into a dedicated pre-deploy Job / ArgoCD sync-wave (decision pushed to Productization #5306) — in which case this slice delivers the expand-only guard + a documented "migrate via Job" path instead of an in-process lock. + +## Acceptance criteria +See US-04 in `../feature-delta.md`. Key: an integration/concurrency test starts N hosts against one DB and asserts a single migration application (e.g. via a migration-history assertion / lock observation); a CI check rejects a destructive migration; single-instance boot auto-migrates unchanged. + +## Dependencies +None hard. Feeds slice 02's "migrations applied" readiness signal. Precedes real multi-replica operation (slice 07). + +## Production data requirement +**Required.** Reproduce concurrent startup against a real Postgres (k3s, 3 replicas) — InMemory tests will NOT catch the race (recurring lesson: persisted-model migration traps are invisible to InMemory). + +## Dogfood moment +Operator scales a fresh deploy to 3 replicas against an empty Postgres and observes one migration application in the logs, all pods healthy. + +## Cross-cutting checklist (confirmed in feature-delta) +RBAC: N/A. Clients: N/A — no API contract; possibly a CLI connection hint for Postgres, confirm in DESIGN. Website: N/A. + +## Pre-slice spike candidates +- Evaluate `PostgreSQL advisory lock` vs. a migration-history sentinel vs. an init-Job approach for the boot lock. (~2 hr) +- Prototype the destructive-migration CI guard (parse generated migration for `DropColumn`/`DropTable`/`RenameColumn`). (~1 hr) +- Confirm the SQLite path degrades the lock to a no-op. (~30 min) diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-05-observability.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-05-observability.md new file mode 100644 index 000000000..4d3b81c8e --- /dev/null +++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-05-observability.md @@ -0,0 +1,42 @@ +# Slice 05: App observability hooks (/metrics + structured logging + traces) + +**Feature**: epic-5305-k8s-readiness +**Story**: US-05 (ADO #5312) → job-operator-observe-in-cluster +**Estimate**: ~1.5 crafter days +**Reference class**: new instrumentation wiring (OpenTelemetry .NET + Prometheus exporter + structured logging provider) + +## Goal +Instrument the app for cluster observability: expose a Prometheus `/metrics` endpoint, emit structured JSON logs to stdout, and add OpenTelemetry traces — in-app instrumentation only, low-overhead / off-by-default where appropriate so the single-container self-hoster pays nothing. + +## IN scope +- Prometheus `/metrics` endpoint (request rate / error rate / latency at minimum) via OpenTelemetry metrics + the Prometheus exporter. +- Structured JSON logging to stdout (configurable), preserving today's log content but in queryable JSON. +- OpenTelemetry tracing (ASP.NET Core + HttpClient + EF instrumentation) exporting via OTLP, exporter off/no-op unless configured. + +## OUT scope +- The cluster-side Prometheus / Grafana / Loki stack — Productization epic #5306, story 16. +- Per-tenant metric labelling / multi-tenant dashboards → #5306. +- Business KPI instrumentation (those live in `docs/product/kpi-contracts.yaml`); this slice is operational telemetry, not product KPIs. + +## Learning hypothesis +**Confirms if it succeeds**: a local Prometheus scrapes `/metrics` and a local Grafana shows Lighthouse request/error/latency; JSON logs parse field-wise in Loki; a slow request is traceable. +**Disproves if it fails**: always-on instrumentation imposes measurable overhead on the single container, forcing a stricter off-by-default posture (and documentation that self-hosters must opt in). + +## Acceptance criteria +See US-05 in `../feature-delta.md`. Key: an integration test asserts `/metrics` returns Prometheus-format output including HTTP server metrics; logs emitted in the JSON shape contain the expected fields; with telemetry disabled, no exporter runs and log/format behaviour matches the configured default (standalone gate — no perf change). + +## Dependencies +None. Can land any time; valuable before slice 07's multi-replica work (so the operator isn't flying blind during scale-out). + +## Production data requirement +**Recommended.** Scrape the dev instance with a real local Prometheus and confirm a dashboard renders; not strictly required for the unit-level acceptance. + +## Dogfood moment +Operator points a local Prometheus + Grafana at the dev instance and sees a live Lighthouse dashboard within the day. + +## Cross-cutting checklist (confirmed in feature-delta) +RBAC: confirm whether `/metrics` needs gating (it can leak request paths); default to unauthenticated cluster-internal surface but DESIGN must decide exposure (Sonar/security). Clients: N/A. Website: N/A. + +## Pre-slice spike candidates +- Pick the metrics surface (OpenTelemetry.Exporter.Prometheus vs. prometheus-net) and confirm it coexists with our logging. (~1 hr) +- Measure overhead of always-on ASP.NET Core + EF tracing to decide the default. (~1 hr) diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-06-mcp-inbound-auth.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-06-mcp-inbound-auth.md new file mode 100644 index 000000000..916639ce6 --- /dev/null +++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-06-mcp-inbound-auth.md @@ -0,0 +1,45 @@ +# Slice 06: MCP HTTP server inbound authentication (OAuth pass-through) + +**Feature**: epic-5305-k8s-readiness +**Story**: US-06 (ADO #5307) → job-mcp-caller-own-identity +**Estimate**: ~2–3 crafter days (primarily in the **lighthouse-clients** repo) +**Reference class**: version-gated client endpoint wrapping (see `work-item-age-percentiles` clients wrapper + `FEATURE_REQUIRES_SERVER_NEWER_THAN`); reuses Lighthouse's existing owner-resolved/scoped API-key model + +## Goal +Stop the published `mcp-http` container being a confused deputy. Each caller authenticates with their OWN credential (preferred: MCP spec rev 2025-06-18 OAuth pass-through; interim: `X-Api-Key` pass-through) that the MCP server forwards — so every caller drives Lighthouse as themselves, with their own RBAC scope and audit, no shared baked key. + +## IN scope +- **lighthouse-clients repo (primary)**: the MCP HTTP server forwards the caller's credential instead of injecting one baked `LIGHTHOUSE_API_KEY`. + - Preferred: adopt the MCP Authorization framework (OAuth) — caller brings an OAuth token. + - Interim fallback: `X-Api-Key` pass-through reusing Lighthouse's owner-resolved (`ApiKey.OwnerSubject` → `sub`) + permission-scoped (`ApiKeyPermission`) keys. +- **Version gate**: the wrapping client method pre-checks the Lighthouse server version (an old server returns an opaque 404) and fails with a clear "upgrade Lighthouse" error. Pin to **strictly newer than the last released Lighthouse version**; record the baseline in the clients' `FEATURE_REQUIRES_SERVER_NEWER_THAN` registry. +- **Lighthouse backend (likely minimal/none)**: confirm the existing `ApiKeyAuthenticationHandler` owner-resolution + scope already satisfies pass-through; add only what's missing (e.g. an OAuth-token acceptance path if OAuth is chosen). +- **Standalone gate**: the existing single-key / dev path stays available; no break for self-hosters. + +## OUT scope +- Edge auth (oauth2-proxy) and ClusterIP-vs-edge exposure decisions → Productization #5306 (chart/SaaS boundary, planning Q5). +- The MCP container's k8s deployment manifest → #5306. + +## Learning hypothesis +**Confirms if it succeeds**: two different callers, each with their own credential, drive the MCP server and each sees only their own RBAC-scoped data, with per-caller audit — no shared-key ambient authority. +**Disproves if it fails**: the MCP OAuth framework is too heavy / immature for our stack right now, so we ship the interim `X-Api-Key` pass-through and defer OAuth (recording the decision), rather than blocking the slice. + +## Acceptance criteria +See US-06 in `../feature-delta.md`. Key: an integration/e2e test in lighthouse-clients shows a caller-supplied credential is forwarded and resolved to that caller's owner+scope (not a baked key); the version gate rejects an old server with a clear upgrade message; the legacy single-key dev path still works. + +## Dependencies +Independent of the other slices (lives mostly in a different repo). The decision OAuth-vs-X-Api-Key is the open question — resolve in DESIGN. + +## Production data requirement +**Required.** Exercise against a real Lighthouse backend with two distinct API-key owners and assert per-owner scoping; smoke the version gate against an older Lighthouse build. + +## Dogfood moment +Operator exposes the dev MCP server and two team members call it with their own keys; each sees only their scoped teams/portfolios. + +## Cross-cutting checklist (confirmed in feature-delta) +RBAC: **central** — this slice removes ambient authority and makes the MCP path honour per-caller `ApiKeyPermission` scope (flows through the existing handler, no new RBAC port). Clients: **primary surface** — change lands in lighthouse-clients; version-gate per CLAUDE.md. Website: N/A — security/packaging, not a marketed UI feature. + +## Pre-slice spike candidates +- **SPIKE (required)**: assess MCP spec 2025-06-18 OAuth support in the client SDK we use vs. effort of `X-Api-Key` pass-through; pick the path. (~half day) +- Confirm `ApiKeyAuthenticationHandler` needs no change for X-Api-Key pass-through. (~1 hr) +- Confirm the last released Lighthouse version to set the `FEATURE_REQUIRES_SERVER_NEWER_THAN` baseline. (~15 min) diff --git a/docs/feature/epic-5305-k8s-readiness/slices/slice-07-horizontal-scalability.md b/docs/feature/epic-5305-k8s-readiness/slices/slice-07-horizontal-scalability.md new file mode 100644 index 000000000..b7335b16d --- /dev/null +++ b/docs/feature/epic-5305-k8s-readiness/slices/slice-07-horizontal-scalability.md @@ -0,0 +1,46 @@ +# Slice 07: Horizontal scalability — SignalR backplane + cluster-aware update work + +**Feature**: epic-5305-k8s-readiness +**Story**: US-07 (ADO #5304) → job-operator-survive-multiple-replicas +**Estimate**: ~4–6 crafter days **after a required SPIKE** (highest uncertainty in the epic) +**Reference class**: distributed-coordination work; closest analog is Epic 5121 (domain-events + concurrency), but larger — this makes a singleton app multi-replica-safe + +## Goal +Make Lighthouse genuinely safe to run with N API replicas: a notification raised on any replica reaches clients on all replicas; external syncs + the update queue run once across the fleet (no N× syncs, no racing Postgres writes); and `GetUpdateStatus` is consistent across pods. Config-gated: no Redis / one replica ⇒ exactly today's single-instance behaviour (standalone gate, D4). + +## The three coupled breakages (from story-07-research.md §1) +1. **(B) SignalR fan-out** is in-memory per process → a notification raised on pod A never reaches pod B's clients. +2. **(C) Background updaters** (`TeamUpdater`, `PortfolioUpdater`, `UpdateQueueService`) run in *every* replica → N× external syncs + racing writes. +3. **(C) Status cache** — the in-memory `ConcurrentDictionary` in `UpdateNotificationHub` answers differently per replica. + +## IN scope +- **SignalR Redis backplane**, config-gated: Redis configured ⇒ cross-pod fan-out; no Redis ⇒ current in-memory behaviour. +- **Cluster-aware update path** — the unit that must become cluster-aware is the **update queue itself**, not just a timer leader. Both trigger paths must be covered: the periodic timer loop AND request-triggered manual refresh (`TeamController`/`PortfolioController` → `UpdateQueueService.EnqueueAndAwaitAsync` inline on whatever replica serves the request). Fix space (DESIGN decides — do NOT pre-pick): a shared/distributed queue with a single consumer, or a cluster-wide per-entity lock, plus a shared status store so dedup + the awaited completion + `GetUpdateStatus` are consistent across replicas. +- **Shared status store** backing `GetUpdateStatus` (Redis or sourced from Postgres). + +## OUT scope +- HPA / `sessionAffinity` / load-test manifests — those were the **learning** story 07 (#5197) k8s-layer spike (throwaway), not this production slice. +- Per-tenant isolation / namespace model → Productization #5306. + +## Learning hypothesis +**Confirms if it succeeds**: with Redis + 3 replicas, a manual refresh served by pod B notifies a client on pod A; the external system is synced once per cycle across the fleet; `GetUpdateStatus` agrees across pods. +**Disproves if it fails**: leader election alone is insufficient (a manual refresh handled by a follower still double-works), proving the queue itself must be the cluster-aware unit — which is exactly why this is one coupled slice, not three. + +## Acceptance criteria +See US-07 in `../feature-delta.md`. Key: a multi-host integration/e2e test asserts (a) single sync per entity across N hosts under concurrent timer + manual-refresh load; (b) cross-pod notification delivery via the backplane; (c) consistent `GetUpdateStatus`; (d) with no Redis / 1 host, behaviour and code path are identical to today. + +## Dependencies +Soft-depends on slice 03 (clean drain) and slice 04 (migration safety) being in place so multi-replica operation is tested on a safe base. This is the LAST slice to ship. + +## Production data requirement +**Required.** Real Postgres + Redis, ≥3 replicas on k3s, real work-tracking connector driving syncs. InMemory/mock tests cannot reproduce the cross-replica races (recurring lesson). + +## Dogfood moment +The dev/stage deployment runs 3 replicas with Redis; operator triggers concurrent refreshes and a node drain and observes single syncs, consistent status, and no lost notifications. + +## Cross-cutting checklist (confirmed in feature-delta) +RBAC: N/A — no authorization surface. Clients: likely N/A — internal infra, no API contract change (confirm in DESIGN). Website: N/A — infra, not a marketed feature. + +## Pre-slice SPIKE (REQUIRED — high uncertainty; do BEFORE committing the slice) +- **Probe the cluster-aware-queue design** (`nw-spike`, ~1–2 days): prototype the two candidate shapes — (i) distributed queue with a single consumer, (ii) cluster-wide per-entity lock + shared status store — against real Postgres+Redis with 3 hosts driving both timer and manual-refresh paths. Goal: disprove "leader election is enough" and pick the unit of coordination. Output feeds DESIGN; do NOT pre-pick a solution in DISCUSS. +- Confirm `UpdateQueueService` singleton-per-process semantics (Channel queue, consumer, awaiters, `updateStatuses` dedup dict) match the research doc before designing the replacement. (~2 hr) diff --git a/docs/product/jobs.yaml b/docs/product/jobs.yaml index e1e63c066..ffa800e03 100644 --- a/docs/product/jobs.yaml +++ b/docs/product/jobs.yaml @@ -1,5 +1,5 @@ schema_version: 1 -updated: 2026-06-14 +updated: 2026-06-16 feature_context: - rbac-enhancements - work-tracking-oauth-authentication @@ -21,6 +21,7 @@ feature_context: - multiple-cycle-times - work-item-age-percentiles - website-screenshot-freshness + - epic-5305-k8s-readiness jobs: - id: job-rbac-bootstrap @@ -1914,3 +1915,176 @@ jobs: satisfaction: 2 - the screenshots exist and are broadly representative, just dated. Gap: 1 - small but real; this job is the downstream payoff of the maintainer job, not an independent ask. + + - id: job-operator-survive-multiple-replicas + title: Run Lighthouse with more than one replica without it breaking + persona: platform-operator + feature: epic-5305-k8s-readiness + job_story: > + When I scale Lighthouse past a single replica behind a load balancer so it can absorb + load and survive a node failure, + I want each external work-tracking sync to run once across the fleet, every client to + receive every SignalR notification regardless of which pod raised it, and concurrent + pod startups to apply migrations exactly once, + so I can operate Lighthouse as a normal horizontally-scaled web app instead of a + single-instance singleton that corrupts itself the moment a second pod starts. + dimensions: + functional: One distributed update path (single consumer / cluster-wide per-entity lock + shared status store); SignalR Redis backplane; migration lock so exactly one pod migrates + emotional: Move from "I daren't run more than one pod" to "I scale this like any other service" + social: Stand behind an SLA — "Lighthouse stays up through a node failure" — to my own stakeholders + forces: + push: Lighthouse is a stateful singleton; >1 replica means N× syncs racing Postgres rows, notifications that reach only one pod's clients, and a per-pod status cache that answers inconsistently + pull: A genuinely cluster-safe app scales on CPU, rolls without flapping, and tolerates a lost node + anxiety: > + Will electing a leader for the timer fix it, when manual refreshes are handled inline on + whichever replica serves the request? Will I silently double-sync and not notice? + habit: Operators expect a web app to be stateless-enough to scale by changing a replica count + opportunity_score: + importance: 5 + current_satisfaction: 1 + gap: 4 + rationale: > + Importance: 5 - hard blocker for any multi-replica / SaaS operation; nothing in Bands D-E + works without it. Current satisfaction: 1 - naive scale-out is actively broken in three ways. + Gap: 4 - the largest, highest-uncertainty job in the epic (the update-queue itself must + become cluster-aware; leader election is necessary-not-sufficient). + + - id: job-operator-zero-downtime-rollout + title: Upgrade Lighthouse with no dropped requests and no data loss + persona: platform-operator + feature: epic-5305-k8s-readiness + job_story: > + When I roll out a new Lighthouse version under a Kubernetes rolling update, so old and + new pods briefly run side by side against one shared database, + I want each release's migrations to be additive-only (so an old pod never depends on a + column the new release dropped) and every terminating pod to drain its in-flight HTTP + requests, SignalR connections and queued updates before it exits, + so I can ship updates during the working day without a maintenance window and without a + user ever seeing a failed request or a half-written change. + dimensions: + functional: Expand-only (expand now / contract later) migration discipline; SIGTERM handling that stops intake and drains within terminationGracePeriodSeconds + emotional: Move from "upgrades are a scary after-hours event" to "I merge a PR and it rolls" + social: Deliver upgrades invisibly; no "Lighthouse will be down 22:00-23:00" email + forces: + push: A rolling update today kills pods mid-request (no drain) and a destructive migration can break the old pods still serving during the rollout + pull: Zero-downtime upgrades make Lighthouse safe to update often and cheaply + anxiety: Will a migration that drops/renames a column break the old replica before it's gone? Will an in-flight forecast write be lost when the pod dies? + habit: Operators are used to scheduling downtime for database migrations + opportunity_score: + importance: 4 + current_satisfaction: 1 + gap: 3 + rationale: > + Importance: 4 - required for credible SaaS upgrades and for safe self-hoster updates. + Current satisfaction: 1 - no drain, no expand-contract discipline today. Gap: 3. + + - id: job-operator-trust-pod-health + title: Let Kubernetes route to only-truly-ready pods and restart only dead ones + persona: platform-operator + feature: epic-5305-k8s-readiness + job_story: > + When Kubernetes decides whether to send traffic to a Lighthouse pod or to restart it, + I want readiness gated on real serving capacity (DB reachable, migrations applied), + liveness shallow enough that a slow dependency never triggers a restart, and a startup + probe that covers a slow boot/migration window, + so I can trust that traffic only reaches pods that can actually serve and that the + orchestrator never restart-loops a healthy-but-slow pod. + dimensions: + functional: ASP.NET Core health checks driving three distinct probes (readiness DB+migrations, shallow liveness, startup) + emotional: Move from "is this 503 the app or the probe?" to "the probes mean what they say" + social: Show a green, trustworthy deployment status to whoever watches the cluster + forces: + push: Without real probes, k8s sends traffic to not-yet-ready pods (cold 500s) or restart-loops on a slow dependency + pull: Correct probes give clean rollouts and accurate health signals + anxiety: Will a shared liveness/readiness endpoint restart a pod that's merely waiting on the DB? + habit: Operators expect every serious app to expose liveness/readiness/startup endpoints + opportunity_score: + importance: 4 + current_satisfaction: 2 + gap: 2 + rationale: > + Importance: 4 - prerequisite for any safe rollout. Current satisfaction: 2 - story 04 + exercised probes as a spike but there is no product implementation. Gap: 2. + + - id: job-operator-correct-behind-proxy + title: Serve correct HTTPS, cookies, OIDC and SignalR behind a reverse proxy + persona: platform-operator + feature: epic-5305-k8s-readiness + job_story: > + When I put Lighthouse behind Traefik / nginx / an Ingress that terminates TLS, + I want the app to honour X-Forwarded-Proto / -Host / -For from the proxy I trust, + so that HTTPS redirects, secure cookies, OIDC callback URLs and SignalR negotiation all + use the real public scheme and host instead of the pod's internal http://hostname. + dimensions: + functional: UseForwardedHeaders gated on a declared set of known proxies/networks + emotional: Move from "why does OIDC redirect to http and loop?" to "login just works behind the proxy" + social: Hand users a clean HTTPS URL that logs in first try + forces: + push: Behind a proxy today, HTTPS redirects loop, OIDC callbacks come out http://, secure cookies drop + pull: One config switch makes every reverse-proxy deployment behave correctly + anxiety: If I trust forwarded headers, am I opening a spoofing hole from untrusted clients? + habit: Operators are used to setting UseForwardedHeaders / trusted-proxy config on .NET apps behind a proxy + opportunity_score: + importance: 4 + current_satisfaction: 1 + gap: 3 + rationale: > + Importance: 4 - blocks correct auth on ANY reverse-proxy deployment, not just k8s. + Current satisfaction: 1 - broken behind a TLS-terminating proxy today. Gap: 3. + + - id: job-operator-observe-in-cluster + title: See per-instance metrics, structured logs and traces in my monitoring stack + persona: platform-operator + feature: epic-5305-k8s-readiness + job_story: > + When I run Lighthouse in a cluster with Prometheus / Loki / an OTel collector, + I want the app to expose a /metrics endpoint, emit structured JSON logs to stdout, and + produce OpenTelemetry traces, + so I can monitor request rates, error rates and latency, query logs by field, and trace + a slow request — without bolting on a sidecar to scrape unstructured text. + dimensions: + functional: Prometheus /metrics endpoint; structured JSON logging to stdout; OpenTelemetry traces + emotional: Move from "Lighthouse is a black box in my cluster" to "it's a first-class citizen on my dashboards" + social: Report instance health/usage to stakeholders from the same Grafana everyone else uses + forces: + push: No /metrics and unstructured logs make Lighthouse invisible to Prometheus and painful in Loki + pull: Native instrumentation drops Lighthouse straight onto existing dashboards + anxiety: Will always-on tracing/metrics cost the single-container self-hoster performance? + habit: Operators expect cloud-native apps to expose /metrics + JSON logs out of the box + opportunity_score: + importance: 3 + current_satisfaction: 2 + gap: 1 + rationale: > + Importance: 3 - valuable for operating the SaaS, less critical for a single self-hoster. + Current satisfaction: 2 - logging exists but unstructured; no /metrics. Gap: 1 - real but + the smallest in the epic; off-by-default keeps the self-hoster cost at zero. + + - id: job-mcp-caller-own-identity + title: Drive the MCP server as myself, not as a shared baked-in key + persona: platform-operator + feature: epic-5305-k8s-readiness + job_story: > + When I expose the Lighthouse MCP HTTP server so colleagues' agents can call it, + I want each caller to authenticate with their own credential (an OAuth token, or their + own Lighthouse API key) that the MCP server passes through, + so that every caller drives Lighthouse as themselves with their own RBAC scope and audit + trail — instead of every caller sharing one baked LIGHTHOUSE_API_KEY whose owner and + scope they all silently inherit. + dimensions: + functional: MCP spec (2025-06-18) OAuth pass-through (preferred) or X-Api-Key pass-through (interim), reusing owner-resolved/scoped Lighthouse keys + emotional: Move from "one shared key I must bake, seal, distribute and rotate" to "callers bring their own" + social: Give security review a clean answer — no ambient authority, per-user audit + forces: + push: The mcp-http container is a confused deputy - one baked key means every caller acts as that owner/scope with no per-user audit + pull: Per-caller identity removes the shared secret entirely; an unauth'd /mcp is no longer an open hole + anxiety: Is adopting the MCP OAuth framework too heavy vs the interim X-Api-Key pass-through? + habit: Operators are used to giving a service one API key and accepting the blast radius + opportunity_score: + importance: 4 + current_satisfaction: 1 + gap: 3 + rationale: > + Importance: 4 - the confused deputy is a real security gap the moment the MCP server is + exposed beyond ClusterIP. Current satisfaction: 1 - single baked key today. Gap: 3. Change + lands mostly in the lighthouse-clients repo; version-gate the endpoint per CLAUDE.md. diff --git a/docs/product/journeys/epic-5305-k8s-readiness.yaml b/docs/product/journeys/epic-5305-k8s-readiness.yaml new file mode 100644 index 000000000..c0d89c459 --- /dev/null +++ b/docs/product/journeys/epic-5305-k8s-readiness.yaml @@ -0,0 +1,183 @@ +schema_version: 1 +feature_id: epic-5305-k8s-readiness +created: 2026-06-16 +research_depth: lightweight +note: > + Operational journeys for Epic 5305 "Lighthouse k8s-readiness — production code changes". + The actor throughout is the platform-operator (self-hoster running a single container today; + LPW SaaS operator running many replicas across tenants tomorrow). These journeys are + operational, not in-app UX: the "screen" is mostly kubectl/Helm/Grafana and the app's HTTP + surface (health endpoints, /metrics, OIDC redirect, /mcp). Every journey inherits the #5305 + EPIC GATE: the change must auto-degrade to the sacrosanct single-container standalone (no + Redis -> in-memory; one replica works; SQLite default; frontend embedded). The north-star + this slices toward lives in docs/feature/l8e-kubernetes-learning/planning-stage.md (D1-D5, + Q1-Q5, §4 architecture). Six jobs map onto seven stories: survive-multiple-replicas (#5304, + migration-lock half of #5308), zero-downtime-rollout (#5308 expand-only, #5309 drain), + trust-pod-health (#5310), correct-behind-proxy (#5311), observe-in-cluster (#5312), + mcp-caller-own-identity (#5307). + +journeys: + + - name: scale-out-without-breaking + goal: > + A platform-operator raises the API replica count past 1 behind a load balancer and + observes that work-tracking syncs run once across the fleet, every connected client + receives every update notification regardless of which pod raised it, and the per-entity + update status is consistent across pods — i.e. Lighthouse behaves as a normal + horizontally-scaled web app, not a self-corrupting singleton. + persona: "platform-operator (LPW SaaS operator; also any self-hoster scaling for HA)" + jobs: [job-operator-survive-multiple-replicas] + stories: ["#5304", "#5308 (migration lock)"] + emotional_arc: + start: Wary - "Lighthouse is a stateful singleton; I've been told a second pod corrupts it + — N× syncs, notifications that reach only one pod's clients, an inconsistent status cache." + middle: Testing - "I configure Redis, scale to 3, trigger a manual refresh on one pod and + watch the notification land on a client connected to another; I check the external system + was synced once, not three times." + end: Confident - "replica count is just a number now; a node can die and Lighthouse stays up + and consistent. With no Redis and one replica it's exactly the old single-instance app." + steps: + - step: Configure Redis backplane (optional) and scale the API Deployment to N replicas. + output: N pods Running; SignalR uses the Redis backplane when configured, in-memory otherwise. + - step: A client connected to pod A; a manual refresh is served by pod B (EnqueueAndAwait). + output: The update runs ONCE (cluster-aware queue / per-entity lock), and pod A's client + receives the completion notification via the backplane. + - step: Inspect the external work-tracking system's request log over one sync cycle. + output: Exactly one sync per team/portfolio across the fleet — no N× duplication, no racing + Postgres writes on the same rows. + - step: GetUpdateStatus is queried against different pods during an in-flight update. + output: Consistent answer across pods (shared/distributed status store, not per-process dict). + error_paths: + - trigger: Redis unreachable mid-operation. + recovery: Degrade to documented behaviour (configured failure mode); single-replica path + unaffected; surfaced in logs/health, never silent data corruption. + - trigger: Two pods both believe they own the timer loop. + recovery: Cluster-wide per-entity lock / single-consumer queue makes double-work impossible + even if leadership is ambiguous (leader election is necessary-not-sufficient — DESIGN solves). + + - name: zero-downtime-upgrade + goal: > + A platform-operator rolls out a new Lighthouse version under a rolling update; old and new + pods coexist against one Postgres for the rollout window; no request is dropped and no + change is lost. + persona: "platform-operator" + jobs: [job-operator-zero-downtime-rollout] + stories: ["#5308 (expand-only)", "#5309"] + emotional_arc: + start: Cautious - "an upgrade today means killed in-flight requests and the fear that a + destructive migration breaks the old pods still serving." + middle: Reassured - "this release's migration is additive-only; terminating pods drain HTTP + + SignalR + the in-memory queue before they exit." + end: Routine - "I merge a PR during the working day and it rolls; nobody notices." + steps: + - step: Apply the new image; k8s starts new pods and sends SIGTERM to old ones. + output: Old pod stops accepting new work, drains in-flight requests/connections, flushes the + update queue, exits within terminationGracePeriodSeconds. + - step: New and old pods run migrations against the same Postgres during the overlap. + output: Migration is additive-only (expand); old pods keep working because nothing they need + was dropped. Destructive cleanup is deferred to a LATER release (contract). + error_paths: + - trigger: A pod cannot drain within terminationGracePeriodSeconds. + recovery: It exits at the deadline having stopped intake first, minimising loss; logged. + - trigger: A developer authors a destructive migration in the same release. + recovery: Caught by the expand-only guard/check (DESIGN) before merge — not at runtime. + + - name: login-behind-the-proxy + goal: > + A platform-operator puts Lighthouse behind a TLS-terminating reverse proxy and a user logs + in via OIDC over the public HTTPS hostname with no redirect loop and a persisted session. + persona: "platform-operator (configures); end-user (logs in, benefits)" + jobs: [job-operator-correct-behind-proxy] + stories: ["#5311"] + emotional_arc: + start: Frustrated - "behind the proxy, OIDC redirects to http://, the callback loops, secure + cookies vanish — login is broken and I can't tell why." + middle: Diagnosing - "the app trusts its own internal http://hostname instead of the proxy's + X-Forwarded-Proto/-Host; I declare the proxy as trusted and enable forwarded headers." + end: Working - "login redirects to the real HTTPS hostname, the callback succeeds, the secure + cookie sticks — first try." + steps: + - step: Declare the proxy/network as trusted; enable UseForwardedHeaders (config-gated). + output: The app derives scheme=https and the public host from X-Forwarded-* from that proxy only. + - step: A user hits the public HTTPS URL and starts OIDC login. + output: Redirect and callback URLs use https + the public host; secure cookie set; session holds. + error_paths: + - trigger: Forwarded headers arrive from an untrusted client (spoof attempt). + recovery: Only the declared known-proxy set is trusted; others are ignored — no scheme/host spoof. + - trigger: No proxy declared (direct/standalone access). + recovery: Forwarded-header trust stays OFF; direct access behaves exactly as today (standalone gate). + + - name: trustworthy-pod-health + goal: > + A platform-operator wires the three k8s probes to real ASP.NET Core health checks so traffic + reaches only serving pods and only genuinely-dead pods are restarted. + persona: "platform-operator" + jobs: [job-operator-trust-pod-health] + stories: ["#5310"] + emotional_arc: + start: Distrustful - "is this 503 the app or a naive shared health endpoint? a slow DB makes + k8s restart-loop a perfectly healthy pod." + middle: Wiring - "readiness checks DB + migrations-applied; liveness is shallow; a startup + probe covers the slow boot/migration window." + end: Trusting - "the probes mean what they say; rollouts are clean and nothing flaps." + steps: + - step: readiness probe -> /health/ready (DB reachable + migrations applied). + output: Pod stays OUT of the LB rotation until it can truly serve; no cold 500s to users. + - step: liveness probe -> shallow /health/live. + output: Pod restarts only on genuine deadlock, never because a dependency is slow. + - step: startup probe -> /health/startup. + output: Slow boot/migration tolerated without tripping liveness. + error_paths: + - trigger: DB unreachable. + recovery: Readiness fails (out of rotation) but liveness stays green (no restart storm). + - trigger: Single-container standalone with no orchestrator. + recovery: Endpoints are harmless/no-op-friendly; no behaviour change for self-hosters (standalone gate). + + - name: observe-in-the-cluster + goal: > + A platform-operator scrapes Lighthouse's /metrics into Prometheus, ships its structured JSON + logs to Loki, and traces a slow request via OpenTelemetry — all from the same Grafana the + rest of the cluster uses. + persona: "platform-operator" + jobs: [job-operator-observe-in-cluster] + stories: ["#5312"] + emotional_arc: + start: Blind - "Lighthouse is a black box: no /metrics, unstructured text logs I can't query." + middle: Instrumenting - "I scrape /metrics, parse JSON logs by field, see traces." + end: Visible - "Lighthouse sits on my dashboards like any first-class service." + steps: + - step: Prometheus scrapes /metrics. + output: Request rate / error rate / latency visible per instance. + - step: Logs emitted as structured JSON to stdout; OTel traces exported. + output: Logs queryable by field in Loki; a slow request is traceable end to end. + error_paths: + - trigger: Self-hoster does not want the overhead. + recovery: Low-overhead / off-by-default where appropriate; no perf change for the single container. + + - name: mcp-caller-brings-own-identity + goal: > + A platform-operator exposes the Lighthouse MCP HTTP server and each caller authenticates with + their OWN credential (OAuth token, or their own Lighthouse API key) that the server passes + through — so every caller drives Lighthouse as themselves, with their own RBAC scope and audit. + persona: "platform-operator (deploys/secures); MCP/CLI caller (authenticates as self)" + jobs: [job-mcp-caller-own-identity] + stories: ["#5307"] + emotional_arc: + start: Uneasy - "the mcp-http container is a confused deputy: one baked LIGHTHOUSE_API_KEY, so + every caller acts as that owner/scope with no per-user audit, and an unauth'd /mcp is an open hole." + middle: Adopting - "I move to MCP OAuth pass-through (preferred) — each caller brings their own + token; or the interim X-Api-Key pass-through reusing Lighthouse's owner-resolved/scoped keys." + end: Secured - "no shared secret to bake/seal/distribute/rotate; per-user RBAC + audit for free; + the single-key dev path still exists for self-hosters (standalone gate)." + steps: + - step: Caller sends its own OAuth token (or X-Api-Key) to the MCP HTTP server. + output: The server passes the credential through; Lighthouse owner-resolves it (ApiKey.OwnerSubject + -> sub) and applies that caller's ApiKeyPermission scope. + - step: The wrapping client method pre-checks the Lighthouse server version before calling. + output: An old server (no endpoint) fails with a clear "upgrade Lighthouse" message, not an opaque 404. + error_paths: + - trigger: Caller presents no credential to an exposed /mcp. + recovery: With pass-through there is no ambient authority to fall back on — the call is rejected, + not silently executed as a shared key. + - trigger: Self-hoster on the legacy single-key/dev path. + recovery: That path stays available; no break for self-hosters (standalone gate). diff --git a/docs/product/personas/platform-operator.yaml b/docs/product/personas/platform-operator.yaml new file mode 100644 index 000000000..8dd0e4bfb --- /dev/null +++ b/docs/product/personas/platform-operator.yaml @@ -0,0 +1,101 @@ +schema_version: 1 +id: platform-operator +created: 2026-06-16 +created_in_feature: epic-5305-k8s-readiness + +display_name: Platform Operator + +aliases: + - self-hoster + - k8s-operator + - sre + - devops-engineer + - lpw-saas-operator + +short_description: > + The person who runs a Lighthouse INSTANCE — not the one who reads flow metrics inside it. + They deploy Lighthouse to a server or a Kubernetes cluster, put it behind a reverse proxy, + wire OIDC, upgrade it without taking it down, and watch it from a monitoring stack. Two + flavours of the same persona: the SELF-HOSTER running a single container for their own + org (today's sacrosanct standalone product), and the LPW SaaS operator running many + replicas across many tenants. What makes them THIS persona is that they care about the + pod/process lifecycle, rollouts, and proxying — the operational envelope around the app — + not about cycle times, forecasts, or RBAC assignments. Distinct from config-admin (who + edits in-app configuration) and from the end-user product personas (flow-coach, etc.). + +primary_jobs: + - job-operator-survive-multiple-replicas + - job-operator-zero-downtime-rollout + - job-operator-trust-pod-health + - job-operator-correct-behind-proxy + - job-operator-observe-in-cluster + - job-mcp-caller-own-identity + +goals: + - Run Lighthouse with more than one replica behind a load balancer without duplicate + work-tracking syncs, lost SignalR notifications, or migrations racing each other + - Roll out a new version with zero dropped requests and zero data loss — old and new pods + coexisting against one database for the length of the rollout + - Let Kubernetes route traffic only to pods that are truly serving, and restart only pods + that are genuinely dead — never flap on a slow dependency + - Get correct HTTPS redirects, secure cookies, OIDC callback URLs and SignalR negotiation + when Lighthouse sits behind Traefik / nginx / an Ingress + - See per-instance metrics, structured logs and traces in my existing monitoring stack + - Never have to bake, seal, distribute and rotate a single shared API key just to expose + the MCP server — each caller should drive Lighthouse as themselves + +frustrations: + - Lighthouse is a stateful singleton by construction; naively running >1 replica breaks in + three independent ways (N× external syncs, SignalR notifications that only reach one pod's + clients, an in-memory status cache that answers differently per pod) with no warning + - Every pod races Database.Migrate() on boot — N replicas starting concurrently is undefined + - A rolling update kills pods mid-request because the app does not drain on SIGTERM + - Behind a proxy, HTTPS redirects loop, OIDC callback URLs come out as http://, secure + cookies get dropped — because the app trusts its own scheme/host instead of the forwarded + headers + - The published mcp-http container is a confused deputy: one baked LIGHTHOUSE_API_KEY means + every caller acts as that key's owner, with that key's scope, with no per-user audit + - The app exposes no /metrics and logs unstructured text, so it is invisible to Prometheus + and painful to query in Loki + +mental_model: + - The standalone single container is the simple, good-enough-for-many product and it must + NEVER change — every k8s-readiness change auto-degrades to the single-instance path (no + Redis -> in-memory; one replica works; SQLite stays the default; frontend stays embedded) + - Readiness != liveness — a pod that cannot reach the DB should leave the load-balancer + rotation (readiness) but must NOT be restarted (liveness); a slow boot needs a startup probe + - A release is two database states at once — new and old pods share one Postgres for the + rollout window, so each release's migrations must be additive (expand now, contract later) + - "Behind a proxy" means the app is no longer the TLS terminator; it must learn its real + scheme/host/client-IP from X-Forwarded-* headers, and only from a proxy it has been told + to trust + - The caller's own credential should map to the caller's own identity and rights — Lighthouse + already owner-resolves and scopes API keys, so the confused deputy is a packaging gap, not a + backend gap + +vocabulary: + - "standalone gate" — the hard acceptance rule that every change preserves the single-container + self-hosted product unchanged (the #5305 epic gate) + - "auto-degrade" — with no Redis / one replica / SQLite, the change collapses to today's + single-instance behaviour with no configuration required + - "expand-only / expand-contract" — additive migrations this release, destructive cleanup a + later release, so old pods never depend on a column the new release dropped + - "drain" — on SIGTERM, stop accepting new work and let in-flight HTTP + SignalR + queued + updates finish within terminationGracePeriodSeconds + - "confused deputy" — the mcp-http server acting on a single baked key's authority regardless + of who called it + - "forwarded headers" — X-Forwarded-Proto / -Host / -For that a reverse proxy sets and the app + must honour (UseForwardedHeaders with known proxies) + - "backplane" — the SignalR fan-out mechanism (in-memory per-process today; Redis across pods) + +related_personas: + - config-admin (runs INSIDE the instance this persona operates; the OIDC/RBAC config this + persona wires up is what config-admin then administers) + - lighthouse-maintainer (ships the chart + docs this persona consumes; sometimes the same person) + - first-time-system-admin (the human who bootstraps RBAC once this persona has the instance up) + +not_this_persona: + - A flow-coach / delivery-lead / forecaster — they read metrics; they do not run the process + - config-admin editing Team / Portfolio / Connection / RBAC settings inside the running app + - A developer USING the MCP/CLI client (that caller appears in job-mcp-caller-own-identity as + the actor, but the persona who deploys and secures the MCP server is THIS one) From 0c0c11ebad06d2966dbd3d1c280522c27c5d0789 Mon Sep 17 00:00:00 2001 From: Benjamin Huser-Berta Date: Thu, 18 Jun 2026 20:35:24 +0200 Subject: [PATCH 4/4] test: inline all @mui and react-transition-group for vitest ESM resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The string "@mui/material" inline was insufficient: the failing tests also vi.mock @mui/x-date-pickers, whose importActual pulls transitions that deep-import react-transition-group/TransitionGroupContext (a directory with a main/module redirect package.json). Node's native ESM resolver — used by Vitest for any non-inlined dep, including importActual — rejects directory imports with ERR_UNSUPPORTED_DIR_IMPORT. Vite/Rollup (the production build) resolve them fine, which is why pnpm build passed while the suite failed. Use regex inline so every @mui subpath and react-transition-group are transformed by Vite, which resolves the directory import. Verified: full suite 264 files / 3474 tests green against @mui/material 9.1.1. Co-Authored-By: Claude Opus 4.8 --- Lighthouse.Frontend/vitest.config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lighthouse.Frontend/vitest.config.ts b/Lighthouse.Frontend/vitest.config.ts index 7635f7948..908d5a183 100644 --- a/Lighthouse.Frontend/vitest.config.ts +++ b/Lighthouse.Frontend/vitest.config.ts @@ -29,7 +29,7 @@ export default defineConfig({ ], server: { deps: { - inline: ["@mui/x-data-grid", "@mui/material"], + inline: [/@mui\//, /react-transition-group/], }, },