Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
527060a
Simplify E2E layered model spec
jyaunches May 15, 2026
2097c70
Add test specification for 2026-05-14_new-e2e-model
jyaunches May 15, 2026
912cf2f
Add validation plan for 2026-05-14_new-e2e-model
jyaunches May 15, 2026
05d371e
Merge remote-tracking branch 'origin/main' into feat/e2e-layered-scen…
jyaunches May 15, 2026
40ce4b0
docs(spec): simplify e2e model review inputs
jyaunches May 15, 2026
15f77b1
docs(spec): add e2e model validation plan
jyaunches May 15, 2026
69a6a1f
Approve validation plan for 2026-05-14_new-e2e-model
jyaunches May 15, 2026
9e0182a
Apply spec review recommendation from section 1
jyaunches May 15, 2026
c70be6e
Apply spec review recommendation from section 5
jyaunches May 15, 2026
f3300b8
Apply spec review recommendation from section 6
jyaunches May 15, 2026
57cd725
feat(e2e): implement Phase 1 layered model
jyaunches May 15, 2026
c0f4e09
chore(spec): mark Phase 1 completed [57cd725]
jyaunches May 15, 2026
71fddfd
feat(e2e): implement Phase 2 layered coverage
jyaunches May 15, 2026
79abfa0
chore(spec): mark Phase 2 completed [71fddfdc9]
jyaunches May 15, 2026
9587add
feat(e2e): implement Phase 3 onboarding assertions
jyaunches May 15, 2026
80a6b66
chore(spec): mark Phase 3 completed [9587add9d]
jyaunches May 15, 2026
af628e2
feat(e2e): implement Phase 4 onboarding matrix
jyaunches May 15, 2026
84b0947
chore(spec): mark Phase 4 completed [af628e2e9]
jyaunches May 15, 2026
17aac25
feat(e2e): implement Phase 5 suite families
jyaunches May 15, 2026
8942b2e
chore(spec): mark Phase 5 completed [17aac254e]
jyaunches May 15, 2026
25fb912
feat(e2e): implement Phase 6 report visibility
jyaunches May 15, 2026
10f3154
chore(spec): mark Phase 6 completed [25fb912c3]
jyaunches May 15, 2026
d8889c4
chore(e2e): implement Phase 7 hygiene
jyaunches May 15, 2026
88d8a01
chore(spec): mark Phase 7 completed [d8889c4fe]
jyaunches May 15, 2026
f7e3133
test(e2e): validate layered scenario model spec
jyaunches May 15, 2026
53561e6
Merge remote-tracking branch 'origin/main' into feat/e2e-layered-scen…
jyaunches May 18, 2026
df1df2e
test(e2e): skip macos docker-dependent suites
jyaunches May 18, 2026
ed6ddde
ci(e2e): surface scenario report in logs
jyaunches May 18, 2026
1216e4e
Revert "ci(e2e): surface scenario report in logs"
jyaunches May 18, 2026
003f79c
fix(e2e): handle sparse scenario coverage rows
jyaunches May 18, 2026
479244d
fix(e2e): satisfy pre-push checks
jyaunches May 18, 2026
98f8f73
test(e2e): apply scenario runner formatting
jyaunches May 18, 2026
a05a1f3
test(e2e): address scenario review feedback
jyaunches May 18, 2026
3913fd7
test(e2e): harden preflight failure assertion
jyaunches May 18, 2026
c5cec44
docs(e2e): remove checked-in specs
jyaunches May 18, 2026
10e4200
Merge remote-tracking branch 'origin/main' into feat/e2e-layered-scen…
jyaunches May 18, 2026
b694e38
Merge remote-tracking branch 'origin/main' into feat/e2e-layered-scen…
jyaunches May 18, 2026
2ef5b64
docs(e2e): simplify hybrid scenario spec
jyaunches May 26, 2026
a1956ea
docs(e2e): add hybrid scenario test spec
jyaunches May 26, 2026
b819fa3
docs(e2e): add hybrid scenario validation plan
jyaunches May 26, 2026
032e87a
docs(e2e): align hybrid spec test commands
jyaunches May 26, 2026
903f038
feat: Implement Phase 1 hybrid E2E skeleton
jyaunches May 26, 2026
bf28a57
Mark Phase 1 as completed [903f03844]
jyaunches May 26, 2026
8618077
test: Add failing tests for Phase 2
jyaunches May 26, 2026
9f3f478
feat: Implement Phase 2 manifests
jyaunches May 26, 2026
b263bdd
Mark Phase 2 as completed [9f3f4786f]
jyaunches May 26, 2026
06323b2
test: Add failing tests for Phase 3
jyaunches May 26, 2026
b9e2fc1
feat: Implement Phase 3 scenario registry
jyaunches May 26, 2026
3f7fedf
Mark Phase 3 as completed [b9e2fc10e]
jyaunches May 26, 2026
a761b6f
test: Add failing tests for Phase 4
jyaunches May 26, 2026
c745253
feat: Implement Phase 4 assertion modules
jyaunches May 26, 2026
ded7717
Mark Phase 4 as completed [c74525326]
jyaunches May 26, 2026
476804d
test: Add failing tests for Phase 5
jyaunches May 26, 2026
5994821
feat: Implement Phase 5 plan compiler
jyaunches May 26, 2026
6b780ad
Mark Phase 5 as completed [59948215d]
jyaunches May 26, 2026
9e7d416
test: Add failing tests for Phase 6
jyaunches May 26, 2026
3c13dc2
feat: Implement Phase 6 orchestrators
jyaunches May 26, 2026
7c1864e
Mark Phase 6 as completed [3c13dc2c2]
jyaunches May 26, 2026
9074f3a
test: Add failing tests for Phase 7
jyaunches May 26, 2026
0a0199c
feat: Implement Phase 7 runtime workflow migration
jyaunches May 26, 2026
e0f51da
Mark Phase 7 as completed [0a0199ce6]
jyaunches May 26, 2026
558de3e
test: Add failing tests for Phase 8
jyaunches May 26, 2026
a0b5b4c
feat: Implement Phase 8 coverage reporting
jyaunches May 26, 2026
48ece2b
Mark Phase 8 as completed [a0b5b4cfb]
jyaunches May 26, 2026
843da6b
test: Add failing tests for Phase 9
jyaunches May 26, 2026
4eca7f0
feat: Implement Phase 9 YAML source retirement
jyaunches May 26, 2026
2a627fb
Mark Phase 9 as completed [4eca7f00c]
jyaunches May 26, 2026
4d7e92c
test: Add failing tests for Phase 10
jyaunches May 26, 2026
80e2a48
feat: Implement Phase 10 cleanup
jyaunches May 26, 2026
2c7da4c
Mark Phase 10 as completed [80e2a48f6]
jyaunches May 26, 2026
58d7037
ci(e2e): fix WSL scenario workflow shell
jyaunches May 26, 2026
d6c4dbc
ci(e2e): dry-run WSL scenarios on Windows host
jyaunches May 26, 2026
434dfef
merge(main): update PR #4270
jyaunches May 26, 2026
a394136
fix(e2e): reconcile scenario metadata after main merge
jyaunches May 26, 2026
846a01e
fix(ci): satisfy post-merge repository gates
jyaunches May 26, 2026
2b38a5c
style(ci): apply hook formatting
jyaunches May 27, 2026
585a6ac
Merge remote-tracking branch 'origin/main' into feat/hybrid-scenario-…
jyaunches May 27, 2026
461f333
fix(ci): merge main and refresh e2e allowlist
jyaunches May 27, 2026
469abed
fix(e2e): address scenario runner review feedback
jyaunches May 27, 2026
6f4bda8
Merge remote-tracking branch 'origin/main' into feat/hybrid-scenario-…
jyaunches May 27, 2026
369eba8
Merge remote-tracking branch 'origin/main' into feat/hybrid-scenario-…
jyaunches May 27, 2026
cabc8dc
fix(e2e): restore scenario fanout workflow call
jyaunches May 27, 2026
d804388
test(e2e): isolate scenario suite assets
jyaunches May 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
312 changes: 115 additions & 197 deletions .github/workflows/e2e-scenarios.yaml

Large diffs are not rendered by default.

186 changes: 69 additions & 117 deletions scripts/e2e/lint-conventions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,10 @@
/**
* E2E convention lint.
*
* Enforces the migration-spec conventions on
* `test/e2e/validation_suites/**` step scripts and the
* `test/e2e/test-*.sh` legacy frontier:
*
* - Suite step scripts MUST NOT re-export non-interactive env vars
* (use runtime/lib/env.sh::e2e_env_apply_noninteractive instead).
* - Suite step scripts MUST NOT register their own traps
* (runtime/lib/cleanup.sh owns teardown).
* - Suite step scripts MUST NOT call `section "..."` — filenames carry
* the phase label, and e2e_section is emitted by the runner.
* - Suite step scripts MUST NOT write to `/tmp/*.log` — use
* `$E2E_CONTEXT_DIR/logs/<scenario>/<suite>/<step>.log`.
* - Non-standard repo-root discovery (`git rev-parse --show-toplevel`)
* is rejected in suite step scripts; use
* `SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"` and
* walk up.
*
* Invocation:
* tsx scripts/e2e/lint-conventions.ts [--root <repo-root>]
* Exits 0 on success, 1 on violations, 2 on misuse.
* Enforces conventions for `test/e2e-scenario/validation_suites/**` step scripts and
* keeps the new typed scenario suite isolated under `test/e2e-scenario/**`.
* Existing top-level `test/e2e/test-*.sh` entrypoints remain valid until a
* separate migration explicitly retires them.
*/

import fs from "node:fs";
Expand All @@ -48,7 +32,7 @@ const STEP_RULES: Rule[] = [
];
for (const p of patterns) {
if (p.test(body))
return `matched ${p.source}; use runtime/lib/env.sh::e2e_env_apply_noninteractive`;
return `matched ${p.source}; non-interactive setup belongs to shared runtime helpers`;
}
return null;
},
Expand All @@ -57,53 +41,36 @@ const STEP_RULES: Rule[] = [
id: "no-own-trap",
describe: "suite step registers its own trap",
test: (body) => {
// Ignore commented lines and ignore `trap` inside quoted strings by
// requiring a leading non-quote character.
const lines = body.split("\n");
for (const raw of lines) {
const line = raw.replace(/^\s+/, "");
for (const raw of body.split("\n")) {
const line = raw.trimStart();
if (line.startsWith("#")) continue;
if (/^trap\s+[^#]/.test(line)) {
return "registered own trap; cleanup lives in runtime/lib/cleanup.sh";
}
if (/^trap\s+[^#]/.test(line))
return "registered own trap; cleanup belongs to orchestrators/shared helpers";
}
return null;
},
},
{
id: "no-section-call",
describe: "suite step calls section/e2e_section",
test: (body) => {
const lines = body.split("\n");
for (const raw of lines) {
const line = raw.replace(/^\s+/, "");
if (line.startsWith("#")) continue;
if (/^section\s+["']/.test(line)) {
return "calls section; filename carries the phase label";
}
}
return null;
},
id: "no-section-helper",
describe: "suite step calls section helper directly",
test: (body) =>
/^\s*section\s+["']/m.test(body) || /^\s*section\s*\(/m.test(body)
? "step calls section; plan/phase output owns sections"
: null,
},
{
id: "no-tmp-log",
describe: "suite step writes to /tmp/*.log",
test: (body) => {
if (/>\s*\/tmp\/[^\s]*\.log/.test(body)) {
return "writes to /tmp/*.log; use $E2E_CONTEXT_DIR/logs/<scenario>/<suite>/<step>.log";
}
return null;
},
describe: "suite step writes logs under /tmp",
test: (body) =>
/\/tmp\/[^\s'\"]+\.log/.test(body) ? "write logs under E2E_CONTEXT_DIR, not /tmp" : null,
},
{
id: "no-git-rev-parse-repo-root",
describe: "suite step uses `git rev-parse --show-toplevel` for repo root",
test: (body) => {
if (/git\s+rev-parse\s+--show-toplevel/.test(body)) {
return 'use SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" instead';
}
return null;
},
id: "no-git-rev-parse-root",
describe: "suite step uses non-standard repo-root discovery",
test: (body) =>
/git\s+rev-parse\s+--show-toplevel/.test(body)
? "avoid git rev-parse repo-root discovery in suite steps"
: null,
},
];

Expand All @@ -113,80 +80,65 @@ interface LintFinding {
message: string;
}

function walkShellScripts(root: string): string[] {
function walk(dir: string): string[] {
if (!fs.existsSync(dir)) return [];
const out: string[] = [];
const walk = (dir: string) => {
let entries: fs.Dirent[];
try {
entries = fs.readdirSync(dir, { withFileTypes: true });
} catch {
return;
}
for (const ent of entries) {
const full = path.join(dir, ent.name);
if (ent.isDirectory()) {
walk(full);
} else if (ent.isFile() && ent.name.endsWith(".sh")) {
out.push(full);
}
}
};
walk(root);
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
const full = path.join(dir, entry.name);
if (entry.isDirectory()) out.push(...walk(full));
else out.push(full);
}
return out;
}

function lintSuiteSteps(root: string): LintFinding[] {
const suitesDir = path.join(root, "test/e2e-scenario/validation_suites");
const findings: LintFinding[] = [];
for (const file of walk(suitesDir).filter((entry) => entry.endsWith(".sh"))) {
const rel = path.relative(root, file);
const body = fs.readFileSync(file, "utf8");
for (const rule of STEP_RULES) {
const message = rule.test(body);
if (message) findings.push({ file: rel, rule: rule.id, message });
}
}
return findings;
}

function lint(root: string): LintFinding[] {
return lintSuiteSteps(root);
}

function parseArgs(argv: string[]): { root: string } {
let root: string | undefined;
let root = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../..");
const args = argv.slice(2);
while (args.length > 0) {
const a = args.shift()!;
if (a === "--root") root = args.shift();
else if (a === "-h" || a === "--help") {
const arg = args.shift();
if (arg === "--root") {
const value = args.shift();
if (!value) throw new Error("--root requires a value");
root = path.resolve(value);
} else if (arg === "--help" || arg === "-h") {
process.stdout.write("tsx scripts/e2e/lint-conventions.ts [--root <repo-root>]\n");
process.exit(0);
} else {
process.stderr.write(`lint-conventions: unexpected arg: ${a}\n`);
process.exit(2);
} else if (arg) {
throw new Error(`unexpected arg: ${arg}`);
}
}
if (!root) {
const scriptDir = path.dirname(fileURLToPath(import.meta.url));
root = path.resolve(scriptDir, "..", "..");
}
return { root };
}

function lintSuiteSteps(root: string): LintFinding[] {
const findings: LintFinding[] = [];
const suitesRoot = path.join(root, "test/e2e/validation_suites");
if (!fs.existsSync(suitesRoot)) return findings;
for (const file of walkShellScripts(suitesRoot)) {
const body = fs.readFileSync(file, "utf8");
for (const rule of STEP_RULES) {
const msg = rule.test(body);
if (msg) {
findings.push({
file: path.relative(root, file),
rule: rule.id,
message: msg,
});
}
}
}
return findings;
}

function main(): number {
try {
const { root } = parseArgs(process.argv);
const findings = lintSuiteSteps(root);
if (findings.length === 0) {
return 0;
}
for (const f of findings) {
process.stderr.write(`${f.file}: [${f.rule}] ${f.message}\n`);
const findings = lint(root);
if (findings.length > 0) {
for (const finding of findings) {
process.stderr.write(`${finding.file}: ${finding.rule}: ${finding.message}\n`);
}
process.exit(1);
}
process.stderr.write(`\ne2e-convention-lint: ${findings.length} violation(s)\n`);
return 1;
process.stdout.write("e2e convention lint passed\n");
} catch (err) {
process.stderr.write(`lint-conventions: ${(err as Error).message}\n`);
process.exit(2);
}

process.exit(main());
28 changes: 28 additions & 0 deletions src/lib/actions/gateway-drift-preflight.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,34 @@ describe("gateway drift preflight for maintenance actions", () => {
expect(backupSandboxStateSpy).not.toHaveBeenCalled();
});

it("backup-all skips sandboxes that are not in Ready phase", async () => {
const registry = requireDist("../../../dist/lib/state/registry.js");
(registry.listSandboxes as ReturnType<typeof vi.fn>).mockReturnValue({
sandboxes: [
{ name: "alpha", provider: "nvidia-prod", model: "nemotron" },
{ name: "beta", provider: "nvidia-prod", model: "nemotron" },
],
});
captureOpenshellSpy.mockReturnValue({
status: 0,
output: [
"NAME NAMESPACE CREATED PHASE",
"alpha openshell 2026-03-24 10:00:00 Ready",
"beta openshell 2026-03-24 10:01:00 Error",
].join("\n"),
});
const logSpy = vi.spyOn(console, "log").mockImplementation(() => undefined);
spies.push(logSpy);

await backupAll();

expect(backupSandboxStateSpy).toHaveBeenCalledWith("alpha");
expect(backupSandboxStateSpy).not.toHaveBeenCalledWith("beta");
expect(logSpy.mock.calls.flat().join("\n")).toContain(
"Skipping 'beta' (not running)",
);
});

it("backup-all fails closed on protobuf mismatch instead of treating sandboxes as stopped", async () => {
const protobufIssue: OpenShellStateRpcIssue = {
kind: "protobuf_mismatch",
Expand Down
6 changes: 3 additions & 3 deletions src/lib/actions/maintenance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {
captureSandboxListWithGatewayRecovery,
printSandboxListFailureWithRecoveryContext,
} from "../openshell-sandbox-list";
import { parseLiveSandboxNames } from "../runtime-recovery";
import { parseReadySandboxNames } from "../runtime-recovery";
import * as registry from "../state/registry";
import * as sandboxState from "../state/sandbox";

Expand Down Expand Up @@ -62,13 +62,13 @@ export async function backupAll(): Promise<void> {
printSandboxListFailureWithRecoveryContext(liveListRecovery);
process.exit(liveList.status || 1);
}
const liveNames = parseLiveSandboxNames(liveList.output || "");
const readyNames = parseReadySandboxNames(liveList.output || "");

let backed = 0;
let failed = 0;
let skipped = 0;
for (const sb of sandboxes) {
if (!liveNames.has(sb.name)) {
if (!readyNames.has(sb.name)) {
console.log(` ${D}Skipping '${sb.name}' (not running)${R}`);
skipped++;
continue;
Expand Down
4 changes: 2 additions & 2 deletions src/lib/actions/upgrade-sandboxes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import {
captureSandboxListWithGatewayRecovery,
printSandboxListFailureWithRecoveryContext,
} from "../openshell-sandbox-list";
import { parseLiveSandboxNames } from "../runtime-recovery";
import { parseReadySandboxNames } from "../runtime-recovery";
import * as sandboxVersion from "../sandbox/version";
import * as registry from "../state/registry";
import { rebuildSandbox } from "./sandbox/rebuild";
Expand Down Expand Up @@ -68,7 +68,7 @@ export async function upgradeSandboxes(
printSandboxListFailureWithRecoveryContext(liveRecovery);
process.exit(liveResult.status || 1);
}
const liveNames = parseLiveSandboxNames(liveResult.output || "");
const liveNames = parseReadySandboxNames(liveResult.output || "");

// Classify sandboxes as stale, unknown, or current
const { stale, unknown } = classifyUpgradeableSandboxes(
Expand Down
Loading
Loading