diff --git a/.github/workflows/e2e-scenarios.yaml b/.github/workflows/e2e-scenarios.yaml
index 5f1dfc8b37..13da3bccfd 100644
--- a/.github/workflows/e2e-scenarios.yaml
+++ b/.github/workflows/e2e-scenarios.yaml
@@ -81,7 +81,6 @@ jobs:
           for raw in "${IDS[@]}"; do
             id="${raw//[[:space:]]/}"
             [ -n "${id}" ] || continue
-            npx tsx test/e2e-scenario/scenarios/run.ts --scenarios "${id}" --plan-only >/dev/null
             runner="${ROUTES[$id]:-}"
             if [ -z "${runner}" ]; then
               echo "::error::No runner route for scenario: ${id}" >&2
@@ -135,7 +134,7 @@ jobs:
             echo "::error::Invalid scenario input: ${SCENARIOS}" >&2
             exit 1
           fi
-          npx tsx test/e2e-scenario/scenarios/run.ts --scenarios "${SCENARIOS}" --dry-run
+          npx tsx test/e2e-scenario/scenarios/run.ts --scenarios "${SCENARIOS}"
 
       - name: Resolve workspace paths for WSL
         if: contains(inputs.scenarios || github.event.inputs.scenarios, 'wsl-repo-cloud-openclaw')
@@ -299,7 +298,7 @@ jobs:
           export E2E_CONTEXT_DIR="`$workdir"
           npm ci --ignore-scripts
           set +e
-          npx tsx test/e2e-scenario/scenarios/run.ts --scenarios "`$scenarios" --dry-run
+          npx tsx test/e2e-scenario/scenarios/run.ts --scenarios "`$scenarios"
           status=`$?
           if [ -d "`$workdir/.e2e" ]; then
             rm -rf "`$checkout_dir/.e2e"
@@ -335,14 +334,25 @@ jobs:
         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
         with:
           name: e2e-scenario-${{ inputs.scenarios || github.event.inputs.scenarios }}
+          # Explicit subpath list, NOT a blanket .e2e/ + hidden files.
+          # The framework redacts every byte that flows from spawned
+          # children into actions/*.log, logs/*.log, and onboard.log via
+          # orchestrators/redaction.ts::pipeRedacted. Anything outside
+          # the listed paths (notably the raw context.env file) is
+          # excluded so secret-bearing key=value lines cannot leak via
+          # the artifact even if a future helper writes there.
+          # Diagnostic dumps of context use e2e_context_dump, which
+          # redacts on emit (runtime/lib/context.sh).
           path: |
             .e2e/run-plan.json
             .e2e/plan.txt
             .e2e/environment.result.json
             .e2e/onboarding.result.json
             .e2e/runtime.result.json
-            .e2e/
+            .e2e/actions/
+            .e2e/logs/
+            .e2e/onboard.log
             test/e2e/logs/
           if-no-files-found: warn
           retention-days: 14
-          include-hidden-files: true
+          include-hidden-files: false
diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md
index 15ad01d88d..f4acc8eebe 100644
--- a/test/e2e-scenario/docs/README.md
+++ b/test/e2e-scenario/docs/README.md
@@ -32,24 +32,25 @@ test plan, expected state, and post-onboard suites. Test plans can also declare
 onboarding assertions that run after install/onboard and before expected-state
 validation.
 
-Plan-only resolution accepts either an alias or a test plan ID:
-
-```bash
-bash test/e2e-scenario/runtime/run-scenario.sh ubuntu-repo-cloud-openclaw --plan-only
-bash test/e2e-scenario/runtime/run-scenario.sh ubuntu-repo-docker__cloud-nvidia-openclaw --plan-only
-```
-
 ## How to run
 
+The TypeScript runner is the only supported entrypoint. There is one
+execution mode: live. There is no `--dry-run`, no `--validate-only`, no
+fake-pass code path. Plan output is emitted as a side effect of the
+live run.
+
 ```bash
-bash test/e2e-scenario/runtime/run-scenario.sh <id> --plan-only       # resolve + print plan, no side effects
-bash test/e2e-scenario/runtime/run-scenario.sh <id> --dry-run         # helpers short-circuit with trace
-bash test/e2e-scenario/runtime/run-scenario.sh <id> --validate-only   # assume setup done; validate expected state
-bash test/e2e-scenario/runtime/run-scenario.sh <id>                   # full live run
-bash test/e2e-scenario/runtime/run-suites.sh <suite-id> [<suite-id>…]
-bash test/e2e-scenario/runtime/coverage-report.sh                     # Markdown matrix of scenario × suite
+npx tsx test/e2e-scenario/scenarios/run.ts --scenarios <id[,id...]>     # live execution (the only mode)
+npx tsx test/e2e-scenario/scenarios/run.ts --list                       # list canonical scenario ids
+npx tsx test/e2e-scenario/scenarios/run.ts --emit-matrix                # JSON registry payload for CI matrix fan-out
+npx tsx test/e2e-scenario/scenarios/run.ts --scenarios <id> --plan-only # local debug only; MUST NOT appear in any workflow
+bash test/e2e-scenario/runtime/coverage-report.sh                       # Markdown matrix of scenario × suite
 ```
 
+The deprecated bash entrypoints `runtime/run-scenario.sh` and
+`runtime/run-suites.sh` exist only as fail-fast stubs; they print a
+pointer at `run.ts` and exit non-zero.
+
 Override the runtime context dir with `E2E_CONTEXT_DIR=<path>` (default
 `.e2e/`, gitignored). The scenario runner and suites communicate only
 through `$E2E_CONTEXT_DIR/context.env` — suites do not rediscover
@@ -72,7 +73,8 @@ test/e2e/
     assert/        # outcome assertions (inference, credentials, policy, messaging)
     smoke/ inference/ hermes/ platform/ security/   # suite scripts grouped by concern
   runtime/                           # entry points + cross-cutting shared libs
-    run-scenario.sh / run-suites.sh / coverage-report.sh
+    run-scenario.sh / run-suites.sh    # DEPRECATED fail-fast stubs (see above)
+    coverage-report.sh
     resolver/      # TypeScript: load, plan, validate, coverage (invoked via tsx)
     lib/           # shared shell helpers: context, env, cleanup, logging, artifacts, sandbox-teardown
 ```
@@ -89,7 +91,7 @@ three YAML files above, plus shell scripts under
 `validation_suites/assert/`, or `validation_suites/<category>/`. The
 schemas in
 [`../runtime/resolver/schema.ts`](../runtime/resolver/schema.ts)
-describe the required shape; `run-scenario.sh <id> --plan-only`
+describe the required shape; `npx tsx test/e2e-scenario/scenarios/run.ts --scenarios <id> --plan-only`
 validates your change without running anything destructive.
 
 When adding a suite assertion, emit or preserve a stable `PASS: <id>` /
diff --git a/test/e2e-scenario/framework-tests/e2e-context-helper.test.ts b/test/e2e-scenario/framework-tests/e2e-context-helper.test.ts
index 6a7c97959f..0134d6adc9 100644
--- a/test/e2e-scenario/framework-tests/e2e-context-helper.test.ts
+++ b/test/e2e-scenario/framework-tests/e2e-context-helper.test.ts
@@ -9,7 +9,6 @@ import path from "node:path";
 
 const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
 const CONTEXT_LIB = path.join(REPO_ROOT, "test/e2e-scenario/runtime/lib/context.sh");
-const RUN_SCENARIO = path.join(REPO_ROOT, "test/e2e-scenario/runtime/run-scenario.sh");
 
 function runBash(script: string, env: Record<string, string> = {}): SpawnSyncReturns<string> {
   return spawnSync("bash", ["-c", script], {
@@ -86,38 +85,4 @@ describe("E2E context helper (runtime/lib/context.sh)", () => {
     }
   });
 
-  it("scenario_plan_execution_should_emit_context_under_dry_run", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-ctx-"));
-    try {
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"],
-        {
-          env: { ...process.env, E2E_CONTEXT_DIR: tmp },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      const ctxPath = path.join(tmp, "context.env");
-      expect(fs.existsSync(ctxPath), `context.env missing in ${tmp}`).toBe(true);
-      const ctx = fs.readFileSync(ctxPath, "utf8");
-      for (const key of [
-        "E2E_SCENARIO",
-        "E2E_PLATFORM_OS",
-        "E2E_INSTALL_METHOD",
-        "E2E_ONBOARDING_PATH",
-        "E2E_AGENT",
-        "E2E_PROVIDER",
-        "E2E_SANDBOX_NAME",
-        "E2E_GATEWAY_URL",
-        "E2E_INFERENCE_ROUTE",
-      ]) {
-        expect(ctx, `${key} missing from context.env`).toMatch(new RegExp(`^${key}=`, "m"));
-      }
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
 });
diff --git a/test/e2e-scenario/framework-tests/e2e-expected-state-validator.test.ts b/test/e2e-scenario/framework-tests/e2e-expected-state-validator.test.ts
deleted file mode 100644
index ba1f2b5f31..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-expected-state-validator.test.ts
+++ /dev/null
@@ -1,235 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import { describe, it, expect } from "vitest";
-import { spawnSync } from "node:child_process";
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-
-import {
-  validateExpectedState,
-  type ProbeResults,
-} from "../runtime/resolver/validator.ts";
-import type { ExpectedStateConfig, ResolvedSuite } from "../runtime/resolver/schema.ts";
-
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const RUN_SCENARIO = path.join(REPO_ROOT, "test/e2e-scenario/runtime/run-scenario.sh");
-
-function cloudOpenclawReady(): ExpectedStateConfig {
-  return {
-    cli: { installed: true },
-    gateway: { expected: "present", health: "healthy" },
-    sandbox: { expected: "present", status: "running", agent: "openclaw" },
-    inference: {
-      expected: "available",
-      provider: "nvidia",
-      route: "inference-local",
-      mode: "gateway-routed",
-    },
-    credentials: { expected: "present", storage: "gateway-managed" },
-  };
-}
-
-function passingProbes(): ProbeResults {
-  return {
-    "cli.installed": true,
-    "gateway.health": "healthy",
-    "gateway.expected": "present",
-    "sandbox.status": "running",
-    "sandbox.expected": "present",
-    "sandbox.agent": "openclaw",
-    "inference.expected": "available",
-    "inference.provider": "nvidia",
-    "inference.route": "inference-local",
-    "inference.mode": "gateway-routed",
-    "credentials.expected": "present",
-    "credentials.storage": "gateway-managed",
-  };
-}
-
-describe("expected state validator", () => {
-  it("should_validate_matching_state", () => {
-    const report = validateExpectedState({
-      stateId: "cloud-openclaw-ready",
-      state: cloudOpenclawReady(),
-      probes: passingProbes(),
-      suites: [],
-    });
-    expect(report.ok).toBe(true);
-    expect(report.checks.every((c) => c.ok)).toBe(true);
-  });
-
-  it("should_fail_when_gateway_expected_but_unhealthy", () => {
-    const probes = passingProbes();
-    probes["gateway.health"] = "unhealthy";
-    const report = validateExpectedState({
-      stateId: "cloud-openclaw-ready",
-      state: cloudOpenclawReady(),
-      probes,
-      suites: [],
-    });
-    expect(report.ok).toBe(false);
-    const failing = report.checks.find((c) => c.key === "gateway.health");
-    expect(failing?.ok).toBe(false);
-    expect(failing?.expected).toBe("healthy");
-    expect(failing?.actual).toBe("unhealthy");
-  });
-
-  it("should_fail_when_sandbox_expected_but_absent", () => {
-    const probes = passingProbes();
-    probes["sandbox.status"] = "absent";
-    probes["sandbox.expected"] = "absent";
-    const report = validateExpectedState({
-      stateId: "cloud-openclaw-ready",
-      state: cloudOpenclawReady(),
-      probes,
-      suites: [],
-    });
-    expect(report.ok).toBe(false);
-    expect(report.checks.some((c) => c.key === "sandbox.status" && !c.ok)).toBe(true);
-  });
-
-  it("should_fail_when_suite_requires_state_unmet_at_runtime", () => {
-    // Expected state claims inference.expected=available, but the probe
-    // reports unavailable; the smoke suite happens to pass but an inference
-    // suite's requires_state should trigger a runtime failure before
-    // execution.
-    const state = cloudOpenclawReady();
-    const probes = passingProbes();
-    probes["inference.expected"] = "unavailable";
-    const inferenceSuite: ResolvedSuite = {
-      id: "inference",
-      requires_state: { "inference.expected": "available" },
-      steps: [{ id: "models-health", script: "suites/inference/cloud/00-models-health.sh" }],
-    };
-    const report = validateExpectedState({
-      stateId: "cloud-openclaw-ready",
-      state,
-      probes,
-      suites: [inferenceSuite],
-    });
-    expect(report.ok).toBe(false);
-    const msg = report.checks
-      .filter((c) => !c.ok)
-      .map((c) => `${c.key}=${c.actual ?? "<missing>"} (wanted ${c.expected})`)
-      .join("; ");
-    expect(msg).toMatch(/inference\.expected/);
-    expect(msg).toMatch(/available/);
-    expect(msg).toMatch(/unavailable/);
-    // Should also reference the suite that made the requirement.
-    expect(report.checks.some((c) => c.suite === "inference" && !c.ok)).toBe(true);
-  });
-});
-
-describe("runner_should_not_run_suites_when_expected_state_fails", () => {
-  it("runs expected-state validation and skips suites on failure", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-es-"));
-    try {
-      const trace = path.join(tmp, "trace.log");
-      // Simulate gateway-unhealthy probe by setting an override env var.
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"],
-        {
-          env: {
-            ...process.env,
-            E2E_CONTEXT_DIR: tmp,
-            E2E_TRACE_FILE: trace,
-            // validator reads these overrides in dry-run mode to fake probes
-            E2E_PROBE_OVERRIDE_GATEWAY_HEALTH: "unhealthy",
-            E2E_VALIDATE_EXPECTED_STATE: "1",
-          },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      // Dry-run execution should now fail because the expected state
-      // validation runs and sees gateway.health=unhealthy.
-      expect(r.status).not.toBe(0);
-      // Validator must run (its report file should exist) but suites must not.
-      const reportPath = path.join(tmp, "expected-state-report.json");
-      expect(fs.existsSync(reportPath), `missing ${reportPath}`).toBe(true);
-      const report = JSON.parse(fs.readFileSync(reportPath, "utf8"));
-      expect(report.ok).toBe(false);
-      expect(report.checks.some((c: { key: string; ok: boolean }) => c.key === "gateway.health" && !c.ok)).toBe(true);
-      // And the run's failure output should reference expected-state, not suites.
-      expect(`${r.stdout}${r.stderr}`).toMatch(/expected.state/i);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
-
-// ─────────────────────────────────────────────────────────────────────────────
-// Phase 1.F — --validate-only flag on run-scenario.sh
-// ─────────────────────────────────────────────────────────────────────────────
-
-describe("run-scenario --validate-only flag", () => {
-  it("runs only validator and emits probe results json on stdout without running install/onboard/suites", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-validate-only-"));
-    try {
-      const trace = path.join(tmp, "trace.log");
-      // Pre-populate a context.env: --validate-only assumes setup has already run.
-      fs.writeFileSync(
-        path.join(tmp, "context.env"),
-        "E2E_SCENARIO=ubuntu-repo-cloud-openclaw\n",
-      );
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--validate-only"],
-        {
-          env: {
-            ...process.env,
-            E2E_CONTEXT_DIR: tmp,
-            E2E_TRACE_FILE: trace,
-            // Supply probe overrides for every key the expected state needs.
-            E2E_PROBE_OVERRIDE_CLI_INSTALLED: "true",
-            E2E_PROBE_OVERRIDE_GATEWAY_EXPECTED: "present",
-            E2E_PROBE_OVERRIDE_GATEWAY_HEALTH: "healthy",
-            E2E_PROBE_OVERRIDE_SANDBOX_EXPECTED: "present",
-            E2E_PROBE_OVERRIDE_SANDBOX_STATUS: "running",
-            E2E_PROBE_OVERRIDE_SANDBOX_AGENT: "openclaw",
-            E2E_PROBE_OVERRIDE_INFERENCE_EXPECTED: "available",
-            E2E_PROBE_OVERRIDE_INFERENCE_PROVIDER: "nvidia",
-            E2E_PROBE_OVERRIDE_INFERENCE_ROUTE: "inference-local",
-            E2E_PROBE_OVERRIDE_INFERENCE_MODE: "gateway-routed",
-            E2E_PROBE_OVERRIDE_CREDENTIALS_EXPECTED: "present",
-            E2E_PROBE_OVERRIDE_CREDENTIALS_STORAGE: "gateway-managed",
-            E2E_PROBE_OVERRIDE_SECURITY_SHIELDS: "supported",
-            // `security.policy_engine` has an embedded underscore, which the
-            // E2E_PROBE_OVERRIDE_* convention cannot express. Use the
-            // JSON escape hatch for this one.
-            E2E_PROBE_OVERRIDES_JSON: JSON.stringify({ "security.policy_engine": "supported" }),
-          },
-          encoding: "utf8",
-          timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      // Must NOT have traced install or onboard.
-      const contents = fs.existsSync(trace) ? fs.readFileSync(trace, "utf8") : "";
-      expect(contents).not.toMatch(/install:/);
-      expect(contents).not.toMatch(/onboard:/);
-      // Must have emitted an expected-state-report.json (probe results).
-      const reportPath = path.join(tmp, "expected-state-report.json");
-      expect(fs.existsSync(reportPath), `missing ${reportPath}`).toBe(true);
-      const report = JSON.parse(fs.readFileSync(reportPath, "utf8"));
-      expect(report.ok).toBe(true);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("is_mutually_exclusive_with_plan_only", () => {
-    const r = spawnSync(
-      "bash",
-      [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--validate-only", "--plan-only"],
-      { encoding: "utf8", timeout: 15_000, cwd: REPO_ROOT },
-    );
-    expect(r.status).not.toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/mutually.exclusive|cannot.*both|--plan-only.*--validate-only|--validate-only.*--plan-only/i);
-  });
-});
diff --git a/test/e2e-scenario/framework-tests/e2e-lib-helpers.test.ts b/test/e2e-scenario/framework-tests/e2e-lib-helpers.test.ts
index 1a5c1a8403..82862f5622 100644
--- a/test/e2e-scenario/framework-tests/e2e-lib-helpers.test.ts
+++ b/test/e2e-scenario/framework-tests/e2e-lib-helpers.test.ts
@@ -15,7 +15,6 @@ const ASSERT = path.join(VALIDATION_SUITES, "assert");
 const REBUILD_UPGRADE_LIB = path.join(VALIDATION_SUITES, "lib/rebuild_upgrade.sh");
 const FIXTURES = path.join(REPO_ROOT, "test/e2e-scenario/nemoclaw_scenarios/fixtures");
 const INSTALL_DIR = path.join(REPO_ROOT, "test/e2e-scenario/nemoclaw_scenarios/install");
-const RUN_SCENARIO = path.join(REPO_ROOT, "test/e2e-scenario/runtime/run-scenario.sh");
 
 function runBash(script: string, env: Record<string, string> = {}): SpawnSyncReturns<string> {
   return spawnSync("bash", ["-c", script], {
@@ -61,51 +60,6 @@ describe("E2E shell helpers", () => {
     }
   });
 
-  it("test_should_emit_plan_only_checks_without_live_infrastructure", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-inf-plan-"));
-    try {
-      const r = runBash(
-        `
-        set -euo pipefail
-        . "${RUNTIME_LIB}/context.sh"
-        . "${VALIDATION_SUITES}/lib/inference_routing.sh"
-        e2e_context_init
-        e2e_context_set E2E_SANDBOX_NAME sandbox-1
-        e2e_inference_routing_assert_chat_completion "post-onboard.inference-routing.inference-local-chat-completion"
-      `,
-        { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      expect(r.stdout).toContain("post-onboard.inference-routing.inference-local-chat-completion");
-      expect(r.stdout).toMatch(/dry-run|plan/i);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("test_should_not_print_secret_values_in_helper_output", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-inf-secret-"));
-    try {
-      const r = runBash(
-        `
-        set -euo pipefail
-        . "${RUNTIME_LIB}/context.sh"
-        . "${VALIDATION_SUITES}/lib/inference_routing.sh"
-        e2e_context_init
-        e2e_context_set E2E_SANDBOX_NAME sandbox-1
-        e2e_context_set E2E_PROVIDER_API_KEY super-secret-test-token
-        e2e_inference_routing_assert_auth_proxy "post-onboard.ollama-auth-proxy.authenticated-request-accepted" "valid"
-      `,
-        { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      expect(r.stdout + r.stderr).not.toContain("super-secret-test-token");
-      expect(r.stdout + r.stderr).toMatch(/REDACTED|dry-run|plan/i);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
   it("security_policy_credentials_helper_should_load_with_context_library", () => {
     const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "spc-context-"));
     try {
@@ -117,7 +71,7 @@ describe("E2E shell helpers", () => {
         spc_require_context E2E_SCENARIO E2E_PROVIDER
         echo "provider=$(spc_context_get E2E_PROVIDER)"
         `,
-        { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" },
+        { E2E_CONTEXT_DIR: tmp },
       );
       expect(r.status, r.stderr).toBe(0);
       expect(r.stdout).toContain("provider=nvidia");
@@ -136,7 +90,7 @@ describe("E2E shell helpers", () => {
         . "${VALIDATION_SUITES}/lib/security_policy_credentials.sh"
         spc_require_context E2E_PROVIDER
         `,
-        { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" },
+        { E2E_CONTEXT_DIR: tmp },
       );
       expect(r.status).not.toBe(0);
       expect(r.stderr).toContain("E2E_PROVIDER");
@@ -474,38 +428,6 @@ exit 0
     }
   });
 
-  it("scenario_dry_run_should_trace_helper_sequence_in_order", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-trace-"));
-    try {
-      const trace = path.join(tmp, "trace.log");
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"],
-        {
-          env: {
-            ...process.env,
-            E2E_CONTEXT_DIR: tmp,
-            E2E_TRACE_FILE: trace,
-          },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      expect(fs.existsSync(trace), "trace log missing").toBe(true);
-      const contents = fs.readFileSync(trace, "utf8");
-      const order = ["env:noninteractive", "install:", "onboard:", "gateway:check", "sandbox:check"];
-      let pos = 0;
-      for (const marker of order) {
-        const idx = contents.indexOf(marker, pos);
-        expect(idx, `trace missing marker in order: ${marker}\nfull:\n${contents}`).toBeGreaterThanOrEqual(0);
-        pos = idx + marker.length;
-      }
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
 });
 
 // ─────────────────────────────────────────────────────────────────────────────
@@ -675,7 +597,9 @@ exec "$@"
         e2e_sandbox_exec sb1 -- false
         echo "rc=$?"
       `,
-        { PATH: `${bin}:${process.env.PATH}` },
+        // Force the openshell-direct transport so the stubbed openshell
+        // (which has no `sandbox ssh-config` subcommand) is exercised.
+        { PATH: `${bin}:${process.env.PATH}`, E2E_SANDBOX_EXEC_VIA_OPENSHELL: "1" },
       );
       expect(r.stdout).toMatch(/rc=1/);
     } finally {
@@ -683,21 +607,6 @@ exec "$@"
     }
   });
 
-  it("sandbox_exec_should_dry_run_short_circuit_when_e2e_dry_run_set", () => {
-    // Use a PATH that has bash itself but no nemoclaw — dry-run must
-    // short-circuit before the CLI lookup.
-    const r = runBash(
-      `
-        set -euo pipefail
-        . "${VALIDATION_SUITES}/sandbox-exec.sh"
-        e2e_sandbox_exec sb1 -- rm -rf /
-      `,
-      { E2E_DRY_RUN: "1", PATH: "/usr/bin:/bin" },
-    );
-    expect(r.status, r.stderr).toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/dry[- ]run/i);
-  });
-
   it("sandbox_exec_stdin_should_quote_args_safely_when_piped", () => {
     // Verify that $TOKEN is NOT expanded on the host side before being
     // delivered to the sandbox. We stub openshell to echo back stdin.
@@ -717,7 +626,12 @@ exec "$@"
           . "${VALIDATION_SUITES}/sandbox-exec.sh"
           printf 'hello $TOKEN' | e2e_sandbox_exec_stdin sb1 -- cat
         `,
-        { PATH: `${bin}:${process.env.PATH}`, TOKEN: "SHOULD_NOT_EXPAND" },
+        {
+          PATH: `${bin}:${process.env.PATH}`,
+          TOKEN: "SHOULD_NOT_EXPAND",
+          // Stub only handles the openshell-direct transport.
+          E2E_SANDBOX_EXEC_VIA_OPENSHELL: "1",
+        },
       );
       expect(r.status, r.stderr).toBe(0);
       expect(r.stdout).toContain("hello $TOKEN");
@@ -726,6 +640,111 @@ exec "$@"
       fs.rmSync(tmp, { recursive: true, force: true });
     }
   });
+
+  it("sandbox_exec_should_prefer_ssh_config_transport_when_openshell_offers_one", () => {
+    // Verify the new default: when `openshell sandbox ssh-config <name>`
+    // succeeds, the wrapper routes through `ssh -F <cfg>` instead of
+    // `openshell sandbox exec`.
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-sbex-ssh-"));
+    try {
+      const bin = path.join(tmp, "bin");
+      fs.mkdirSync(bin);
+      const trace = path.join(tmp, "ssh.trace");
+      fs.writeFileSync(
+        path.join(bin, "openshell"),
+        `#!/usr/bin/env bash
+set -euo pipefail
+if [[ "$1" == "sandbox" && "$2" == "ssh-config" ]]; then
+  printf 'Host openshell-%s\\n  HostName 127.0.0.1\\n  Port 2222\\n  User sandbox\\n' "$3"
+  exit 0
+fi
+echo "unexpected openshell call: $*" >&2
+exit 99
+`,
+        { mode: 0o755 },
+      );
+      fs.writeFileSync(
+        path.join(bin, "ssh"),
+        `#!/usr/bin/env bash
+set -euo pipefail
+printf '%s\\n' "ssh-args:$*" >> "${trace}"
+remote="\${@: -1}"
+printf '%s\\n' "remote-cmd:\${remote}" >> "${trace}"
+echo ok-from-ssh
+exit 0
+`,
+        { mode: 0o755 },
+      );
+      const ctxDir = path.join(tmp, "ctx");
+      fs.mkdirSync(ctxDir);
+      const r = runBash(
+        `
+          set -euo pipefail
+          . "${VALIDATION_SUITES}/sandbox-exec.sh"
+          e2e_sandbox_exec sb1 -- echo hello
+        `,
+        {
+          PATH: `${bin}:${process.env.PATH}`,
+          E2E_CONTEXT_DIR: ctxDir,
+        },
+      );
+      expect(r.status, r.stderr).toBe(0);
+      expect(r.stdout).toContain("ok-from-ssh");
+      const traceContents = fs.readFileSync(trace, "utf8");
+      expect(traceContents).toMatch(/ssh-args:.*-F /);
+      expect(traceContents).toContain("openshell-sb1");
+      expect(traceContents).toMatch(/remote-cmd:echo hello$/m);
+      const cfg = path.join(ctxDir, ".ssh-config-cache", "sb1.cfg");
+      expect(fs.existsSync(cfg)).toBe(true);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
+
+  it("sandbox_exec_should_fall_back_to_openshell_when_ssh_config_unavailable", () => {
+    // If `openshell sandbox ssh-config` fails, the wrapper must fall
+    // back to `openshell sandbox exec`.
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-sbex-fb-"));
+    try {
+      const bin = path.join(tmp, "bin");
+      fs.mkdirSync(bin);
+      fs.writeFileSync(
+        path.join(bin, "openshell"),
+        `#!/usr/bin/env bash
+set -uo pipefail
+if [[ "$1" == "sandbox" && "$2" == "ssh-config" ]]; then
+  exit 1
+fi
+if [[ "$1" == "sandbox" && "$2" == "exec" ]]; then
+  shift 2
+  while [[ "$#" -gt 0 && "$1" != "--" ]]; do shift; done
+  shift || true
+  exec "$@"
+fi
+exit 99
+`,
+        { mode: 0o755 },
+      );
+      const ctxDir = path.join(tmp, "ctx");
+      fs.mkdirSync(ctxDir);
+      const r = runBash(
+        `
+          set -euo pipefail
+          . "${VALIDATION_SUITES}/sandbox-exec.sh"
+          e2e_sandbox_exec sb1 -- echo fallback-ok
+        `,
+        {
+          PATH: `${bin}:${process.env.PATH}`,
+          E2E_CONTEXT_DIR: ctxDir,
+        },
+      );
+      expect(r.status, r.stderr).toBe(0);
+      expect(r.stdout).toContain("fallback-ok");
+      expect(r.stderr).toMatch(/ssh-config unavailable for sb1/);
+    } finally {
+      fs.rmSync(tmp, { recursive: true, force: true });
+    }
+  });
 });
 
 // ─────────────────────────────────────────────────────────────────────────────
@@ -968,53 +987,6 @@ describe("Issue #3810 messaging provider helper library", () => {
   });
 });
 
-// ─────────────────────────────────────────────────────────────────────────────
-// Phase 1.E — Install-method dispatcher splits
-// ─────────────────────────────────────────────────────────────────────────────
-
-describe("Phase 1.E install dispatcher splits", () => {
-  function dispatchDryRun(profile: string): SpawnSyncReturns<string> {
-    return runBash(
-      `
-        set -euo pipefail
-        . "${INSTALL_DIR}/dispatch.sh"
-        e2e_install "${profile}"
-      `,
-      { E2E_DRY_RUN: "1" },
-    );
-  }
-
-  it("install_should_dispatch_to_install_repo_helper_for_repo_current_profile", () => {
-    const r = dispatchDryRun("repo-current");
-    expect(r.status, r.stderr).toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/install-repo/);
-    expect(r.stdout + r.stderr).not.toMatch(/install-curl|install-ollama|install-launchable/);
-  });
-
-  it("install_should_dispatch_to_install_curl_helper_for_public_installer_profile", () => {
-    const r = dispatchDryRun("public-installer");
-    expect(r.status, r.stderr).toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/install-curl/);
-    expect(r.stdout + r.stderr).not.toMatch(/install-repo|install-ollama|install-launchable/);
-  });
-
-  it("install_should_dispatch_to_install_ollama_helper_for_ollama_profile", () => {
-    const r = dispatchDryRun("ollama");
-    expect(r.status, r.stderr).toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/install-ollama/);
-    expect(r.stdout + r.stderr).not.toMatch(/install-repo|install-curl|install-launchable/);
-  });
-
-  it("install_should_dispatch_to_install_launchable_helper_for_launchable_profile", () => {
-    const r = dispatchDryRun("launchable");
-    expect(r.status, r.stderr).toBe(0);
-    expect(r.stdout + r.stderr).toMatch(/install-launchable/);
-    expect(r.stdout + r.stderr).not.toMatch(/install-repo|install-curl|install-ollama/);
-  });
-});
-
-
-
 describe("baseline onboarding validation helper", () => {
   it("baseline_helper_should_source_under_strict_shell_options", () => {
     const r = runBash(`set -euo pipefail; source "${VALIDATION_SUITES}/lib/baseline_onboarding.sh"`);
@@ -1080,7 +1052,7 @@ describe("sandbox lifecycle validation helper", () => {
     try {
       const bin = path.join(tmp, "bin"); fs.mkdirSync(bin);
       fs.writeFileSync(path.join(bin, "timeout"), "#!/usr/bin/env bash\necho timed out >&2\nexit 124\n", { mode: 0o755 });
-      const r = runBash(`set -e; unset E2E_DRY_RUN; . "${VALIDATION_SUITES}/lib/sandbox_lifecycle.sh"; sandbox_lifecycle_run_with_timeout 1 bash -c 'sleep 5'`, { PATH: `${bin}:${process.env.PATH}` });
+      const r = runBash(`set -e; . "${VALIDATION_SUITES}/lib/sandbox_lifecycle.sh"; sandbox_lifecycle_run_with_timeout 1 bash -c 'sleep 5'`, { PATH: `${bin}:${process.env.PATH}` });
       expect(r.status).toBe(124);
       expect(r.stderr).toMatch(/timed out/);
     } finally { fs.rmSync(tmp, { recursive: true, force: true }); }
@@ -1093,7 +1065,7 @@ describe("sandbox lifecycle validation helper", () => {
       fs.writeFileSync(path.join(bin, "nemoclaw"), `#!/usr/bin/env bash
 case "$*" in
   list) echo sb1;;
-  "sb1 status") echo 'status running gateway healthy sandbox running';;
+  "sb1 status") printf '  Sandbox: sb1\\n    Model:    nvidia/x\\n    OpenShell: 0.0.44\\n    Policies: npm\\n';;
   "sb1 logs") echo logline;;
   *) echo "unexpected nemoclaw args: $*" >&2; exit 64;;
 esac
diff --git a/test/e2e-scenario/framework-tests/e2e-phase-orchestrators.test.ts b/test/e2e-scenario/framework-tests/e2e-phase-orchestrators.test.ts
index 497dac3387..c0f08fd23a 100644
--- a/test/e2e-scenario/framework-tests/e2e-phase-orchestrators.test.ts
+++ b/test/e2e-scenario/framework-tests/e2e-phase-orchestrators.test.ts
@@ -3,19 +3,39 @@
 
 import { describe, expect, it } from "vitest";
 import fs from "node:fs";
+import os from "node:os";
 import path from "node:path";
 
 import { HostCliClient } from "../scenarios/clients/host-cli.ts";
 import { compileRunPlans } from "../scenarios/compiler.ts";
 import { PhaseOrchestrator } from "../scenarios/orchestrators/phase.ts";
 import { ScenarioRunner } from "../scenarios/orchestrators/runner.ts";
-import type { AssertionStep, PhaseName, PhaseResult, RunContext, RunPlanPhase } from "../scenarios/types.ts";
+import type {
+  AssertionStep,
+  PhaseAction,
+  PhaseName,
+  PhaseResult,
+  RunContext,
+  RunPlanPhase,
+} from "../scenarios/types.ts";
 
-function fakeCtx(): RunContext {
-  return { contextDir: fs.mkdtempSync(path.join(process.cwd(), ".tmp-e2e-phase-")), dryRun: true };
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+
+function freshCtx(): RunContext {
+  return { contextDir: fs.mkdtempSync(path.join(os.tmpdir(), "e2e-phase-")) };
+}
+
+function shellStep(id: string, phase: PhaseName, ref: string, reliability?: AssertionStep["reliability"]): AssertionStep {
+  return {
+    id,
+    phase,
+    implementation: { kind: "shell", ref },
+    evidencePath: `.e2e/assertions/${id}.log`,
+    reliability,
+  };
 }
 
-function fakeStep(id: string, phase: PhaseName, ref = "fake-pass"): AssertionStep {
+function probeStep(id: string, phase: PhaseName, ref = "no-such-probe"): AssertionStep {
   return {
     id,
     phase,
@@ -24,24 +44,69 @@ function fakeStep(id: string, phase: PhaseName, ref = "fake-pass"): AssertionSte
   };
 }
 
-function fakePhase(step: AssertionStep): RunPlanPhase {
+function pendingStep(id: string, phase: PhaseName): AssertionStep {
+  return {
+    id,
+    phase,
+    implementation: { kind: "pending", ref: "not-yet" },
+  };
+}
+
+function makePhase(steps: AssertionStep[]): RunPlanPhase {
   return {
-    name: step.phase,
+    name: steps[0].phase,
     actions: [],
-    assertionGroups: [{ id: `group.${step.id}`, phase: step.phase, migrationStatus: "complete", steps: [step] }],
+    assertionGroups: [{ id: `group.${steps[0].id}`, phase: steps[0].phase, migrationStatus: "complete", steps }],
   };
 }
 
-describe("phase orchestrators", () => {
+function writeTempScript(dir: string, name: string, body: string): string {
+  const p = path.join(dir, name);
+  fs.writeFileSync(p, `#!/usr/bin/env bash\nset -euo pipefail\n${body}\n`, { mode: 0o755 });
+  return p;
+}
+
+function shellAction(
+  id: string,
+  phase: PhaseName,
+  scriptRef: string,
+  opts: { timeoutSeconds?: number; arg?: string } = {},
+): PhaseAction {
+  return {
+    id,
+    phase,
+    kind: "shell",
+    scriptRef,
+    arg: opts.arg,
+    timeoutSeconds: opts.timeoutSeconds,
+  };
+}
+
+function makePhaseWithActions(
+  phase: PhaseName,
+  actions: PhaseAction[],
+  steps: AssertionStep[],
+): RunPlanPhase {
+  return {
+    name: phase,
+    actions,
+    assertionGroups:
+      steps.length > 0
+        ? [{ id: `group.${steps[0].id}`, phase, migrationStatus: "complete", steps }]
+        : [],
+  };
+}
+
+describe("phase orchestrators - top-level delegation", () => {
   it("test_should_execute_phase_assertions_from_phase_orchestrators_not_top_level_runner", async () => {
-    const ctx = fakeCtx();
+    const ctx = freshCtx();
     try {
       const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
       const calls: string[] = [];
       const fakeOrchestrator = (phase: PhaseName) => ({
         run: async (_ctx: RunContext, runPhase: RunPlanPhase, _prior?: PhaseResult[]): Promise<PhaseResult> => {
           calls.push(runPhase.name);
-          return { phase, status: "passed", assertions: [] };
+          return { phase, status: "passed", actions: [], assertions: [] };
         },
       });
       const runner = new ScenarioRunner({
@@ -58,63 +123,729 @@ describe("phase orchestrators", () => {
       fs.rmSync(ctx.contextDir, { recursive: true, force: true });
     }
   });
+});
 
-  it("test_should_record_step_status_attempts_duration_classifier_and_evidence", async () => {
-    const ctx = fakeCtx();
+describe("phase orchestrators - real shell execution", () => {
+  it("shell_step_passes_when_script_exits_zero", async () => {
+    const ctx = freshCtx();
     try {
-      const step = fakeStep("runtime.retry-pass", "runtime", "fake-retry-once-pass");
-      step.reliability = { retry: { attempts: 2, on: ["gateway-transient"] } };
+      const script = writeTempScript(ctx.contextDir, "ok.sh", "echo hello-from-real-shell");
+      const ref = path.relative(REPO_ROOT, script);
+      const step = shellStep("runtime.real-pass", "runtime", ref);
       const orchestrator = new PhaseOrchestrator("runtime");
 
-      const result = await orchestrator.run(ctx, fakePhase(step));
+      const result = await orchestrator.run(ctx, makePhase([step]));
 
       expect(result.status).toBe("passed");
       expect(result.assertions[0]).toEqual(
-        expect.objectContaining({
-          id: "runtime.retry-pass",
-          status: "passed",
-          attempts: 2,
-          classifier: "gateway-transient",
-          evidence: ".e2e/assertions/runtime.retry-pass.json",
-        }),
+        expect.objectContaining({ id: "runtime.real-pass", status: "passed", attempts: 1 }),
       );
-      expect(result.assertions[0].durationMs).toBeGreaterThanOrEqual(0);
+      const log = fs.readFileSync(result.assertions[0].evidence!, "utf8");
+      expect(log).toContain("hello-from-real-shell");
     } finally {
       fs.rmSync(ctx.contextDir, { recursive: true, force: true });
     }
   });
 
-  it("test_should_enforce_timeout_and_retry_policy_in_orchestrator", async () => {
-    const ctx = fakeCtx();
+  it("shell_step_fails_when_script_exits_nonzero_and_records_stderr_tail", async () => {
+    const ctx = freshCtx();
     try {
-      const step = fakeStep("runtime.retry-fail", "runtime", "fake-always-transient");
-      step.reliability = { timeoutSeconds: 1, retry: { attempts: 2, on: ["provider-transient"] } };
+      const script = writeTempScript(ctx.contextDir, "fail.sh", 'echo "boom: real failure" >&2; exit 7');
+      const ref = path.relative(REPO_ROOT, script);
+      const step = shellStep("runtime.real-fail", "runtime", ref);
       const orchestrator = new PhaseOrchestrator("runtime");
 
-      const result = await orchestrator.run(ctx, fakePhase(step));
+      const result = await orchestrator.run(ctx, makePhase([step]));
 
       expect(result.status).toBe("failed");
-      expect(result.assertions[0]).toEqual(
-        expect.objectContaining({
-          id: "runtime.retry-fail",
-          status: "failed",
-          attempts: 2,
-          classifier: "provider-transient",
+      expect(result.assertions[0].status).toBe("failed");
+      expect(result.assertions[0].message).toMatch(/exit 7/);
+      expect(result.assertions[0].message).toMatch(/boom: real failure/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("shell_step_times_out_via_orchestrator_policy_not_script", async () => {
+    const ctx = freshCtx();
+    try {
+      const script = writeTempScript(ctx.contextDir, "slow.sh", "sleep 30");
+      const ref = path.relative(REPO_ROOT, script);
+      const step = shellStep("runtime.real-timeout", "runtime", ref, { timeoutSeconds: 1 });
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const started = Date.now();
+      const result = await orchestrator.run(ctx, makePhase([step]));
+      const elapsed = Date.now() - started;
+
+      expect(result.status).toBe("failed");
+      expect(result.assertions[0].message).toMatch(/exceeded 1s/);
+      expect(elapsed).toBeLessThan(15_000);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  }, 20_000);
+
+  it("shell_step_retries_on_classified_transient_then_passes", async () => {
+    const ctx = freshCtx();
+    try {
+      const counterFile = path.join(ctx.contextDir, "counter");
+      fs.writeFileSync(counterFile, "0");
+      const script = writeTempScript(
+        ctx.contextDir,
+        "gateway-flaky.sh",
+        `n=$(cat "${counterFile}"); n=$((n+1)); echo "$n" > "${counterFile}"; if [ "$n" -lt 2 ]; then echo "gateway-transient: try again" >&2; exit 1; fi; echo ok`,
+      );
+      const ref = path.relative(REPO_ROOT, script);
+      const step = shellStep("runtime.gateway-retry", "runtime", ref, {
+        retry: { attempts: 2, on: ["gateway-transient"] },
+      });
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.status).toBe("passed");
+      expect(result.assertions[0].attempts).toBe(2);
+      expect(result.assertions[0].classifier).toBe("gateway-transient");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("shell_step_fails_with_clear_message_when_script_missing", async () => {
+    const ctx = freshCtx();
+    try {
+      const step = shellStep("runtime.missing", "runtime", "test/e2e-scenario/does-not-exist.sh");
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.status).toBe("failed");
+      expect(result.assertions[0].message).toMatch(/script not found/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("probe_step_without_registered_probe_skips_visibly_never_passes_falsely", async () => {
+    const ctx = freshCtx();
+    try {
+      const step = probeStep("runtime.probe-pending", "runtime");
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.assertions[0].status).toBe("skipped");
+      expect(result.assertions[0].message).toMatch(/probe not registered/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("pending_step_skips_visibly_with_pending_marker", async () => {
+    const ctx = freshCtx();
+    try {
+      const step = pendingStep("runtime.pending", "runtime");
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.assertions[0].status).toBe("skipped");
+      expect(result.assertions[0].message).toMatch(/^pending:/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("phase orchestrators - actions execute before assertions", () => {
+  it("phase_action_runs_before_assertions_and_records_evidence", async () => {
+    const ctx = freshCtx();
+    try {
+      const actionScript = writeTempScript(ctx.contextDir, "setup.sh", "echo phase-action-evidence");
+      const action = shellAction("environment.setup-ok", "environment", path.relative(REPO_ROOT, actionScript));
+      const stepScript = writeTempScript(ctx.contextDir, "after.sh", "echo after-action");
+      const step = shellStep("environment.assert-ok", "environment", path.relative(REPO_ROOT, stepScript));
+      const orchestrator = new PhaseOrchestrator("environment");
+
+      const result = await orchestrator.run(ctx, makePhaseWithActions("environment", [action], [step]));
+
+      expect(result.status).toBe("passed");
+      expect(result.actions).toHaveLength(1);
+      expect(result.actions[0]).toEqual(
+        expect.objectContaining({ id: "environment.setup-ok", status: "passed" }),
+      );
+      expect(result.actions[0].evidence).toBeTruthy();
+      const actionLog = fs.readFileSync(result.actions[0].evidence!, "utf8");
+      expect(actionLog).toContain("phase-action-evidence");
+      expect(result.assertions).toHaveLength(1);
+      expect(result.assertions[0].status).toBe("passed");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("phase_action_failure_short_circuits_assertions", async () => {
+    const ctx = freshCtx();
+    try {
+      const failScript = writeTempScript(ctx.contextDir, "fail.sh", 'echo "setup boom" >&2; exit 5');
+      const action = shellAction("environment.setup-fail", "environment", path.relative(REPO_ROOT, failScript));
+      const stepScript = writeTempScript(ctx.contextDir, "after.sh", "echo should-not-run");
+      const step = shellStep("environment.never-runs", "environment", path.relative(REPO_ROOT, stepScript));
+      const orchestrator = new PhaseOrchestrator("environment");
+
+      const result = await orchestrator.run(ctx, makePhaseWithActions("environment", [action], [step]));
+
+      expect(result.status).toBe("failed");
+      expect(result.actions).toHaveLength(1);
+      expect(result.actions[0].status).toBe("failed");
+      expect(result.actions[0].message).toMatch(/exit 5/);
+      // Assertions must NOT have run, so they must NOT show a misleading
+      // pass for an environment that was never set up.
+      expect(result.assertions).toEqual([]);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("phase_action_times_out_via_orchestrator_policy", async () => {
+    const ctx = freshCtx();
+    try {
+      const slow = writeTempScript(ctx.contextDir, "slow.sh", "sleep 30");
+      const action = shellAction("environment.setup-slow", "environment", path.relative(REPO_ROOT, slow), {
+        timeoutSeconds: 1,
+      });
+      const orchestrator = new PhaseOrchestrator("environment");
+
+      const started = Date.now();
+      const result = await orchestrator.run(ctx, makePhaseWithActions("environment", [action], []));
+
+      expect(result.status).toBe("failed");
+      expect(result.actions[0].status).toBe("failed");
+      expect(result.actions[0].message).toMatch(/exceeded 1s/);
+      // The orchestrator must enforce the timeout, not depend on the
+      // script self-killing. Allow some headroom but fail if we waited
+      // anywhere near the script's 30s sleep.
+      expect(Date.now() - started).toBeLessThan(15_000);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("phase_action_publishes_alias_path_on_success", async () => {
+    const ctx = freshCtx();
+    try {
+      const actionScript = writeTempScript(ctx.contextDir, "alias.sh", "echo aliased-output");
+      const action: PhaseAction = {
+        id: "onboarding.profile.alias-demo",
+        phase: "onboarding",
+        kind: "shell",
+        scriptRef: path.relative(REPO_ROOT, actionScript),
+        aliasPath: "onboard.log",
+      };
+      const orchestrator = new PhaseOrchestrator("onboarding");
+
+      const result = await orchestrator.run(ctx, makePhaseWithActions("onboarding", [action], []));
+
+      expect(result.actions[0].status).toBe("passed");
+      const aliasContents = fs.readFileSync(path.join(ctx.contextDir, "onboard.log"), "utf8");
+      expect(aliasContents).toContain("aliased-output");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("phase_action_evidence_log_is_flushed_before_resolve", async () => {
+    const ctx = freshCtx();
+    try {
+      const actionScript = writeTempScript(ctx.contextDir, "flush.sh", "echo flushed-phase-action-output");
+      const action = shellAction("environment.flush", "environment", path.relative(REPO_ROOT, actionScript));
+      const orchestrator = new PhaseOrchestrator("environment");
+
+      const result = await orchestrator.run(ctx, makePhaseWithActions("environment", [action], []));
+
+      // Synchronous read must already see the output - the orchestrator
+      // must wait for the WriteStream's 'finish' before resolving.
+      const log = fs.readFileSync(result.actions[0].evidence!, "utf8");
+      expect(log).toContain("flushed-phase-action-output");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("plan compiler emits phase actions for canonical scenarios", () => {
+  it("compiler_emits_install_and_onboard_actions_for_canonical_scenarios", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const ids = [
+      "ubuntu-repo-cloud-openclaw",
+      "ubuntu-repo-cloud-hermes",
+      "gpu-repo-local-ollama-openclaw",
+      "macos-repo-cloud-openclaw",
+      "wsl-repo-cloud-openclaw",
+      "brev-launchable-cloud-openclaw",
+      "ubuntu-no-docker-preflight-negative",
+    ];
+    const plans = compileRunPlans(ids);
+    expect(plans).toHaveLength(ids.length);
+    for (const plan of plans) {
+      const env = plan.phases.find((p) => p.name === "environment")!;
+      const onb = plan.phases.find((p) => p.name === "onboarding")!;
+      expect(env.actions.some((a) => a.id.startsWith("environment.install."))).toBe(true);
+      expect(onb.actions.some((a) => a.id.startsWith("onboarding.profile."))).toBe(true);
+      // context.env emission is framework infrastructure (ScenarioRunner),
+      // not a shell action. The compiler must NOT emit a shell context
+      // action - if it did we'd be coupling back to the old resolver's
+      // plan.json shape.
+      expect(env.actions.map((a) => a.id)).not.toContain("environment.context.emit");
+      // Onboarding action must publish a stable alias path so legacy
+      // shell assertions referencing ${E2E_CONTEXT_DIR}/onboard.log
+      // keep working without coupling them to action ids.
+      const onboardingAction = onb.actions.find((a) => a.id.startsWith("onboarding.profile."));
+      expect(onboardingAction?.aliasPath).toBe("onboard.log");
+      // Every install/onboard action must be a typed shell-fn referencing
+      // the canonical dispatcher script - no free-form strings.
+      for (const action of [...env.actions, ...onb.actions]) {
+        if (action.id.startsWith("environment.install.") || action.id.startsWith("onboarding.profile.")) {
+          expect(action.kind).toBe("shell-fn");
+          expect(action.scriptRef).toMatch(/dispatch\.sh$/);
+          expect(action.fn).toMatch(/^e2e_(install|onboard)$/);
+          expect(action.arg).toBeTruthy();
+        }
+      }
+    }
+  });
+
+  it("compiler_routes_docker_missing_runtime_to_no_docker_onboarding_profile", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    // Negative scenario declares runtime=docker-missing in scenarios.yaml.
+    // The compiler must substitute the onboarding profile id from the
+    // base 'cloud-openclaw' to 'cloud-openclaw-no-docker' so the
+    // dispatcher routes to the worker that installs the docker shim and
+    // captures negative-preflight.log. Without this routing, the
+    // 'onboarding.preflight.expected-failed' assertion has nothing to grep.
+    const [plan] = compileRunPlans(["ubuntu-no-docker-preflight-negative"]);
+    const onb = plan.phases.find((p) => p.name === "onboarding")!;
+    const action = onb.actions.find((a) => a.id.startsWith("onboarding.profile."));
+    expect(action?.id).toBe("onboarding.profile.cloud-openclaw-no-docker");
+    expect(action?.arg).toBe("cloud-openclaw-no-docker");
+    expect(action?.evidencePath).toBe(
+      ".e2e/actions/onboarding.profile.cloud-openclaw-no-docker.log",
+    );
+    // Secret env must still include NVIDIA_API_KEY so behavior matches
+    // a real user invocation (CLI loads creds even if preflight aborts).
+    expect(action?.secretEnv).toContain("NVIDIA_API_KEY");
+    // Positive scenarios must NOT pick up the -no-docker suffix.
+    const [posPlan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+    const posAction = posPlan.phases
+      .find((p) => p.name === "onboarding")!
+      .actions.find((a) => a.id.startsWith("onboarding.profile."));
+    expect(posAction?.arg).toBe("cloud-openclaw");
+  });
+});
+
+describe("ScenarioRunner seeds context.env and short-circuits across phases", () => {
+  it("seedContextEnv_writes_normalized_keys_at_top_level_context_env_path", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const { seedContextEnv } = await import("../scenarios/orchestrators/context.ts");
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+      const result = seedContextEnv(ctx, plan);
+
+      // Path matches the shell helper's e2e_context_init: top-level,
+      // not under .e2e/. Runtime steps source ${E2E_CONTEXT_DIR}/context.env.
+      expect(result.path).toBe(path.join(ctx.contextDir, "context.env"));
+      const body = fs.readFileSync(result.path, "utf8");
+      // Required keys downstream shell assertions look up.
+      expect(body).toMatch(/^E2E_SCENARIO=ubuntu-repo-cloud-openclaw$/m);
+      expect(body).toMatch(/^E2E_PLATFORM_OS=ubuntu$/m);
+      expect(body).toMatch(/^E2E_AGENT=openclaw$/m);
+      expect(body).toMatch(/^E2E_PROVIDER=nvidia$/m);
+      expect(body).toMatch(/^E2E_GATEWAY_URL=http:\/\/127\.0\.0\.1:18789$/m);
+      expect(body).toMatch(/^E2E_SANDBOX_NAME=e2e-ubuntu-repo-cloud-openclaw$/m);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("hermes_scenario_seeds_hermes_gateway_url", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const { seedContextEnv } = await import("../scenarios/orchestrators/context.ts");
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-hermes"]);
+      const result = seedContextEnv(ctx, plan);
+      const body = fs.readFileSync(result.path, "utf8");
+      expect(body).toMatch(/^E2E_AGENT=hermes$/m);
+      expect(body).toMatch(/^E2E_GATEWAY_URL=http:\/\/127\.0\.0\.1:8642$/m);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("runner_skips_downstream_phases_when_prior_phase_action_fails", async () => {
+    const { ScenarioRunner } = await import("../scenarios/orchestrators/runner.ts");
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+      // Inject a failing environment phase to simulate an install action
+      // failure. Onboarding and runtime must report skipped, not run
+      // their own actions or assertions.
+      const failingEnv = {
+        run: async () => ({
+          phase: "environment" as const,
+          status: "failed" as const,
+          actions: [
+            {
+              id: "environment.install.repo-current",
+              status: "failed" as const,
+              durationMs: 5,
+              message: "simulated install failure",
+            },
+          ],
+          assertions: [],
         }),
+      };
+      let onboardingCalled = false;
+      let runtimeCalled = false;
+      const onboarding = {
+        run: async () => {
+          onboardingCalled = true;
+          return { phase: "onboarding" as const, status: "passed" as const, actions: [], assertions: [] };
+        },
+      };
+      const runtime = {
+        run: async () => {
+          runtimeCalled = true;
+          return { phase: "runtime" as const, status: "passed" as const, actions: [], assertions: [] };
+        },
+      };
+      const runner = new ScenarioRunner({ environment: failingEnv, onboarding, runtime });
+
+      const results = await runner.run(ctx, plan);
+
+      // Downstream orchestrators must NOT have been invoked.
+      expect(onboardingCalled).toBe(false);
+      expect(runtimeCalled).toBe(false);
+      // Each phase still has a result, and the downstream ones are
+      // skipped with a message that names the blocking action.
+      expect(results.map((r) => r.phase)).toEqual(["environment", "onboarding", "runtime"]);
+      expect(results[1].status).toBe("skipped");
+      expect(results[2].status).toBe("skipped");
+      expect(results[1].assertions[0].message).toMatch(/blocked by prior failure/);
+      expect(results[1].assertions[0].message).toMatch(/environment.install.repo-current/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("runner_does_not_short_circuit_on_assertion_failure_only", async () => {
+    // Assertion failures (as opposed to action failures) must not block
+    // downstream phases - reviewers need to see all failure layers.
+    const { ScenarioRunner } = await import("../scenarios/orchestrators/runner.ts");
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const ctx = freshCtx();
+    try {
+      const [plan] = compileRunPlans(["ubuntu-repo-cloud-openclaw"]);
+      const env = {
+        run: async () => ({
+          phase: "environment" as const,
+          status: "failed" as const,
+          actions: [],
+          assertions: [
+            { id: "environment.something", status: "failed" as const, attempts: 1, durationMs: 1 },
+          ],
+        }),
+      };
+      let onboardingCalled = false;
+      const onboarding = {
+        run: async () => {
+          onboardingCalled = true;
+          return { phase: "onboarding" as const, status: "passed" as const, actions: [], assertions: [] };
+        },
+      };
+      const runner = new ScenarioRunner({
+        environment: env,
+        onboarding,
+        runtime: {
+          run: async () => ({ phase: "runtime" as const, status: "passed" as const, actions: [], assertions: [] }),
+        },
+      });
+
+      await runner.run(ctx, plan);
+      expect(onboardingCalled).toBe(true);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+});
+
+describe("required probe and pending steps fail closed", () => {
+  it("test_required_probe_step_that_is_unregistered_fails_the_phase", async () => {
+    const ctx = freshCtx();
+    try {
+      const step: AssertionStep = {
+        id: "runtime.security.required-probe",
+        phase: "runtime",
+        implementation: { kind: "probe", ref: "unregisteredSecurityProbe" },
+        evidencePath: ".e2e/assertions/runtime.security.required-probe.json",
+        required: true,
+      };
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.status).toBe("failed");
+      expect(result.assertions[0].status).toBe("failed");
+      expect(result.assertions[0].message).toMatch(/required probe not registered/);
+      expect(result.assertions[0].message).toContain("unregisteredSecurityProbe");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("test_non_required_probe_step_continues_to_skip_visibly", async () => {
+    const ctx = freshCtx();
+    try {
+      const step: AssertionStep = {
+        id: "runtime.diagnostics.non-required-probe",
+        phase: "runtime",
+        implementation: { kind: "probe", ref: "diagnosticsProbe" },
+        evidencePath: ".e2e/assertions/runtime.diagnostics.non-required-probe.json",
+        // required intentionally omitted (defaults to false)
+      };
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.assertions[0].status).toBe("skipped");
+      expect(result.assertions[0].message).toMatch(/probe not registered/);
+      // Non-required skipped step does not fail the phase.
+      expect(result.status).not.toBe("failed");
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("test_required_pending_step_fails_closed", async () => {
+    const ctx = freshCtx();
+    try {
+      const step: AssertionStep = {
+        id: "runtime.expected-failure.no-side-effects",
+        phase: "runtime",
+        implementation: { kind: "pending", ref: "expectedFailureNoSideEffectsProbe" },
+        evidencePath: ".e2e/assertions/runtime.expected-failure.no-side-effects.json",
+        required: true,
+      };
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+
+      expect(result.status).toBe("failed");
+      expect(result.assertions[0].status).toBe("failed");
+      expect(result.assertions[0].message).toMatch(/required pending step not implemented/);
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("test_security_suite_groups_in_registry_mark_their_steps_as_required", async () => {
+    const { assertionGroupForSuite } = await import("../scenarios/assertions/registry.ts");
+    for (const suiteId of ["security-shields", "security-policy", "security-injection"]) {
+      const group = assertionGroupForSuite(suiteId);
+      expect(group, `missing assertion group for suite ${suiteId}`).toBeDefined();
+      for (const step of group?.steps ?? []) {
+        expect(
+          step.required,
+          `${suiteId} step ${step.id} must be required so it fails closed`,
+        ).toBe(true);
+      }
+    }
+  });
+
+  it("test_expected_failure_no_side_effects_step_in_registry_is_required", async () => {
+    const { assertionRegistry } = await import("../scenarios/assertions/registry.ts");
+    const group = assertionRegistry.groups.find(
+      (g) => g.id === "runtime.expected-failure.no-side-effects",
+    );
+    expect(group).toBeDefined();
+    for (const step of group?.steps ?? []) {
+      expect(step.required).toBe(true);
+    }
+  });
+});
+
+describe("framework-owned secret hygiene at the spawn boundary", () => {
+  it("test_should_not_persist_secret_shaped_child_output_into_evidence", async () => {
+    const ctx = freshCtx();
+    try {
+      // Child writes secret-shaped tokens (NVIDIA, GitHub, OpenAI,
+      // Slack, Bearer-prefixed) on both stdout and stderr, then exits
+      // non-zero so stderrTail also flows into result.message. None of
+      // those literal tokens may persist anywhere in the evidence.
+      const body = [
+        'echo "step prints nvapi-1234567890abcdef0123456789"',
+        'echo "and ghp_abcdefghijklmnopqrstuvwxyz0123456789"',
+        'echo "and sk-abcdefghijklmnopqrstuvwxyz0123456789"',
+        'echo "and xoxb-9876543210-fake-bot-token-abc"',
+        'echo "Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.payload.signature" 1>&2',
+        'exit 7',
+      ].join("\n");
+      const script = writeTempScript(ctx.contextDir, "leak.sh", body);
+      const ref = path.relative(REPO_ROOT, script);
+      const step = shellStep("runtime.leak", "runtime", ref);
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+      const assertion = result.assertions[0];
+      const logBody = fs.readFileSync(path.join(ctx.contextDir, ".e2e", "logs", `${step.id}.log`), "utf8");
+      const phaseResultJson = fs.readFileSync(
+        path.join(ctx.contextDir, ".e2e", "runtime.result.json"),
+        "utf8",
+      );
+      const surfaces = [logBody, assertion.message ?? "", phaseResultJson];
+
+      // Every secret-shaped token canonicalized in
+      // src/lib/security/secret-patterns.ts must be redacted on the
+      // way to disk, regardless of which surface is read.
+      const forbiddenPatterns = [
+        /nvapi-[A-Za-z0-9_-]{10,}/,
+        /ghp_[A-Za-z0-9_-]{10,}/,
+        /sk-[A-Za-z0-9_-]{20,}/,
+        /(?:xox[bpas]|xapp)-[A-Za-z0-9-]{10,}/,
+        /Bearer\s+[A-Za-z0-9_.+\/=-]{10,}/i,
+      ];
+      for (const surface of surfaces) {
+        for (const pat of forbiddenPatterns) {
+          expect(surface, `evidence surface must not contain ${pat}`).not.toMatch(pat);
+        }
+        expect(surface).toMatch(/<REDACTED>/);
+      }
+    } finally {
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("test_should_drop_non_allowlisted_parent_env_unless_declared_in_secretEnv", async () => {
+    const ctx = freshCtx();
+    const sentinelKey = "SECRET_LEAK_PROBE_TOKEN";
+    const previous = process.env[sentinelKey];
+    process.env[sentinelKey] = "sentinel-value-that-must-not-leak";
+    try {
+      const script = writeTempScript(
+        ctx.contextDir,
+        "env-leak.sh",
+        `printenv | sort\n`,
+      );
+      const ref = path.relative(REPO_ROOT, script);
+      // Step does NOT declare SECRET_LEAK_PROBE_TOKEN in secretEnv,
+      // so the framework must drop it before spawn.
+      const step = shellStep("runtime.env-drop", "runtime", ref);
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+      const logBody = fs.readFileSync(path.join(ctx.contextDir, ".e2e", "logs", `${step.id}.log`), "utf8");
+
+      expect(result.assertions[0].status).toBe("passed");
+      expect(logBody, "non-allowlisted parent env must not reach the child").not.toContain(sentinelKey);
+      expect(logBody).not.toContain("sentinel-value-that-must-not-leak");
+      // Framework allowlist + overlay still arrive: PATH and E2E_PHASE.
+      expect(logBody).toMatch(/^PATH=/m);
+      expect(logBody).toMatch(/^E2E_PHASE=runtime$/m);
+    } finally {
+      if (previous === undefined) delete process.env[sentinelKey];
+      else process.env[sentinelKey] = previous;
+      fs.rmSync(ctx.contextDir, { recursive: true, force: true });
+    }
+  });
+
+  it("test_should_pass_declared_secretEnv_through_to_child", async () => {
+    const ctx = freshCtx();
+    const declaredKey = "NEMOCLAW_TEST_API_KEY"; // matches SECRET_ENV_KEY_SHAPE
+    const previous = process.env[declaredKey];
+    process.env[declaredKey] = "declared-secret-value-passes-through";
+    try {
+      const script = writeTempScript(
+        ctx.contextDir,
+        "declared.sh",
+        `printenv ${declaredKey} || echo MISSING\n`,
       );
+      const ref = path.relative(REPO_ROOT, script);
+      const step: AssertionStep = {
+        ...shellStep("runtime.env-declared", "runtime", ref),
+        secretEnv: [declaredKey],
+      };
+      const orchestrator = new PhaseOrchestrator("runtime");
+
+      const result = await orchestrator.run(ctx, makePhase([step]));
+      const logBody = fs.readFileSync(path.join(ctx.contextDir, ".e2e", "logs", `${step.id}.log`), "utf8");
+
+      expect(result.assertions[0].status).toBe("passed");
+      // Declared secret reaches the child verbatim.
+      expect(logBody).toContain("declared-secret-value-passes-through");
+      // It is NOT redacted in printenv output because nothing about
+      // the literal value matches a token-shape pattern. (Real
+      // secrets that match secret-patterns.ts WILL be redacted as a
+      // second line of defense; this synthetic value is intentionally
+      // shape-free to isolate the env-passthrough behavior.)
     } finally {
+      if (previous === undefined) delete process.env[declaredKey];
+      else process.env[declaredKey] = previous;
       fs.rmSync(ctx.contextDir, { recursive: true, force: true });
     }
   });
 
+  it("test_should_reject_non_secret_shaped_keys_in_secretEnv_at_runtime", async () => {
+    const { buildChildEnv } = await import("../scenarios/orchestrators/redaction.ts");
+    expect(() =>
+      buildChildEnv(process.env, { secretEnv: ["FOO_VAR"], frameworkOverlay: {} }),
+    ).toThrow(/secret-key shape/);
+  });
+
+  it("test_should_declare_NVIDIA_API_KEY_only_for_cloud_onboarding_actions", async () => {
+    const { compileRunPlans } = await import("../scenarios/compiler.ts");
+    const plans = compileRunPlans([
+      "ubuntu-repo-cloud-openclaw",
+      "gpu-repo-local-ollama-openclaw",
+    ]);
+    const cloudOnboard = plans[0].phases
+      .find((p) => p.name === "onboarding")
+      ?.actions.find((a) => a.id.startsWith("onboarding.profile."));
+    const localOnboard = plans[1].phases
+      .find((p) => p.name === "onboarding")
+      ?.actions.find((a) => a.id.startsWith("onboarding.profile."));
+    expect(cloudOnboard?.secretEnv).toEqual(["NVIDIA_API_KEY"]);
+    expect(localOnboard?.secretEnv).toEqual([]);
+  });
+});
+
+describe("clients are pass/fail/policy free", () => {
   it("test_should_keep_clients_free_of_pass_fail_and_retry_semantics", () => {
-    const source = fs.readFileSync(
-      path.join(process.cwd(), "test/e2e-scenario/scenarios/clients/host-cli.ts"),
-      "utf8",
-    );
     const observation = new HostCliClient().observeVersion();
 
+    // The client returns a raw act/observe shape only: the command it would
+    // run. It must NOT decide pass/fail, attach retry policy, surface a
+    // classifier, or expose AssertionResult/PhaseResult-shaped fields.
     expect(observation).toEqual(expect.objectContaining({ command: ["nemoclaw", "--version"] }));
-    expect(source).not.toMatch(/AssertionResult|PhaseResult|retry|timeout|passed|failed/);
+    // Raw act/observe fields are allowed (exitCode/stdout/stderr/timing).
+    // Pass/fail and reliability-policy fields are not.
+    const forbiddenKeys = [
+      "status",
+      "attempts",
+      "classifier",
+      "evidence",
+      "retry",
+      "timeout",
+      "timeoutSeconds",
+      "phase",
+      "assertions",
+      "passed",
+      "failed",
+    ];
+    for (const key of forbiddenKeys) {
+      expect(observation).not.toHaveProperty(key);
+    }
   });
 });
diff --git a/test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts b/test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts
new file mode 100644
index 0000000000..eb6c785a91
--- /dev/null
+++ b/test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts
@@ -0,0 +1,73 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Parity test: the framework's local secret-pattern set
+ * (test/e2e-scenario/scenarios/orchestrators/redaction.ts) must stay in
+ * lockstep with the canonical product source
+ * (src/lib/security/secret-patterns.ts).
+ *
+ * The framework deliberately mirrors rather than imports — see the
+ * "Framework-local mirror" comment in redaction.ts for why — but the
+ * mirror is only safe if it is actually a mirror. This test parses
+ * both source files at the textual level and compares the regex
+ * literals.
+ */
+
+import { describe, expect, it } from "vitest";
+import fs from "node:fs";
+import path from "node:path";
+
+const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
+
+// Pull only regex literals (lines starting with `/` and ending with
+// a flag set like /g or /gi). Filters out comment lines like `// NVIDIA`
+// that begin with `/` but are not regex.
+const REGEX_LITERAL_LINE = /^\/.+\/[a-z]*,?$/;
+
+function extractFromBlock(block: string): string[] {
+  return block
+    .split("\n")
+    .map((line) => line.trim())
+    .filter((line) => REGEX_LITERAL_LINE.test(line))
+    .map((line) => line.replace(/,\s*$/, ""));
+}
+
+function extractRegexLiterals(source: string, exportName: string): string[] {
+  const re = new RegExp(`export const ${exportName}[^=]*=\\s*\\[([\\s\\S]*?)\\];`, "m");
+  const m = source.match(re);
+  return m ? extractFromBlock(m[1]) : [];
+}
+
+function extractFrameworkArray(source: string, constName: string): string[] {
+  const re = new RegExp(`const ${constName}: RegExp\\[\\] = \\[([\\s\\S]*?)\\];`, "m");
+  const m = source.match(re);
+  return m ? extractFromBlock(m[1]) : [];
+}
+
+describe("framework redaction parity with product source-of-truth", () => {
+  const productSource = fs.readFileSync(
+    path.join(REPO_ROOT, "src/lib/security/secret-patterns.ts"),
+    "utf8",
+  );
+  const frameworkSource = fs.readFileSync(
+    path.join(REPO_ROOT, "test/e2e-scenario/scenarios/orchestrators/redaction.ts"),
+    "utf8",
+  );
+
+  it("test_framework_TOKEN_PREFIX_PATTERNS_matches_product_source", () => {
+    const product = extractRegexLiterals(productSource, "TOKEN_PREFIX_PATTERNS");
+    const framework = extractFrameworkArray(frameworkSource, "TOKEN_PREFIX_PATTERNS");
+    expect(framework.length).toBeGreaterThan(0);
+    expect(product.length).toBeGreaterThan(0);
+    expect(framework).toEqual(product);
+  });
+
+  it("test_framework_CONTEXT_PATTERNS_matches_product_source", () => {
+    const product = extractRegexLiterals(productSource, "CONTEXT_PATTERNS");
+    const framework = extractFrameworkArray(frameworkSource, "CONTEXT_PATTERNS");
+    expect(framework.length).toBeGreaterThan(0);
+    expect(product.length).toBeGreaterThan(0);
+    expect(framework).toEqual(product);
+  });
+});
diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-additional-families.test.ts b/test/e2e-scenario/framework-tests/e2e-scenario-additional-families.test.ts
index 8c2e70caae..2d3c42fba0 100644
--- a/test/e2e-scenario/framework-tests/e2e-scenario-additional-families.test.ts
+++ b/test/e2e-scenario/framework-tests/e2e-scenario-additional-families.test.ts
@@ -2,17 +2,15 @@
 // SPDX-License-Identifier: Apache-2.0
 
 /**
- * Phase 9: Migrate Additional Scenario Families.
- * Verifies metadata for new scenarios (macOS, WSL, GPU local Ollama, Brev
- * launchable, Ubuntu cloud Hermes, and the no-docker negative preflight)
- * plus the deferred schema concepts (scenario-level overrides, negative
- * expected state).
+ * Phase 9: Additional Scenario Families - resolver-level metadata only.
+ *
+ * Plan-printout tests that exercised the deprecated bash entrypoint
+ * (run-scenario.sh --plan-only) were deleted alongside the bash runner.
+ * The TS runner is exercised by e2e-plan-compiler / e2e-scenario-registry
+ * / e2e-phase-orchestrators tests instead.
  */
 
 import { describe, it, expect } from "vitest";
-import { spawnSync } from "node:child_process";
-import fs from "node:fs";
-import os from "node:os";
 import path from "node:path";
 
 import { loadMetadataFromDir } from "../runtime/resolver/load.ts";
@@ -20,27 +18,6 @@ import { resolveScenario } from "../runtime/resolver/plan.ts";
 
 const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
 const E2E_DIR = path.join(REPO_ROOT, "test/e2e-scenario");
-const RUN_SCENARIO = path.join(E2E_DIR, "runtime", "run-scenario.sh");
-
-function planOnly(scenarioId: string): { stdout: string; stderr: string; status: number | null; plan: Record<string, unknown> } {
-  const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-p9-"));
-  try {
-    const r = spawnSync("bash", [RUN_SCENARIO, scenarioId, "--plan-only"], {
-      env: { ...process.env, E2E_CONTEXT_DIR: tmp },
-      encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-      cwd: REPO_ROOT,
-    });
-    let plan = {};
-    const pj = path.join(tmp, "plan.json");
-    if (fs.existsSync(pj)) {
-      plan = JSON.parse(fs.readFileSync(pj, "utf8"));
-    }
-    return { stdout: r.stdout, stderr: r.stderr, status: r.status, plan };
-  } finally {
-    fs.rmSync(tmp, { recursive: true, force: true });
-  }
-}
 
 describe("Issue 3812: inference/provider suite families", () => {
   it("test_should_route_inference_suite_families_to_domain_specific_steps", () => {
@@ -74,37 +51,6 @@ describe("Phase 9: additional scenario families - metadata", () => {
   });
 });
 
-describe("Phase 9: macOS / WSL plan-only", () => {
-  it("macos scenario plan identifies macOS platform", () => {
-    const { status, plan } = planOnly("macos-repo-cloud-openclaw");
-    expect(status).toBe(0);
-    const dims = (plan as { dimensions: { platform: { profile: { os?: string } } } }).dimensions;
-    expect(dims.platform.profile.os).toBe("macos");
-  });
-
-  it("wsl scenario plan identifies WSL platform", () => {
-    const { status, plan } = planOnly("wsl-repo-cloud-openclaw");
-    expect(status).toBe(0);
-    const dims = (plan as { dimensions: { platform: { profile: { os?: string } } } }).dimensions;
-    expect(dims.platform.profile.os).toBe("wsl");
-  });
-});
-
-describe("Phase 9: GPU local Ollama plan-only", () => {
-  it("runtime indicates GPU/CDI and provider is ollama", () => {
-    const { status, plan } = planOnly("gpu-repo-local-ollama-openclaw");
-    expect(status).toBe(0);
-    const dims = (plan as {
-      dimensions: {
-        runtime: { profile: { gpu_runtime?: string } };
-        onboarding: { profile: { provider?: string } };
-      };
-    }).dimensions;
-    expect(dims.runtime.profile.gpu_runtime).toBe("cdi");
-    expect(dims.onboarding.profile.provider).toBe("ollama");
-  });
-});
-
 describe("Phase 9: Brev launchable scenario (overrides schema)", () => {
   it("should_support_scenario_overrides_on_brev_launchable", () => {
     const meta = loadMetadataFromDir(E2E_DIR);
@@ -116,21 +62,6 @@ describe("Phase 9: Brev launchable scenario (overrides schema)", () => {
     expect(overrides?.onboarding?.gateway?.bind_address).toBeTypeOf("string");
     expect(overrides?.onboarding?.gateway?.bind_address?.length).toBeGreaterThan(0);
   });
-
-  it("plan shows remote target, launchable install, and gateway bind override", () => {
-    const { status, stdout, plan } = planOnly("brev-launchable-cloud-openclaw");
-    expect(status).toBe(0);
-    const dims = (plan as {
-      dimensions: {
-        platform: { profile: { execution_target?: string } };
-        install: { id: string };
-      };
-    }).dimensions;
-    expect(dims.platform.profile.execution_target).toBe("remote");
-    expect(dims.install.id).toBe("launchable");
-    expect(stdout).toMatch(/Overrides:/);
-    expect(stdout).toMatch(/bind_address/);
-  });
 });
 
 describe("Phase 9: negative preflight", () => {
@@ -148,27 +79,4 @@ describe("Phase 9: negative preflight", () => {
     expect(es?.sandbox?.expected).toBe("absent");
     expect(es?.failure?.expected).toBe(true);
   });
-
-  it("negative scenario plan identifies docker missing and negative state", () => {
-    const { status, plan } = planOnly("ubuntu-no-docker-preflight-negative");
-    expect(status).toBe(0);
-    const p = plan as {
-      dimensions: { runtime: { profile: { container_daemon?: string } } };
-      expected_state: { id: string };
-      expected_failure?: {
-        phase?: string;
-        error_class?: string;
-        message_pattern?: string;
-        forbidden_side_effects?: string[];
-      };
-    };
-    expect(p.dimensions.runtime.profile.container_daemon).toBe("missing");
-    expect(p.expected_state.id).toBe("preflight-failure-no-sandbox");
-    expect(p.expected_failure?.phase).toBe("preflight");
-    expect(p.expected_failure?.error_class).toBe("docker-missing");
-    expect(p.expected_failure?.message_pattern).toBeTypeOf("string");
-    expect(p.expected_failure?.forbidden_side_effects).toEqual(
-      expect.arrayContaining(["sandbox-created", "gateway-started", "credentials-written"]),
-    );
-  });
 });
diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-first-migration.test.ts b/test/e2e-scenario/framework-tests/e2e-scenario-first-migration.test.ts
deleted file mode 100644
index 0307ca9103..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-scenario-first-migration.test.ts
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-/**
- * Phase 6: Migrate First Scenario - ubuntu-repo-cloud-openclaw.
- * Verifies resolver output, plan printout, and dry-run phase ordering.
- */
-
-import { describe, it, expect } from "vitest";
-import { spawnSync } from "node:child_process";
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-
-import { loadMetadataFromDir } from "../runtime/resolver/load.ts";
-import { resolveScenario } from "../runtime/resolver/plan.ts";
-
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const E2E_DIR = path.join(REPO_ROOT, "test/e2e-scenario");
-const RUN_SCENARIO = path.join(E2E_DIR, "runtime", "run-scenario.sh");
-
-describe("Phase 6: ubuntu-repo-cloud-openclaw migration", () => {
-  it("ubuntu_repo_cloud_openclaw_should_resolve_to_cloud_openclaw_ready", () => {
-    const meta = loadMetadataFromDir(E2E_DIR);
-    const plan = resolveScenario("ubuntu-repo-cloud-openclaw", meta);
-    expect(plan.expected_state.id).toBe("cloud-openclaw-ready");
-    const suiteIds = plan.suites.map((s) => s.id);
-    expect(suiteIds).toContain("smoke");
-    expect(suiteIds).toContain("inference");
-  });
-
-  it("ubuntu_repo_cloud_openclaw_plan_should_include_setup_install_onboard", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-first-"));
-    try {
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--plan-only"],
-        { env: { ...process.env, E2E_CONTEXT_DIR: tmp }, encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000), cwd: REPO_ROOT },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      expect(r.stdout).toMatch(/install=repo-current/);
-      expect(r.stdout).toMatch(/runtime=docker-running/);
-      expect(r.stdout).toMatch(/onboarding=cloud-openclaw/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("ubuntu_repo_cloud_openclaw_dry_run_should_execute_phases_in_order", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-first-"));
-    try {
-      const trace = path.join(tmp, "trace.log");
-      const r = spawnSync(
-        "bash",
-        [RUN_SCENARIO, "ubuntu-repo-cloud-openclaw", "--dry-run"],
-        {
-          env: { ...process.env, E2E_CONTEXT_DIR: tmp, E2E_TRACE_FILE: trace },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(r.status, r.stderr).toBe(0);
-      expect(fs.existsSync(trace)).toBe(true);
-      const contents = fs.readFileSync(trace, "utf8");
-      const order = [
-        "env:noninteractive",
-        "install:repo-current",
-        "onboard:cloud-openclaw",
-        "gateway:check",
-        "sandbox:check",
-      ];
-      let pos = 0;
-      for (const marker of order) {
-        const idx = contents.indexOf(marker, pos);
-        expect(idx, `missing marker ${marker}. trace:\n${contents}`).toBeGreaterThanOrEqual(0);
-        pos = idx + marker.length;
-      }
-      // The run should also seed the context and produce plan.json.
-      expect(fs.existsSync(path.join(tmp, "context.env"))).toBe(true);
-      expect(fs.existsSync(path.join(tmp, "plan.json"))).toBe(true);
-      // After dry-run, suite runner should be able to execute the full
-      // suite sequence against the emitted context.
-      const suites = spawnSync(
-        "bash",
-        [path.join(E2E_DIR, "runtime", "run-suites.sh"), "smoke", "inference"],
-        {
-          env: { ...process.env, E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(suites.status, `suite stderr:${suites.stderr}\nstdout:${suites.stdout}`).toBe(0);
-      expect(suites.stdout).toMatch(/PASS smoke\/cli-available/);
-      expect(suites.stdout).toMatch(/PASS inference\/models-health/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-resolver.test.ts b/test/e2e-scenario/framework-tests/e2e-scenario-resolver.test.ts
index dc4f105884..0111aa0e42 100644
--- a/test/e2e-scenario/framework-tests/e2e-scenario-resolver.test.ts
+++ b/test/e2e-scenario/framework-tests/e2e-scenario-resolver.test.ts
@@ -199,62 +199,6 @@ suites:
   });
 });
 
-describe("run-scenario.sh --plan-only", () => {
-  it("run_scenario_plan_only_should_print_plan", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-plan-"));
-    try {
-      const result = spawnSync(
-        "bash",
-        [
-          path.join(E2E_DIR, "runtime", "run-scenario.sh"),
-          "ubuntu-repo-cloud-openclaw",
-          "--plan-only",
-        ],
-        {
-          env: { ...process.env, E2E_CONTEXT_DIR: tmp },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(result.status, result.stderr).toBe(0);
-      expect(result.stdout).toContain("ubuntu-repo-cloud-openclaw");
-      expect(result.stdout).toContain("cloud-openclaw-ready");
-      expect(result.stdout).toContain("smoke");
-      expect(result.stdout).toContain("inference");
-      const planJsonPath = path.join(tmp, "plan.json");
-      expect(fs.existsSync(planJsonPath)).toBe(true);
-      const doc = JSON.parse(fs.readFileSync(planJsonPath, "utf8"));
-      expect(doc.scenario_id).toBe("ubuntu-repo-cloud-openclaw");
-      expect(doc.expected_state.id).toBe("cloud-openclaw-ready");
-      expect(Array.isArray(doc.suites)).toBe(true);
-      expect(doc.suites.map((s: { id: string }) => s.id)).toContain("smoke");
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("run_scenario_plan_only_should_fail_for_unknown_scenario", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-plan-"));
-    try {
-      const result = spawnSync(
-        "bash",
-        [
-          path.join(E2E_DIR, "runtime", "run-scenario.sh"),
-          "does-not-exist",
-          "--plan-only",
-        ],
-        {
-          env: { ...process.env, E2E_CONTEXT_DIR: tmp },
-          encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-          cwd: REPO_ROOT,
-        },
-      );
-      expect(result.status).not.toBe(0);
-      expect(`${result.stderr}${result.stdout}`).toMatch(/does-not-exist/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
+// run-scenario.sh-based plan-only tests removed: the bash runner is
+// now a fail-fast stub. Equivalent coverage of the typed runner lives in
+// e2e-plan-compiler.test.ts and e2e-scenario-registry.test.ts.
diff --git a/test/e2e-scenario/framework-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/framework-tests/e2e-scenarios-workflow.test.ts
index eb1be9ae19..5a1e3d8906 100644
--- a/test/e2e-scenario/framework-tests/e2e-scenarios-workflow.test.ts
+++ b/test/e2e-scenario/framework-tests/e2e-scenarios-workflow.test.ts
@@ -50,8 +50,9 @@ jobs:
           "run-scenario job must use the resolved runner output",
           "run-scenario job missing step: Run typed scenarios in WSL",
           "artifact upload name must include the scenarios input",
-          "artifact upload must include hidden .e2e files",
-          "artifact upload path must include .e2e/",
+          "artifact upload must set include-hidden-files: false (raw context.env must not leak)",
+          "artifact upload path must include .e2e/actions/ (redacted action evidence)",
+          "artifact upload path must include .e2e/logs/ (redacted shell-step evidence)",
         ]),
       );
     } finally {
diff --git a/test/e2e-scenario/framework-tests/e2e-suite-runner.test.ts b/test/e2e-scenario/framework-tests/e2e-suite-runner.test.ts
deleted file mode 100644
index ded16c1917..0000000000
--- a/test/e2e-scenario/framework-tests/e2e-suite-runner.test.ts
+++ /dev/null
@@ -1,250 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import { describe, it, expect } from "vitest";
-import { spawnSync, type SpawnSyncReturns } from "node:child_process";
-import fs from "node:fs";
-import os from "node:os";
-import path from "node:path";
-const REPO_ROOT = path.resolve(import.meta.dirname, "../../..");
-const RUN_SUITES = path.join(REPO_ROOT, "test/e2e-scenario/runtime/run-suites.sh");
-
-function runSuites(args: string[], env: Record<string, string> = {}): SpawnSyncReturns<string> {
-  return spawnSync("bash", [RUN_SUITES, ...args], {
-    env: { ...process.env, ...env },
-    encoding: "utf8",
-    timeout: Number(process.env.E2E_SPAWN_TIMEOUT_MS ?? 60_000),
-    cwd: REPO_ROOT,
-  });
-}
-
-function seedContext(tmp: string, values: Record<string, string>): void {
-  fs.mkdirSync(tmp, { recursive: true });
-  const ctx = Object.entries(values)
-    .map(([k, v]) => `${k}=${v}`)
-    .join("\n");
-  fs.writeFileSync(path.join(tmp, "context.env"), `${ctx}\n`);
-}
-
-function fullContext(): Record<string, string> {
-  return {
-    E2E_SCENARIO: "ubuntu-repo-cloud-openclaw",
-    E2E_PLATFORM_OS: "ubuntu",
-    E2E_EXECUTION_TARGET: "local",
-    E2E_INSTALL_METHOD: "repo-checkout",
-    E2E_CONTAINER_ENGINE: "docker",
-    E2E_CONTAINER_DAEMON: "running",
-    E2E_ONBOARDING_PATH: "cloud",
-    E2E_AGENT: "openclaw",
-    E2E_PROVIDER: "nvidia",
-    E2E_SANDBOX_NAME: "e2e-ubuntu-repo-cloud-openclaw",
-    E2E_GATEWAY_URL: "http://127.0.0.1:18789",
-    E2E_INFERENCE_ROUTE: "inference-local",
-  };
-}
-
-describe("Issue #3810 messaging suite wiring", () => {
-  it("should_define_real_steps_for_messaging_provider_suites", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-messaging-suites-"));
-    try {
-      const baseContext = {
-        ...fullContext(),
-        E2E_PROVIDER: "telegram",
-        E2E_MESSAGING_PROVIDER: "telegram",
-        E2E_MESSAGING_BRIDGE_URL: "http://127.0.0.1:18789",
-        E2E_MESSAGING_CONFIG_CONTENT: "TELEGRAM_BOT_TOKEN=PLACEHOLDER",
-      };
-      seedContext(tmp, baseContext);
-      const telegram = runSuites(["messaging-telegram"], {
-        E2E_CONTEXT_DIR: tmp,
-        E2E_DRY_RUN: "1",
-      });
-      expect(telegram.status, `stderr:${telegram.stderr}\nstdout:${telegram.stdout}`).toBe(0);
-      seedContext(tmp, {
-        ...baseContext,
-        E2E_MESSAGING_PROVIDER: "discord",
-        E2E_MESSAGING_CONFIG_CONTENT: "DISCORD_BOT_TOKEN=PLACEHOLDER",
-      });
-      const discord = runSuites(["messaging-discord"], {
-        E2E_CONTEXT_DIR: tmp,
-        E2E_DRY_RUN: "1",
-      });
-      expect(discord.status, `stderr:${discord.stderr}\nstdout:${discord.stdout}`).toBe(0);
-      seedContext(tmp, {
-        ...baseContext,
-        E2E_MESSAGING_PROVIDER: "slack",
-        E2E_MESSAGING_CHANNEL: "bot",
-        E2E_MESSAGING_CONFIG_CONTENT: "SLACK_BOT_TOKEN=PLACEHOLDER",
-      });
-      const slack = runSuites(["messaging-slack"], {
-        E2E_CONTEXT_DIR: tmp,
-        E2E_DRY_RUN: "1",
-      });
-      expect(slack.status, `stderr:${slack.stderr}\nstdout:${slack.stdout}`).toBe(0);
-      const output = `${telegram.stdout}\n${discord.stdout}\n${slack.stdout}`;
-      for (const id of [
-        "messaging-provider-attached",
-        "messaging-placeholder-configured",
-        "messaging-no-secret-leak",
-        "messaging-bridge-reachable",
-        "telegram-injection-safety",
-        "discord-gateway-path",
-        "slack-provider-state",
-        "slack.runtime-discovery",
-      ]) {
-        expect(output).toContain(id);
-      }
-      expect(output).not.toContain("cli-available");
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
-
-describe("run-suites.sh", () => {
-  it("security_credentials_suite_should_emit_stable_assertion_ids", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-security-credentials-"));
-    try {
-      seedContext(tmp, { ...fullContext(), E2E_CREDENTIALS_EXPECTED: "present" });
-      const r = runSuites(["security-credentials"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1", HOME: tmp });
-      expect(r.status, `stderr:${r.stderr}\nstdout:${r.stdout}`).toBe(0);
-      expect(r.stdout).toContain("post-onboard.credentials.gateway-list-redacts-values");
-      expect(r.stdout).toContain("post-onboard.credentials.no-plaintext-host-store");
-      expect(r.stdout).not.toMatch(/no-credentials-leaked|assert\//);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("run_suites_should_run_steps_in_declared_order", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      seedContext(tmp, fullContext());
-      const r = runSuites(["smoke"], {
-        E2E_CONTEXT_DIR: tmp,
-        E2E_DRY_RUN: "1",
-      });
-      expect(r.status, `stderr:${r.stderr}\nstdout:${r.stdout}`).toBe(0);
-      // Smoke order is: cli-available, gateway-health, sandbox-listed, sandbox-shell
-      const order = ["cli-available", "gateway-health", "sandbox-listed", "sandbox-shell"];
-      let pos = 0;
-      for (const marker of order) {
-        const idx = r.stdout.indexOf(marker, pos);
-        expect(idx, `missing marker ${marker} after ${pos} in:\n${r.stdout}`).toBeGreaterThanOrEqual(0);
-        pos = idx + marker.length;
-      }
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("run_suites_should_fail_on_unknown_suite", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      seedContext(tmp, fullContext());
-      const r = runSuites(["does-not-exist"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" });
-      expect(r.status).not.toBe(0);
-      expect(`${r.stdout}${r.stderr}`).toMatch(/does-not-exist/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("run_suites_should_stop_on_first_failed_step", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      seedContext(tmp, fullContext());
-      // Use a fixture suites file with a failing middle step.
-      const fixtureSuites = path.join(tmp, "suites.yaml");
-      const fixtureDir = path.join(tmp, "suites", "fixture");
-      fs.mkdirSync(fixtureDir, { recursive: true });
-      fs.writeFileSync(path.join(fixtureDir, "00-a.sh"), "#!/usr/bin/env bash\necho A-RAN\nexit 0\n");
-      fs.writeFileSync(path.join(fixtureDir, "01-b.sh"), "#!/usr/bin/env bash\necho B-RAN\nexit 1\n");
-      fs.writeFileSync(path.join(fixtureDir, "02-c.sh"), "#!/usr/bin/env bash\necho C-RAN\nexit 0\n");
-      fs.chmodSync(path.join(fixtureDir, "00-a.sh"), 0o755);
-      fs.chmodSync(path.join(fixtureDir, "01-b.sh"), 0o755);
-      fs.chmodSync(path.join(fixtureDir, "02-c.sh"), 0o755);
-      fs.writeFileSync(
-        fixtureSuites,
-        `suites:
-  fixture:
-    steps:
-      - { id: a, script: suites/fixture/00-a.sh }
-      - { id: b, script: suites/fixture/01-b.sh }
-      - { id: c, script: suites/fixture/02-c.sh }
-`,
-      );
-      const r = runSuites(["fixture"], {
-        E2E_CONTEXT_DIR: tmp,
-        E2E_SUITES_FILE: fixtureSuites,
-        E2E_SUITES_DIR: tmp,
-      });
-      expect(r.status).not.toBe(0);
-      expect(r.stdout).toContain("A-RAN");
-      expect(r.stdout).toContain("B-RAN");
-      expect(r.stdout).not.toContain("C-RAN");
-      expect(`${r.stdout}${r.stderr}`).toMatch(/FAIL.*(fixture\/b|step=b)/i);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("smoke_suite_should_require_context", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      // No context.env written to tmp.
-      const r = runSuites(["smoke"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" });
-      expect(r.status).not.toBe(0);
-      expect(`${r.stderr}${r.stdout}`).toMatch(/context\.env|E2E_SCENARIO|missing/i);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("rebuild_and_upgrade_suites_should_emit_stable_assertion_ids_in_dry_run", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      seedContext(tmp, fullContext());
-      const r = runSuites(["rebuild", "upgrade"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" });
-      expect(r.status, `stderr:${r.stderr}\nstdout:${r.stdout}`).toBe(0);
-      for (const id of [
-        "suite.rebuild.workspace_state_preserved",
-        "suite.rebuild.agent_version_upgraded",
-        "suite.rebuild.inference_still_works",
-        "suite.rebuild.policy_presets_preserved",
-        "suite.rebuild.hermes_config_preserved",
-        "suite.upgrade.sandbox_registry_preserved",
-        "suite.upgrade.gateway_version_upgraded",
-        "suite.upgrade.survivor_agent_reachable",
-      ]) {
-        expect(r.stdout).toContain(id);
-      }
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-
-  it("smoke_and_inference_run_with_stub_context", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "e2e-suite-"));
-    try {
-      seedContext(tmp, fullContext());
-      const r = runSuites(["smoke", "inference"], { E2E_CONTEXT_DIR: tmp, E2E_DRY_RUN: "1" });
-      expect(r.status, `stderr:${r.stderr}\nstdout:${r.stdout}`).toBe(0);
-      for (const id of [
-        "cli-available",
-        "gateway-health",
-        "sandbox-listed",
-        "sandbox-shell",
-        "models-health",
-        "chat-completion",
-        "sandbox-inference-local",
-      ]) {
-        expect(r.stdout).toContain(id);
-      }
-      // Summary should call out PASS for each step.
-      expect(r.stdout).toMatch(/PASS/);
-    } finally {
-      fs.rmSync(tmp, { recursive: true, force: true });
-    }
-  });
-});
diff --git a/test/e2e-scenario/nemoclaw_scenarios/dispatch-action.sh b/test/e2e-scenario/nemoclaw_scenarios/dispatch-action.sh
new file mode 100755
index 0000000000..5aaca1b2c1
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/dispatch-action.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Phase-action launcher for the hybrid scenario E2E framework.
+#
+# The phase orchestrators (EnvironmentOrchestrator, OnboardingOrchestrator)
+# call this launcher to invoke a function defined in a sourced shell
+# dispatcher (install/dispatch.sh or onboard/dispatch.sh). Those
+# dispatchers are intentionally library-style (function definitions
+# only); this script gives them a deterministic executable entrypoint
+# the typed runner can spawn.
+#
+# Usage:
+#   dispatch-action.sh <fn> <arg> <dispatcher-script>
+#
+# Examples:
+#   dispatch-action.sh e2e_install repo-current \
+#     test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh
+#
+#   dispatch-action.sh e2e_onboard cloud-openclaw \
+#     test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh
+#
+# Environment (set by the orchestrator):
+#   E2E_CONTEXT_DIR  artifact directory
+#   E2E_PHASE        environment | onboarding
+#   E2E_ACTION_ID    stable action id, used for trace/log correlation
+
+set -euo pipefail
+
+if [[ $# -lt 3 ]]; then
+  echo "dispatch-action.sh: usage: <fn> <arg> <dispatcher-script>" >&2
+  exit 2
+fi
+
+ACTION_FN="$1"
+ACTION_ARG="$2"
+DISPATCHER="$3"
+
+if [[ ! -f "${DISPATCHER}" ]]; then
+  echo "dispatch-action.sh: dispatcher script not found: ${DISPATCHER}" >&2
+  exit 2
+fi
+
+# Source the runtime/lib helpers the dispatchers (and their workers) rely on.
+RUNTIME_LIB="$(cd "$(dirname "${BASH_SOURCE[0]}")/../runtime/lib" && pwd)"
+# shellcheck source=runtime/lib/env.sh
+. "${RUNTIME_LIB}/env.sh"
+# shellcheck source=runtime/lib/context.sh
+. "${RUNTIME_LIB}/context.sh"
+
+# Apply the standard non-interactive env once, on the very first action of
+# the run. Subsequent actions in the same run see the env via process
+# inheritance. e2e_env_apply_noninteractive is idempotent.
+e2e_env_apply_noninteractive
+e2e_env_trace "phase:${E2E_PHASE:-unknown}/action:${E2E_ACTION_ID:-unknown}"
+
+# IMPORTANT: do NOT call e2e_context_init here. The TS framework
+# (ScenarioRunner.seedContextEnv) is the single owner of context.env
+# initialization for the run; e2e_context_init opens with `: > ctx`
+# which would truncate the file and wipe seeded keys (E2E_SCENARIO,
+# E2E_GATEWAY_URL, ...) that runtime assertions require.
+# Workers may still call e2e_context_set to extend context.env in place.
+
+# Source the dispatcher last so its function definitions are in scope
+# when we invoke the requested function.
+# shellcheck source=/dev/null
+. "${DISPATCHER}"
+
+if ! declare -F "${ACTION_FN}" >/dev/null 2>&1; then
+  echo "dispatch-action.sh: function not found in dispatcher: ${ACTION_FN}" >&2
+  exit 2
+fi
+
+"${ACTION_FN}" "${ACTION_ARG}"
diff --git a/test/e2e-scenario/nemoclaw_scenarios/fixtures/older-base-image.sh b/test/e2e-scenario/nemoclaw_scenarios/fixtures/older-base-image.sh
index 3d49c03116..d10fbd2c9d 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/fixtures/older-base-image.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/fixtures/older-base-image.sh
@@ -12,8 +12,6 @@
 #   older_base_image_prepare <tag> [--registry ghcr.io/nvidia/nemoclaw]
 #     Writes a minimal Dockerfile to a temp location whose first line is
 #     `FROM <registry>:<tag>`, and prints the Dockerfile path on stdout.
-#     Honors E2E_DRY_RUN: skips the `docker pull` step (but still writes
-#     the Dockerfile, which is what callers inspect).
 #   older_base_image_cleanup <dockerfile-path>
 #     Removes the generated Dockerfile and (if present) its build context.
 
@@ -50,11 +48,9 @@ LABEL nemoclaw.e2e.fixture=older-base-image
 EOF
 
   e2e_env_trace "fixture:older-base-image" "${registry}:${tag}"
-  if ! e2e_env_is_dry_run; then
-    if command -v docker >/dev/null 2>&1; then
-      docker pull "${registry}:${tag}" >&2 \
-        || echo "older_base_image_prepare: docker pull failed (continuing; build may still succeed on cached layers)" >&2
-    fi
+  if command -v docker >/dev/null 2>&1; then
+    docker pull "${registry}:${tag}" >&2 \
+      || echo "older_base_image_prepare: docker pull failed (continuing; build may still succeed on cached layers)" >&2
   fi
   printf '%s\n' "${dockerfile}"
 }
diff --git a/test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh b/test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh
index 7ea798cfdf..1a2ec2b0aa 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh
@@ -4,7 +4,7 @@
 #
 # Install dispatcher. Routes by install-method / profile id to one of four
 # split helpers (repo-current.sh, public-curl.sh, ollama.sh,
-# launchable.sh). Honors E2E_DRY_RUN.
+# launchable.sh).
 #
 # Accepts both legacy install-method names (repo-checkout,
 # curl-install-script) and the new profile-centric names used by
diff --git a/test/e2e-scenario/nemoclaw_scenarios/install/launchable.sh b/test/e2e-scenario/nemoclaw_scenarios/install/launchable.sh
index 5ec638e90a..09d8aa3bbb 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/install/launchable.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/install/launchable.sh
@@ -18,11 +18,6 @@ _E2E_INST_LNCH_RUNTIME_LIB="$(cd "${_E2E_INST_LNCH_DIR}/../../runtime/lib" && pw
 
 e2e_install_launchable() {
   e2e_env_trace "install-launchable"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] install-launchable (skipped)"
-    return 0
-  fi
-
   # Match nightly launchable-smoke-e2e: exercise the launchable bootstrap
   # script on the current runner instead of assuming a pre-provisioned Brev VM.
   # The script has no Brev API dependency; it installs Docker/OpenShell/NemoClaw
diff --git a/test/e2e-scenario/nemoclaw_scenarios/install/ollama.sh b/test/e2e-scenario/nemoclaw_scenarios/install/ollama.sh
index a9d5f81c14..449eae519a 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/install/ollama.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/install/ollama.sh
@@ -17,10 +17,6 @@ _E2E_INST_OL_RUNTIME_LIB="$(cd "${_E2E_INST_OL_DIR}/../../runtime/lib" && pwd)"
 
 e2e_install_ollama() {
   e2e_env_trace "install-ollama"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] install-ollama (skipped)"
-    return 0
-  fi
   local ollama_url="${E2E_OLLAMA_INSTALL_URL:-https://ollama.ai/install.sh}"
   if ! command -v ollama >/dev/null 2>&1; then
     if ! curl -fsSL --retry 3 --retry-delay 2 "${ollama_url}" | bash; then
diff --git a/test/e2e-scenario/nemoclaw_scenarios/install/public-curl.sh b/test/e2e-scenario/nemoclaw_scenarios/install/public-curl.sh
index 143d097f0d..6628e332a2 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/install/public-curl.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/install/public-curl.sh
@@ -16,10 +16,6 @@ _E2E_INST_CURL_RUNTIME_LIB="$(cd "${_E2E_INST_CURL_DIR}/../../runtime/lib" && pw
 
 e2e_install_curl() {
   e2e_env_trace "install-curl"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] install-curl (skipped)"
-    return 0
-  fi
   local url="${E2E_INSTALLER_URL:-https://raw.githubusercontent.com/NVIDIA/NemoClaw/main/scripts/install.sh}"
   local sha256="${E2E_INSTALLER_SHA256:-}"
   local tmp
diff --git a/test/e2e-scenario/nemoclaw_scenarios/install/repo-current.sh b/test/e2e-scenario/nemoclaw_scenarios/install/repo-current.sh
index 8c985dc3f7..000431a4b8 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/install/repo-current.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/install/repo-current.sh
@@ -5,7 +5,6 @@
 # Install from a checked-out repo (repo-current / repo-checkout profile).
 #
 # Split from the install dispatcher to keep scenario setup logic flat and to
-# make the per-profile code discoverable by grep. Honors E2E_DRY_RUN.
 
 _E2E_INST_REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 _E2E_INST_REPO_RUNTIME_LIB="$(cd "${_E2E_INST_REPO_DIR}/../../runtime/lib" && pwd)"
@@ -16,10 +15,6 @@ _E2E_INST_REPO_RUNTIME_LIB="$(cd "${_E2E_INST_REPO_DIR}/../../runtime/lib" && pw
 
 e2e_install_repo() {
   e2e_env_trace "install-repo"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] install-repo (skipped)"
-    return 0
-  fi
   local repo_root
   repo_root="$(cd "${_E2E_INST_REPO_DIR}/../../../.." && pwd)"
   cd "${repo_root}" || return
diff --git a/test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh b/test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh
new file mode 100644
index 0000000000..9c7b9803f1
--- /dev/null
+++ b/test/e2e-scenario/nemoclaw_scenarios/onboard/cloud-openclaw-no-docker.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Onboard worker: cloud-openclaw-no-docker profile.
+#
+# Drives the negative `ubuntu-no-docker-preflight-negative` scenario by:
+#
+#   1. Installing a `docker` shim earlier on PATH that exits non-zero
+#      with a "Cannot connect to the Docker daemon" message. This makes
+#      `commandExists("docker")` succeed (the binary is present) while
+#      `docker info` fails — matching the production failure mode users
+#      see when Docker is installed but the daemon is not running.
+#
+#   2. Running `nemoclaw onboard --non-interactive` with stdout+stderr
+#      captured to `${E2E_CONTEXT_DIR}/negative-preflight.log`. The
+#      `onboarding.preflight.expected-failed` assertion greps that file.
+#
+#   3. Asserting that nemoclaw exits non-zero (preflight DID fail). If
+#      onboard unexpectedly succeeds, the action fails so the operator
+#      sees a clear "expected failure did not happen" signal instead of a
+#      green light masking a regression.
+#
+#   4. Returning 0 on the *expected* failure path so the orchestrator
+#      reports the action as passed and the assertion phase runs against
+#      the captured log. Without this, the action would be marked failed
+#      and the dependent assertions would be skipped.
+#
+# Pattern mirrors test/e2e/e2e-cloud-experimental/test-port8080-conflict.sh,
+# which sets up a different failure condition (port 8080 occupied) but
+# follows the same capture-output / check-exit / grep-log shape.
+
+e2e_onboard_cloud_openclaw_no_docker() {
+  e2e_env_apply_noninteractive
+  e2e_context_init
+
+  local log shim_dir rc=0
+  log="${E2E_CONTEXT_DIR}/negative-preflight.log"
+  shim_dir="$(mktemp -d -t e2e-no-docker-XXXXXX)"
+
+  cat >"${shim_dir}/docker" <<'SHIM'
+#!/usr/bin/env bash
+# Negative-preflight docker shim — preserves "docker is installed" while
+# breaking "docker info" / "docker version" so preflight fails with the
+# real "Cannot connect to the Docker daemon" message.
+printf 'Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?\n' >&2
+exit 1
+SHIM
+  chmod +x "${shim_dir}/docker"
+
+  echo "negative-preflight: shim docker installed at ${shim_dir}/docker"
+  echo "negative-preflight: log_file=${log}"
+  echo "negative-preflight: invoking nemoclaw onboard --non-interactive (expected to fail at preflight)"
+
+  PATH="${shim_dir}:${PATH}" \
+    nemoclaw onboard --non-interactive --yes-i-accept-third-party-software \
+    >"${log}" 2>&1 || rc=$?
+
+  rm -rf "${shim_dir}"
+
+  echo "negative-preflight: nemoclaw onboard exited ${rc}"
+  if [[ -f "${log}" ]]; then
+    echo "--- captured log tail (${log}) ---"
+    tail -50 "${log}" 2>/dev/null || true
+    echo "--- end captured log ---"
+  fi
+
+  if [[ "${rc}" -eq 0 ]]; then
+    echo "negative-preflight: ERROR: nemoclaw onboard unexpectedly exited 0; preflight should have failed when docker is unreachable" >&2
+    return 1
+  fi
+
+  return 0
+}
diff --git a/test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh b/test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh
index 2baf698986..fba1004559 100755
--- a/test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh
+++ b/test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh
@@ -14,6 +14,8 @@ _E2E_ONBOARD_RUNTIME_LIB="$(cd "${_E2E_ONBOARD_DIR}/../../runtime/lib" && pwd)"
 . "${_E2E_ONBOARD_RUNTIME_LIB}/context.sh"
 # shellcheck source=cloud-openclaw.sh
 . "${_E2E_ONBOARD_DIR}/cloud-openclaw.sh"
+# shellcheck source=cloud-openclaw-no-docker.sh
+. "${_E2E_ONBOARD_DIR}/cloud-openclaw-no-docker.sh"
 # shellcheck source=cloud-hermes.sh
 . "${_E2E_ONBOARD_DIR}/cloud-hermes.sh"
 # shellcheck source=local-ollama-openclaw.sh
@@ -26,14 +28,13 @@ e2e_onboard() {
     return 2
   fi
   e2e_env_trace "onboard:${profile}"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] onboard profile=${profile} (skipped)"
-    return 0
-  fi
   case "${profile}" in
     cloud-openclaw)
       e2e_onboard_cloud_openclaw
       ;;
+    cloud-openclaw-no-docker)
+      e2e_onboard_cloud_openclaw_no_docker
+      ;;
     cloud-openclaw-custom-policies)
       E2E_ONBOARDING_MODEL="${E2E_ONBOARDING_MODEL:-nvidia/nemotron-3-super-120b-a12b}"
       E2E_ONBOARDING_POLICY_PRESETS="${E2E_ONBOARDING_POLICY_PRESETS:-npm,pypi}"
diff --git a/test/e2e-scenario/onboarding_assertions/preflight/00-preflight-passed.sh b/test/e2e-scenario/onboarding_assertions/preflight/00-preflight-passed.sh
index 69bda6c47c..fb05606494 100755
--- a/test/e2e-scenario/onboarding_assertions/preflight/00-preflight-passed.sh
+++ b/test/e2e-scenario/onboarding_assertions/preflight/00-preflight-passed.sh
@@ -9,7 +9,14 @@ if [[ ! -f "${E2E_CONTEXT_DIR:-}/onboard.log" ]]; then
   exit 1
 fi
 
-if grep -Eiq "preflight.*(fail|error)|docker|container|daemon|socket" "${E2E_CONTEXT_DIR}/onboard.log"; then
+# The onboarding action already completed (exit 0) for this assertion to
+# run; we only need to confirm the captured onboard.log does not contain
+# explicit preflight FAILURE markers. The previous regex matched any
+# mention of 'docker' / 'container' / 'daemon' / 'socket', which a normal
+# successful onboarding always logs. Tighten to actual failure phrases.
+if grep -Eiq \
+  "preflight[[:space:]]+(failed|error)|cannot connect to[[:space:]]+(the[[:space:]]+)?docker daemon|permission denied[[:space:]]+while trying to connect to.*docker.*sock|onboarding aborted|FATAL: docker|ERROR: docker daemon" \
+  "${E2E_CONTEXT_DIR}/onboard.log"; then
   echo "FAIL: onboarding.preflight.passed - onboard log contains preflight failure evidence"
   exit 1
 fi
diff --git a/test/e2e-scenario/runtime/lib/env.sh b/test/e2e-scenario/runtime/lib/env.sh
index ed33fb8a6a..9c33af97cc 100755
--- a/test/e2e-scenario/runtime/lib/env.sh
+++ b/test/e2e-scenario/runtime/lib/env.sh
@@ -40,8 +40,3 @@ e2e_env_trace() {
     printf '%s %s\n' "${event}" "$*" >>"${E2E_TRACE_FILE}"
   fi
 }
-
-# e2e_env_is_dry_run: true if E2E_DRY_RUN=1
-e2e_env_is_dry_run() {
-  [[ "${E2E_DRY_RUN:-0}" == "1" ]]
-}
diff --git a/test/e2e-scenario/runtime/run-scenario.sh b/test/e2e-scenario/runtime/run-scenario.sh
index 58042c8523..2477ce79ec 100755
--- a/test/e2e-scenario/runtime/run-scenario.sh
+++ b/test/e2e-scenario/runtime/run-scenario.sh
@@ -2,482 +2,24 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
-# E2E scenario runner entrypoint.
-#
-# Usage:
-#   bash test/e2e-scenario/runtime/run-scenario.sh <scenario-id> [--plan-only|--validate-only|--dry-run]
-#
-# Flags:
-#   --plan-only      Resolve metadata and print the plan only. Writes
-#                    ${E2E_CONTEXT_DIR:-.e2e}/plan.json for artifact upload.
-#   --validate-only  Run the expected-state validator against the current
-#                    context.env without running install/onboard/suites.
-#                    Emits probe results JSON to stdout and writes
-#                    ${E2E_CONTEXT_DIR}/expected-state-report.json. Used by
-#                    the parity-compare workflow to collect per-assertion
-#                    probe results. Mutually exclusive with --plan-only.
-#   --dry-run        (reserved) Run orchestration with real side effects
-#                    replaced by trace-logged stubs. Sets E2E_DRY_RUN=1 for
-#                    helpers. Full dry-run orchestration lands in later phases.
-#
-# Environment:
-#   E2E_CONTEXT_DIR  Override the scenario artifact directory
-#                    (default: <repo-root>/.e2e/).
+# DEPRECATED. The hybrid scenario architecture has a single supported runtime
+# entrypoint: test/e2e-scenario/scenarios/run.ts. This bash runner duplicated
+# install/onboard/gateway-check/suite-execution that now belongs in TS phase
+# orchestrators (EnvironmentOrchestrator, OnboardingOrchestrator,
+# RuntimeOrchestrator) and shared clients (HostCliClient, GatewayClient,
+# SandboxClient). It is fail-fast so the deprecation is loud, not silent.
 
 set -euo pipefail
 
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-E2E_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
-
-SCENARIO_ID=""
-PLAN_ONLY=0
-VALIDATE_ONLY=0
-DRY_RUN=0
-
-usage() {
-  cat >&2 <<'USAGE'
-Usage: bash test/e2e-scenario/runtime/run-scenario.sh <scenario-id> [--plan-only|--validate-only|--dry-run]
-USAGE
-}
-
-while [[ $# -gt 0 ]]; do
-  case "$1" in
-    --plan-only)
-      PLAN_ONLY=1
-      shift
-      ;;
-    --validate-only)
-      VALIDATE_ONLY=1
-      shift
-      ;;
-    --dry-run)
-      DRY_RUN=1
-      shift
-      ;;
-    -h | --help)
-      usage
-      exit 0
-      ;;
-    --*)
-      echo "run-scenario: unknown flag: $1" >&2
-      usage
-      exit 2
-      ;;
-    *)
-      if [[ -z "${SCENARIO_ID}" ]]; then
-        SCENARIO_ID="$1"
-      else
-        echo "run-scenario: unexpected positional argument: $1" >&2
-        usage
-        exit 2
-      fi
-      shift
-      ;;
-  esac
-done
-
-if [[ -z "${SCENARIO_ID}" ]]; then
-  echo "run-scenario: missing scenario id" >&2
-  usage
-  exit 2
-fi
-
-if [[ "${PLAN_ONLY}" -eq 1 && "${VALIDATE_ONLY}" -eq 1 ]]; then
-  echo "run-scenario: --plan-only and --validate-only are mutually exclusive" >&2
-  usage
-  exit 2
-fi
-
-export E2E_CONTEXT_DIR="${E2E_CONTEXT_DIR:-${REPO_ROOT}/.e2e}"
-mkdir -p "${E2E_CONTEXT_DIR}"
-
-if [[ "${DRY_RUN}" -eq 1 ]]; then
-  export E2E_DRY_RUN=1
-fi
-
-# Prefer the locally-installed tsx if present, otherwise fall back to npx.
-TSX_BIN="${REPO_ROOT}/node_modules/.bin/tsx"
-if [[ ! -x "${TSX_BIN}" ]]; then
-  TSX_BIN=""
-fi
-
-run_resolver() {
-  if [[ -n "${TSX_BIN}" ]]; then
-    "${TSX_BIN}" "${SCRIPT_DIR}/resolver/index.ts" "$@"
-    return
-  fi
-  # CodeRabbit review item #10: fail closed with a clear hint instead of
-  # silently pulling tsx from the network via `npx --yes`.
-  if ! (cd "${REPO_ROOT}" && npx --no-install tsx "${SCRIPT_DIR}/resolver/index.ts" "$@"); then
-    echo "run-scenario: tsx is required but not installed. Run 'npm ci' at the repo root and retry." >&2
-    return 1
-  fi
-}
-
-run_resolver plan "${SCENARIO_ID}" --context-dir "${E2E_CONTEXT_DIR}"
-
-if [[ "${PLAN_ONLY}" -eq 1 ]]; then
-  exit 0
-fi
-
-# --validate-only: assume setup has already completed. Skip install /
-# onboard / suite execution and dispatch the expected-state validator
-# using probes resolved from E2E_PROBE_OVERRIDE_* env vars. Emits the
-# probe results JSON report to stdout and writes it to
-# ${E2E_CONTEXT_DIR}/expected-state-report.json.
-if [[ "${VALIDATE_ONLY}" -eq 1 ]]; then
-  validate_args=("${SCENARIO_ID}" --context-dir "${E2E_CONTEXT_DIR}")
-  if ! run_resolver validate-state "${validate_args[@]}"; then
-    echo "run-scenario: --validate-only: expected-state validation failed" >&2
-    exit 3
-  fi
-  exit 0
-fi
-
-# Source the shared helper library so we can exercise the full
-# setup → install → onboard → gateway/sandbox check sequence. In dry-run
-# mode each helper short-circuits (and writes to E2E_TRACE_FILE if set).
-# shellcheck source=lib/env.sh
-. "${SCRIPT_DIR}/lib/env.sh"
-# shellcheck source=lib/context.sh
-. "${SCRIPT_DIR}/lib/context.sh"
-# shellcheck source=lib/negative.sh
-. "${SCRIPT_DIR}/lib/negative.sh"
-# shellcheck source=lib/port-holder.sh
-. "${SCRIPT_DIR}/lib/port-holder.sh"
-# shellcheck source=../nemoclaw_scenarios/install/dispatch.sh
-. "${E2E_ROOT}/nemoclaw_scenarios/install/dispatch.sh"
-# shellcheck source=../nemoclaw_scenarios/onboard/dispatch.sh
-. "${E2E_ROOT}/nemoclaw_scenarios/onboard/dispatch.sh"
-# shellcheck source=../validation_suites/assert/gateway-alive.sh
-. "${E2E_ROOT}/validation_suites/assert/gateway-alive.sh"
-# shellcheck source=../validation_suites/assert/sandbox-alive.sh
-. "${E2E_ROOT}/validation_suites/assert/sandbox-alive.sh"
-
-# Apply standard non-interactive env (and trace it).
-e2e_env_apply_noninteractive
-e2e_env_trace "env:noninteractive"
-
-# Emit normalized context from the resolved plan.
-e2e_context_init
-"${E2E_ROOT}/nemoclaw_scenarios/helpers/emit-context-from-plan.sh" "${E2E_CONTEXT_DIR}/plan.json"
-
-# Extract the install method and onboarding profile from the plan so we can
-# dispatch to the right helpers.
-read_plan_string() {
-  local key="$1"
-  node -e "
-    const p = JSON.parse(require('fs').readFileSync(process.argv[1], 'utf8'));
-    const parts = process.argv[2].split('.');
-    let cur = p;
-    for (const part of parts) { if (cur == null) { cur = ''; break; } cur = cur[part]; }
-    process.stdout.write(cur == null ? '' : String(cur));
-  " "${E2E_CONTEXT_DIR}/plan.json" "${key}"
-}
-
-INSTALL_ID="$(read_plan_string dimensions.install.id)"
-INSTALL_METHOD="$(read_plan_string dimensions.install.profile.method)"
-ONBOARDING_ID="$(read_plan_string dimensions.onboarding.id)"
-RUNTIME_ID="$(read_plan_string dimensions.runtime.id)"
-RUNTIME_CONTAINER_DAEMON="$(read_plan_string dimensions.runtime.profile.container_daemon)"
-EXPECTED_STATE_ID="$(read_plan_string expected_state.id)"
-FAILURE_STAGE="$(read_plan_string expected_state.config.failure.stage)"
-FAILURE_EXIT_CODE="$(read_plan_string expected_state.config.failure.exit_code)"
-FAILURE_MESSAGE_CONTAINS="$(read_plan_string expected_state.config.failure.message_contains)"
-FAILURE_NO_STACK_TRACE="$(read_plan_string expected_state.config.failure.no_stack_trace)"
-
-# Trace the dimension id so scenario-level assertions can identify the
-# configured install (e.g. repo-current); e2e_install internally traces
-# the resolved method.
-e2e_env_trace "install:${INSTALL_ID}"
-
-install_log="${E2E_CONTEXT_DIR}/install.log"
-set +e
-e2e_install "${INSTALL_METHOD}" >"${install_log}" 2>&1
-install_status=$?
-set -e
-if [[ "${install_status}" -ne 0 ]]; then
-  cat "${install_log}" >&2
-  echo "run-scenario: install ${INSTALL_METHOD} failed with status ${install_status}" >&2
-  exit "${install_status}"
-fi
-export PATH="${HOME}/.local/bin:${PATH}"
-{
-  printf 'PATH=%s\n' "${PATH}"
-  command -v nemoclaw || true
-} >"${E2E_CONTEXT_DIR}/post-install-path.log" 2>&1
-if [[ "${DRY_RUN}" -eq 1 ]]; then
-  printf 'run-scenario: dry-run skipping post-install nemoclaw PATH verification\n' >&2
-else
-  nemoclaw_bin="$(command -v nemoclaw || true)"
-  if [[ -z "${nemoclaw_bin}" ]]; then
-    cat "${E2E_CONTEXT_DIR}/post-install-path.log" >&2
-    echo "run-scenario: nemoclaw not found on PATH after install" >&2
-    exit 127
-  fi
-  printf 'run-scenario: using nemoclaw at %s\n' "${nemoclaw_bin}" >&2
-fi
-
-# Negative scenarios declare an `expected_failure` block on their expected
-# state (see NemoClaw issue #3608). The runner forces the failure mode for
-# the scenario, captures the setup log, gathers a side-effect inventory, and
-# delegates structured matching to `resolver/index.ts match-failure`. The
-# matcher writes `expected-vs-actual.json` for CI artifact upload.
-
-read_plan_failure_field() {
-  local key="$1"
-  node -e "
-    (() => {
-      const p = JSON.parse(require('fs').readFileSync(process.argv[1], 'utf8'));
-      const ef = p.expected_failure;
-      if (!ef) { process.stdout.write(''); return; }
-      const v = ef[process.argv[2]];
-      process.stdout.write(v == null ? '' : Array.isArray(v) ? v.join(',') : String(v));
-    })();
-  " "${E2E_CONTEXT_DIR}/plan.json" "${key}"
-}
-
-EXPECTED_FAILURE_PHASE="$(read_plan_failure_field phase)"
-
-if [[ -n "${EXPECTED_FAILURE_PHASE}" ]]; then
-  expected_error_class="$(read_plan_failure_field error_class)"
-  negative_log="${E2E_CONTEXT_DIR}/negative-${EXPECTED_FAILURE_PHASE}.log"
-  sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
-
-  # Snapshot the side-effect baseline BEFORE forcing the failure so we only
-  # report effects newly introduced by this scenario. A pre-existing gateway
-  # or credentials file from an earlier run would otherwise look like a fresh
-  # side effect and falsely fail negative scenarios in dirty environments.
-  baseline_sandbox=0
-  if [[ -n "${sandbox_name}" ]] && openshell sandbox list 2>/dev/null | grep -Fq "${sandbox_name}"; then
-    baseline_sandbox=1
-  fi
-  baseline_gateway=0
-  if nemoclaw gateway status >/dev/null 2>&1; then
-    baseline_gateway=1
-  fi
-  baseline_credentials=0
-  if [[ -s "${HOME}/.nemoclaw/credentials.json" ]]; then
-    baseline_credentials=1
-  fi
-
-  # Force the failure mode declared by the scenario. Only `preflight` /
-  # `docker-missing` is implemented here; other phases are accepted by the
-  # schema but their forcing logic lands alongside the first consumer.
-  case "${EXPECTED_FAILURE_PHASE}:${expected_error_class}" in
-    preflight:docker-missing)
-      if [[ "${DRY_RUN}" -eq 1 ]]; then
-        printf 'Cannot connect to the Docker daemon during preflight\n' >"${negative_log}"
-      else
-        if DOCKER_HOST="unix:///tmp/nemoclaw-e2e-missing-docker.sock" \
-          e2e_onboard "${ONBOARDING_ID}" >"${negative_log}" 2>&1; then
-          echo "run-scenario: expected preflight failure, but onboarding succeeded" >&2
-          cat "${negative_log}" >&2
-          exit 4
-        fi
-      fi
-      ;;
-    *)
-      echo "run-scenario: expected_failure phase=${EXPECTED_FAILURE_PHASE} class=${expected_error_class} has no forcing implementation yet" >&2
-      exit 2
-      ;;
-  esac
-
-  # Compute the side-effect delta: only count effects that were absent in the
-  # baseline and present after the forced failure.
-  observed_side_effects=""
-  if [[ "${baseline_sandbox}" -eq 0 ]] && [[ -n "${sandbox_name}" ]] \
-    && openshell sandbox list 2>/dev/null | grep -Fq "${sandbox_name}"; then
-    observed_side_effects="${observed_side_effects:+${observed_side_effects},}sandbox-created"
-  fi
-  if [[ "${baseline_gateway}" -eq 0 ]] && nemoclaw gateway status >/dev/null 2>&1; then
-    observed_side_effects="${observed_side_effects:+${observed_side_effects},}gateway-started"
-  fi
-  if [[ "${baseline_credentials}" -eq 0 ]] && [[ -s "${HOME}/.nemoclaw/credentials.json" ]]; then
-    observed_side_effects="${observed_side_effects:+${observed_side_effects},}credentials-written"
-  fi
-
-  # `--observed-error-class` is intentionally omitted: the runner does not yet
-  # derive a structured error class from the actual failure output, and
-  # reporting the planned class back to the matcher would make the check
-  # tautological. The matcher logs this as a skipped check.
-  match_args=(
-    match-failure "${SCENARIO_ID}"
-    --context-dir "${E2E_CONTEXT_DIR}"
-    --log "${negative_log}"
-    --observed-phase "${EXPECTED_FAILURE_PHASE}"
-  )
-  if [[ -n "${observed_side_effects}" ]]; then
-    match_args+=(--observed-side-effects "${observed_side_effects}")
-  fi
-  if ! run_resolver "${match_args[@]}"; then
-    echo "run-scenario: expected-failure match failed; see ${E2E_CONTEXT_DIR}/expected-vs-actual.json" >&2
-    exit 4
-  fi
-  echo "run-scenario: negative scenario passed (phase=${EXPECTED_FAILURE_PHASE} class=${expected_error_class})"
-  exit 0
-fi
-
-if [[ "${EXPECTED_STATE_ID}" == "preflight-failure-no-sandbox" ]]; then
-  negative_log="${E2E_CONTEXT_DIR}/negative-preflight.log"
-  sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
-  if [[ "${DRY_RUN}" -eq 1 ]]; then
-    printf 'Cannot connect to the Docker daemon during preflight\n' >"${negative_log}"
-  elif DOCKER_HOST="unix:///tmp/nemoclaw-e2e-missing-docker.sock" e2e_onboard "${ONBOARDING_ID}" >"${negative_log}" 2>&1; then
-    echo "run-scenario: expected preflight failure, but onboarding succeeded" >&2
-    exit 4
-  fi
-  if ! grep -Eiq "docker|container|daemon|socket|preflight" "${negative_log}"; then
-    echo "run-scenario: negative preflight failed without a clear Docker/preflight reason" >&2
-    cat "${negative_log}" >&2
-    exit 4
-  fi
-  if openshell sandbox list 2>/dev/null | grep -Fq "${sandbox_name}"; then
-    echo "run-scenario: negative preflight left behind sandbox ${sandbox_name}" >&2
-    exit 4
-  fi
-  echo "run-scenario: negative preflight passed; Docker daemon unavailable and no sandbox was created"
-  exit 0
-fi
-
-if [[ "${FAILURE_STAGE}" == "onboarding" ]]; then
-  negative_log="${E2E_CONTEXT_DIR}/negative-onboarding.log"
-  sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
-  port_holder_started=0
-  onboard_env=(NEMOCLAW_SANDBOX_NAME="${sandbox_name}" NEMOCLAW_RECREATE_SANDBOX=1 NEMOCLAW_POLICY_MODE=skip)
-  case "${ONBOARDING_ID}" in
-    cloud-openclaw-invalid-nvidia-key)
-      onboard_env+=(NVIDIA_API_KEY=not-a-nvidia-key)
-      ;;
-    cloud-openclaw-gateway-port-conflict)
-      conflict_port="$(read_plan_string dimensions.onboarding.profile.gateway_port)"
-      : "${conflict_port:=18080}"
-      if e2e_port_holder_start "${conflict_port}"; then
-        port_holder_started=1
-      else
-        echo "run-scenario: could not start port holder on ${conflict_port}; continuing against any existing listener" >&2
-      fi
-      onboard_env+=(NEMOCLAW_GATEWAY_PORT="${conflict_port}")
-      ;;
-  esac
-  if [[ "${DRY_RUN}" -eq 1 ]]; then
-    printf '%s
-' "${FAILURE_MESSAGE_CONTAINS}" >"${negative_log}"
-    negative_status="${FAILURE_EXIT_CODE:-1}"
-  else
-    set +e
-    (
-      export "${onboard_env[@]}"
-      e2e_onboard "${ONBOARDING_ID}"
-    ) >"${negative_log}" 2>&1
-    negative_status=$?
-    set -e
-  fi
-  if [[ "${port_holder_started}" -eq 1 ]]; then
-    e2e_port_holder_stop
-  fi
-  if ! e2e_negative_assert_failure "${negative_log}" "${negative_status}" "${FAILURE_EXIT_CODE:-1}" "${FAILURE_MESSAGE_CONTAINS}" "$([[ "${FAILURE_NO_STACK_TRACE}" == "true" ]] && echo 1 || echo 0)"; then
-    exit 4
-  fi
-  if openshell sandbox list 2>/dev/null | grep -Fq "${sandbox_name}"; then
-    echo "run-scenario: negative onboarding left behind sandbox ${sandbox_name}" >&2
-    exit 4
-  fi
-  echo "run-scenario: negative onboarding ${ONBOARDING_ID} passed"
-  exit 0
-fi
-
-DOCKER_OPTIONAL_UNAVAILABLE=0
-if [[ "${RUNTIME_CONTAINER_DAEMON}" == "optional" ]] && ! docker info >/dev/null 2>&1; then
-  DOCKER_OPTIONAL_UNAVAILABLE=1
-  echo "SKIP: scenario.${SCENARIO_ID}.docker-dependent-suites Docker unavailable for optional runtime ${RUNTIME_ID}; gateway/sandbox/inference coverage skipped"
-  echo "run-scenario: Docker unavailable for optional runtime ${RUNTIME_ID}; scaling back to platform-only suites"
-else
-  onboard_log="${E2E_CONTEXT_DIR}/onboard.log"
-  set +e
-  e2e_onboard "${ONBOARDING_ID}" >"${onboard_log}" 2>&1
-  onboard_status=$?
-  set -e
-  if [[ "${onboard_status}" -ne 0 ]]; then
-    cat "${onboard_log}" >&2
-    echo "run-scenario: onboarding ${ONBOARDING_ID} failed with status ${onboard_status}" >&2
-    exit "${onboard_status}"
-  fi
-  if [[ "${RUNTIME_ID}" == "gpu-docker-cdi" ]] && ! e2e_env_is_dry_run; then
-    echo "run-scenario: GPU Docker CDI uses host-network gateway; validating gateway from suites"
-  else
-    e2e_gateway_assert_healthy
-  fi
-  e2e_sandbox_assert_running
-fi
-
-# Expected state validation. The validator reads E2E_PROBE_OVERRIDE_* env
-# variables to simulate real probe outputs in dry-run/test contexts.
-# Live probe wiring lands scenario-by-scenario; by default, live runs move
-# straight from setup checks to suites so migrated suite assertions can be
-# debugged against the real environment.
-if [[ "${E2E_VALIDATE_EXPECTED_STATE:-0}" == "1" || "${DRY_RUN}" -eq 1 ]]; then
-  validate_args=("${SCENARIO_ID}" --context-dir "${E2E_CONTEXT_DIR}")
-  if [[ "${DRY_RUN}" -eq 1 ]]; then
-    # CodeRabbit review item #9: explicitly opt in to seeding probes from
-    # the expected state in dry-run/test mode. Live runs go through real
-    # probes and must fail closed if any are missing.
-    validate_args+=(--probes-from-state)
-  fi
-  if ! run_resolver validate-state "${validate_args[@]}"; then
-    echo "run-scenario: expected-state validation failed; suites will NOT run" >&2
-    exit 3
-  fi
-fi
-
-if [[ "${DRY_RUN}" -eq 1 ]]; then
-  echo "run-scenario: dry-run complete; context.env emitted under ${E2E_CONTEXT_DIR}"
-  exit 0
-fi
-
-SUITE_IDS=()
-while IFS= read -r suite_id; do
-  SUITE_IDS+=("${suite_id}")
-done < <(node -e "
-  try {
-    const planPath = process.argv[1];
-    const p = JSON.parse(require('fs').readFileSync(planPath, 'utf8'));
-    if (!Array.isArray(p.suites)) {
-      throw new Error('missing or invalid suites array');
-    }
-    const filter = process.env.E2E_SUITE_FILTER || '';
-    const selected = filter ? filter.split(',').map((s) => s.trim()).filter(Boolean) : p.suites.map((s) => s.id);
-    for (const id of selected) console.log(id);
-  } catch (err) {
-    console.error('run-scenario: failed to parse plan.json ' + process.argv[1] + ': ' + err.message);
-    process.exit(1);
-  }
-" "${E2E_CONTEXT_DIR}/plan.json")
-
-if [[ "${#SUITE_IDS[@]}" -eq 0 ]]; then
-  echo "run-scenario: no suites selected for ${SCENARIO_ID}" >&2
-  exit 4
-fi
-
-if [[ "${DOCKER_OPTIONAL_UNAVAILABLE}" -eq 1 ]]; then
-  FILTERED_SUITE_IDS=()
-  for suite_id in "${SUITE_IDS[@]}"; do
-    case "${suite_id}" in
-      smoke | inference | credentials | hermes-specific | local-ollama-inference | ollama-proxy | gateway-health | sandbox-shell | cloud-inference | ollama-auth-proxy | security-credentials | messaging-telegram | messaging-discord | messaging-slack | security-shields | inference-routing | sandbox-lifecycle | sandbox-operations | snapshot | rebuild | upgrade | diagnostics | docs-validation | openai-compatible-inference | inference-switch | kimi-compatibility | messaging-token-rotation | security-policy | security-injection | model-router)
-        echo "SKIP: suite.${suite_id} skipped because optional Docker runtime ${RUNTIME_ID} is unavailable"
-        ;;
-      *)
-        FILTERED_SUITE_IDS+=("${suite_id}")
-        ;;
-    esac
-  done
-  SUITE_IDS=("${FILTERED_SUITE_IDS[@]}")
-fi
+cat >&2 <<'MSG'
+run-scenario.sh is deprecated. Use the TS runner instead:
 
-if [[ "${#SUITE_IDS[@]}" -eq 0 ]]; then
-  echo "run-scenario: all suites skipped for ${SCENARIO_ID}" >&2
-  exit 0
-fi
+  npx tsx test/e2e-scenario/scenarios/run.ts --scenarios <id[,id...]>
 
-bash "${SCRIPT_DIR}/run-suites.sh" "${SUITE_IDS[@]}"
+Other run.ts modes (read-only):
+  --list                List canonical scenario ids
+  --emit-matrix         Emit GitHub Actions matrix payload from the registry
+  --plan-only           Local debug: print the compiled plan, do not execute
+                        (must NOT appear in any CI workflow)
+MSG
+exit 2
diff --git a/test/e2e-scenario/runtime/run-suites.sh b/test/e2e-scenario/runtime/run-suites.sh
index e99c069408..dac69cd422 100755
--- a/test/e2e-scenario/runtime/run-suites.sh
+++ b/test/e2e-scenario/runtime/run-suites.sh
@@ -2,136 +2,20 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
-# Run one or more functional suites against a completed E2E environment.
-#
-# Usage:
-#   bash test/e2e-scenario/runtime/run-suites.sh <suite-id> [<suite-id> ...]
-#
-# Reads suite metadata from test/e2e-scenario/validation_suites/suites.yaml
-# (or $E2E_SUITES_FILE). Each suite script receives .e2e/context.env
-# via E2E_CONTEXT_DIR and is expected to source runtime/lib/context.sh if
-# it needs specific keys.
-#
-# Environment:
-#   E2E_CONTEXT_DIR   Directory containing context.env (default: <repo>/.e2e)
-#   E2E_SUITES_FILE   Override suites metadata file (for tests)
-#   E2E_SUITES_DIR    Override the directory that suite scripts are resolved
-#                     against (default: test/e2e-scenario/validation_suites/)
-#   E2E_DRY_RUN       When 1, suite scripts run in dry-run mode themselves.
-#
-# Exit code: 0 if all steps pass; non-zero at the first failing step.
+# DEPRECATED. Suite execution is now driven directly by the TS phase
+# orchestrator (RuntimeOrchestrator -> PhaseOrchestrator.runShellStep) which
+# spawns each migrated assertion step's implementation.ref shell script.
+# There is no longer a YAML-walking bash suite runner.
 
 set -euo pipefail
 
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-E2E_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
-VALIDATION_SUITES_DIR="${E2E_ROOT}/validation_suites"
-
-if (($# == 0)); then
-  echo "run-suites: at least one suite id required" >&2
-  echo "Usage: bash test/e2e-scenario/runtime/run-suites.sh <suite-id> [<suite-id> ...]" >&2
-  exit 2
-fi
-
-export E2E_CONTEXT_DIR="${E2E_CONTEXT_DIR:-${REPO_ROOT}/.e2e}"
-SUITES_FILE="${E2E_SUITES_FILE:-${VALIDATION_SUITES_DIR}/suites.yaml}"
-SUITES_DIR="${E2E_SUITES_DIR:-${VALIDATION_SUITES_DIR}}"
-
-CTX_FILE="${E2E_CONTEXT_DIR}/context.env"
-if [[ ! -f "${CTX_FILE}" ]]; then
-  echo "run-suites: missing ${CTX_FILE}; run-scenario.sh must emit context before running suites" >&2
-  exit 1
-fi
-
-# Sanity-check that the baseline scenario key is present.
-if ! grep -q '^E2E_SCENARIO=' "${CTX_FILE}"; then
-  echo "run-suites: ${CTX_FILE} is missing required key E2E_SCENARIO" >&2
-  exit 1
-fi
-
-# Resolve the suite step list by reading the YAML via node.
-resolve_suite() {
-  local suite_id="$1"
-  node -e "
-    const fs = require('fs');
-    const path = process.argv[1];
-    const wanted = process.argv[2];
-    const raw = fs.readFileSync(path, 'utf8');
-    // Minimal YAML reader: prefer js-yaml if available; else fall back.
-    let yaml;
-    try { yaml = require('js-yaml'); } catch (_) {
-      process.stderr.write('run-suites: js-yaml required to parse suite metadata\n');
-      process.exit(2);
-    }
-    const doc = yaml.load(raw);
-    if (!doc || !doc.suites || !doc.suites[wanted]) {
-      process.stderr.write('run-suites: unknown suite: ' + wanted + '\n');
-      process.exit(3);
-    }
-    const steps = doc.suites[wanted].steps || [];
-    for (const s of steps) {
-      if (!s || typeof s.id !== 'string' || typeof s.script !== 'string') {
-        process.stderr.write('run-suites: malformed step in ' + wanted + '\n');
-        process.exit(4);
-      }
-      process.stdout.write(s.id + '\t' + s.script + '\n');
-    }
-  " "${SUITES_FILE}" "${suite_id}"
-}
-
-declare -a FAILED_STEPS=()
-declare -a PASSED_STEPS=()
-OVERALL_STATUS=0
-
-run_one_suite() {
-  local suite_id="$1"
-  echo "== suite: ${suite_id} =="
-  local steps
-  if ! steps="$(resolve_suite "${suite_id}")"; then
-    OVERALL_STATUS=1
-    return 1
-  fi
-  if [[ -z "${steps}" ]]; then
-    echo "  (no steps)"
-    return 0
-  fi
-  while IFS=$'\t' read -r step_id script; do
-    [[ -z "${step_id}" ]] && continue
-    local full="${SUITES_DIR}/${script}"
-    echo "  -> step: ${step_id} (${script})"
-    if [[ ! -f "${full}" ]]; then
-      echo "    FAIL: script not found at ${full}" >&2
-      FAILED_STEPS+=("${suite_id}/${step_id}")
-      OVERALL_STATUS=1
-      return 1
-    fi
-    if ! bash "${full}"; then
-      echo "    FAIL: suite=${suite_id} step=${step_id}" >&2
-      FAILED_STEPS+=("${suite_id}/${step_id}")
-      OVERALL_STATUS=1
-      return 1
-    fi
-    echo "    PASS: ${step_id}"
-    PASSED_STEPS+=("${suite_id}/${step_id}")
-  done <<<"${steps}"
-}
-
-for suite_id in "$@"; do
-  if ! run_one_suite "${suite_id}"; then
-    break
-  fi
-done
+cat >&2 <<'MSG'
+run-suites.sh is deprecated. Suite assertions are now executed by
+test/e2e-scenario/scenarios/orchestrators/phase.ts via child_process.spawn,
+walking the typed assertionGroups defined in the scenario registry.
 
-echo
-echo "== suite summary =="
-# bash 3.2 (macOS) fails on "${arr[@]}" when the array is empty under `set -u`;
-# use the `${arr[@]+...}` guard to expand to nothing when empty.
-for p in ${PASSED_STEPS[@]+"${PASSED_STEPS[@]}"}; do
-  echo "  PASS ${p}"
-done
-for f in ${FAILED_STEPS[@]+"${FAILED_STEPS[@]}"}; do
-  echo "  FAIL ${f}"
-done
+Run scenarios via:
 
-exit "${OVERALL_STATUS}"
+  npx tsx test/e2e-scenario/scenarios/run.ts --scenarios <id[,id...]>
+MSG
+exit 2
diff --git a/test/e2e-scenario/scenarios/assertions/environment.ts b/test/e2e-scenario/scenarios/assertions/environment.ts
deleted file mode 100644
index be7a62e6fb..0000000000
--- a/test/e2e-scenario/scenarios/assertions/environment.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import type { AssertionGroup } from "../types.ts";
-
-export function environmentBaseline(): AssertionGroup {
-  return {
-    id: "environment.baseline",
-    phase: "environment",
-    description: "Skeleton environment baseline assertion group.",
-    migrationStatus: "complete",
-    steps: [
-      {
-        id: "environment.plan.skeleton",
-        phase: "environment",
-        description: "Placeholder step until live environment orchestration is migrated.",
-        implementation: { kind: "pending", ref: "phase-1-skeleton" },
-        evidencePath: ".e2e/environment.result.json",
-      },
-    ],
-  };
-}
diff --git a/test/e2e-scenario/scenarios/assertions/onboarding.ts b/test/e2e-scenario/scenarios/assertions/onboarding.ts
deleted file mode 100644
index 9886a701fb..0000000000
--- a/test/e2e-scenario/scenarios/assertions/onboarding.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import type { AssertionGroup } from "../types.ts";
-
-export function onboardingBaseline(): AssertionGroup {
-  return {
-    id: "onboarding.baseline",
-    phase: "onboarding",
-    description: "Skeleton onboarding assertion group.",
-    steps: [
-      {
-        id: "onboarding.plan.skeleton",
-        phase: "onboarding",
-        description: "Placeholder step until onboarding assertions are migrated.",
-        implementation: { kind: "pending", ref: "phase-1-skeleton" },
-        evidencePath: ".e2e/onboarding.result.json",
-      },
-    ],
-  };
-}
diff --git a/test/e2e-scenario/scenarios/assertions/registry.ts b/test/e2e-scenario/scenarios/assertions/registry.ts
index d6ef59fe1c..2a7d6603f4 100644
--- a/test/e2e-scenario/scenarios/assertions/registry.ts
+++ b/test/e2e-scenario/scenarios/assertions/registry.ts
@@ -3,7 +3,6 @@
 
 import fs from "node:fs";
 import path from "node:path";
-import { environmentBaseline } from "./environment.ts";
 import type { AssertionGroup, AssertionStep, PhaseName, ScenarioDefinition } from "../types.ts";
 
 type Reliability = AssertionStep["reliability"];
@@ -25,22 +24,42 @@ function shellStep(input: ShellStepInput): AssertionStep {
   };
 }
 
-function probeStep(id: string, phase: PhaseName, ref: string, reliability?: Reliability): AssertionStep {
+interface ProbeStepOptions {
+  reliability?: Reliability;
+  // When true, an unregistered probe fails the phase (and the run)
+  // instead of skipping. Use for security-sensitive probes the run
+  // is not safe without.
+  required?: boolean;
+}
+
+function probeStep(
+  id: string,
+  phase: PhaseName,
+  ref: string,
+  options: ProbeStepOptions = {},
+): AssertionStep {
   return {
     id,
     phase,
     implementation: { kind: "probe", ref },
     evidencePath: `.e2e/assertions/${id}.json`,
-    reliability,
+    reliability: options.reliability,
+    required: options.required,
   };
 }
 
-function pendingStep(id: string, phase: PhaseName, ref: string): AssertionStep {
+function pendingStep(
+  id: string,
+  phase: PhaseName,
+  ref: string,
+  options: { required?: boolean } = {},
+): AssertionStep {
   return {
     id,
     phase,
     implementation: { kind: "pending", ref },
     evidencePath: `.e2e/assertions/${id}.json`,
+    required: options.required,
   };
 }
 
@@ -186,7 +205,21 @@ export const runtimeControlGroups: AssertionGroup[] = [
     phase: "runtime",
     description: "Negative scenario runtime check ensuring forbidden side effects did not occur.",
     migrationStatus: "complete",
-    steps: [pendingStep("runtime.expected-failure.no-side-effects", "runtime", "expectedFailureNoSideEffectsProbe")],
+    steps: [
+      pendingStep(
+        "runtime.expected-failure.no-side-effects",
+        "runtime",
+        "expectedFailureNoSideEffectsProbe",
+        // Negative scenarios assert that a declared failure mode
+        // produced no forbidden side effects. Until the side-effect
+        // validator is implemented, this step must fail closed for
+        // any scenario that opts into runtimeControlGroups[0]
+        // (i.e. scenario.expectedFailure is set). Skipping it would
+        // let negative scenarios silently "pass" without verifying
+        // their core contract.
+        { required: true },
+      ),
+    ],
   },
 ];
 
@@ -219,9 +252,19 @@ export const validationSuiteGroups: AssertionGroup[] = [
   ]),
   suiteGroup("credentials", credentialsSteps),
   suiteGroup("security-credentials", credentialsSteps),
-  suiteGroup("security-shields", [probeStep("security.shields.config", "runtime", "shieldsConfigProbe")]),
-  suiteGroup("security-policy", [probeStep("security.policy.enforced", "runtime", "networkPolicyProbe")]),
-  suiteGroup("security-injection", [probeStep("security.injection.blocked", "runtime", "injectionBlockedProbe")]),
+  // Security-sensitive probes MUST fail closed until the probe
+  // registry lands. A skipped shields/policy/injection check would
+  // produce fake-green for the exact suites these scenarios exist to
+  // protect.
+  suiteGroup("security-shields", [
+    probeStep("security.shields.config", "runtime", "shieldsConfigProbe", { required: true }),
+  ]),
+  suiteGroup("security-policy", [
+    probeStep("security.policy.enforced", "runtime", "networkPolicyProbe", { required: true }),
+  ]),
+  suiteGroup("security-injection", [
+    probeStep("security.injection.blocked", "runtime", "injectionBlockedProbe", { required: true }),
+  ]),
   suiteGroup("messaging-telegram", [
     shellStep({ id: "messaging.telegram.injection-safety", phase: "runtime", ref: "test/e2e-scenario/validation_suites/messaging/telegram/00-telegram-injection-safety.sh", reliability: { timeoutSeconds: 30, retry: { attempts: 2, on: ["external-tunnel"] } } }),
     shellStep({ id: "messaging.telegram.injection-payload-classes", phase: "runtime", ref: "test/e2e-scenario/validation_suites/messaging/telegram/01-telegram-injection-payload-classes.sh", reliability: { timeoutSeconds: 30, retry: { attempts: 2, on: ["external-tunnel"] } } }),
@@ -254,7 +297,7 @@ export const validationSuiteGroups: AssertionGroup[] = [
 ];
 
 export const assertionRegistry = {
-  groups: [environmentBaseline(), ...onboardingAssertionGroups, ...runtimeControlGroups, ...validationSuiteGroups],
+  groups: [...onboardingAssertionGroups, ...runtimeControlGroups, ...validationSuiteGroups],
 };
 
 export function assertionGroupForSuite(suiteId: string): AssertionGroup | undefined {
@@ -349,8 +392,11 @@ export function assertionGroupsForScenario(scenario: ScenarioDefinition): Assert
     return group;
   });
 
+  // Environment phase work is performed by typed PhaseAction entries
+  // (context.emit + install.<id>) emitted from compiler.phaseActions(),
+  // not by assertion groups. No environment-phase assertion group is
+  // included in scenario plans.
   const groups: (AssertionGroup | undefined)[] = [
-    environmentBaseline(),
     ...onboardingGroups,
     ...suiteGroups,
     ...supplementalGroups,
diff --git a/test/e2e-scenario/scenarios/assertions/runtime.ts b/test/e2e-scenario/scenarios/assertions/runtime.ts
deleted file mode 100644
index 5ed7031279..0000000000
--- a/test/e2e-scenario/scenarios/assertions/runtime.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import type { AssertionGroup } from "../types.ts";
-
-export function runtimeSmokeSkeleton(): AssertionGroup {
-  return {
-    id: "runtime.smoke.skeleton",
-    phase: "runtime",
-    description: "Skeleton runtime smoke assertion group.",
-    steps: [
-      {
-        id: "runtime.plan.skeleton",
-        phase: "runtime",
-        description: "Placeholder step until validation suites are migrated.",
-        implementation: { kind: "pending", ref: "phase-1-skeleton" },
-        evidencePath: ".e2e/runtime.result.json",
-      },
-    ],
-  };
-}
diff --git a/test/e2e-scenario/scenarios/compiler.ts b/test/e2e-scenario/scenarios/compiler.ts
index 5046c77dd2..796e8a05fc 100644
--- a/test/e2e-scenario/scenarios/compiler.ts
+++ b/test/e2e-scenario/scenarios/compiler.ts
@@ -6,7 +6,17 @@ import path from "node:path";
 import { fileURLToPath } from "node:url";
 import { loadManifest } from "./manifests.ts";
 import { requireScenarios } from "./registry.ts";
-import type { AssertionGroup, NemoClawInstanceManifest, PhaseName, RunPlan, ScenarioDefinition, SutBoundary } from "./types.ts";
+import type {
+  AssertionGroup,
+  ExpectedFailureContract,
+  ExpectedFailurePhase,
+  NemoClawInstanceManifest,
+  PhaseAction,
+  PhaseName,
+  RunPlan,
+  ScenarioDefinition,
+  SutBoundary,
+} from "./types.ts";
 
 const PHASES: PhaseName[] = ["environment", "onboarding", "runtime"];
 const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../..");
@@ -67,17 +77,114 @@ function validateManifestCompatibility(scenario: ScenarioDefinition, manifest?:
   }
 }
 
-function phaseActions(phase: PhaseName, scenario: ScenarioDefinition): string[] {
+// Centralized paths to the existing shell helpers. Spec rule: shell
+// scripts can remain as implementations, but invocation goes through
+// typed assertion/action definitions, not bare workflow YAML or a
+// resurrected bash runner.
+const INSTALL_DISPATCH = "test/e2e-scenario/nemoclaw_scenarios/install/dispatch.sh";
+const ONBOARD_DISPATCH = "test/e2e-scenario/nemoclaw_scenarios/onboard/dispatch.sh";
+
+// Default action timeouts. Install and onboarding can take a while on
+// cold runners (Docker pulls, image builds, sandbox bootstrap).
+const INSTALL_TIMEOUT_SECONDS = 900;
+const ONBOARD_TIMEOUT_SECONDS = 900;
+
+// Declared parent-env secrets each onboarding profile actually needs.
+// Anything not listed here (and not in the framework allowlist) is
+// dropped before spawn by buildChildEnv. Keep this list minimal —
+// every entry widens the secret blast radius if the child or one of
+// its descendants logs unredacted output.
+const ONBOARD_PROFILE_SECRET_ENV: Readonly<Record<string, readonly string[]>> = {
+  // Cloud profiles invoke `nemoclaw onboard` which authenticates to the
+  // NVIDIA cloud provider via NVIDIA_API_KEY.
+  "cloud-openclaw": ["NVIDIA_API_KEY"],
+  "cloud-openclaw-custom-policies": ["NVIDIA_API_KEY"],
+  "cloud-openclaw-invalid-nvidia-key": ["NVIDIA_API_KEY"],
+  "cloud-openclaw-gateway-port-conflict": ["NVIDIA_API_KEY"],
+  // Negative scenario: nemoclaw onboard runs against a docker shim that
+  // exits non-zero. Onboard never reaches the cloud auth step, but the
+  // CLI still loads NVIDIA_API_KEY when present — keep it in the secret
+  // env so behavior matches a real user invocation.
+  "cloud-openclaw-no-docker": ["NVIDIA_API_KEY"],
+  "cloud-hermes": ["NVIDIA_API_KEY"],
+  "cloud-hermes-discord": ["NVIDIA_API_KEY"],
+  "cloud-hermes-slack": ["NVIDIA_API_KEY"],
+  // Local profiles do not need any cloud secret.
+  "local-ollama-openclaw": [],
+};
+
+function phaseActions(phase: PhaseName, scenario: ScenarioDefinition): PhaseAction[] {
   if (phase === "environment") {
+    if (!scenario.environment) {
+      // Scenarios without any environment dimension (skeleton scenarios)
+      // legitimately have no actions yet. Don't fail-fast here.
+      return [];
+    }
+    const installId = scenario.environment.install;
+    if (!installId) {
+      // Environment is declared but install is missing - that IS a
+      // malformed scenario; fail fast so the caller sees a clear error
+      // rather than a phase that silently no-ops setup work.
+      throw new Error(`Scenario ${scenario.id} is missing environment.install`);
+    }
     return [
-      `install:${scenario.environment?.install ?? "unknown"}`,
-      `runtime:${scenario.environment?.runtime ?? "unknown"}`,
+      {
+        id: `environment.install.${installId}`,
+        phase: "environment",
+        description: `Run e2e_install ${installId} to set up the host control plane.`,
+        kind: "shell-fn",
+        scriptRef: INSTALL_DISPATCH,
+        fn: "e2e_install",
+        arg: installId,
+        timeoutSeconds: INSTALL_TIMEOUT_SECONDS,
+        evidencePath: `.e2e/actions/environment.install.${installId}.log`,
+      },
     ];
   }
   if (phase === "onboarding") {
-    return [`onboard:${scenario.environment?.onboarding ?? "unknown"}`];
+    if (!scenario.environment) {
+      return [];
+    }
+    const baseOnboardingId = scenario.environment.onboarding;
+    if (!baseOnboardingId) {
+      throw new Error(`Scenario ${scenario.id} is missing environment.onboarding`);
+    }
+    // Negative-runtime scenarios route to a dedicated onboarding profile
+    // that sets up the failure condition (e.g. docker-missing) BEFORE
+    // invoking `nemoclaw onboard` and captures the resulting output to
+    // the log file the assertion phase reads. The profile id convention
+    // is `<base>-no-docker`. New negative profiles register a worker in
+    // nemoclaw_scenarios/onboard/dispatch.sh and a secret-env mapping
+    // above.
+    const onboardingId =
+      scenario.environment.runtime === "docker-missing"
+        ? `${baseOnboardingId}-no-docker`
+        : baseOnboardingId;
+    // secretEnv defaults to [] (no parent-env secrets pass through)
+    // unless the profile is explicitly listed above. Unknown profiles
+    // get the safest setting and surface the gap loudly the first
+    // time they actually need a secret to authenticate.
+    const secretEnv = ONBOARD_PROFILE_SECRET_ENV[onboardingId] ?? [];
+    return [
+      {
+        id: `onboarding.profile.${onboardingId}`,
+        phase: "onboarding",
+        description: `Run e2e_onboard ${onboardingId} to bring the gateway and sandbox online.`,
+        kind: "shell-fn",
+        scriptRef: ONBOARD_DISPATCH,
+        fn: "e2e_onboard",
+        arg: onboardingId,
+        timeoutSeconds: ONBOARD_TIMEOUT_SECONDS,
+        evidencePath: `.e2e/actions/onboarding.profile.${onboardingId}.log`,
+        // Legacy preflight assertions look for ${E2E_CONTEXT_DIR}/onboard.log;
+        // publish a stable alias so they keep working without rewiring.
+        aliasPath: "onboard.log",
+        secretEnv,
+      },
+    ];
   }
-  return (scenario.suiteIds ?? []).map((suiteId) => `suite:${suiteId}`);
+  // Runtime phase has no actions; suites are assertion groups.
+  return [];
 }
 
 const SUT_BOUNDARIES: SutBoundary[] = [
@@ -89,6 +196,41 @@ const SUT_BOUNDARIES: SutBoundary[] = [
   { id: "state", client: "StateClient" },
 ];
 
+// Negative scenarios advertise their failure mode against one of these
+// user-facing phases. "preflight" is intentionally distinct from the
+// internal PhaseName union: scenario manifests speak the user's vocab
+// ("preflight failed") and the matcher resolves preflight to the
+// onboarding phase orchestrator. See orchestrators/negative-matcher.ts.
+const EXPECTED_FAILURE_PHASES: readonly ExpectedFailurePhase[] = [
+  "environment",
+  "onboarding",
+  "runtime",
+  "preflight",
+];
+
+function validateExpectedFailure(scenarioId: string, contract: ExpectedFailureContract): void {
+  if (!EXPECTED_FAILURE_PHASES.includes(contract.phase)) {
+    throw new Error(
+      `Scenario ${scenarioId} expectedFailure.phase invalid: ${String(contract.phase)} (allowed: ${EXPECTED_FAILURE_PHASES.join(", ")})`,
+    );
+  }
+  if (typeof contract.errorClass !== "string" || contract.errorClass.trim().length === 0) {
+    throw new Error(`Scenario ${scenarioId} expectedFailure.errorClass must be a non-empty string`);
+  }
+  if (contract.forbiddenSideEffects !== undefined) {
+    if (!Array.isArray(contract.forbiddenSideEffects)) {
+      throw new Error(`Scenario ${scenarioId} expectedFailure.forbiddenSideEffects must be an array`);
+    }
+    for (const entry of contract.forbiddenSideEffects) {
+      if (typeof entry !== "string" || entry.trim().length === 0) {
+        throw new Error(
+          `Scenario ${scenarioId} expectedFailure.forbiddenSideEffects entries must be non-empty strings`,
+        );
+      }
+    }
+  }
+}
+
 export function validateRunPlan(plan: RunPlan): void {
   if (!plan.scenarioId) {
     throw new Error("RunPlan missing scenarioId");
@@ -101,6 +243,9 @@ export function validateRunPlan(plan: RunPlan): void {
   if (plan.sutBoundaries.length === 0) {
     throw new Error(`RunPlan ${plan.scenarioId} missing SUT boundaries`);
   }
+  if (plan.expectedFailure) {
+    validateExpectedFailure(plan.scenarioId, plan.expectedFailure);
+  }
 }
 
 export function compileRunPlans(inputs: Array<string | ScenarioDefinition>): RunPlan[] {
@@ -112,7 +257,7 @@ export function compileRunPlans(inputs: Array<string | ScenarioDefinition>): Run
     const plan: RunPlan = {
       scenarioId: scenario.id,
       status: "compiled",
-      note: "compiled plan-only preview; live execution lands in later phases",
+      note: "compiled plan; phase orchestrators execute actions then assertions",
       manifestPath: scenario.manifestPath,
       manifest,
       environment: scenario.environment,
@@ -182,6 +327,18 @@ export function renderPlanText(plans: RunPlan[]): string {
     }
     for (const phase of plan.phases) {
       lines.push(`Phase: ${phase.name}`);
+      for (const action of phase.actions) {
+        const policy: string[] = [];
+        if (action.timeoutSeconds) {
+          policy.push(`timeout=${action.timeoutSeconds}s`);
+        }
+        const target = action.kind === "shell-fn"
+          ? `${action.fn ?? ""}${action.arg ? ` ${action.arg}` : ""}`.trim()
+          : action.scriptRef;
+        const policySuffix = policy.length > 0 ? ` (${policy.join(", ")})` : "";
+        const targetSuffix = target ? ` -> ${target}` : "";
+        lines.push(`  Action: ${action.id}${policySuffix}${targetSuffix}`);
+      }
       for (const group of phase.assertionGroups) {
         lines.push(`  Group: ${group.id}`);
         for (const step of group.steps) {
diff --git a/test/e2e-scenario/scenarios/orchestrators/context.ts b/test/e2e-scenario/scenarios/orchestrators/context.ts
new file mode 100644
index 0000000000..35394121fc
--- /dev/null
+++ b/test/e2e-scenario/scenarios/orchestrators/context.ts
@@ -0,0 +1,108 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import fs from "node:fs";
+import path from "node:path";
+import type { RunContext, RunPlan } from "../types.ts";
+
+// Spec ownership: emitting the normalized context.env that downstream
+// shell helpers consume is FRAMEWORK INFRASTRUCTURE, not a phase action.
+// Doing it as a shell action coupled the typed runner back to the old
+// resolver's plan.json shape; doing it here keeps the typed RunPlan as
+// the single source of truth.
+//
+// We seed context.env with values derivable from the typed RunPlan
+// (scenario id, install method, agent/provider/route, default sandbox
+// name and gateway URL). Onboarding helpers may overwrite these via
+// e2e_context_set (e.g. assigning a real sandbox name, real gateway
+// URL after the gateway boots).
+
+function platformOsFromManifest(plan: RunPlan): string {
+  const explicit = plan.manifest?.spec.setup.platform.os;
+  if (typeof explicit === "string" && explicit.length > 0) {
+    return explicit;
+  }
+  // Fall back to the scenario environment platform id ("ubuntu-local",
+  // "macos-local", "wsl-local", "gpu-runner", "brev-launchable").
+  const platform = plan.environment?.platform ?? "";
+  if (platform.startsWith("macos")) return "macos";
+  if (platform.startsWith("wsl")) return "wsl";
+  if (platform.startsWith("brev")) return "ubuntu";
+  if (platform.startsWith("gpu")) return "ubuntu";
+  return "ubuntu";
+}
+
+function executionTargetFromManifest(plan: RunPlan): string {
+  const explicit = plan.manifest?.spec.setup.platform.executionTarget;
+  if (typeof explicit === "string" && explicit.length > 0) {
+    return explicit;
+  }
+  return plan.environment?.platform === "brev-launchable" ? "remote" : "local";
+}
+
+function containerEngine(plan: RunPlan): string {
+  const explicit = plan.manifest?.spec.setup.runtime.containerEngine;
+  return typeof explicit === "string" && explicit.length > 0 ? explicit : "docker";
+}
+
+function containerDaemon(plan: RunPlan): string {
+  const explicit = plan.manifest?.spec.setup.runtime.containerDaemon;
+  if (typeof explicit === "string" && explicit.length > 0) {
+    return explicit;
+  }
+  return plan.environment?.runtime === "docker-missing" ? "missing" : "running";
+}
+
+function defaultGatewayUrl(agent: string): string {
+  // Mirrors the historical defaults from emit-context-from-plan.sh so
+  // existing shell helpers see the same seed values they used to.
+  return agent === "hermes" ? "http://127.0.0.1:8642" : "http://127.0.0.1:18789";
+}
+
+function escapeContextValue(value: string): string {
+  // The context library accepts plain `KEY=value` lines without quoting.
+  // Reject newlines (would corrupt the file) and otherwise pass through.
+  if (value.includes("\n")) {
+    throw new Error(`context.env value for must not contain newline: ${JSON.stringify(value)}`);
+  }
+  return value;
+}
+
+export interface ContextSeedResult {
+  path: string;
+  keys: string[];
+}
+
+export function seedContextEnv(ctx: RunContext, plan: RunPlan): ContextSeedResult {
+  const onboarding = plan.manifest?.spec.onboarding;
+  const agent = onboarding?.agent ?? "openclaw";
+  const provider = onboarding?.provider ?? "nvidia";
+  const inferenceRoute = onboarding?.modelRoute ?? "inference-local";
+  const onboardingPath = plan.environment?.onboarding ?? "unknown";
+  const installMethod = plan.environment?.install ?? "unknown";
+
+  const entries: Record<string, string> = {
+    E2E_SCENARIO: plan.scenarioId,
+    E2E_PLATFORM_OS: platformOsFromManifest(plan),
+    E2E_EXECUTION_TARGET: executionTargetFromManifest(plan),
+    E2E_INSTALL_METHOD: installMethod,
+    E2E_CONTAINER_ENGINE: containerEngine(plan),
+    E2E_CONTAINER_DAEMON: containerDaemon(plan),
+    E2E_ONBOARDING_PATH: onboardingPath,
+    E2E_AGENT: agent,
+    E2E_PROVIDER: provider,
+    E2E_INFERENCE_ROUTE: inferenceRoute,
+    E2E_SANDBOX_NAME: `e2e-${plan.scenarioId}`,
+    E2E_GATEWAY_URL: defaultGatewayUrl(agent),
+  };
+
+  // Path matches the shell helper's e2e_context_init: ${E2E_CONTEXT_DIR}/context.env
+  const contextPath = path.join(ctx.contextDir, "context.env");
+  fs.mkdirSync(ctx.contextDir, { recursive: true });
+  const lines = Object.entries(entries)
+    .map(([key, value]) => `${key}=${escapeContextValue(value)}`)
+    .join("\n");
+  fs.writeFileSync(contextPath, `${lines}\n`);
+
+  return { path: contextPath, keys: Object.keys(entries) };
+}
diff --git a/test/e2e-scenario/scenarios/orchestrators/phase.ts b/test/e2e-scenario/scenarios/orchestrators/phase.ts
index ae59a58e62..de952b23fc 100644
--- a/test/e2e-scenario/scenarios/orchestrators/phase.ts
+++ b/test/e2e-scenario/scenarios/orchestrators/phase.ts
@@ -1,31 +1,49 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
+import { spawn } from "node:child_process";
 import fs from "node:fs";
 import path from "node:path";
+import { fileURLToPath } from "node:url";
 import type {
   AssertionResult,
   AssertionStep,
+  PhaseAction,
+  PhaseActionResult,
   PhaseName,
   PhaseResult,
   RunContext,
   RunPlanPhase,
   TransientClassifier,
 } from "../types.ts";
+import { buildChildEnv, pipeRedacted, redactString } from "./redaction.ts";
+
+const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../..");
+const DEFAULT_STEP_TIMEOUT_SECONDS = 300;
 
 interface StepAttemptOutcome {
-  status: "passed" | "failed";
+  status: "passed" | "failed" | "skipped";
   classifier?: TransientClassifier;
   message?: string;
+  evidence?: string;
 }
 
-function transientForRef(ref: string): TransientClassifier {
-  if (ref.includes("provider") || ref.includes("transient")) {
-    return "provider-transient";
+// Heuristic transient classifier for shell step refs that don't print
+// their own classifier hint. Phase orchestrators own classification;
+// clients/scripts do not.
+function classifierForRef(ref: string): TransientClassifier {
+  if (/provider|inference|chat-completion|cloudflared|tunnel/i.test(ref)) {
+    // Use case-insensitive matching here too; the outer guard is /i, so
+    // mixed-case refs (Tunnel, Cloudflared) must still classify as
+    // external-tunnel rather than fall through to provider-transient.
+    return /tunnel|cloudflared/i.test(ref) ? "external-tunnel" : "provider-transient";
   }
-  if (ref.includes("gateway")) {
+  if (/gateway/i.test(ref)) {
     return "gateway-transient";
   }
+  if (/event-capture|tui|chat-events/i.test(ref)) {
+    return "empty-event-capture";
+  }
   return "runner-infra";
 }
 
@@ -33,35 +51,208 @@ export class PhaseOrchestrator {
   constructor(private readonly phaseName: PhaseName) {}
 
   async run(ctx: RunContext, phase: RunPlanPhase): Promise<PhaseResult> {
+    const actions: PhaseActionResult[] = [];
+    let actionFailed = false;
+    for (const action of phase.actions) {
+      const actionResult = await this.runAction(ctx, action);
+      actions.push(actionResult);
+      if (actionResult.status === "failed") {
+        actionFailed = true;
+        // Spec failure-layer rule: setup failure must not let assertions
+        // run and accidentally pass. Stop the phase here.
+        break;
+      }
+    }
     const assertions: AssertionResult[] = [];
-    for (const group of phase.assertionGroups) {
-      for (const step of group.steps) {
-        assertions.push(await this.runStep(ctx, step));
+    if (!actionFailed) {
+      for (const group of phase.assertionGroups) {
+        for (const step of group.steps) {
+          assertions.push(await this.runStep(ctx, step));
+        }
       }
     }
-    const status = assertions.some((assertion) => assertion.status === "failed") ? "failed" : "passed";
-    const result: PhaseResult = { phase: this.phaseName, status, assertions };
+    const assertionsFailed = assertions.some((assertion) => assertion.status === "failed");
+    const allSkipped =
+      !actionFailed &&
+      assertions.length > 0 &&
+      assertions.every((assertion) => assertion.status === "skipped");
+    let status: PhaseResult["status"];
+    if (actionFailed || assertionsFailed) {
+      status = "failed";
+    } else if (allSkipped || (actions.length === 0 && assertions.length === 0)) {
+      status = "skipped";
+    } else {
+      status = "passed";
+    }
+    const result: PhaseResult = { phase: this.phaseName, status, actions, assertions };
     this.writePhaseResult(ctx, result);
     return result;
   }
 
+  private async runAction(ctx: RunContext, action: PhaseAction): Promise<PhaseActionResult> {
+    const startedAt = Date.now();
+    const scriptPath = path.isAbsolute(action.scriptRef)
+      ? action.scriptRef
+      : path.resolve(REPO_ROOT, action.scriptRef);
+    if (!fs.existsSync(scriptPath)) {
+      return {
+        id: action.id,
+        status: "failed",
+        durationMs: Date.now() - startedAt,
+        message: `phase action ${action.id} script not found: ${scriptPath}`,
+      };
+    }
+    const timeoutSeconds = action.timeoutSeconds ?? DEFAULT_STEP_TIMEOUT_SECONDS;
+    const logDir = path.join(ctx.contextDir, ".e2e", "actions");
+    fs.mkdirSync(logDir, { recursive: true });
+    const logPath = path.join(logDir, `${action.id}.log`);
+
+    // Compose the bash invocation. shell-fn sources the dispatcher and
+    // calls the named function with its single positional arg; shell
+    // executes the script directly. We always go through bash -lc so
+    // sourced shell helpers see a normal interactive-style env.
+    const dispatchAction = path.join(REPO_ROOT, "test/e2e-scenario/nemoclaw_scenarios/dispatch-action.sh");
+    const useDispatchLauncher = action.kind === "shell-fn" && fs.existsSync(dispatchAction);
+    const bashArgs: string[] = useDispatchLauncher
+      ? [dispatchAction, action.fn ?? "", action.arg ?? "", scriptPath]
+      : [scriptPath, ...(action.arg ? [action.arg] : [])];
+
+    // Framework-owned secret hygiene at the spawn boundary. The child
+    // gets a minimal allowlisted env plus only the secrets this action
+    // explicitly declared via PhaseAction.secretEnv. See
+    // orchestrators/redaction.ts for the full contract.
+    const env = buildChildEnv(process.env, {
+      secretEnv: action.secretEnv,
+      frameworkOverlay: {
+        E2E_CONTEXT_DIR: ctx.contextDir,
+        E2E_PHASE: action.phase,
+        E2E_ACTION_ID: action.id,
+      },
+    });
+
+    return await new Promise<PhaseActionResult>((resolve) => {
+      const child = spawn("bash", bashArgs, { env, cwd: REPO_ROOT, detached: true });
+      const pgid = child.pid;
+      const logStream = fs.createWriteStream(logPath);
+      let stderrTail = "";
+      // Every byte from the child passes through redactString before
+      // hitting the evidence log or the stderr tail; raw output never
+      // touches disk or PhaseActionResult.message.
+      pipeRedacted(child.stdout, logStream);
+      pipeRedacted(child.stderr, logStream, (redactedChunk) => {
+        stderrTail = (stderrTail + redactedChunk).slice(-4096);
+      });
+
+      const killGroup = (signal: NodeJS.Signals) => {
+        if (typeof pgid !== "number") {
+          child.kill(signal);
+          return;
+        }
+        try {
+          process.kill(-pgid, signal);
+        } catch {
+          /* group already gone */
+        }
+      };
+
+      let timedOut = false;
+      const timeout = setTimeout(() => {
+        timedOut = true;
+        killGroup("SIGTERM");
+        setTimeout(() => {
+          if (!child.killed) {
+            killGroup("SIGKILL");
+          }
+        }, 5_000).unref();
+      }, timeoutSeconds * 1_000);
+
+      const finishLog = (): Promise<void> =>
+        new Promise((res) => {
+          if ((logStream as unknown as { closed?: boolean }).closed) {
+            res();
+            return;
+          }
+          logStream.once("finish", () => res());
+          logStream.once("error", () => res());
+          logStream.end();
+        });
+
+      child.on("error", (err) => {
+        clearTimeout(timeout);
+        void finishLog().then(() =>
+          resolve({
+            id: action.id,
+            status: "failed",
+            durationMs: Date.now() - startedAt,
+            evidence: logPath,
+            message: redactString(`phase action ${action.id} spawn error: ${err.message}`),
+          }),
+        );
+      });
+
+      child.on("close", (code, signal) => {
+        clearTimeout(timeout);
+        void finishLog().then(() => {
+          const durationMs = Date.now() - startedAt;
+          if (timedOut) {
+            resolve({
+              id: action.id,
+              status: "failed",
+              durationMs,
+              evidence: logPath,
+              message: `phase action ${action.id} exceeded ${timeoutSeconds}s (signal=${signal ?? "SIGTERM"})`,
+            });
+            return;
+          }
+          if (code === 0) {
+            // Publish the action's evidence log under a stable alias for
+            // legacy assertions that reference fixed filenames
+            // (onboard.log, install.log, ...). Best-effort; alias copy
+            // failures do not fail the action.
+            if (action.aliasPath) {
+              try {
+                const aliasFull = path.isAbsolute(action.aliasPath)
+                  ? action.aliasPath
+                  : path.join(ctx.contextDir, action.aliasPath);
+                fs.mkdirSync(path.dirname(aliasFull), { recursive: true });
+                fs.copyFileSync(logPath, aliasFull);
+              } catch {
+                /* alias is a convenience; never fail action on copy */
+              }
+            }
+            resolve({ id: action.id, status: "passed", durationMs, evidence: logPath });
+            return;
+          }
+          resolve({
+            id: action.id,
+            status: "failed",
+            durationMs,
+            evidence: logPath,
+            message: `phase action ${action.id} exit ${code ?? "null"}: ${stderrTail.split("\n").slice(-3).join(" | ").trim()}`,
+          });
+        });
+      });
+    });
+  }
+
   private async runStep(ctx: RunContext, step: AssertionStep): Promise<AssertionResult> {
     const startedAt = Date.now();
     const rawAttempts = step.reliability?.retry?.attempts;
-    const maxAttempts = typeof rawAttempts === "number" && Number.isFinite(rawAttempts) ? Math.max(1, Math.floor(rawAttempts)) : 1;
+    const maxAttempts =
+      typeof rawAttempts === "number" && Number.isFinite(rawAttempts) ? Math.max(1, Math.floor(rawAttempts)) : 1;
     let attempts = 0;
     let lastOutcome: StepAttemptOutcome = { status: "failed", message: "step did not run" };
     for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
       attempts = attempt;
       lastOutcome = await this.executeStep(ctx, step, attempt);
-      if (lastOutcome.status === "passed") {
+      if (lastOutcome.status === "passed" || lastOutcome.status === "skipped") {
         return {
           id: step.id,
-          status: "passed",
+          status: lastOutcome.status,
           attempts,
           durationMs: Date.now() - startedAt,
           classifier: attempt > 1 ? step.reliability?.retry?.on[0] : lastOutcome.classifier,
-          evidence: step.evidencePath,
+          evidence: lastOutcome.evidence ?? step.evidencePath,
           message: lastOutcome.message,
         };
       }
@@ -75,7 +266,7 @@ export class PhaseOrchestrator {
       attempts,
       durationMs: Date.now() - startedAt,
       classifier: lastOutcome.classifier,
-      evidence: step.evidencePath,
+      evidence: lastOutcome.evidence ?? step.evidencePath,
       message: lastOutcome.message,
     };
   }
@@ -92,26 +283,192 @@ export class PhaseOrchestrator {
     return step.reliability?.retry?.on.includes(classifier) ?? false;
   }
 
-  private async executeStep(_ctx: RunContext, step: AssertionStep, attempt: number): Promise<StepAttemptOutcome> {
-    const ref = step.implementation?.ref ?? "";
-    if (ref === "fake-pass" || ref === "phase-1-skeleton") {
-      return { status: "passed" };
+  private async executeStep(ctx: RunContext, step: AssertionStep, _attempt: number): Promise<StepAttemptOutcome> {
+    const kind = step.implementation?.kind;
+    if (kind === "shell") {
+      return this.runShellStep(ctx, step);
     }
-    if (ref === "fake-retry-once-pass") {
-      return attempt === 1
-        ? { status: "failed", classifier: step.reliability?.retry?.on[0] ?? "gateway-transient" }
-        : { status: "passed" };
+    if (kind === "probe") {
+      // Probe registry lands in a follow-up PR. Until then, probes
+      // surface as visibly skipped — never as fake green. For
+      // security-sensitive or otherwise required probes, the run
+      // must NOT pass on this gap; the typed registry marks those
+      // with `required: true` and we reclassify the skip as a
+      // failure so the phase result fails closed.
+      const ref = step.implementation?.ref ?? "<no ref>";
+      if (step.required) {
+        return {
+          status: "failed",
+          classifier: "runner-infra",
+          message: `required probe not registered: ${ref} (step ${step.id})`,
+        };
+      }
+      return { status: "skipped", message: `probe not registered: ${ref}` };
     }
-    if (ref === "fake-always-transient") {
-      return { status: "failed", classifier: step.reliability?.retry?.on[0] ?? transientForRef(ref) };
+    if (kind === "pending") {
+      // pending steps surface as skipped with the placeholder ref so
+      // gaps are visible in plan output and phase results. Required
+      // pending steps (e.g. expected-failure side-effect validators
+      // for negative scenarios) fail closed instead — the run cannot
+      // honestly pass while the contract is unimplemented.
+      const ref = step.implementation?.ref ?? "";
+      if (step.required) {
+        return {
+          status: "failed",
+          classifier: "runner-infra",
+          message: `required pending step not implemented: ${ref} (step ${step.id})`,
+        };
+      }
+      return { status: "skipped", message: `pending: ${ref}` };
+    }
+    throw new Error(`Unknown assertion step kind for ${step.id}: ${String(kind)}`);
+  }
+
+  private async runShellStep(ctx: RunContext, step: AssertionStep): Promise<StepAttemptOutcome> {
+    const ref = step.implementation?.ref;
+    if (!ref) {
+      return { status: "failed", message: `shell step ${step.id} missing implementation.ref` };
     }
-    if (step.implementation?.kind === "shell" && _ctx.dryRun) {
-      return { status: "passed", message: `dry-run shell ${ref}` };
+    const scriptPath = path.isAbsolute(ref) ? ref : path.resolve(REPO_ROOT, ref);
+    if (!fs.existsSync(scriptPath)) {
+      return { status: "failed", message: `shell step ${step.id} script not found: ${scriptPath}` };
     }
-    if (step.implementation?.kind === "probe" && _ctx.dryRun) {
-      return { status: "passed", message: `dry-run probe ${ref}` };
+
+    const timeoutSeconds = step.reliability?.timeoutSeconds ?? DEFAULT_STEP_TIMEOUT_SECONDS;
+    const logDir = path.join(ctx.contextDir, ".e2e", "logs");
+    fs.mkdirSync(logDir, { recursive: true });
+    const logPath = path.join(logDir, `${step.id}.log`);
+
+    // Framework-owned secret hygiene at the spawn boundary (mirrors
+    // runAction). The shell step's child gets only the framework
+    // allowlist + scenario context.env keys + step.secretEnv
+    // declarations. See orchestrators/redaction.ts.
+    const env = buildChildEnv(process.env, {
+      secretEnv: step.secretEnv,
+      frameworkOverlay: {
+        E2E_CONTEXT_DIR: ctx.contextDir,
+        E2E_STEP_ID: step.id,
+        E2E_PHASE: step.phase,
+      },
+    });
+    // Surface scenario-derived context (E2E_SCENARIO, E2E_SANDBOX_NAME,
+    // E2E_GATEWAY_URL, etc.) that the framework wrote at the start of the
+    // run and that environment+onboarding phases extended via
+    // e2e_context_set. The shell context library writes to
+    // ${E2E_CONTEXT_DIR}/context.env, NOT to ${E2E_CONTEXT_DIR}/.e2e/.
+    const contextEnvPath = path.join(ctx.contextDir, "context.env");
+    if (fs.existsSync(contextEnvPath)) {
+      const contextEnv = fs.readFileSync(contextEnvPath, "utf8");
+      for (const line of contextEnv.split("\n")) {
+        const trimmed = line.trim();
+        if (!trimmed || trimmed.startsWith("#")) {
+          continue;
+        }
+        const eq = trimmed.indexOf("=");
+        if (eq <= 0) {
+          continue;
+        }
+        const key = trimmed.slice(0, eq);
+        let value = trimmed.slice(eq + 1);
+        if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
+          value = value.slice(1, -1);
+        }
+        env[key] = value;
+      }
     }
-    return { status: "failed", message: `unsupported live step ${step.id}` };
+
+    return await new Promise<StepAttemptOutcome>((resolve) => {
+      // detached: true puts the child (and any of its children, e.g. a `sleep`
+      // spawned by bash) into its own process group. We send signals to the
+      // negative pid so the whole group dies on timeout. Without this, bash
+      // ignores SIGTERM until its current foreground command (e.g. sleep)
+      // returns, and timeouts effectively don't work.
+      const child = spawn("bash", [scriptPath], { env, cwd: REPO_ROOT, detached: true });
+      const pgid = child.pid;
+      const logStream = fs.createWriteStream(logPath);
+      let stderrTail = "";
+      // Redact at the I/O boundary; raw bytes from the child must not
+      // reach the evidence log or the stderr tail that flows into
+      // step result.message.
+      pipeRedacted(child.stdout, logStream);
+      pipeRedacted(child.stderr, logStream, (redactedChunk) => {
+        stderrTail = (stderrTail + redactedChunk).slice(-4096);
+      });
+
+      const killGroup = (signal: NodeJS.Signals) => {
+        if (typeof pgid !== "number") {
+          child.kill(signal);
+          return;
+        }
+        try {
+          process.kill(-pgid, signal);
+        } catch {
+          /* group already gone */
+        }
+      };
+
+      let timedOut = false;
+      const timeout = setTimeout(() => {
+        timedOut = true;
+        killGroup("SIGTERM");
+        setTimeout(() => {
+          if (!child.killed) {
+            killGroup("SIGKILL");
+          }
+        }, 5_000).unref();
+      }, timeoutSeconds * 1_000);
+
+      // Wait for the log writeStream to fully flush before resolving so
+      // callers can synchronously read the evidence file. Without this, the
+      // 'close' event on the child fires before the WriteStream finishes
+      // draining, and tests/orchestrators see an empty log file.
+      const finishLog = (): Promise<void> =>
+        new Promise((res) => {
+          if ((logStream as unknown as { closed?: boolean }).closed) {
+            res();
+            return;
+          }
+          logStream.once("finish", () => res());
+          logStream.once("error", () => res());
+          logStream.end();
+        });
+
+      child.on("error", (err) => {
+        clearTimeout(timeout);
+        void finishLog().then(() =>
+          resolve({
+            status: "failed",
+            message: redactString(`shell step ${step.id} spawn error: ${err.message}`),
+            evidence: logPath,
+          }),
+        );
+      });
+
+      child.on("close", (code, signal) => {
+        clearTimeout(timeout);
+        void finishLog().then(() => {
+          if (timedOut) {
+            resolve({
+              status: "failed",
+              classifier: "runner-infra",
+              message: `shell step ${step.id} exceeded ${timeoutSeconds}s (signal=${signal ?? "SIGTERM"})`,
+              evidence: logPath,
+            });
+            return;
+          }
+          if (code === 0) {
+            resolve({ status: "passed", evidence: logPath });
+            return;
+          }
+          resolve({
+            status: "failed",
+            classifier: classifierForRef(ref),
+            message: `shell step ${step.id} exit ${code ?? "null"}: ${stderrTail.split("\n").slice(-3).join(" | ").trim()}`,
+            evidence: logPath,
+          });
+        });
+      });
+    });
   }
 
   private writePhaseResult(ctx: RunContext, result: PhaseResult) {
diff --git a/test/e2e-scenario/scenarios/orchestrators/redaction.ts b/test/e2e-scenario/scenarios/orchestrators/redaction.ts
new file mode 100644
index 0000000000..745ec61126
--- /dev/null
+++ b/test/e2e-scenario/scenarios/orchestrators/redaction.ts
@@ -0,0 +1,212 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Framework-owned secret hygiene at the spawn boundary.
+ *
+ * Spec ownership: redaction and child-env minimization are FRAMEWORK
+ * INFRASTRUCTURE, not a per-action / per-script / per-workflow concern.
+ * Children spawned by PhaseOrchestrator must (a) receive a minimal,
+ * typed env (framework allowlist + per-action declared `secretEnv`
+ * passthrough only), and (b) have their stdout/stderr passed through
+ * redaction before any byte reaches an evidence log or
+ * PhaseResult.message. There is no opt-out flag, no env switch, no
+ * helper that bypasses this. One execution mode, secrets always
+ * redacted in evidence — same one-mode discipline that motivates the
+ * rest of this PR.
+ *
+ * Pattern source-of-truth: src/lib/security/secret-patterns.ts. We
+ * import the canonical regex sets and apply them here so framework
+ * redaction stays in lockstep with product-runtime redaction without
+ * coupling the framework to product runtime modules.
+ *
+ * Bash side: test/e2e-scenario/runtime/lib/context.sh::e2e_context_dump
+ * already redacts on dump via _e2e_context_is_sensitive_key. Bash
+ * helpers must continue to use that for diagnostic dumps; this module
+ * only covers the TS-spawned-child I/O path.
+ *
+ * Tests:
+ *   test/e2e-scenario/framework-tests/e2e-phase-orchestrators.test.ts
+ *     - test_should_not_persist_secret_shaped_child_output_into_evidence
+ *     - test_should_drop_non_allowlisted_parent_env_unless_declared_in_secretEnv
+ *     - test_should_pass_declared_secretEnv_through_to_child
+ */
+
+import type { Readable, Writable } from "node:stream";
+
+const REDACTED = "<REDACTED>";
+
+// Framework-local mirror of src/lib/security/secret-patterns.ts. The
+// framework deliberately does not import from src/lib/security/ so it
+// stays decoupled from product runtime modules and the cross-tsconfig
+// boundary. A parity test
+// (test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts)
+// asserts these regex sources stay in lockstep with the canonical
+// product source so adding a token shape there keeps both layers
+// honest at once.
+const TOKEN_PREFIX_PATTERNS: RegExp[] = [
+  /nvapi-[A-Za-z0-9_-]{10,}/g,
+  /nvcf-[A-Za-z0-9_-]{10,}/g,
+  /ghp_[A-Za-z0-9_-]{10,}/g,
+  /(?:github_pat_)[A-Za-z0-9_]{30,}/g,
+  /sk-proj-[A-Za-z0-9_-]{10,}/g,
+  /sk-ant-[A-Za-z0-9_-]{10,}/g,
+  /sk-[A-Za-z0-9_-]{20,}/g,
+  /(?:xox[bpas]|xapp)-[A-Za-z0-9-]{10,}/g,
+  /A(?:K|S)IA[A-Z0-9]{16}/g,
+  /hf_[A-Za-z0-9]{10,}/g,
+  /glpat-[A-Za-z0-9_-]{10,}/g,
+  /gsk_[A-Za-z0-9]{10,}/g,
+  /pypi-[A-Za-z0-9_-]{10,}/g,
+  /\bbot\d{8,10}:[A-Za-z0-9_-]{35}\b/g,
+  /\b\d{8,10}:[A-Za-z0-9_-]{35}\b/g,
+  /\b[A-Za-z0-9]{24}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{27,}\b/g,
+];
+
+const CONTEXT_PATTERNS: RegExp[] = [
+  /(?<=Bearer\s+)[A-Za-z0-9_.+/=-]{10,}/gi,
+  /(?<=(?:_KEY|API_KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL)[=: ]['"]?)[A-Za-z0-9_.+/=-]{10,}/gi,
+];
+
+/**
+ * Replace every secret-shaped token in `text` with `<REDACTED>`. Uses
+ * the canonical TOKEN_PREFIX_PATTERNS + CONTEXT_PATTERNS sets.
+ *
+ * Best-effort against unknown token shapes. The actual defense is the
+ * env allowlist (buildChildEnv); pattern redaction catches what slips
+ * through (e.g. error messages that echo a secret value).
+ */
+export function redactString(text: string): string {
+  if (!text) return text;
+  let out = text;
+  for (const p of TOKEN_PREFIX_PATTERNS) {
+    p.lastIndex = 0;
+    out = out.replace(p, REDACTED);
+  }
+  for (const p of CONTEXT_PATTERNS) {
+    p.lastIndex = 0;
+    out = out.replace(p, REDACTED);
+  }
+  return out;
+}
+
+// Env keys the framework guarantees children may always see. Anything
+// outside this set, outside FRAMEWORK_ENV_PREFIXES, and not declared
+// in PhaseAction.secretEnv / AssertionStep.secretEnv is dropped before
+// the child spawns.
+const FRAMEWORK_ENV_ALLOWLIST: ReadonlySet<string> = new Set([
+  "PATH",
+  "HOME",
+  "SHELL",
+  "USER",
+  "LOGNAME",
+  "LANG",
+  "LC_ALL",
+  "LC_CTYPE",
+  "TZ",
+  "TERM",
+  "TMPDIR",
+  "RUNNER_TEMP",
+  "RUNNER_OS",
+  "GITHUB_ACTIONS",
+  "CI",
+  "NEMOCLAW_NON_INTERACTIVE",
+  "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE",
+]);
+
+const FRAMEWORK_ENV_PREFIXES: readonly string[] = ["E2E_", "NEMOCLAW_LOG_"];
+
+// Shape required of any declared secretEnv key — must look like a
+// secret-bearing variable. Prevents accidental allowlisting of
+// non-secret values via the secretEnv channel and keeps the
+// "framework-allowlist vs declared-secret" distinction honest.
+const SECRET_ENV_KEY_SHAPE =
+  /^[A-Z][A-Z0-9_]*(?:API[_]?KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|PASSPHRASE|PRIVATE[_]?KEY|ACCESS[_]?KEY)$/;
+
+export function isValidSecretEnvKey(key: string): boolean {
+  return SECRET_ENV_KEY_SHAPE.test(key);
+}
+
+export interface BuildChildEnvOptions {
+  /** Per-action / per-step declared secret-bearing env keys to pass through. */
+  secretEnv?: readonly string[];
+  /** Framework-controlled overlay (E2E_CONTEXT_DIR, E2E_PHASE, E2E_*_ID). */
+  frameworkOverlay: NodeJS.ProcessEnv;
+}
+
+/**
+ * Build the child's env from `base` (typically `process.env`) by
+ * keeping only:
+ *   1. keys in FRAMEWORK_ENV_ALLOWLIST
+ *   2. keys starting with one of FRAMEWORK_ENV_PREFIXES
+ *   3. keys explicitly declared in `opts.secretEnv` (validated shape)
+ * then layering `opts.frameworkOverlay` on top.
+ *
+ * Throws if a `secretEnv` entry doesn't match the secret-key shape;
+ * better to fail loudly at compile/runtime than silently leak a
+ * non-secret env var (which would defeat the allowlist purpose).
+ */
+export function buildChildEnv(
+  base: NodeJS.ProcessEnv,
+  opts: BuildChildEnvOptions,
+): NodeJS.ProcessEnv {
+  const out: NodeJS.ProcessEnv = {};
+  for (const [key, value] of Object.entries(base)) {
+    if (value === undefined) continue;
+    if (FRAMEWORK_ENV_ALLOWLIST.has(key)) {
+      out[key] = value;
+      continue;
+    }
+    if (FRAMEWORK_ENV_PREFIXES.some((prefix) => key.startsWith(prefix))) {
+      out[key] = value;
+      continue;
+    }
+  }
+  for (const key of opts.secretEnv ?? []) {
+    if (!isValidSecretEnvKey(key)) {
+      throw new Error(
+        `secretEnv entry '${key}' does not match the secret-key shape ` +
+          `(must end with API_KEY, TOKEN, SECRET, PASSWORD, CREDENTIAL, ` +
+          `PASSPHRASE, PRIVATE_KEY, or ACCESS_KEY). Refusing to allowlist.`,
+      );
+    }
+    if (base[key] !== undefined) {
+      out[key] = base[key];
+    }
+  }
+  Object.assign(out, opts.frameworkOverlay);
+  return out;
+}
+
+/**
+ * Pipe `src` into `log`, redacting every chunk on the way through.
+ * Optional `onChunk` receives the already-redacted text (used by the
+ * orchestrator to keep a redacted stderr tail for failure messages).
+ *
+ * No raw bytes from the child ever reach `log` or the tail callback.
+ */
+export function pipeRedacted(
+  src: Readable,
+  log: Writable,
+  onChunk?: (redactedChunk: string) => void,
+): void {
+  src.on("data", (chunk: Buffer) => {
+    const redacted = redactString(chunk.toString("utf8"));
+    log.write(redacted);
+    onChunk?.(redacted);
+  });
+}
+
+/**
+ * Compact array of all framework env keys the child sees by default.
+ * Exported for tests/diagnostics; do not use to bypass the boundary.
+ */
+export function frameworkEnvAllowlistSnapshot(): {
+  keys: string[];
+  prefixes: string[];
+} {
+  return {
+    keys: [...FRAMEWORK_ENV_ALLOWLIST].sort(),
+    prefixes: [...FRAMEWORK_ENV_PREFIXES],
+  };
+}
diff --git a/test/e2e-scenario/scenarios/orchestrators/runner.ts b/test/e2e-scenario/scenarios/orchestrators/runner.ts
index 6ab3b76c62..228d32d452 100644
--- a/test/e2e-scenario/scenarios/orchestrators/runner.ts
+++ b/test/e2e-scenario/scenarios/orchestrators/runner.ts
@@ -1,7 +1,8 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-import type { PhaseResult, RunContext, RunPlan, RunPlanPhase } from "../types.ts";
+import type { PhaseActionResult, PhaseResult, RunContext, RunPlan, RunPlanPhase } from "../types.ts";
+import { seedContextEnv } from "./context.ts";
 import { EnvironmentOrchestrator } from "./environment.ts";
 import { OnboardingOrchestrator } from "./onboarding.ts";
 import { RuntimeOrchestrator } from "./runtime.ts";
@@ -28,22 +29,65 @@ export class ScenarioRunner {
   }
 
   async run(ctx: RunContext, plan: RunPlan): Promise<PhaseResult[]> {
+    // Seed context.env from the typed RunPlan once, before any phase
+    // runs. Spec ownership: framework infrastructure (the runner), not
+    // a shell action. Onboarding may extend context.env via
+    // e2e_context_set; the runtime phase reads whatever is on disk.
+    seedContextEnv(ctx, plan);
+
     const results: PhaseResult[] = [];
     for (const phase of plan.phases) {
-      if (phase.name === "environment") {
-        results.push(await this.environment.run(ctx, phase, results));
-        continue;
-      }
-      if (phase.name === "onboarding") {
-        results.push(await this.onboarding.run(ctx, phase, results));
+      const blocked = blockingPriorResult(results);
+      if (blocked) {
+        // Cross-phase short-circuit: the previous phase's setup work
+        // failed, so this phase cannot meaningfully run. Synthesize a
+        // skipped PhaseResult with a clear reason so artifacts stay
+        // honest (no false greens, no <1s assertion explosion).
+        results.push({
+          phase: phase.name,
+          status: "skipped",
+          actions: [],
+          assertions: [
+            {
+              id: `${phase.name}.blocked`,
+              status: "skipped",
+              attempts: 0,
+              durationMs: 0,
+              message: `phase blocked by prior failure: ${blocked.phase} action ${blocked.action.id} failed (${blocked.action.message ?? "no message"})`,
+            },
+          ],
+        });
         continue;
       }
-      if (phase.name === "runtime") {
-        results.push(await this.runtime.run(ctx, phase, results));
-        continue;
-      }
-      throw new Error(`Unsupported phase: ${String(phase.name)}`);
+      const orchestrator = this.orchestratorFor(phase.name);
+      results.push(await orchestrator.run(ctx, phase, results));
     }
     return results;
   }
+
+  private orchestratorFor(name: RunPlanPhase["name"]): PhaseRunner {
+    if (name === "environment") return this.environment;
+    if (name === "onboarding") return this.onboarding;
+    if (name === "runtime") return this.runtime;
+    throw new Error(`Unsupported phase: ${String(name)}`);
+  }
+}
+
+interface BlockingFailure {
+  phase: PhaseResult["phase"];
+  action: PhaseActionResult;
+}
+
+function blockingPriorResult(results: PhaseResult[]): BlockingFailure | undefined {
+  // A phase action failure (real setup work didn't succeed) blocks
+  // downstream phases. Assertion failures do NOT block downstream
+  // phases - they are expected to be reported alongside other phase
+  // results so reviewers can see all failure layers at once.
+  for (const result of results) {
+    const failedAction = result.actions.find((action) => action.status === "failed");
+    if (failedAction) {
+      return { phase: result.phase, action: failedAction };
+    }
+  }
+  return undefined;
 }
diff --git a/test/e2e-scenario/scenarios/run.ts b/test/e2e-scenario/scenarios/run.ts
index e666e07844..2a16c85996 100644
--- a/test/e2e-scenario/scenarios/run.ts
+++ b/test/e2e-scenario/scenarios/run.ts
@@ -4,33 +4,29 @@
 import { compileRunPlans, renderPlanText, writePlanArtifacts } from "./compiler.ts";
 import { ScenarioRunner } from "./orchestrators/runner.ts";
 import { listScenarios } from "./registry.ts";
+import type { PhaseResult } from "./types.ts";
 
 interface Args {
   list: boolean;
+  emitMatrix: boolean;
   planOnly: boolean;
-  dryRun: boolean;
-  validateOnly: boolean;
   scenarios: string[];
 }
 
 function parseArgs(argv: string[]): Args {
-  const args: Args = { list: false, planOnly: false, dryRun: false, validateOnly: false, scenarios: [] };
+  const args: Args = { list: false, emitMatrix: false, planOnly: false, scenarios: [] };
   for (let i = 0; i < argv.length; i += 1) {
     const arg = argv[i];
     if (arg === "--list") {
       args.list = true;
       continue;
     }
-    if (arg === "--plan-only") {
-      args.planOnly = true;
+    if (arg === "--emit-matrix") {
+      args.emitMatrix = true;
       continue;
     }
-    if (arg === "--dry-run") {
-      args.dryRun = true;
-      continue;
-    }
-    if (arg === "--validate-only") {
-      args.validateOnly = true;
+    if (arg === "--plan-only") {
+      args.planOnly = true;
       continue;
     }
     if (arg === "--scenarios") {
@@ -54,17 +50,29 @@ function printList() {
   }
 }
 
+function emitMatrix() {
+  // Read-only emission of the typed registry as a GitHub Actions matrix
+  // payload. Consumed by the dynamic matrix workflow (PR #4359).
+  const payload = {
+    include: listScenarios().map((scenario) => ({
+      id: scenario.id,
+      description: scenario.description ?? "",
+    })),
+  };
+  console.log(JSON.stringify(payload));
+}
+
 async function main() {
   const args = parseArgs(process.argv.slice(2));
   if (args.list) {
     printList();
     return;
   }
-
-  const modeCount = [args.planOnly, args.dryRun, args.validateOnly].filter(Boolean).length;
-  if (modeCount !== 1) {
-    throw new Error("Use exactly one of --plan-only, --dry-run, or --validate-only with --scenarios <id[,id...]>");
+  if (args.emitMatrix) {
+    emitMatrix();
+    return;
   }
+
   if (args.scenarios.length === 0) {
     throw new Error("scenario execution requires --scenarios <id[,id...]>");
   }
@@ -78,12 +86,43 @@ async function main() {
   writePlanArtifacts(plans, contextDir);
   console.log(renderPlanText(plans));
 
-  if (args.dryRun) {
-    const runner = new ScenarioRunner();
-    for (const plan of plans) {
-      await runner.run({ contextDir, dryRun: true }, plan);
+  if (args.planOnly) {
+    // Local debug only. Workflows must not pass --plan-only.
+    return;
+  }
+
+  const runner = new ScenarioRunner();
+  const allResults: PhaseResult[] = [];
+  let anyFailed = false;
+  for (const plan of plans) {
+    const results = await runner.run({ contextDir }, plan);
+    allResults.push(...results);
+    if (results.some((result) => result.status === "failed")) {
+      anyFailed = true;
     }
   }
+
+  // Surface a compact run summary so phase results don't have to be opened
+  // to see what passed.
+  console.log("");
+  console.log("Phase results:");
+  for (const result of allResults) {
+    const counts = result.assertions.reduce(
+      (acc, assertion) => {
+        acc[assertion.status] = (acc[assertion.status] ?? 0) + 1;
+        return acc;
+      },
+      {} as Record<string, number>,
+    );
+    const detail = Object.entries(counts)
+      .map(([status, count]) => `${status}=${count}`)
+      .join(" ");
+    console.log(`  ${result.phase}: ${result.status} (${detail || "no steps"})`);
+  }
+
+  if (anyFailed) {
+    process.exitCode = 1;
+  }
 }
 
 try {
diff --git a/test/e2e-scenario/scenarios/types.ts b/test/e2e-scenario/scenarios/types.ts
index b29f8458d6..46201f55a2 100644
--- a/test/e2e-scenario/scenarios/types.ts
+++ b/test/e2e-scenario/scenarios/types.ts
@@ -66,6 +66,21 @@ export interface AssertionStep {
   };
   evidencePath?: string;
   reliability?: AssertionStepReliability;
+  // Declared parent-env keys this step requires beyond the framework's
+  // allowlist. Anything not allowlisted and not declared here is
+  // dropped before spawn. See orchestrators/redaction.ts. Each entry
+  // must match the secret-key shape; the framework rejects non-secret
+  // names to keep the allowlist-vs-declared-secret boundary honest.
+  secretEnv?: readonly string[];
+  // When true, a probe/pending step that resolves as "skipped" is
+  // reclassified as "failed" by the phase orchestrator. Required
+  // steps fail closed when their underlying implementation isn't
+  // available yet (probe registry not landed, expected-failure
+  // side-effect validator not implemented, ...) instead of silently
+  // producing fake green. Defaults to false; set true for security-
+  // sensitive suites and expected-failure validators that the run
+  // is not safe without.
+  required?: boolean;
 }
 
 export interface AssertionGroup {
@@ -100,9 +115,53 @@ export interface ScenarioDefinition {
   expectedFailure?: Record<string, unknown>;
 }
 
+// A phase action is real, deterministic setup work the phase orchestrator
+// performs BEFORE running its assertions: install nemoclaw, run
+// onboarding, emit context.env, etc. Actions short-circuit assertions on
+// failure (assertions don't run if the action they depend on failed).
+//
+// Spec ownership: phase orchestrators own actions. The top-level runner
+// must not execute actions; clients must not embed action policy.
+export interface PhaseAction {
+  id: string;
+  phase: PhaseName;
+  description?: string;
+  // "shell-fn" sources the bash dispatcher and invokes the named function.
+  // "shell"    runs an executable script (used for context-emit helper).
+  kind: "shell-fn" | "shell";
+  // Repo-relative path to the script.
+  scriptRef: string;
+  // For "shell-fn": the bash function to invoke after sourcing scriptRef.
+  fn?: string;
+  // Single positional arg passed to the function/script (install method or
+  // onboarding profile id today). Kept as a single string to keep stable
+  // ids predictable; multi-arg variants can extend this later.
+  arg?: string;
+  // Per-action timeout. No retry by default - install/onboard must fail
+  // loudly so the regression is visible. Retry stays a property of
+  // assertion steps, not actions.
+  timeoutSeconds?: number;
+  // Repo-relative evidence log path.
+  evidencePath?: string;
+  // Optional stable alias the orchestrator copies the evidence log to
+  // after a successful action. Lets legacy shell assertions that
+  // reference well-known filenames (e.g. ${E2E_CONTEXT_DIR}/onboard.log)
+  // keep working without coupling them to the action's stable id.
+  aliasPath?: string;
+  // Declared parent-env keys this action requires beyond the
+  // framework's allowlist (PATH, HOME, E2E_*, NEMOCLAW_*, ...).
+  // Anything not allowlisted and not declared here is dropped before
+  // spawn. See orchestrators/redaction.ts. Each entry must match the
+  // secret-key shape; the framework rejects non-secret names so the
+  // allowlist-vs-declared-secret boundary stays honest. Cloud install
+  // declares ["NVIDIA_API_KEY"]; slack onboarding declares the slack
+  // tokens it actually needs; etc.
+  secretEnv?: readonly string[];
+}
+
 export interface RunPlanPhase {
   name: PhaseName;
-  actions: string[];
+  actions: PhaseAction[];
   assertionGroups: AssertionGroup[];
 }
 
@@ -126,7 +185,6 @@ export interface RunPlan {
 
 export interface RunContext {
   contextDir: string;
-  dryRun: boolean;
 }
 
 export interface AssertionResult {
@@ -139,8 +197,20 @@ export interface AssertionResult {
   message?: string;
 }
 
+export interface PhaseActionResult {
+  id: string;
+  status: "passed" | "failed" | "skipped";
+  durationMs: number;
+  evidence?: string;
+  message?: string;
+}
+
 export interface PhaseResult {
   phase: PhaseName;
   status: "passed" | "failed" | "skipped";
+  // Action results are recorded distinctly from assertion results so
+  // failure-layer attribution stays unambiguous: a failure in actions
+  // means setup never completed; assertions did not have a fair chance.
+  actions: PhaseActionResult[];
   assertions: AssertionResult[];
 }
diff --git a/test/e2e-scenario/validation_suites/assert/gateway-alive.sh b/test/e2e-scenario/validation_suites/assert/gateway-alive.sh
index a498602d35..42f33e1c50 100755
--- a/test/e2e-scenario/validation_suites/assert/gateway-alive.sh
+++ b/test/e2e-scenario/validation_suites/assert/gateway-alive.sh
@@ -9,6 +9,8 @@ _E2E_GW_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../runtime/lib" && pwd)
 . "${_E2E_GW_LIB_DIR}/env.sh"
 # shellcheck source=../../runtime/lib/context.sh
 . "${_E2E_GW_LIB_DIR}/context.sh"
+# shellcheck source=../sandbox-exec.sh
+. "$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/sandbox-exec.sh"
 
 # e2e_gateway_assert_healthy [url]
 # Defaults to E2E_GATEWAY_URL from context; returns non-zero with a clear
@@ -23,10 +25,6 @@ e2e_gateway_assert_healthy() {
     return 2
   fi
   e2e_env_trace "gateway:check" "${url}"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] gateway check ${url} (skipped)"
-    return 0
-  fi
   # Prefer /health if available, otherwise just hit the base URL.
   local http_code
   http_code="$(curl -fsS -o /dev/null -w '%{http_code}' --max-time 5 "${url%/}/health" 2>/dev/null || echo 000)"
@@ -41,7 +39,9 @@ e2e_gateway_assert_healthy() {
     local sandbox_name
     sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
     if [[ -n "${sandbox_name}" ]] && command -v openshell >/dev/null 2>&1; then
-      http_code="$(openshell sandbox exec -n "${sandbox_name}" -- curl -fsS -o /dev/null -w '%{http_code}' --max-time 5 http://localhost:18789/health 2>/dev/null || echo 000)"
+      # Wrapper applies a per-call timeout so a wedged ssh handshake here
+      # cannot consume the orchestrator's whole step budget.
+      http_code="$(E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=15 e2e_sandbox_exec "${sandbox_name}" -- curl -fsS -o /dev/null -w '%{http_code}' --max-time 5 http://localhost:18789/health 2>/dev/null || echo 000)"
       if [[ "${http_code}" == "200" || "${http_code}" == "401" ]]; then
         return 0
       fi
diff --git a/test/e2e-scenario/validation_suites/assert/sandbox-alive.sh b/test/e2e-scenario/validation_suites/assert/sandbox-alive.sh
index b85ef9cd60..473061e972 100755
--- a/test/e2e-scenario/validation_suites/assert/sandbox-alive.sh
+++ b/test/e2e-scenario/validation_suites/assert/sandbox-alive.sh
@@ -12,7 +12,6 @@ _E2E_SB_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../runtime/lib" && pwd)
 
 # e2e_sandbox_assert_running
 # Requires E2E_SANDBOX_NAME in context. Real implementation queries
-# `nemoclaw list`; honors E2E_DRY_RUN.
 e2e_sandbox_assert_running() {
   if ! e2e_context_require E2E_SANDBOX_NAME; then
     return 1
@@ -20,10 +19,6 @@ e2e_sandbox_assert_running() {
   local name
   name="$(e2e_context_get E2E_SANDBOX_NAME)"
   e2e_env_trace "sandbox:check" "${name}"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] sandbox check ${name} (skipped)"
-    return 0
-  fi
   if ! command -v nemoclaw >/dev/null 2>&1; then
     echo "e2e_sandbox_assert_running: nemoclaw CLI not on PATH" >&2
     return 1
diff --git a/test/e2e-scenario/validation_suites/hermes/00-hermes-health.sh b/test/e2e-scenario/validation_suites/hermes/00-hermes-health.sh
index 0fff0fd9ab..4b8161aea4 100755
--- a/test/e2e-scenario/validation_suites/hermes/00-hermes-health.sh
+++ b/test/e2e-scenario/validation_suites/hermes/00-hermes-health.sh
@@ -16,10 +16,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../runtime/lib" && pwd)"
 
 echo "hermes-specific:hermes-health"
 e2e_context_require E2E_AGENT
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would run Hermes health checks"
-  exit 0
-fi
 agent="$(e2e_context_get E2E_AGENT)"
 if [[ "${agent}" != "hermes" ]]; then
   echo "hermes-specific: E2E_AGENT should be 'hermes', got '${agent}'" >&2
diff --git a/test/e2e-scenario/validation_suites/inference/cloud/00-models-health.sh b/test/e2e-scenario/validation_suites/inference/cloud/00-models-health.sh
index 64e1b086fc..8277f05f38 100755
--- a/test/e2e-scenario/validation_suites/inference/cloud/00-models-health.sh
+++ b/test/e2e-scenario/validation_suites/inference/cloud/00-models-health.sh
@@ -13,17 +13,16 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 . "${LIB_DIR}/env.sh"
 # shellcheck source=../../../runtime/lib/context.sh
 . "${LIB_DIR}/context.sh"
+# shellcheck source=../../sandbox-exec.sh
+. "${SCRIPT_DIR}/../../sandbox-exec.sh"
 
 echo "inference:models-health"
 e2e_context_require E2E_SANDBOX_NAME
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would GET inference.local/v1/models from inside the sandbox"
-  exit 0
-fi
-
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
-body="$(openshell sandbox exec --name "${name}" -- curl -fsS --max-time 30 "https://inference.local/v1/models")"
+# Orchestrator step cap is 30s; wrapper default 25s applies. Inner curl
+# --max-time keeps a hung HTTP read from consuming the whole budget.
+body="$(e2e_sandbox_exec "${name}" -- curl -fsS --max-time 20 "https://inference.local/v1/models")"
 if [[ -z "${body}" ]]; then
   echo "inference:models-health: no response from models endpoint" >&2
   exit 1
diff --git a/test/e2e-scenario/validation_suites/inference/cloud/01-chat-completion.sh b/test/e2e-scenario/validation_suites/inference/cloud/01-chat-completion.sh
index f54ff8806b..20f481504e 100755
--- a/test/e2e-scenario/validation_suites/inference/cloud/01-chat-completion.sh
+++ b/test/e2e-scenario/validation_suites/inference/cloud/01-chat-completion.sh
@@ -12,18 +12,20 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 . "${LIB_DIR}/env.sh"
 # shellcheck source=../../../runtime/lib/context.sh
 . "${LIB_DIR}/context.sh"
+# shellcheck source=../../sandbox-exec.sh
+. "${SCRIPT_DIR}/../../sandbox-exec.sh"
 
 echo "inference:chat-completion"
 e2e_context_require E2E_SANDBOX_NAME
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would POST a chat completion to inference.local from inside the sandbox"
-  exit 0
-fi
-
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
 payload='{"model":"nvidia/nemotron-3-super-120b-a12b","messages":[{"role":"user","content":"Reply with exactly one word: PONG"}],"max_tokens":100}'
-response="$(openshell sandbox exec --name "${name}" -- curl -fsS --max-time 60 -H 'Content-Type: application/json' \
+# Orchestrator step cap is 60s; widen the wrapper cap to 50s so a hung
+# upstream surfaces with a clear diagnostic before SIGTERM. Inner curl
+# --max-time stays ~10s under the wrapper cap.
+# shellcheck disable=SC2034 # consumed by e2e_sandbox_exec via env
+E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=50 \
+response="$(e2e_sandbox_exec "${name}" -- curl -fsS --max-time 40 -H 'Content-Type: application/json' \
   -d "${payload}" "https://inference.local/v1/chat/completions")"
 # CodeRabbit review item #12: substring expansion instead of `| head`
 # avoids SIGPIPE-driven false failures under `set -o pipefail`.
diff --git a/test/e2e-scenario/validation_suites/inference/cloud/02-inference-local-from-sandbox.sh b/test/e2e-scenario/validation_suites/inference/cloud/02-inference-local-from-sandbox.sh
index 6d1343a736..f5102efd74 100755
--- a/test/e2e-scenario/validation_suites/inference/cloud/02-inference-local-from-sandbox.sh
+++ b/test/e2e-scenario/validation_suites/inference/cloud/02-inference-local-from-sandbox.sh
@@ -13,18 +13,18 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 . "${LIB_DIR}/env.sh"
 # shellcheck source=../../../runtime/lib/context.sh
 . "${LIB_DIR}/context.sh"
+# shellcheck source=../../sandbox-exec.sh
+. "${SCRIPT_DIR}/../../sandbox-exec.sh"
 
 echo "inference:sandbox-inference-local"
 e2e_context_require E2E_SANDBOX_NAME E2E_INFERENCE_ROUTE
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would resolve inference-local from inside the sandbox"
-  exit 0
-fi
-
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
 route="$(e2e_context_get E2E_INFERENCE_ROUTE)"
+# Orchestrator step cap is 45s; widen wrapper cap to 35s.
 # CodeRabbit review item #13: capture then truncate to avoid `| head` racing
 # curl under `pipefail` and flagging a successful request as failed.
-body="$(openshell sandbox exec --name "${name}" -- curl -fsS --max-time 10 "https://${route}/v1/models")"
+# shellcheck disable=SC2034 # consumed by e2e_sandbox_exec via env
+E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=35 \
+body="$(e2e_sandbox_exec "${name}" -- curl -fsS --max-time 25 "https://${route}/v1/models")"
 printf '%s\n' "${body:0:512}"
diff --git a/test/e2e-scenario/validation_suites/inference/ollama-auth-proxy/00-proxy-reachable.sh b/test/e2e-scenario/validation_suites/inference/ollama-auth-proxy/00-proxy-reachable.sh
index 77d4772c17..d172615795 100755
--- a/test/e2e-scenario/validation_suites/inference/ollama-auth-proxy/00-proxy-reachable.sh
+++ b/test/e2e-scenario/validation_suites/inference/ollama-auth-proxy/00-proxy-reachable.sh
@@ -12,18 +12,16 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 . "${LIB_DIR}/env.sh"
 # shellcheck source=../../../runtime/lib/context.sh
 . "${LIB_DIR}/context.sh"
+# shellcheck source=../../sandbox-exec.sh
+. "${SCRIPT_DIR}/../../sandbox-exec.sh"
 
 echo "ollama-proxy:proxy-reachable"
 e2e_context_require E2E_SANDBOX_NAME
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would verify the Ollama auth proxy is reachable from the sandbox"
-  exit 0
-fi
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
 # The Ollama auth proxy intentionally rejects unauthenticated requests to
 # /api/tags (legacy test-gpu-e2e.sh accepts 401/403 as proof the proxy is
 # live and enforcing auth). Do not use curl -f here.
-status="$(openshell sandbox exec --name "${name}" -- curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "http://inference-local/api/tags" 2>/dev/null || echo 000)"
+status="$(e2e_sandbox_exec "${name}" -- curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "http://inference-local/api/tags" 2>/dev/null || echo 000)"
 case "${status}" in
   200 | 401 | 403)
     echo "ollama-proxy:proxy-reachable status=${status}"
diff --git a/test/e2e-scenario/validation_suites/inference/ollama-gpu/00-ollama-models-health.sh b/test/e2e-scenario/validation_suites/inference/ollama-gpu/00-ollama-models-health.sh
index 47e9f1fd43..d61ead2e98 100755
--- a/test/e2e-scenario/validation_suites/inference/ollama-gpu/00-ollama-models-health.sh
+++ b/test/e2e-scenario/validation_suites/inference/ollama-gpu/00-ollama-models-health.sh
@@ -15,10 +15,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 
 echo "local-ollama-inference:ollama-models-health"
 e2e_context_require E2E_PROVIDER
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would GET ollama /api/tags via host Ollama"
-  exit 0
-fi
 # GPU Ollama scenarios mirror legacy test-gpu-e2e.sh: validate the host
 # Ollama daemon directly because Docker GPU host networking bypasses the
 # normal dashboard/gateway forward path.
diff --git a/test/e2e-scenario/validation_suites/inference/ollama-gpu/01-ollama-chat-completion.sh b/test/e2e-scenario/validation_suites/inference/ollama-gpu/01-ollama-chat-completion.sh
index ad8ff54faa..5d18b4209a 100755
--- a/test/e2e-scenario/validation_suites/inference/ollama-gpu/01-ollama-chat-completion.sh
+++ b/test/e2e-scenario/validation_suites/inference/ollama-gpu/01-ollama-chat-completion.sh
@@ -15,10 +15,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 
 echo "local-ollama-inference:ollama-chat-completion"
 e2e_context_require E2E_SANDBOX_NAME
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would POST chat completion from sandbox to host-network Ollama"
-  exit 0
-fi
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
 model="$(curl -fsS --max-time 10 http://127.0.0.1:11434/api/tags \
   | node -e "const fs=require('fs'); const data=JSON.parse(fs.readFileSync(0,'utf8')); process.stdout.write(data.models?.[0]?.name || data.models?.[0]?.model || 'default');")"
diff --git a/test/e2e-scenario/validation_suites/lib/inference_routing.sh b/test/e2e-scenario/validation_suites/lib/inference_routing.sh
index b4f4c1d63f..17db0bbedb 100755
--- a/test/e2e-scenario/validation_suites/lib/inference_routing.sh
+++ b/test/e2e-scenario/validation_suites/lib/inference_routing.sh
@@ -31,16 +31,6 @@ _e2e_inference_sandbox_name() {
   e2e_context_get E2E_SANDBOX_NAME
 }
 
-_e2e_inference_plan() {
-  local assertion_id="${1:-}"
-  local detail="${2:-planned inference/provider check}"
-  e2e_env_trace "inference:plan" "${assertion_id} ${detail}"
-  echo "[dry-run] ${assertion_id}: ${detail}"
-  if [[ -f "$(e2e_context_path)" ]]; then
-    e2e_context_dump | sed -E 's/(TOKEN|SECRET|API_KEY|APIKEY|CREDENTIAL|PASSWORD)([^=]*)=.*/\1\2=REDACTED/'
-  fi
-}
-
 _e2e_inference_curl_json() {
   local sandbox="$1"
   local url="$2"
@@ -64,10 +54,6 @@ e2e_inference_routing_assert_chat_completion() {
   local assertion_id="${1:-post-onboard.inference-routing.inference-local-chat-completion}"
   _e2e_inference_assertion "${assertion_id}"
   _e2e_inference_require_sandbox
-  if e2e_env_is_dry_run; then
-    _e2e_inference_plan "${assertion_id}" "POST https://inference.local/v1/chat/completions with bounded curl"
-    return 0
-  fi
   local sandbox payload output
   sandbox="$(_e2e_inference_sandbox_name)"
   payload='{"model":"default","messages":[{"role":"user","content":"Say ok"}],"max_tokens":8}'
@@ -84,10 +70,6 @@ e2e_inference_routing_assert_health() {
   local url="${2:-https://inference.local/v1/models}"
   _e2e_inference_assertion "${assertion_id}"
   _e2e_inference_require_sandbox
-  if e2e_env_is_dry_run; then
-    _e2e_inference_plan "${assertion_id}" "GET ${url} with bounded curl"
-    return 0
-  fi
   local sandbox status
   sandbox="$(_e2e_inference_sandbox_name)"
   status="$(_e2e_inference_status "${sandbox}" "${url}")"
@@ -103,10 +85,6 @@ e2e_inference_routing_assert_auth_proxy() {
   local mode="${2:-valid}"
   _e2e_inference_assertion "${assertion_id}"
   _e2e_inference_require_sandbox
-  if e2e_env_is_dry_run; then
-    _e2e_inference_plan "${assertion_id}" "auth-proxy ${mode} request; sensitive context redacted"
-    return 0
-  fi
   local sandbox status token
   sandbox="$(_e2e_inference_sandbox_name)"
   case "${mode}" in
diff --git a/test/e2e-scenario/validation_suites/lib/messaging_providers.sh b/test/e2e-scenario/validation_suites/lib/messaging_providers.sh
index 77eb1f1176..01250b784f 100755
--- a/test/e2e-scenario/validation_suites/lib/messaging_providers.sh
+++ b/test/e2e-scenario/validation_suites/lib/messaging_providers.sh
@@ -104,10 +104,6 @@ e2e_messaging_read_config_surface() {
     return 0
   fi
   path="$(e2e_messaging_agent_config_path)"
-  if [[ -n "${E2E_DRY_RUN:-}" ]]; then
-    printf '%s=PLACEHOLDER\n' "$(e2e_messaging_config_key)"
-    return 0
-  fi
   if [[ -f "${path}" ]]; then
     cat "${path}"
     return 0
@@ -177,9 +173,6 @@ e2e_messaging_assert_literal_payload() {
   local assertion_id="${1:?assertion id required}"
   local payload="${2:?payload required}"
   local observed="${3:-}"
-  if [[ -z "${observed}" && -n "${E2E_DRY_RUN:-}" ]]; then
-    observed="${payload}"
-  fi
   if [[ -z "${observed}" ]]; then
     e2e_fail "${assertion_id} missing observed payload output"
   fi
diff --git a/test/e2e-scenario/validation_suites/lib/rebuild_upgrade.sh b/test/e2e-scenario/validation_suites/lib/rebuild_upgrade.sh
index c6483c99fb..4870a68c64 100755
--- a/test/e2e-scenario/validation_suites/lib/rebuild_upgrade.sh
+++ b/test/e2e-scenario/validation_suites/lib/rebuild_upgrade.sh
@@ -10,6 +10,15 @@ _REBUILD_UPGRADE_REPO_ROOT="$(cd "${_REBUILD_UPGRADE_DIR}/../../../.." && pwd)"
 . "${_REBUILD_UPGRADE_REPO_ROOT}/test/e2e-scenario/runtime/lib/context.sh"
 # shellcheck source=../../runtime/lib/logging.sh
 . "${_REBUILD_UPGRADE_REPO_ROOT}/test/e2e-scenario/runtime/lib/logging.sh"
+# shellcheck source=../sandbox-exec.sh
+. "${_REBUILD_UPGRADE_REPO_ROOT}/test/e2e-scenario/validation_suites/sandbox-exec.sh"
+
+# Sandbox-exec calls in this lib feed the lifecycle.rebuild/upgrade
+# orchestrator steps, which carry 120s caps. Default the per-call wrapper
+# cap to 100s so a hung 'openshell sandbox exec'/'ssh -F' surfaces as a
+# classified exit 124 well before the orchestrator's SIGTERM. Callers
+# may still override per-call.
+: "${E2E_SANDBOX_EXEC_TIMEOUT_SECONDS:=100}"
 
 rebuild_upgrade_require_context() {
   e2e_context_require E2E_SCENARIO E2E_AGENT E2E_SANDBOX_NAME E2E_GATEWAY_URL
@@ -30,15 +39,30 @@ _rebuild_upgrade_run() {
   "$@"
 }
 
+# _rebuild_upgrade_sandbox_exec <sandbox> <cmd> [args...]
+# Routes through the canonical `e2e_sandbox_exec` wrapper (ssh-config
+# preferred, openshell-exec fallback, per-call timeout, classified
+# diagnostic on hang) for production; honors the legacy
+# REBUILD_UPGRADE_SANDBOX_CMD override so tests can inject a fake. The
+# override contract preserves the original argv shape
+# (`<override> -n <sandbox> -- <cmd>...`) so existing test fakes
+# (e.g. `REBUILD_UPGRADE_SANDBOX_CMD=fake_sandbox`) keep working.
+_rebuild_upgrade_sandbox_exec() {
+  local sandbox="$1"
+  shift
+  if [[ -n "${REBUILD_UPGRADE_SANDBOX_CMD:-}" ]]; then
+    # shellcheck disable=SC2086
+    ${REBUILD_UPGRADE_SANDBOX_CMD} -n "${sandbox}" -- "$@"
+    return $?
+  fi
+  e2e_sandbox_exec "${sandbox}" -- "$@"
+}
+
 rebuild_upgrade_assert_sandbox_reachable() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.upgrade.survivor_agent_reachable dry-run"
-    return 0
-  fi
   local sandbox
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
-  if _rebuild_upgrade_run REBUILD_UPGRADE_SANDBOX_CMD openshell sandbox exec -n "${sandbox}" -- true; then
+  if _rebuild_upgrade_sandbox_exec "${sandbox}" true; then
     e2e_pass "suite.upgrade.survivor_agent_reachable"
   else
     e2e_fail "suite.upgrade.survivor_agent_reachable"
@@ -47,15 +71,11 @@ rebuild_upgrade_assert_sandbox_reachable() {
 
 rebuild_upgrade_assert_marker_preserved() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.rebuild.workspace_state_preserved dry-run"
-    return 0
-  fi
   local sandbox marker_path expected actual
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
   marker_path="${E2E_REBUILD_MARKER_PATH:-/workspace/.nemoclaw-rebuild-marker}"
   expected="${E2E_REBUILD_MARKER_EXPECTED:-${E2E_STATE_MARKER_EXPECTED:-}}"
-  actual="$(_rebuild_upgrade_run REBUILD_UPGRADE_SANDBOX_CMD openshell sandbox exec -n "${sandbox}" -- cat "${marker_path}" 2>/dev/null || true)"
+  actual="$(_rebuild_upgrade_sandbox_exec "${sandbox}" cat "${marker_path}" 2>/dev/null || true)"
   if [[ -n "${actual}" && (-z "${expected}" || "${actual}" == "${expected}") ]]; then
     e2e_pass "suite.rebuild.workspace_state_preserved"
   else
@@ -65,16 +85,12 @@ rebuild_upgrade_assert_marker_preserved() {
 
 rebuild_upgrade_assert_agent_version_upgraded() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.rebuild.agent_version_upgraded dry-run"
-    return 0
-  fi
   local sandbox old expected actual cmd
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
   old="${E2E_OLD_AGENT_VERSION:-}"
   expected="${E2E_EXPECTED_AGENT_VERSION:-}"
   cmd="${E2E_AGENT_VERSION_COMMAND:-openclaw --version}"
-  actual="$(_rebuild_upgrade_run REBUILD_UPGRADE_SANDBOX_CMD openshell sandbox exec -n "${sandbox}" -- bash -lc "${cmd}" 2>/dev/null || true)"
+  actual="$(_rebuild_upgrade_sandbox_exec "${sandbox}" bash -lc "${cmd}" 2>/dev/null || true)"
   if [[ -n "${actual}" && (-z "${old}" || "${actual}" != *"${old}"*) && (-z "${expected}" || "${actual}" == *"${expected}"*) ]]; then
     e2e_pass "suite.rebuild.agent_version_upgraded"
   else
@@ -84,14 +100,10 @@ rebuild_upgrade_assert_agent_version_upgraded() {
 
 rebuild_upgrade_assert_inference_works() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.rebuild.inference_still_works dry-run"
-    return 0
-  fi
   local sandbox cmd output
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
   cmd="${E2E_INFERENCE_CHECK_COMMAND:-curl -fsS http://inference.local/v1/models}"
-  output="$(_rebuild_upgrade_run REBUILD_UPGRADE_SANDBOX_CMD openshell sandbox exec -n "${sandbox}" -- bash -lc "${cmd}" 2>/dev/null || true)"
+  output="$(_rebuild_upgrade_sandbox_exec "${sandbox}" bash -lc "${cmd}" 2>/dev/null || true)"
   if [[ -n "${output}" ]]; then
     e2e_pass "suite.rebuild.inference_still_works"
   else
@@ -101,10 +113,6 @@ rebuild_upgrade_assert_inference_works() {
 
 rebuild_upgrade_assert_policy_presets_preserved() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.rebuild.policy_presets_preserved dry-run"
-    return 0
-  fi
   local presets output preset
   presets="${E2E_EXPECTED_POLICY_PRESETS:-npm pypi}"
   output="$(_rebuild_upgrade_run REBUILD_UPGRADE_NEMOCLAW_CMD nemoclaw policy status 2>/dev/null || true)"
@@ -123,13 +131,9 @@ rebuild_upgrade_assert_hermes_config_preserved() {
     e2e_pass "suite.rebuild.hermes_config_preserved skipped non-hermes"
     return 0
   fi
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.rebuild.hermes_config_preserved dry-run"
-    return 0
-  fi
   local sandbox output
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
-  output="$(_rebuild_upgrade_run REBUILD_UPGRADE_SANDBOX_CMD openshell sandbox exec -n "${sandbox}" -- bash -lc "grep -R 'platforms.discord\|DISCORD' ~/.hermes . 2>/dev/null" || true)"
+  output="$(_rebuild_upgrade_sandbox_exec "${sandbox}" bash -lc "grep -R 'platforms.discord\|DISCORD' ~/.hermes . 2>/dev/null" || true)"
   if [[ "${output}" == *"discord"* || "${output}" == *"DISCORD"* ]]; then
     e2e_pass "suite.rebuild.hermes_config_preserved"
   else
@@ -139,10 +143,6 @@ rebuild_upgrade_assert_hermes_config_preserved() {
 
 rebuild_upgrade_assert_sandbox_registry_preserved() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.upgrade.sandbox_registry_preserved dry-run"
-    return 0
-  fi
   local sandbox output
   sandbox="$(_rebuild_upgrade_ctx E2E_SANDBOX_NAME)"
   output="$(_rebuild_upgrade_run REBUILD_UPGRADE_NEMOCLAW_CMD nemoclaw list 2>/dev/null || true)"
@@ -155,10 +155,6 @@ rebuild_upgrade_assert_sandbox_registry_preserved() {
 
 rebuild_upgrade_assert_gateway_version_upgraded() {
   rebuild_upgrade_require_context || return 1
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    e2e_pass "suite.upgrade.gateway_version_upgraded dry-run"
-    return 0
-  fi
   local expected output
   expected="${E2E_EXPECTED_OPENSHELL_VERSION:-}"
   output="$(_rebuild_upgrade_run REBUILD_UPGRADE_GATEWAY_CMD curl -fsS "$(_rebuild_upgrade_ctx E2E_GATEWAY_URL)/version" 2>/dev/null || true)"
diff --git a/test/e2e-scenario/validation_suites/lib/sandbox_lifecycle.sh b/test/e2e-scenario/validation_suites/lib/sandbox_lifecycle.sh
index df942487e7..3cca8966b4 100755
--- a/test/e2e-scenario/validation_suites/lib/sandbox_lifecycle.sh
+++ b/test/e2e-scenario/validation_suites/lib/sandbox_lifecycle.sh
@@ -37,11 +37,6 @@ sandbox_lifecycle_run_with_timeout() {
   local seconds="$1"
   shift
   SANDBOX_LIFECYCLE_LAST_OUTPUT=""
-  if [[ "${E2E_DRY_RUN:-0}" == "1" ]]; then
-    SANDBOX_LIFECYCLE_LAST_OUTPUT="dry-run: $*"
-    printf '%s\n' "${SANDBOX_LIFECYCLE_LAST_OUTPUT}"
-    return 0
-  fi
   if command -v timeout >/dev/null 2>&1; then
     SANDBOX_LIFECYCLE_LAST_OUTPUT="$(timeout "${seconds}" "$@" 2>&1)" || {
       local rc=$?
@@ -64,7 +59,10 @@ sandbox_lifecycle_assert_nemoclaw_list_contains_sandbox() {
     sandbox_lifecycle_fail "${id}" "nemoclaw list failed"
     return 1
   }
-  [[ "${E2E_DRY_RUN:-0}" == "1" || "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" == *"${E2E_SANDBOX_NAME}"* ]] || {
+  # Match the sandbox name exactly as a whole token; substring match
+  # would let `sb1` falsely match `sb10`.
+  awk -v n="${E2E_SANDBOX_NAME}" '$1 == n { found = 1 } END { exit !found }' \
+    <<<"${SANDBOX_LIFECYCLE_LAST_OUTPUT}" || {
     sandbox_lifecycle_fail "${id}" "sandbox not listed: ${E2E_SANDBOX_NAME}"
     return 1
   }
@@ -77,16 +75,25 @@ sandbox_lifecycle_assert_status_fields_present() {
     sandbox_lifecycle_fail "${id}" "nemoclaw status failed"
     return 1
   }
-  if [[ "${E2E_DRY_RUN:-0}" != "1" ]]; then
-    local status_output_lower
-    status_output_lower="$(printf '%s' "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" | tr '[:upper:]' '[:lower:]')"
-    for field in status gateway sandbox; do
-      [[ "${status_output_lower}" == *"${field}"* ]] || {
-        sandbox_lifecycle_fail "${id}" "missing status field: ${field}"
-        return 1
-      }
-    done
+  # The real `nemoclaw <name> status` output (src/lib/actions/sandbox/status.ts)
+  # always emits a 'Sandbox: <name>' header plus structured fields like
+  # 'Model:', 'OpenShell:', 'Policies:'. The original assertion required
+  # literal 'status' and 'gateway' tokens that never appear in normal
+  # output — it only passed against the test-suite mock. Align with the
+  # production CLI: require the sandbox name and a couple of substantive
+  # field labels that are unconditionally printed.
+  local output="${SANDBOX_LIFECYCLE_LAST_OUTPUT}"
+  if [[ "${output}" != *"${E2E_SANDBOX_NAME}"* ]]; then
+    sandbox_lifecycle_fail "${id}" "status output did not mention sandbox '${E2E_SANDBOX_NAME}'"
+    return 1
   fi
+  local field
+  for field in Sandbox Model OpenShell; do
+    [[ "${output}" == *"${field}"* ]] || {
+      sandbox_lifecycle_fail "${id}" "missing status field: ${field}"
+      return 1
+    }
+  done
   sandbox_lifecycle_pass "${id}" "status fields present"
 }
 
@@ -96,7 +103,7 @@ sandbox_lifecycle_assert_logs_available() {
     sandbox_lifecycle_fail "${id}" "nemoclaw logs failed"
     return 1
   }
-  [[ "${E2E_DRY_RUN:-0}" == "1" || -n "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" ]] || {
+  [[ -n "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" ]] || {
     sandbox_lifecycle_fail "${id}" "logs empty"
     return 1
   }
@@ -109,7 +116,7 @@ sandbox_lifecycle_assert_openshell_exec_ok() {
     sandbox_lifecycle_fail "${id}" "openshell exec failed"
     return 1
   }
-  [[ "${E2E_DRY_RUN:-0}" == "1" || "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" == *"lifecycle-ok"* ]] || {
+  [[ "${SANDBOX_LIFECYCLE_LAST_OUTPUT}" == *"lifecycle-ok"* ]] || {
     sandbox_lifecycle_fail "${id}" "unexpected exec output"
     return 1
   }
diff --git a/test/e2e-scenario/validation_suites/lib/security_policy_credentials.sh b/test/e2e-scenario/validation_suites/lib/security_policy_credentials.sh
index 3e1872d62a..8d34a5444f 100755
--- a/test/e2e-scenario/validation_suites/lib/security_policy_credentials.sh
+++ b/test/e2e-scenario/validation_suites/lib/security_policy_credentials.sh
@@ -55,10 +55,6 @@ spc_assert_credentials_expected() {
     return 1
   fi
   spc_log_provider_metadata "$(spc_context_get E2E_PROVIDER)" "gateway"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] would list gateway credentials without raw values"
-    return 0
-  fi
   local raw_file listed_raw listed list_rc
   raw_file="$(mktemp "${TMPDIR:-/tmp}/nemoclaw-credentials-list.XXXXXX")"
   chmod 600 "${raw_file}"
@@ -105,10 +101,6 @@ spc_assert_policy_preset_present() {
   spc_assertion_id "post-onboard.security-policy.${preset}-preset-applied"
   spc_require_context E2E_SCENARIO E2E_SANDBOX_NAME
   echo "policy preset expected: ${preset}"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] would verify policy preset ${preset}"
-    return 0
-  fi
   local sandbox_name active
   sandbox_name="$(spc_context_get E2E_SANDBOX_NAME)"
   if ! active="$(nemoclaw "${sandbox_name}" policy-list 2>&1)"; then
@@ -143,10 +135,6 @@ spc_semver_ge() {
 spc_assert_openshell_credential_rewrite_supported() {
   spc_assertion_id "post-onboard.gateway.openshell-version-supports-credential-rewrite"
   spc_require_context E2E_SCENARIO
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] would verify OpenShell gateway capability metadata"
-    return 0
-  fi
   local openshell_bin version_output version minimum_version binary_strings feature
   minimum_version="0.0.39"
   openshell_bin="$(command -v openshell 2>/dev/null || true)"
@@ -221,10 +209,6 @@ spc_assert_shields_permissions_match_state() {
 spc_assert_shields_config_consistent() {
   spc_assertion_id "post-onboard.security-shields.config-consistent"
   spc_require_context E2E_SCENARIO E2E_SANDBOX_NAME E2E_AGENT
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] would verify shields config consistency"
-    return 0
-  fi
   local sandbox_name status observed expected
   sandbox_name="$(spc_context_get E2E_SANDBOX_NAME)"
   if ! status="$(nemoclaw "${sandbox_name}" shields status 2>&1)"; then
@@ -262,10 +246,6 @@ spc_assert_telegram_payload_not_shell_executed() {
   if [[ -n "${fixture_payload}" ]]; then
     printf 'telegram payload fixture loaded (%s bytes)\n' "${#fixture_payload}"
   fi
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] would submit payload without shell evaluation"
-    return 0
-  fi
   local sandbox_name marker payload send_output marker_state
   sandbox_name="$(spc_context_get E2E_SANDBOX_NAME)"
   marker="/tmp/nemoclaw-telegram-injection-proof-$RANDOM-$$"
diff --git a/test/e2e-scenario/validation_suites/messaging/common/03-bridge-reachable.sh b/test/e2e-scenario/validation_suites/messaging/common/03-bridge-reachable.sh
index 9fc2156ad0..8ec82f8aeb 100755
--- a/test/e2e-scenario/validation_suites/messaging/common/03-bridge-reachable.sh
+++ b/test/e2e-scenario/validation_suites/messaging/common/03-bridge-reachable.sh
@@ -5,9 +5,4 @@
 set -euo pipefail
 . "$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/lib/messaging_providers.sh"
 e2e_messaging_load_context
-if [[ -n "${E2E_DRY_RUN:-}" ]]; then
-  provider="$(e2e_messaging_provider_name)"
-  e2e_pass "expected-state.messaging.${provider}.bridge-reachable dry-run"
-  exit 0
-fi
 e2e_messaging_assert_bridge_reachable
diff --git a/test/e2e-scenario/validation_suites/messaging/slack/00-slack-provider-state.sh b/test/e2e-scenario/validation_suites/messaging/slack/00-slack-provider-state.sh
index 0f1afa2e14..bac54bb501 100755
--- a/test/e2e-scenario/validation_suites/messaging/slack/00-slack-provider-state.sh
+++ b/test/e2e-scenario/validation_suites/messaging/slack/00-slack-provider-state.sh
@@ -3,7 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
-. "$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/lib/messaging_providers.sh"
+_SLACK_SUITES_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+. "${_SLACK_SUITES_DIR}/lib/messaging_providers.sh"
+# shellcheck source=../../sandbox-exec.sh
+. "${_SLACK_SUITES_DIR}/sandbox-exec.sh"
 e2e_messaging_load_context
 provider="$(e2e_messaging_provider_name)"
 case "${provider}" in
@@ -12,25 +15,25 @@ case "${provider}" in
 esac
 e2e_messaging_assert_provider_attached
 if [[ "$(e2e_context_get E2E_AGENT)" == "openclaw" ]]; then
-  if [[ -n "${E2E_DRY_RUN:-}" ]]; then
-    e2e_pass "expected-state.messaging.slack.openclaw-enabled dry-run"
-    e2e_pass "expected-state.messaging.slack.runtime-discovery dry-run"
-  else
-    content="$(e2e_messaging_read_config_surface)"
-    if ! printf '%s\n' "${content}" | python3 -c '
+  content="$(e2e_messaging_read_config_surface)"
+  if ! printf '%s\n' "${content}" | python3 -c '
 import json
 import sys
 cfg = json.load(sys.stdin)
 assert cfg["channels"]["slack"]["enabled"] is True
 assert cfg["plugins"]["entries"]["slack"]["enabled"] is True
 '; then
-      e2e_fail "expected-state.messaging.slack.openclaw-enabled missing channels.slack.enabled or plugins.entries.slack.enabled"
-    fi
-    e2e_pass "expected-state.messaging.slack.openclaw-enabled channel and plugin enabled"
+    e2e_fail "expected-state.messaging.slack.openclaw-enabled missing channels.slack.enabled or plugins.entries.slack.enabled"
+  fi
+  e2e_pass "expected-state.messaging.slack.openclaw-enabled channel and plugin enabled"
 
-    sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
-    runtime_json="$(openshell sandbox exec --name "${sandbox_name}" -- timeout 45 openclaw channels list --all --json --no-color 2>/dev/null || true)"
-    runtime_state="$(printf '%s\n' "${runtime_json}" | python3 -c '
+  sandbox_name="$(e2e_context_get E2E_SANDBOX_NAME)"
+  # Wrapper cap (50s) sits just above the inner `timeout 45` so the inner
+  # cap is what fires under normal upstream slowness; the wrapper only
+  # catches the case where openshell itself wedges before delivering the
+  # `timeout` invocation to the sandbox.
+  runtime_json="$(E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=50 e2e_sandbox_exec "${sandbox_name}" -- timeout 45 openclaw channels list --all --json --no-color 2>/dev/null || true)"
+  runtime_state="$(printf '%s\n' "${runtime_json}" | python3 -c '
 import json
 import sys
 try:
@@ -44,10 +47,9 @@ try:
 except Exception as exc:
     print("error %s" % exc)
 ' 2>/dev/null || true)"
-    if [[ "${runtime_state}" != "yes" ]]; then
-      e2e_fail "expected-state.messaging.slack.runtime-discovery OpenClaw did not report Slack installed/configured (${runtime_state}; output=${runtime_json:0:300})"
-    fi
-    e2e_pass "expected-state.messaging.slack.runtime-discovery OpenClaw reports Slack installed and configured"
+  if [[ "${runtime_state}" != "yes" ]]; then
+    e2e_fail "expected-state.messaging.slack.runtime-discovery OpenClaw did not report Slack installed/configured (${runtime_state}; output=${runtime_json:0:300})"
   fi
+  e2e_pass "expected-state.messaging.slack.runtime-discovery OpenClaw reports Slack installed and configured"
 fi
 e2e_pass "expected-state.messaging.slack.provider-state ${provider} provider state configured"
diff --git a/test/e2e-scenario/validation_suites/platform/macos/00-macos-smoke.sh b/test/e2e-scenario/validation_suites/platform/macos/00-macos-smoke.sh
index 2f42115f5e..4f2f094c67 100755
--- a/test/e2e-scenario/validation_suites/platform/macos/00-macos-smoke.sh
+++ b/test/e2e-scenario/validation_suites/platform/macos/00-macos-smoke.sh
@@ -19,11 +19,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 echo "platform-macos:macos-smoke"
 e2e_context_require E2E_PLATFORM_OS
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would run macOS-specific smoke checks"
-  exit 0
-fi
-
 os="$(e2e_context_get E2E_PLATFORM_OS)"
 if [[ "${os}" != "macos" ]]; then
   echo "platform-macos: E2E_PLATFORM_OS should be 'macos', got '${os}'" >&2
diff --git a/test/e2e-scenario/validation_suites/platform/wsl/00-wsl-smoke.sh b/test/e2e-scenario/validation_suites/platform/wsl/00-wsl-smoke.sh
index 1aeb39fe7c..ef96795a0c 100755
--- a/test/e2e-scenario/validation_suites/platform/wsl/00-wsl-smoke.sh
+++ b/test/e2e-scenario/validation_suites/platform/wsl/00-wsl-smoke.sh
@@ -17,11 +17,6 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../../runtime/lib" && pwd)"
 echo "platform-wsl:wsl-smoke"
 e2e_context_require E2E_PLATFORM_OS E2E_SANDBOX_NAME
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would run WSL-specific smoke checks"
-  exit 0
-fi
-
 os="$(e2e_context_get E2E_PLATFORM_OS)"
 if [[ "${os}" != "wsl" ]]; then
   echo "platform-wsl: E2E_PLATFORM_OS should be 'wsl', got '${os}'" >&2
diff --git a/test/e2e-scenario/validation_suites/sandbox-exec.sh b/test/e2e-scenario/validation_suites/sandbox-exec.sh
index 0682c4cf2f..44e4288111 100755
--- a/test/e2e-scenario/validation_suites/sandbox-exec.sh
+++ b/test/e2e-scenario/validation_suites/sandbox-exec.sh
@@ -12,7 +12,6 @@
 # Functions:
 #   e2e_sandbox_exec       <sandbox> -- <cmd> [args...]
 #       Run <cmd> inside <sandbox> via `openshell sandbox exec`. No stdin passed.
-#       Exit code propagates from <cmd>. Honors E2E_DRY_RUN.
 #
 #   e2e_sandbox_exec_stdin <sandbox> -- <cmd> [args...]
 #       Like e2e_sandbox_exec but pipes the caller's stdin into the
@@ -23,6 +22,174 @@ _E2E_SBEX_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../runtime/lib" && pwd)"
 # shellcheck source=../runtime/lib/env.sh
 . "${_E2E_SBEX_LIB_DIR}/env.sh"
 
+# Per-call timeout (seconds) applied to every `openshell sandbox exec`
+# invocation routed through this wrapper. Callers MAY override per call:
+#   E2E_SANDBOX_EXEC_TIMEOUT_SECONDS=50 e2e_sandbox_exec ...
+#
+# Why a wrapper-level cap exists:
+#   The orchestrator (phase.ts) enforces step-level timeouts via SIGTERM on
+#   the script's process group. When openshell ssh-into-sandbox hangs,
+#   SIGTERM eventually kills the script — but the script has no chance to
+#   emit a structured diagnostic, so logs end mid-line. An inner per-call
+#   `timeout` lets the wrapper observe the hang, emit a classified
+#   diagnostic, and exit cleanly *before* the orchestrator's SIGTERM.
+#
+# The default (25s) sits below the most common orchestrator step caps
+# (30s smoke / kimi, 45s sandbox-local). Steps with longer caps (60s
+# chat-completion, 120s rebuild) export a larger value before calling.
+: "${E2E_SANDBOX_EXEC_TIMEOUT_SECONDS:=25}"
+
+# Resolve the timeout binary once. Empty string == not available.
+_e2e_sbex_resolve_timeout_cmd() {
+  if command -v timeout >/dev/null 2>&1; then
+    printf '%s' timeout
+  elif command -v gtimeout >/dev/null 2>&1; then
+    printf '%s' gtimeout
+  else
+    printf '%s' ''
+  fi
+}
+
+# ----------------------------------------------------------------------
+# ssh-config transport (preferred)
+#
+# `openshell sandbox exec` has been observed to wedge in CI (PR #4380
+# scenario run — host can curl the gateway but `openshell sandbox exec`
+# never returns). The legacy test/e2e/ scripts have always entered the
+# sandbox via `openshell sandbox ssh-config` + `ssh -F`, which works in
+# the same environments. We mirror that pattern here:
+#
+#   1. On first call per sandbox, materialize an ssh-config under
+#      ${E2E_CONTEXT_DIR}/.ssh-config-cache/<sandbox>.cfg.
+#   2. Subsequent calls reuse the cached config.
+#   3. Each ssh invocation gets `-o ConnectTimeout=10`,
+#      `-o StrictHostKeyChecking=no`, `-o UserKnownHostsFile=/dev/null`,
+#      `-o LogLevel=ERROR` to mirror the legacy pattern.
+#
+# Opt-out: set E2E_SANDBOX_EXEC_VIA_OPENSHELL=1 to force the original
+# `openshell sandbox exec` transport (e.g. for debugging or for runners
+# where ssh-config is unavailable).
+# ----------------------------------------------------------------------
+
+_e2e_sbex_ssh_cfg_dir() {
+  local base="${E2E_CONTEXT_DIR:-/tmp}"
+  printf '%s/.ssh-config-cache' "${base}"
+}
+
+# _e2e_sbex_ssh_config_for <sandbox>
+# Prints the path to a populated ssh-config for <sandbox> on stdout.
+# Returns non-zero (and prints nothing) if `openshell sandbox ssh-config`
+# fails — callers fall back to `openshell sandbox exec`.
+_e2e_sbex_ssh_config_for() {
+  local sandbox="$1"
+  local dir cfg
+  dir="$(_e2e_sbex_ssh_cfg_dir)"
+  mkdir -p "${dir}" || return 1
+  cfg="${dir}/${sandbox}.cfg"
+  if [[ ! -s "${cfg}" ]]; then
+    if ! openshell sandbox ssh-config "${sandbox}" >"${cfg}" 2>/dev/null; then
+      rm -f "${cfg}"
+      return 1
+    fi
+  fi
+  printf '%s' "${cfg}"
+}
+
+# _e2e_sbex_quote_args <args...>
+# Outputs the args quoted into a single shell string suitable for
+# embedding as the remote command in `ssh host 'cmd args ...'`.
+_e2e_sbex_quote_args() {
+  local arg out=""
+  for arg in "$@"; do
+    out+="$(printf '%q' "${arg}") "
+  done
+  printf '%s' "${out% }"
+}
+
+# _e2e_sbex_invoke_via_ssh <cfg> <stdin_mode> <seconds> <timeout_cmd>
+# stdin_mode is 'pipe' (forward caller stdin) or 'none' (close stdin).
+# Returns ssh's exit code (124 if timed out, 137 if SIGKILLed).
+_e2e_sbex_invoke_via_ssh() {
+  local cfg="$1" stdin_mode="$2" seconds="$3" timeout_cmd="$4"
+  local remote_cmd ssh_args
+  remote_cmd="$(_e2e_sbex_quote_args "${_E2E_SBEX_CMD[@]}")"
+  ssh_args=(
+    -F "${cfg}"
+    -o ConnectTimeout=10
+    -o StrictHostKeyChecking=no
+    -o UserKnownHostsFile=/dev/null
+    -o LogLevel=ERROR
+    "openshell-${_E2E_SBEX_SB_NAME}"
+    "${remote_cmd}"
+  )
+  if [[ "${stdin_mode}" == "none" ]]; then
+    if [[ -z "${timeout_cmd}" ]]; then
+      ssh "${ssh_args[@]}" </dev/null
+    else
+      "${timeout_cmd}" --kill-after=5s "${seconds}" ssh "${ssh_args[@]}" </dev/null
+    fi
+  else
+    if [[ -z "${timeout_cmd}" ]]; then
+      ssh "${ssh_args[@]}"
+    else
+      "${timeout_cmd}" --kill-after=5s "${seconds}" ssh "${ssh_args[@]}"
+    fi
+  fi
+}
+
+# _e2e_sbex_invoke_via_openshell <stdin_mode> <seconds> <timeout_cmd>
+# Fallback path that uses `openshell sandbox exec`.
+_e2e_sbex_invoke_via_openshell() {
+  local stdin_mode="$1" seconds="$2" timeout_cmd="$3"
+  if [[ -z "${timeout_cmd}" ]]; then
+    openshell sandbox exec --name "${_E2E_SBEX_SB_NAME}" -- "${_E2E_SBEX_CMD[@]}"
+  else
+    "${timeout_cmd}" --kill-after=5s "${seconds}" \
+      openshell sandbox exec --name "${_E2E_SBEX_SB_NAME}" -- "${_E2E_SBEX_CMD[@]}"
+  fi
+}
+
+# _e2e_sbex_dispatch <stdin_mode>
+# Shared body for e2e_sandbox_exec / e2e_sandbox_exec_stdin. Picks the
+# transport (ssh-config preferred; openshell sandbox exec on opt-out or
+# ssh-config failure), applies the per-call timeout, and emits a
+# classified diagnostic on hang.
+_e2e_sbex_dispatch() {
+  local stdin_mode="$1"
+  if ! command -v openshell >/dev/null 2>&1; then
+    echo "e2e_sandbox_exec: openshell CLI not on PATH" >&2
+    return 127
+  fi
+  local timeout_cmd seconds="${E2E_SANDBOX_EXEC_TIMEOUT_SECONDS}"
+  timeout_cmd="$(_e2e_sbex_resolve_timeout_cmd)"
+  if [[ -z "${timeout_cmd}" ]]; then
+    # Make the missing safety net visible so CI can flag it; do not
+    # abort — the orchestrator's step-level timeout still applies.
+    echo "e2e_sandbox_exec: 'timeout' not available; running without per-call cap (sandbox=${_E2E_SBEX_SB_NAME})" >&2
+  fi
+
+  local cfg="" via="ssh" rc=0
+  if [[ "${E2E_SANDBOX_EXEC_VIA_OPENSHELL:-0}" == "1" ]]; then
+    via="openshell"
+  elif ! cfg="$(_e2e_sbex_ssh_config_for "${_E2E_SBEX_SB_NAME}")"; then
+    echo "e2e_sandbox_exec: ssh-config unavailable for ${_E2E_SBEX_SB_NAME}; falling back to 'openshell sandbox exec'" >&2
+    via="openshell"
+  fi
+
+  if [[ "${via}" == "ssh" ]]; then
+    _e2e_sbex_invoke_via_ssh "${cfg}" "${stdin_mode}" "${seconds}" "${timeout_cmd}"
+    rc=$?
+  else
+    _e2e_sbex_invoke_via_openshell "${stdin_mode}" "${seconds}" "${timeout_cmd}"
+    rc=$?
+  fi
+
+  if [[ "${rc}" -eq 124 || "${rc}" -eq 137 ]]; then
+    echo "e2e_sandbox_exec: ${via} transport hung after ${seconds}s (sandbox=${_E2E_SBEX_SB_NAME}, cmd=${_E2E_SBEX_CMD[0]:-?}; classifier=gateway-transient)" >&2
+  fi
+  return "${rc}"
+}
+
 # _e2e_sbex_split_args <sandbox> -- <cmd> [args...]
 # Parses the shared calling convention. Prints on stderr on misuse and
 # returns 2. On success, sets the two global arrays _E2E_SBEX_SB_NAME and
@@ -52,15 +219,7 @@ _e2e_sbex_parse() {
 e2e_sandbox_exec() {
   _e2e_sbex_parse "$@" || return $?
   e2e_env_trace "sandbox:exec" "${_E2E_SBEX_SB_NAME}" "${_E2E_SBEX_CMD[*]}"
-  if e2e_env_is_dry_run; then
-    echo "[dry-run] sandbox_exec ${_E2E_SBEX_SB_NAME} -- ${_E2E_SBEX_CMD[*]} (skipped)"
-    return 0
-  fi
-  if ! command -v openshell >/dev/null 2>&1; then
-    echo "e2e_sandbox_exec: openshell CLI not on PATH" >&2
-    return 127
-  fi
-  openshell sandbox exec --name "${_E2E_SBEX_SB_NAME}" -- "${_E2E_SBEX_CMD[@]}"
+  _e2e_sbex_dispatch none
 }
 
 # e2e_sandbox_exec_stdin <sandbox> -- <cmd> [args...]
@@ -70,15 +229,5 @@ e2e_sandbox_exec() {
 e2e_sandbox_exec_stdin() {
   _e2e_sbex_parse "$@" || return $?
   e2e_env_trace "sandbox:exec_stdin" "${_E2E_SBEX_SB_NAME}" "${_E2E_SBEX_CMD[*]}"
-  if e2e_env_is_dry_run; then
-    # Consume stdin so the caller's pipeline doesn't SIGPIPE.
-    cat >/dev/null 2>&1 || true
-    echo "[dry-run] sandbox_exec_stdin ${_E2E_SBEX_SB_NAME} -- ${_E2E_SBEX_CMD[*]} (skipped)"
-    return 0
-  fi
-  if ! command -v openshell >/dev/null 2>&1; then
-    echo "e2e_sandbox_exec_stdin: openshell CLI not on PATH" >&2
-    return 127
-  fi
-  openshell sandbox exec --name "${_E2E_SBEX_SB_NAME}" -- "${_E2E_SBEX_CMD[@]}"
+  _e2e_sbex_dispatch pipe
 }
diff --git a/test/e2e-scenario/validation_suites/smoke/00-cli-available.sh b/test/e2e-scenario/validation_suites/smoke/00-cli-available.sh
index e56925b1f9..ab733f039d 100755
--- a/test/e2e-scenario/validation_suites/smoke/00-cli-available.sh
+++ b/test/e2e-scenario/validation_suites/smoke/00-cli-available.sh
@@ -18,11 +18,6 @@ echo "smoke:cli-available"
 
 e2e_context_require E2E_SCENARIO
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would check that nemoclaw CLI is on PATH"
-  exit 0
-fi
-
 if ! command -v nemoclaw >/dev/null 2>&1; then
   echo "smoke:cli-available: nemoclaw CLI not on PATH" >&2
   exit 1
diff --git a/test/e2e-scenario/validation_suites/smoke/03-sandbox-shell.sh b/test/e2e-scenario/validation_suites/smoke/03-sandbox-shell.sh
index b92dc33e8a..966efeb2d8 100755
--- a/test/e2e-scenario/validation_suites/smoke/03-sandbox-shell.sh
+++ b/test/e2e-scenario/validation_suites/smoke/03-sandbox-shell.sh
@@ -4,7 +4,6 @@
 #
 # smoke step: sandbox-shell
 # Verifies that OpenShell can execute a trivial command inside the sandbox.
-# Honors E2E_DRY_RUN.
 
 set -euo pipefail
 
@@ -14,17 +13,15 @@ LIB_DIR="$(cd "${SCRIPT_DIR}/../../runtime/lib" && pwd)"
 . "${LIB_DIR}/env.sh"
 # shellcheck source=../../runtime/lib/context.sh
 . "${LIB_DIR}/context.sh"
+# shellcheck source=../sandbox-exec.sh
+. "${SCRIPT_DIR}/../sandbox-exec.sh"
 
 echo "smoke:sandbox-shell"
 e2e_context_require E2E_SANDBOX_NAME
 
-if e2e_env_is_dry_run; then
-  echo "[dry-run] would run: openshell sandbox exec --name <sandbox> -- echo ok"
-  exit 0
-fi
-
 name="$(e2e_context_get E2E_SANDBOX_NAME)"
-output="$(openshell sandbox exec --name "${name}" -- echo ok 2>&1)"
+# Orchestrator step cap is 30s; wrapper default 25s applies.
+output="$(e2e_sandbox_exec "${name}" -- echo ok 2>&1)"
 echo "${output}"
 if ! echo "${output}" | grep -q '^ok$'; then
   echo "smoke:sandbox-shell: did not receive expected 'ok' from sandbox" >&2
diff --git a/tools/e2e-scenarios/workflow-boundary.mts b/tools/e2e-scenarios/workflow-boundary.mts
index 26394d1b4c..a06b21f3ea 100644
--- a/tools/e2e-scenarios/workflow-boundary.mts
+++ b/tools/e2e-scenarios/workflow-boundary.mts
@@ -49,6 +49,13 @@ function requireRunContains(errors: string[], step: WorkflowStep | undefined, ex
   }
 }
 
+function requireRunDoesNotContain(errors: string[], step: WorkflowStep | undefined, forbidden: string): void {
+  if (!step) return;
+  if (stringValue(step.run).includes(forbidden)) {
+    errors.push(`step '${step.name ?? "<unnamed>"}' run script must not include ${forbidden}`);
+  }
+}
+
 export function validateE2eScenariosWorkflowBoundary(
   workflowPath = DEFAULT_WORKFLOW_PATH,
 ): string[] {
@@ -92,7 +99,11 @@ export function validateE2eScenariosWorkflowBoundary(
   const normalRun = requireStep(errors, steps, "Run typed scenarios");
   requireRunContains(errors, normalRun, "npx tsx test/e2e-scenario/scenarios/run.ts");
   requireRunContains(errors, normalRun, "--scenarios");
-  requireRunContains(errors, normalRun, "--dry-run");
+  // The TS runner has one execution mode: live. Workflows must not pass
+  // --dry-run, --plan-only, or --validate-only — they hide real test runs.
+  requireRunDoesNotContain(errors, normalRun, "--dry-run");
+  requireRunDoesNotContain(errors, normalRun, "--plan-only");
+  requireRunDoesNotContain(errors, normalRun, "--validate-only");
 
   const wslInstall = requireStep(errors, steps, "Ensure Ubuntu WSL exists");
   requireRunContains(errors, wslInstall, "wsl --install");
@@ -113,7 +124,16 @@ export function validateE2eScenariosWorkflowBoundary(
   const wslRun = requireStep(errors, steps, "Run typed scenarios in WSL");
   requireRunContains(errors, wslRun, "npx tsx test/e2e-scenario/scenarios/run.ts");
   requireRunContains(errors, wslRun, "--scenarios");
-  requireRunContains(errors, wslRun, "--dry-run");
+  // From this PR: the typed runner is the only execution path; the
+  // bash runner / dry-run / validate-only / plan-only modes are
+  // removed from CI.
+  requireRunDoesNotContain(errors, wslRun, "--dry-run");
+  requireRunDoesNotContain(errors, wslRun, "--plan-only");
+  requireRunDoesNotContain(errors, wslRun, "--validate-only");
+  // From main (#4346): the WSL step must use the robust PowerShell
+  // wrapper that materializes a bash script, copies it into WSL via
+  // wslpath, and invokes it with `bash -l` so Docker WSL integration
+  // and Ubuntu first-run races are handled.
   requireRunContains(errors, wslRun, "$env:WSL_WORKDIR");
   requireRunContains(errors, wslRun, "WriteAllText");
   requireRunContains(errors, wslRun, "bash -l $wslTmp");
@@ -123,11 +143,28 @@ export function validateE2eScenariosWorkflowBoundary(
   if (uploadWith.name !== "e2e-scenario-${{ inputs.scenarios || github.event.inputs.scenarios }}") {
     errors.push("artifact upload name must include the scenarios input");
   }
-  if (uploadWith["include-hidden-files"] !== true) {
-    errors.push("artifact upload must include hidden .e2e files");
+  // Framework-owned secret hygiene: include-hidden-files MUST be false.
+  // Hidden dotfiles under the workspace can carry raw secrets (notably
+  // .e2e/context.env, written by e2e_context_set without redaction).
+  // The redacted surfaces are explicit subpaths under .e2e/ that the
+  // framework writes via orchestrators/redaction.ts::pipeRedacted.
+  if (uploadWith["include-hidden-files"] !== false) {
+    errors.push("artifact upload must set include-hidden-files: false (raw context.env must not leak)");
+  }
+  const uploadPath = stringValue(uploadWith.path);
+  if (!uploadPath.includes(".e2e/actions/")) {
+    errors.push("artifact upload path must include .e2e/actions/ (redacted action evidence)");
+  }
+  if (!uploadPath.includes(".e2e/logs/")) {
+    errors.push("artifact upload path must include .e2e/logs/ (redacted shell-step evidence)");
   }
-  if (!stringValue(uploadWith.path).includes(".e2e/")) {
-    errors.push("artifact upload path must include .e2e/");
+  // Bare blanket '.e2e/' (without a trailing subdir) would re-include
+  // the raw context.env file. Reject it so the explicit-subpath
+  // contract stays honest. Subpaths like '.e2e/actions/' are fine.
+  for (const line of uploadPath.split("\n")) {
+    if (line.trim() === ".e2e/") {
+      errors.push("artifact upload path must not list bare .e2e/ (use explicit subpaths to avoid context.env leakage)");
+    }
   }
 
   return errors;