From 8fe051d9d60f6b5dbd36d979295057b989f4b3c9 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 04:32:15 +0000 Subject: [PATCH 01/12] fix(shields): cross-check sandbox filesystem in shields status Signed-off-by: Tinson Lai --- src/lib/shields/index.test.ts | 162 ++++++++++++++++++++++++++++++++++ src/lib/shields/index.ts | 103 +++++++++++++++------ 2 files changed, 237 insertions(+), 28 deletions(-) diff --git a/src/lib/shields/index.test.ts b/src/lib/shields/index.test.ts index 454e7a3a23..700bbc48fd 100644 --- a/src/lib/shields/index.test.ts +++ b/src/lib/shields/index.test.ts @@ -655,6 +655,168 @@ describe("shields — unit logic", () => { expect(exitSpy).toHaveBeenCalledWith(1); }); }); + + // ------------------------------------------------------------------- + // NC-2243: verifyShieldsLockState cross-checks the sandbox filesystem + // so a host-root tamper that reverts /sandbox/.openclaw perms to a + // sandbox-writable state is surfaced as drift instead of reported as a + // clean lockdown. + // ------------------------------------------------------------------- + describe("NC-2243: verifyShieldsLockState detects sandbox-filesystem drift", () => { + async function loadShieldsModule() { + const distModulePath = path.join( + process.cwd(), + "dist", + "lib", + "shields", + "index.js", + ); + return import(distModulePath); + } + + const target = { + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], + }; + + type StatLookup = Record; + + function makeExec(perms: StatLookup): (cmd: string[]) => string { + return (cmd: string[]) => { + if (cmd[0] === "stat") { + const file = cmd[cmd.length - 1]; + if (file in perms) return perms[file]; + } + return ""; + }; + } + + it("returns ok when all locked files and the config dir match the expected perms", async () => { + const { verifyShieldsLockState } = await loadShieldsModule(); + const exec = makeExec({ + "/sandbox/.openclaw/openclaw.json": "444 root:root", + "/sandbox/.openclaw/.config-hash": "444 root:root", + "/sandbox/.openclaw": "755 root:root", + }); + + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: () => {}, + }); + + expect(result.ok).toBe(true); + expect(result.issues).toEqual([]); + }); + + it("flags drift when host-root tamper reverts dir + files to sandbox-writable perms", async () => { + const { verifyShieldsLockState } = await loadShieldsModule(); + // Reproduce the issue #4243 host-root tamper sequence: + // chmod 2770 /sandbox/.openclaw + // chown sandbox:sandbox /sandbox/.openclaw + // chmod 660 /sandbox/.openclaw/openclaw.json + // chown sandbox:sandbox /sandbox/.openclaw/openclaw.json + // chmod 660 /sandbox/.openclaw/.config-hash + // chown sandbox:sandbox /sandbox/.openclaw/.config-hash + const exec = makeExec({ + "/sandbox/.openclaw/openclaw.json": "660 sandbox:sandbox", + "/sandbox/.openclaw/.config-hash": "660 sandbox:sandbox", + "/sandbox/.openclaw": "2770 sandbox:sandbox", + }); + + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: () => {}, + }); + + expect(result.ok).toBe(false); + expect(result.issues).toEqual( + expect.arrayContaining([ + "/sandbox/.openclaw/openclaw.json mode=660 (expected 444)", + "/sandbox/.openclaw/openclaw.json owner=sandbox:sandbox (expected root:root)", + "/sandbox/.openclaw/.config-hash mode=660 (expected 444)", + "/sandbox/.openclaw/.config-hash owner=sandbox:sandbox (expected root:root)", + "dir mode=2770 (expected 755)", + "dir owner=sandbox:sandbox (expected root:root)", + ]), + ); + }); + + it("reports stat failures as drift when the sandbox cannot be reached", async () => { + const { verifyShieldsLockState } = await loadShieldsModule(); + const exec = (_cmd: string[]): string => { + throw new Error("Container not found"); + }; + + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: () => {}, + }); + + expect(result.ok).toBe(false); + expect(result.issues.some((issue) => issue.includes("stat failed"))).toBe( + true, + ); + expect(result.issues.some((issue) => issue.includes("Container not found"))).toBe( + true, + ); + }); + + it("flags missing immutable bit only when verifyChattr is requested", async () => { + const { verifyShieldsLockState } = await loadShieldsModule(); + const exec = (cmd: string[]): string => { + if (cmd[0] === "stat") { + if (cmd[cmd.length - 1] === "/sandbox/.openclaw") return "755 root:root"; + return "444 root:root"; + } + if (cmd[0] === "lsattr") { + // No 'i' flag present. + return `----e----- ${cmd[cmd.length - 1]}`; + } + return ""; + }; + + const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: () => {}, + }); + expect(withoutChattrCheck.ok).toBe(true); + + const withChattrCheck = verifyShieldsLockState("openclaw", target, { + exec, + verifyChattr: true, + assertLegacyLayout: () => {}, + }); + expect(withChattrCheck.ok).toBe(false); + expect(withChattrCheck.issues).toEqual( + expect.arrayContaining([ + "/sandbox/.openclaw/openclaw.json immutable bit not set", + "/sandbox/.openclaw/.config-hash immutable bit not set", + ]), + ); + }); + + it("surfaces a legacy state layout violation when the asserter throws", async () => { + const { verifyShieldsLockState } = await loadShieldsModule(); + const exec = makeExec({ + "/sandbox/.openclaw/openclaw.json": "444 root:root", + "/sandbox/.openclaw/.config-hash": "444 root:root", + "/sandbox/.openclaw": "755 root:root", + }); + + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: () => { + throw new Error("legacy data dir exists: /sandbox/.openclaw-data"); + }, + }); + + expect(result.ok).toBe(false); + expect(result.issues).toContain( + "legacy data dir exists: /sandbox/.openclaw-data", + ); + }); + }); }); // ------------------------------------------------------------------- diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index d9de3700f3..8c3e4bf34a 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -712,15 +712,44 @@ function lockAgentConfig( // Verify the lock actually took effect. // Mode + ownership are mandatory (layers 1+2 depend on them). // Immutable bit is only verified if chattr succeeded above. + const { issues } = verifyShieldsLockState(sandboxName, target, { + verifyChattr: chattrSucceeded, + }); + + if (issues.length > 0) { + throw new Error(`Config not locked: ${issues.join(", ")}`); + } +} + +// Re-verify that the sandbox filesystem still matches what shields-up +// established: 444 root:root on each locked file, 755 root:root on the config +// directory, no legacy state layout, and (when the caller knows chattr was +// applied) the immutable bit. Returns the list of mismatches so callers can +// report drift after a host-root tamper. Stat/lsattr failures are folded into +// `issues` so the caller can decide whether to treat them as drift. +// +// `options.exec` and `options.assertLegacyLayout` are injection points for +// tests; production callers use the defaults that go through the sandbox's +// privileged exec channel. +function verifyShieldsLockState( + sandboxName: string, + target: AgentConfigTarget, + options: { + verifyChattr?: boolean; + exec?: (cmd: string[]) => string; + assertLegacyLayout?: (sandboxName: string, configDir: string) => void; + } = {}, +): { ok: boolean; issues: string[] } { + const exec = + options.exec ?? ((cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd)); + const assertLegacyLayout = + options.assertLegacyLayout ?? assertNoLegacyStateLayout; const issues: string[] = []; - for (const f of filesToLock) { + const filesToVerify = [target.configPath, ...(target.sensitiveFiles || [])]; + + for (const f of filesToVerify) { try { - const perms = privilegedSandboxExecCapture(sandboxName, [ - "stat", - "-c", - "%a %U:%G", - f, - ]); + const perms = exec(["stat", "-c", "%a %U:%G", f]); const [mode, owner] = perms.split(" "); if (!/^4[0-4][0-4]$/.test(mode)) issues.push(`${f} mode=${mode} (expected 444)`); @@ -733,12 +762,7 @@ function lockAgentConfig( } try { - const dirPerms = privilegedSandboxExecCapture(sandboxName, [ - "stat", - "-c", - "%a %U:%G", - target.configDir, - ]); + const dirPerms = exec(["stat", "-c", "%a %U:%G", target.configDir]); const [dirMode, dirOwner] = dirPerms.split(" "); if (dirMode !== "755") issues.push(`dir mode=${dirMode} (expected 755)`); if (dirOwner !== "root:root") @@ -748,14 +772,10 @@ function lockAgentConfig( issues.push(`dir stat failed: ${msg}`); } - if (chattrSucceeded) { - for (const f of filesToLock) { + if (options.verifyChattr) { + for (const f of filesToVerify) { try { - const attrs = privilegedSandboxExecCapture(sandboxName, [ - "lsattr", - "-d", - f, - ]); + const attrs = exec(["lsattr", "-d", f]); // lsattr format: "----i---------e----- /path/to/file" // First whitespace-delimited token is the flags field. const [flags] = attrs.trim().split(/\s+/, 1); @@ -767,15 +787,13 @@ function lockAgentConfig( } try { - assertNoLegacyStateLayout(sandboxName, target.configDir); + assertLegacyLayout(sandboxName, target.configDir); } catch (err) { const msg = err instanceof Error ? err.message : String(err); issues.push(msg); } - if (issues.length > 0) { - throw new Error(`Config not locked: ${issues.join(", ")}`); - } + return { ok: issues.length === 0, issues }; } function rollbackShieldsDown( @@ -1300,15 +1318,43 @@ function shieldsStatus(sandboxName: string, allowInlineRecovery = true): void { ); return; - case "locked": + case "locked": { + // NC-2243: cross-check the sandbox filesystem so a host-root tamper that + // reverts /sandbox/.openclaw perms back to a sandbox-writable state is + // surfaced as drift instead of reported as a clean lockdown. + let driftIssues: string[] = []; + try { + const target = resolveAgentConfig(sandboxName); + driftIssues = verifyShieldsLockState(sandboxName, target).issues; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + driftIssues = [`unable to resolve agent config target: ${msg}`]; + } + const policyLine = ` Policy: restrictive${state.shieldsPolicySnapshotPath ? " (snapshot preserved)" : ""}`; + if (driftIssues.length > 0) { + console.error( + " Shields: UP (DRIFTED — declared locked but sandbox filesystem differs)", + ); + console.error(policyLine); + if (state.shieldsDownAt) { + console.error(` Last unlocked: ${state.shieldsDownAt}`); + } + console.error(" Drift:"); + for (const issue of driftIssues) { + console.error(` - ${issue}`); + } + console.error( + ` Recovery: nemoclaw ${sandboxName} shields up # re-lock and re-verify`, + ); + process.exit(2); + } console.log(` Shields: ${posture.statusText}`); - console.log( - ` Policy: restrictive${state.shieldsPolicySnapshotPath ? " (snapshot preserved)" : ""}`, - ); + console.log(policyLine); if (state.shieldsDownAt) { console.log(` Last unlocked: ${state.shieldsDownAt}`); } return; + } case "temporarily_unlocked": { const downSince = state.shieldsDownAt @@ -1368,6 +1414,7 @@ export { parseDuration, lockAgentConfig, unlockAgentConfig, + verifyShieldsLockState, MAX_TIMEOUT_SECONDS, DEFAULT_TIMEOUT_SECONDS, }; From 0c3b6a4fa9a418c643bab7c981bf7d7f22f71717 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 05:24:38 +0000 Subject: [PATCH 02/12] fix(shields): tighten lock verifier + split into focused module Signed-off-by: Tinson Lai --- src/lib/shields/index.test.ts | 219 ++++++++++++---------------- src/lib/shields/index.ts | 108 ++++---------- src/lib/shields/verify-lock.test.ts | 190 ++++++++++++++++++++++++ src/lib/shields/verify-lock.ts | 99 +++++++++++++ 4 files changed, 406 insertions(+), 210 deletions(-) create mode 100644 src/lib/shields/verify-lock.test.ts create mode 100644 src/lib/shields/verify-lock.ts diff --git a/src/lib/shields/index.test.ts b/src/lib/shields/index.test.ts index 700bbc48fd..4e0ea6ed63 100644 --- a/src/lib/shields/index.test.ts +++ b/src/lib/shields/index.test.ts @@ -657,12 +657,9 @@ describe("shields — unit logic", () => { }); // ------------------------------------------------------------------- - // NC-2243: verifyShieldsLockState cross-checks the sandbox filesystem - // so a host-root tamper that reverts /sandbox/.openclaw perms to a - // sandbox-writable state is surfaced as drift instead of reported as a - // clean lockdown. + // shieldsStatus: locked-state drift surface // ------------------------------------------------------------------- - describe("NC-2243: verifyShieldsLockState detects sandbox-filesystem drift", () => { + describe("shieldsStatus surfaces drift returned by the verifier", () => { async function loadShieldsModule() { const distModulePath = path.join( process.cwd(), @@ -674,147 +671,113 @@ describe("shields — unit logic", () => { return import(distModulePath); } - const target = { - configPath: "/sandbox/.openclaw/openclaw.json", - configDir: "/sandbox/.openclaw", - sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], - }; - - type StatLookup = Record; - - function makeExec(perms: StatLookup): (cmd: string[]) => string { - return (cmd: string[]) => { - if (cmd[0] === "stat") { - const file = cmd[cmd.length - 1]; - if (file in perms) return perms[file]; - } - return ""; - }; + function stateDir(): string { + return path.join(tmpDir, ".nemoclaw", "state"); } - it("returns ok when all locked files and the config dir match the expected perms", async () => { - const { verifyShieldsLockState } = await loadShieldsModule(); - const exec = makeExec({ - "/sandbox/.openclaw/openclaw.json": "444 root:root", - "/sandbox/.openclaw/.config-hash": "444 root:root", - "/sandbox/.openclaw": "755 root:root", - }); - - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: () => {}, - }); - - expect(result.ok).toBe(true); - expect(result.issues).toEqual([]); - }); - - it("flags drift when host-root tamper reverts dir + files to sandbox-writable perms", async () => { - const { verifyShieldsLockState } = await loadShieldsModule(); - // Reproduce the issue #4243 host-root tamper sequence: - // chmod 2770 /sandbox/.openclaw - // chown sandbox:sandbox /sandbox/.openclaw - // chmod 660 /sandbox/.openclaw/openclaw.json - // chown sandbox:sandbox /sandbox/.openclaw/openclaw.json - // chmod 660 /sandbox/.openclaw/.config-hash - // chown sandbox:sandbox /sandbox/.openclaw/.config-hash - const exec = makeExec({ - "/sandbox/.openclaw/openclaw.json": "660 sandbox:sandbox", - "/sandbox/.openclaw/.config-hash": "660 sandbox:sandbox", - "/sandbox/.openclaw": "2770 sandbox:sandbox", - }); - - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: () => {}, - }); - - expect(result.ok).toBe(false); - expect(result.issues).toEqual( - expect.arrayContaining([ - "/sandbox/.openclaw/openclaw.json mode=660 (expected 444)", - "/sandbox/.openclaw/openclaw.json owner=sandbox:sandbox (expected root:root)", - "/sandbox/.openclaw/.config-hash mode=660 (expected 444)", - "/sandbox/.openclaw/.config-hash owner=sandbox:sandbox (expected root:root)", - "dir mode=2770 (expected 755)", - "dir owner=sandbox:sandbox (expected root:root)", - ]), + function writeLockedState(sandboxName: string): void { + fs.mkdirSync(stateDir(), { recursive: true }); + fs.writeFileSync( + path.join(stateDir(), `shields-${sandboxName}.json`), + JSON.stringify( + { + shieldsDown: false, + updatedAt: new Date().toISOString(), + }, + null, + 2, + ), + { mode: 0o600 }, ); - }); + } - it("reports stat failures as drift when the sandbox cannot be reached", async () => { - const { verifyShieldsLockState } = await loadShieldsModule(); - const exec = (_cmd: string[]): string => { - throw new Error("Container not found"); - }; + it("prints DRIFTED with the issue list and exits 2 when the verifier reports drift", async () => { + const sandboxName = "openclaw"; + writeLockedState(sandboxName); + const driftIssues = [ + "/sandbox/.openclaw/openclaw.json mode=660 (expected 444)", + "/sandbox/.openclaw/openclaw.json owner=sandbox:sandbox (expected root:root)", + "dir mode=2770 (expected 755)", + "dir owner=sandbox:sandbox (expected root:root)", + ]; + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + const exitSpy = vi + .spyOn(process, "exit") + .mockImplementation((code?: string | number | null) => { + throw new Error(`exit ${String(code)}`); + }); - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: () => {}, - }); + const { shieldsStatus } = await loadShieldsModule(); + expect(() => + shieldsStatus(sandboxName, true, { + verifyLockState: () => ({ ok: false, issues: driftIssues }), + resolveConfig: () => ({ + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + }), + }), + ).toThrow("exit 2"); - expect(result.ok).toBe(false); - expect(result.issues.some((issue) => issue.includes("stat failed"))).toBe( - true, + expect(errorSpy).toHaveBeenCalledWith( + " Shields: UP (DRIFTED — declared locked but sandbox filesystem differs)", ); - expect(result.issues.some((issue) => issue.includes("Container not found"))).toBe( - true, + expect(errorSpy).toHaveBeenCalledWith(" Drift:"); + for (const issue of driftIssues) { + expect(errorSpy).toHaveBeenCalledWith(` - ${issue}`); + } + expect(errorSpy).toHaveBeenCalledWith( + ` Recovery: nemoclaw ${sandboxName} shields up # re-lock and re-verify`, ); + expect(exitSpy).toHaveBeenCalledWith(2); }); - it("flags missing immutable bit only when verifyChattr is requested", async () => { - const { verifyShieldsLockState } = await loadShieldsModule(); - const exec = (cmd: string[]): string => { - if (cmd[0] === "stat") { - if (cmd[cmd.length - 1] === "/sandbox/.openclaw") return "755 root:root"; - return "444 root:root"; - } - if (cmd[0] === "lsattr") { - // No 'i' flag present. - return `----e----- ${cmd[cmd.length - 1]}`; - } - return ""; - }; + it("prints a clean locked status when the verifier reports no drift", async () => { + const sandboxName = "openclaw"; + writeLockedState(sandboxName); + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); - const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: () => {}, + const { shieldsStatus } = await loadShieldsModule(); + shieldsStatus(sandboxName, true, { + verifyLockState: () => ({ ok: true, issues: [] }), + resolveConfig: () => ({ + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + }), }); - expect(withoutChattrCheck.ok).toBe(true); - const withChattrCheck = verifyShieldsLockState("openclaw", target, { - exec, - verifyChattr: true, - assertLegacyLayout: () => {}, - }); - expect(withChattrCheck.ok).toBe(false); - expect(withChattrCheck.issues).toEqual( - expect.arrayContaining([ - "/sandbox/.openclaw/openclaw.json immutable bit not set", - "/sandbox/.openclaw/.config-hash immutable bit not set", - ]), - ); + expect(logSpy).toHaveBeenCalledWith(" Shields: UP (lockdown active)"); + expect(logSpy).toHaveBeenCalledWith(" Policy: restrictive"); + expect(errorSpy).not.toHaveBeenCalled(); }); - it("surfaces a legacy state layout violation when the asserter throws", async () => { - const { verifyShieldsLockState } = await loadShieldsModule(); - const exec = makeExec({ - "/sandbox/.openclaw/openclaw.json": "444 root:root", - "/sandbox/.openclaw/.config-hash": "444 root:root", - "/sandbox/.openclaw": "755 root:root", - }); + it("treats a resolveConfig throw as drift so the locked status cannot mask a setup gap", async () => { + const sandboxName = "openclaw"; + writeLockedState(sandboxName); + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + const exitSpy = vi + .spyOn(process, "exit") + .mockImplementation((code?: string | number | null) => { + throw new Error(`exit ${String(code)}`); + }); - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: () => { - throw new Error("legacy data dir exists: /sandbox/.openclaw-data"); - }, - }); + const { shieldsStatus } = await loadShieldsModule(); + expect(() => + shieldsStatus(sandboxName, true, { + verifyLockState: () => ({ ok: true, issues: [] }), + resolveConfig: () => { + throw new Error("agent config not found"); + }, + }), + ).toThrow("exit 2"); - expect(result.ok).toBe(false); - expect(result.issues).toContain( - "legacy data dir exists: /sandbox/.openclaw-data", + const allErrors = errorSpy.mock.calls.map((args) => args[0]).join("\n"); + expect(allErrors).toContain( + "unable to resolve agent config target: agent config not found", ); + expect(exitSpy).toHaveBeenCalledWith(2); }); }); }); diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 8c3e4bf34a..59a132fc1d 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -46,6 +46,9 @@ const { buildRuntimePermissivePolicy, }: typeof import("./permissive-runtime") = require("./permissive-runtime"); const { cleanupTempDir } = require("../onboard/temp-files"); +const { + verifyShieldsLockState, +}: typeof import("./verify-lock") = require("./verify-lock"); const STATE_DIR = resolveNemoclawStateDir(); @@ -714,6 +717,8 @@ function lockAgentConfig( // Immutable bit is only verified if chattr succeeded above. const { issues } = verifyShieldsLockState(sandboxName, target, { verifyChattr: chattrSucceeded, + exec: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), + assertLegacyLayout: assertNoLegacyStateLayout, }); if (issues.length > 0) { @@ -721,81 +726,6 @@ function lockAgentConfig( } } -// Re-verify that the sandbox filesystem still matches what shields-up -// established: 444 root:root on each locked file, 755 root:root on the config -// directory, no legacy state layout, and (when the caller knows chattr was -// applied) the immutable bit. Returns the list of mismatches so callers can -// report drift after a host-root tamper. Stat/lsattr failures are folded into -// `issues` so the caller can decide whether to treat them as drift. -// -// `options.exec` and `options.assertLegacyLayout` are injection points for -// tests; production callers use the defaults that go through the sandbox's -// privileged exec channel. -function verifyShieldsLockState( - sandboxName: string, - target: AgentConfigTarget, - options: { - verifyChattr?: boolean; - exec?: (cmd: string[]) => string; - assertLegacyLayout?: (sandboxName: string, configDir: string) => void; - } = {}, -): { ok: boolean; issues: string[] } { - const exec = - options.exec ?? ((cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd)); - const assertLegacyLayout = - options.assertLegacyLayout ?? assertNoLegacyStateLayout; - const issues: string[] = []; - const filesToVerify = [target.configPath, ...(target.sensitiveFiles || [])]; - - for (const f of filesToVerify) { - try { - const perms = exec(["stat", "-c", "%a %U:%G", f]); - const [mode, owner] = perms.split(" "); - if (!/^4[0-4][0-4]$/.test(mode)) - issues.push(`${f} mode=${mode} (expected 444)`); - if (owner !== "root:root") - issues.push(`${f} owner=${owner} (expected root:root)`); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - issues.push(`${f} stat failed: ${msg}`); - } - } - - try { - const dirPerms = exec(["stat", "-c", "%a %U:%G", target.configDir]); - const [dirMode, dirOwner] = dirPerms.split(" "); - if (dirMode !== "755") issues.push(`dir mode=${dirMode} (expected 755)`); - if (dirOwner !== "root:root") - issues.push(`dir owner=${dirOwner} (expected root:root)`); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - issues.push(`dir stat failed: ${msg}`); - } - - if (options.verifyChattr) { - for (const f of filesToVerify) { - try { - const attrs = exec(["lsattr", "-d", f]); - // lsattr format: "----i---------e----- /path/to/file" - // First whitespace-delimited token is the flags field. - const [flags] = attrs.trim().split(/\s+/, 1); - if (!flags.includes("i")) issues.push(`${f} immutable bit not set`); - } catch { - // lsattr may not be available on all images — skip - } - } - } - - try { - assertLegacyLayout(sandboxName, target.configDir); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - issues.push(msg); - } - - return { ok: issues.length === 0, issues }; -} - function rollbackShieldsDown( sandboxName: string, target: AgentConfigTarget, @@ -1293,9 +1223,21 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): // shields status // --------------------------------------------------------------------------- -function shieldsStatus(sandboxName: string, allowInlineRecovery = true): void { +type ShieldsStatusDeps = { + verifyLockState?: typeof verifyShieldsLockState; + resolveConfig?: typeof resolveAgentConfig; +}; + +function shieldsStatus( + sandboxName: string, + allowInlineRecovery = true, + deps: ShieldsStatusDeps = {}, +): void { validateName(sandboxName, "sandbox name"); + const verify = deps.verifyLockState ?? verifyShieldsLockState; + const resolveConfig = deps.resolveConfig ?? resolveAgentConfig; + const posture = getShieldsPosture(sandboxName, allowInlineRecovery); const { state } = posture; if (state._isCorrupt) { @@ -1319,13 +1261,16 @@ function shieldsStatus(sandboxName: string, allowInlineRecovery = true): void { return; case "locked": { - // NC-2243: cross-check the sandbox filesystem so a host-root tamper that - // reverts /sandbox/.openclaw perms back to a sandbox-writable state is - // surfaced as drift instead of reported as a clean lockdown. + // Cross-check the sandbox filesystem so a host-root tamper that reverts + // protected perms back to a sandbox-writable state is surfaced as drift + // instead of reported as a clean lockdown. let driftIssues: string[] = []; try { - const target = resolveAgentConfig(sandboxName); - driftIssues = verifyShieldsLockState(sandboxName, target).issues; + const target = resolveConfig(sandboxName); + driftIssues = verify(sandboxName, target, { + exec: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), + assertLegacyLayout: assertNoLegacyStateLayout, + }).issues; } catch (err) { const msg = err instanceof Error ? err.message : String(err); driftIssues = [`unable to resolve agent config target: ${msg}`]; @@ -1414,7 +1359,6 @@ export { parseDuration, lockAgentConfig, unlockAgentConfig, - verifyShieldsLockState, MAX_TIMEOUT_SECONDS, DEFAULT_TIMEOUT_SECONDS, }; diff --git a/src/lib/shields/verify-lock.test.ts b/src/lib/shields/verify-lock.test.ts new file mode 100644 index 0000000000..8d19c11eb6 --- /dev/null +++ b/src/lib/shields/verify-lock.test.ts @@ -0,0 +1,190 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; +import path from "node:path"; + +// Import from compiled dist/ for correct coverage attribution. +async function loadVerifier(): Promise { + const distModulePath = path.join( + process.cwd(), + "dist", + "lib", + "shields", + "verify-lock.js", + ); + return import(distModulePath); +} + +const target = { + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], +}; + +type StatLookup = Record; + +function makeExec(perms: StatLookup): (cmd: string[]) => string { + return (cmd: string[]) => { + if (cmd[0] === "stat") { + const file = cmd[cmd.length - 1]; + if (file in perms) return perms[file]; + } + return ""; + }; +} + +describe("verifyShieldsLockState", () => { + it("returns ok when all locked files and the config dir match the expected perms", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + const exec = makeExec({ + "/sandbox/.openclaw/openclaw.json": "444 root:root", + "/sandbox/.openclaw/.config-hash": "444 root:root", + "/sandbox/.openclaw": "755 root:root", + }); + + const result = verifyShieldsLockState("openclaw", target, { exec }); + + expect(result.ok).toBe(true); + expect(result.issues).toEqual([]); + }); + + it("flags drift when host-root tamper reverts dir + files to sandbox-writable perms", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + const exec = makeExec({ + "/sandbox/.openclaw/openclaw.json": "660 sandbox:sandbox", + "/sandbox/.openclaw/.config-hash": "660 sandbox:sandbox", + "/sandbox/.openclaw": "2770 sandbox:sandbox", + }); + + const result = verifyShieldsLockState("openclaw", target, { exec }); + + expect(result.ok).toBe(false); + expect(result.issues).toEqual( + expect.arrayContaining([ + "/sandbox/.openclaw/openclaw.json mode=660 (expected 444)", + "/sandbox/.openclaw/openclaw.json owner=sandbox:sandbox (expected root:root)", + "/sandbox/.openclaw/.config-hash mode=660 (expected 444)", + "/sandbox/.openclaw/.config-hash owner=sandbox:sandbox (expected root:root)", + "dir mode=2770 (expected 755)", + "dir owner=sandbox:sandbox (expected root:root)", + ]), + ); + }); + + it.each([ + ["402", "world-writable file"], + ["420", "group-writable file"], + ["422", "group + world-writable file"], + ["440", "missing world read"], + ["404", "missing group read"], + ["644", "owner-writable file"], + ["445", "world-execute file"], + ])( + "rejects mode %s (%s) so writable perms cannot masquerade as locked", + async (mode, _description) => { + const { verifyShieldsLockState } = await loadVerifier(); + const exec = makeExec({ + "/sandbox/.openclaw/openclaw.json": `${mode} root:root`, + "/sandbox/.openclaw/.config-hash": "444 root:root", + "/sandbox/.openclaw": "755 root:root", + }); + + const result = verifyShieldsLockState("openclaw", target, { exec }); + + expect(result.ok).toBe(false); + expect(result.issues).toContain( + `/sandbox/.openclaw/openclaw.json mode=${mode} (expected 444)`, + ); + }, + ); + + it("rejects any non-755 dir mode even when the file modes are clean", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + const exec = makeExec({ + "/sandbox/.openclaw/openclaw.json": "444 root:root", + "/sandbox/.openclaw/.config-hash": "444 root:root", + "/sandbox/.openclaw": "775 root:root", + }); + + const result = verifyShieldsLockState("openclaw", target, { exec }); + + expect(result.ok).toBe(false); + expect(result.issues).toContain("dir mode=775 (expected 755)"); + }); + + it("reports stat failures as drift when the sandbox cannot be reached", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + const exec = (_cmd: string[]): string => { + throw new Error("Container not found"); + }; + + const result = verifyShieldsLockState("openclaw", target, { exec }); + + expect(result.ok).toBe(false); + expect(result.issues.some((issue: string) => issue.includes("stat failed"))).toBe( + true, + ); + expect( + result.issues.some((issue: string) => issue.includes("Container not found")), + ).toBe(true); + }); + + it("flags missing immutable bit only when verifyChattr is requested", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + const exec = (cmd: string[]): string => { + if (cmd[0] === "stat") { + if (cmd[cmd.length - 1] === "/sandbox/.openclaw") return "755 root:root"; + return "444 root:root"; + } + if (cmd[0] === "lsattr") { + // No 'i' flag present. + return `----e----- ${cmd[cmd.length - 1]}`; + } + return ""; + }; + + const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { exec }); + expect(withoutChattrCheck.ok).toBe(true); + + const withChattrCheck = verifyShieldsLockState("openclaw", target, { + exec, + verifyChattr: true, + }); + expect(withChattrCheck.ok).toBe(false); + expect(withChattrCheck.issues).toEqual( + expect.arrayContaining([ + "/sandbox/.openclaw/openclaw.json immutable bit not set", + "/sandbox/.openclaw/.config-hash immutable bit not set", + ]), + ); + }); + + it("surfaces a legacy state layout violation when the asserter throws", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + const exec = makeExec({ + "/sandbox/.openclaw/openclaw.json": "444 root:root", + "/sandbox/.openclaw/.config-hash": "444 root:root", + "/sandbox/.openclaw": "755 root:root", + }); + + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: () => { + throw new Error("legacy data dir exists: /sandbox/.openclaw-data"); + }, + }); + + expect(result.ok).toBe(false); + expect(result.issues).toContain( + "legacy data dir exists: /sandbox/.openclaw-data", + ); + }); + + it("rejects calls without an exec dependency so production paths cannot silently no-op", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + expect(() => verifyShieldsLockState("openclaw", target)).toThrow( + /requires options\.exec/, + ); + }); +}); diff --git a/src/lib/shields/verify-lock.ts b/src/lib/shields/verify-lock.ts new file mode 100644 index 0000000000..79e3d90b42 --- /dev/null +++ b/src/lib/shields/verify-lock.ts @@ -0,0 +1,99 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Re-verify that the sandbox filesystem still matches what `shields up` +// established: 444 root:root on each locked file, 755 root:root on the +// config directory, no legacy state layout, and (when the caller knows +// chattr was applied) the immutable bit. Returns the list of mismatches +// so callers can either fail the lock operation or surface drift after a +// host-root tamper. Stat/lsattr failures are folded into `issues` so the +// caller can decide whether to treat them as drift. + +export type LockTarget = { + configPath: string; + configDir: string; + sensitiveFiles?: string[]; +}; + +export type VerifyShieldsLockOptions = { + verifyChattr?: boolean; + exec?: (cmd: string[]) => string; + assertLegacyLayout?: (sandboxName: string, configDir: string) => void; +}; + +export type VerifyShieldsLockResult = { + ok: boolean; + issues: string[]; +}; + +const EXPECTED_FILE_MODE = "444"; +const EXPECTED_DIR_MODE = "755"; +const EXPECTED_OWNER = "root:root"; + +function noopAssertLegacyLayout(_sandboxName: string, _configDir: string): void { + // Production callers replace this with the real legacy-layout assertion; + // when omitted, the verifier treats legacy-layout state as "no issue". +} + +export function verifyShieldsLockState( + sandboxName: string, + target: LockTarget, + options: VerifyShieldsLockOptions = {}, +): VerifyShieldsLockResult { + if (!options.exec) { + throw new Error("verifyShieldsLockState requires options.exec"); + } + const exec = options.exec; + const assertLegacyLayout = options.assertLegacyLayout ?? noopAssertLegacyLayout; + const issues: string[] = []; + const filesToVerify = [target.configPath, ...(target.sensitiveFiles || [])]; + + for (const f of filesToVerify) { + try { + const perms = exec(["stat", "-c", "%a %U:%G", f]); + const [mode, owner] = perms.split(" "); + if (mode !== EXPECTED_FILE_MODE) + issues.push(`${f} mode=${mode} (expected ${EXPECTED_FILE_MODE})`); + if (owner !== EXPECTED_OWNER) + issues.push(`${f} owner=${owner} (expected ${EXPECTED_OWNER})`); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + issues.push(`${f} stat failed: ${msg}`); + } + } + + try { + const dirPerms = exec(["stat", "-c", "%a %U:%G", target.configDir]); + const [dirMode, dirOwner] = dirPerms.split(" "); + if (dirMode !== EXPECTED_DIR_MODE) + issues.push(`dir mode=${dirMode} (expected ${EXPECTED_DIR_MODE})`); + if (dirOwner !== EXPECTED_OWNER) + issues.push(`dir owner=${dirOwner} (expected ${EXPECTED_OWNER})`); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + issues.push(`dir stat failed: ${msg}`); + } + + if (options.verifyChattr) { + for (const f of filesToVerify) { + try { + const attrs = exec(["lsattr", "-d", f]); + // lsattr format: "----i---------e----- /path/to/file" + // First whitespace-delimited token is the flags field. + const [flags] = attrs.trim().split(/\s+/, 1); + if (!flags.includes("i")) issues.push(`${f} immutable bit not set`); + } catch { + // lsattr may not be available on all images — skip + } + } + } + + try { + assertLegacyLayout(sandboxName, target.configDir); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + issues.push(msg); + } + + return { ok: issues.length === 0, issues }; +} From 0edc2689b431ea5bd41129a5d713545987b76acb Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 05:51:46 +0000 Subject: [PATCH 03/12] fix(shields): require exec + assertLegacyLayout deps, record lsattr failures Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 4 +- src/lib/shields/verify-lock.test.ts | 78 +++++++++++++++++++++++++---- src/lib/shields/verify-lock.ts | 36 +++++++------ 3 files changed, 92 insertions(+), 26 deletions(-) diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index dd2b75569c..69c4a823f4 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -202,13 +202,13 @@ Writable agent state such as plugins, skills, hooks, and workspace metadata live By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and writable state entry points cannot be changed by the sandbox user. -- **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `700` and `openclaw.json` with mode `600`, so the agent can read and write config directly. +- **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `2770` (group-writable with setgid so descendants stay in the sandbox group) and `openclaw.json` with mode `660`, so the agent can read and write config directly. Under `shields up`, the directory is re-owned `root:root 755` and the config files are re-owned `root:root 444`, and `shields status` cross-checks both states to surface drift. - **Config integrity hash.** The image includes a SHA256 hash of `openclaw.json`. In the default mutable state, `.config-hash` is sandbox-owned and is not a tamper-proof trust anchor, so startup does not fail closed on that hash. When the hash is root-owned and read-only, startup enforces it and refuses to start if the hash does not match. - **Gateway token environment.** The gateway exports `OPENCLAW_GATEWAY_TOKEN` and writes it to `/tmp/nemoclaw-proxy-env.sh` for interactive sandbox sessions. Keep this in mind when deciding whether a workload should run with mutable config or an immutable config posture. | Aspect | Detail | |---|---| -| Default | The sandbox keeps `/sandbox/.openclaw` writable (`700 sandbox:sandbox`), sets `openclaw.json` to `600 sandbox:sandbox`, lets the agent manage state directly, and has the gateway place `OPENCLAW_GATEWAY_TOKEN` in `/tmp/nemoclaw-proxy-env.sh` for interactive shells. | +| Default | The sandbox keeps `/sandbox/.openclaw` writable (`2770 sandbox:sandbox`), sets `openclaw.json` to `660 sandbox:sandbox`, lets the agent manage state directly, and has the gateway place `OPENCLAW_GATEWAY_TOKEN` in `/tmp/nemoclaw-proxy-env.sh` for interactive shells. | | What you can change | Apply a reviewed host-side immutability workflow to lock config and state directories with DAC permissions and the immutable flag where available. | | Risk of default | A writable `.openclaw` directory lets the agent modify its own gateway config: disabling CORS or redirecting inference to an attacker-controlled endpoint. | | Recommendation | For always-on assistants handling sensitive workloads, lock config after initial setup. For development workflows, the writable default is appropriate. | diff --git a/src/lib/shields/verify-lock.test.ts b/src/lib/shields/verify-lock.test.ts index 8d19c11eb6..586edd564c 100644 --- a/src/lib/shields/verify-lock.test.ts +++ b/src/lib/shields/verify-lock.test.ts @@ -22,6 +22,8 @@ const target = { sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], }; +const noopLegacyLayoutAsserter = (): void => {}; + type StatLookup = Record; function makeExec(perms: StatLookup): (cmd: string[]) => string { @@ -43,7 +45,10 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "755 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { exec }); + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(result.ok).toBe(true); expect(result.issues).toEqual([]); @@ -57,7 +62,10 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "2770 sandbox:sandbox", }); - const result = verifyShieldsLockState("openclaw", target, { exec }); + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(result.ok).toBe(false); expect(result.issues).toEqual( @@ -90,7 +98,10 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "755 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { exec }); + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(result.ok).toBe(false); expect(result.issues).toContain( @@ -107,7 +118,10 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "775 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { exec }); + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(result.ok).toBe(false); expect(result.issues).toContain("dir mode=775 (expected 755)"); @@ -119,7 +133,10 @@ describe("verifyShieldsLockState", () => { throw new Error("Container not found"); }; - const result = verifyShieldsLockState("openclaw", target, { exec }); + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(result.ok).toBe(false); expect(result.issues.some((issue: string) => issue.includes("stat failed"))).toBe( @@ -144,12 +161,16 @@ describe("verifyShieldsLockState", () => { return ""; }; - const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { exec }); + const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(withoutChattrCheck.ok).toBe(true); const withChattrCheck = verifyShieldsLockState("openclaw", target, { exec, verifyChattr: true, + assertLegacyLayout: noopLegacyLayoutAsserter, }); expect(withChattrCheck.ok).toBe(false); expect(withChattrCheck.issues).toEqual( @@ -160,6 +181,34 @@ describe("verifyShieldsLockState", () => { ); }); + it("records lsattr exec failure as drift when verifyChattr is requested so a missing binary is not a silent pass", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + const exec = (cmd: string[]): string => { + if (cmd[0] === "stat") { + if (cmd[cmd.length - 1] === "/sandbox/.openclaw") return "755 root:root"; + return "444 root:root"; + } + if (cmd[0] === "lsattr") { + throw new Error("lsattr: command not found"); + } + return ""; + }; + + const result = verifyShieldsLockState("openclaw", target, { + exec, + verifyChattr: true, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); + + expect(result.ok).toBe(false); + expect(result.issues).toEqual( + expect.arrayContaining([ + "/sandbox/.openclaw/openclaw.json lsattr failed: lsattr: command not found", + "/sandbox/.openclaw/.config-hash lsattr failed: lsattr: command not found", + ]), + ); + }); + it("surfaces a legacy state layout violation when the asserter throws", async () => { const { verifyShieldsLockState } = await loadVerifier(); const exec = makeExec({ @@ -183,8 +232,19 @@ describe("verifyShieldsLockState", () => { it("rejects calls without an exec dependency so production paths cannot silently no-op", async () => { const { verifyShieldsLockState } = await loadVerifier(); - expect(() => verifyShieldsLockState("openclaw", target)).toThrow( - /requires options\.exec/, - ); + expect(() => + verifyShieldsLockState("openclaw", target, { + assertLegacyLayout: noopLegacyLayoutAsserter, + } as unknown as Parameters[2]), + ).toThrow(/requires options\.exec/); + }); + + it("rejects calls without an assertLegacyLayout dependency so the legacy-layout check cannot be skipped", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + expect(() => + verifyShieldsLockState("openclaw", target, { + exec: () => "", + } as unknown as Parameters[2]), + ).toThrow(/requires options\.assertLegacyLayout/); }); }); diff --git a/src/lib/shields/verify-lock.ts b/src/lib/shields/verify-lock.ts index 79e3d90b42..e864f75ba3 100644 --- a/src/lib/shields/verify-lock.ts +++ b/src/lib/shields/verify-lock.ts @@ -6,8 +6,16 @@ // config directory, no legacy state layout, and (when the caller knows // chattr was applied) the immutable bit. Returns the list of mismatches // so callers can either fail the lock operation or surface drift after a -// host-root tamper. Stat/lsattr failures are folded into `issues` so the -// caller can decide whether to treat them as drift. +// host-root tamper. +// +// Failure handling: +// - `stat` failures are recorded as issues so the caller cannot mistake +// an unreachable sandbox for a clean lockdown. +// - `lsattr` failures are recorded as issues only when `verifyChattr` is +// true. Some images do not ship `lsattr`, so when chattr verification +// was not requested the binary's absence is treated as an unavailable +// check rather than a tamper signal. +// - `assertLegacyLayout` failures are recorded as issues. export type LockTarget = { configPath: string; @@ -17,8 +25,8 @@ export type LockTarget = { export type VerifyShieldsLockOptions = { verifyChattr?: boolean; - exec?: (cmd: string[]) => string; - assertLegacyLayout?: (sandboxName: string, configDir: string) => void; + exec: (cmd: string[]) => string; + assertLegacyLayout: (sandboxName: string, configDir: string) => void; }; export type VerifyShieldsLockResult = { @@ -30,21 +38,18 @@ const EXPECTED_FILE_MODE = "444"; const EXPECTED_DIR_MODE = "755"; const EXPECTED_OWNER = "root:root"; -function noopAssertLegacyLayout(_sandboxName: string, _configDir: string): void { - // Production callers replace this with the real legacy-layout assertion; - // when omitted, the verifier treats legacy-layout state as "no issue". -} - export function verifyShieldsLockState( sandboxName: string, target: LockTarget, - options: VerifyShieldsLockOptions = {}, + options: VerifyShieldsLockOptions, ): VerifyShieldsLockResult { - if (!options.exec) { + if (!options || typeof options.exec !== "function") { throw new Error("verifyShieldsLockState requires options.exec"); } - const exec = options.exec; - const assertLegacyLayout = options.assertLegacyLayout ?? noopAssertLegacyLayout; + if (typeof options.assertLegacyLayout !== "function") { + throw new Error("verifyShieldsLockState requires options.assertLegacyLayout"); + } + const { exec, assertLegacyLayout } = options; const issues: string[] = []; const filesToVerify = [target.configPath, ...(target.sensitiveFiles || [])]; @@ -82,8 +87,9 @@ export function verifyShieldsLockState( // First whitespace-delimited token is the flags field. const [flags] = attrs.trim().split(/\s+/, 1); if (!flags.includes("i")) issues.push(`${f} immutable bit not set`); - } catch { - // lsattr may not be available on all images — skip + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + issues.push(`${f} lsattr failed: ${msg}`); } } } From 337bd08f4ff54457f614527a023fcb563eba1a18 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 07:55:23 +0000 Subject: [PATCH 04/12] fix(shields): add tamper E2E, scope drift to DAC, dedup test helpers Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 2 +- src/lib/shields/index.test.ts | 45 +++++++------------ src/lib/shields/index.ts | 8 ++++ test/e2e/test-shields-config.sh | 74 ++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 31 deletions(-) diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 69c4a823f4..45dc7bd0c2 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -202,7 +202,7 @@ Writable agent state such as plugins, skills, hooks, and workspace metadata live By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and writable state entry points cannot be changed by the sandbox user. -- **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `2770` (group-writable with setgid so descendants stay in the sandbox group) and `openclaw.json` with mode `660`, so the agent can read and write config directly. Under `shields up`, the directory is re-owned `root:root 755` and the config files are re-owned `root:root 444`, and `shields status` cross-checks both states to surface drift. +- DAC permissions (default): the sandbox user owns `/sandbox/.openclaw` with mode `2770` (group-writable with setgid so descendants stay in the sandbox group) and `openclaw.json` with mode `660`, so the agent reads and writes config directly. `shields up` re-owns the directory `root:root 755` and the config files `root:root 444`, and `shields status` cross-checks both states to surface drift. - **Config integrity hash.** The image includes a SHA256 hash of `openclaw.json`. In the default mutable state, `.config-hash` is sandbox-owned and is not a tamper-proof trust anchor, so startup does not fail closed on that hash. When the hash is root-owned and read-only, startup enforces it and refuses to start if the hash does not match. - **Gateway token environment.** The gateway exports `OPENCLAW_GATEWAY_TOKEN` and writes it to `/tmp/nemoclaw-proxy-env.sh` for interactive sandbox sessions. Keep this in mind when deciding whether a workload should run with mutable config or an immutable config posture. diff --git a/src/lib/shields/index.test.ts b/src/lib/shields/index.test.ts index 4e0ea6ed63..a5ce1b7cbc 100644 --- a/src/lib/shields/index.test.ts +++ b/src/lib/shields/index.test.ts @@ -98,6 +98,21 @@ afterEach(() => { // Instead, test the logic by directly manipulating state files and // calling functions that read them at invocation time. +async function loadShieldsModule(): Promise { + const distModulePath = path.join( + process.cwd(), + "dist", + "lib", + "shields", + "index.js", + ); + return import(distModulePath); +} + +function stateDir(): string { + return path.join(tmpDir, ".nemoclaw", "state"); +} + describe("shields — unit logic", () => { describe("parseDuration (inline in shields.ts)", () => { // parseDuration is inlined in shields.ts. Test it via the ESM module. @@ -382,21 +397,6 @@ describe("shields — unit logic", () => { }); describe("NC-3112: status self-heals stale expired auto-restore markers", () => { - async function loadShieldsModule() { - const distModulePath = path.join( - process.cwd(), - "dist", - "lib", - "shields", - "index.js", - ); - return import(distModulePath); - } - - function stateDir(): string { - return path.join(tmpDir, ".nemoclaw", "state"); - } - function writeState( sandboxName: string, state: Record, @@ -660,21 +660,6 @@ describe("shields — unit logic", () => { // shieldsStatus: locked-state drift surface // ------------------------------------------------------------------- describe("shieldsStatus surfaces drift returned by the verifier", () => { - async function loadShieldsModule() { - const distModulePath = path.join( - process.cwd(), - "dist", - "lib", - "shields", - "index.js", - ); - return import(distModulePath); - } - - function stateDir(): string { - return path.join(tmpDir, ".nemoclaw", "state"); - } - function writeLockedState(sandboxName: string): void { fs.mkdirSync(stateDir(), { recursive: true }); fs.writeFileSync( diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 6e7fc02885..0287e75665 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -1209,6 +1209,14 @@ function shieldsStatus( // Cross-check the sandbox filesystem so a host-root tamper that reverts // protected perms back to a sandbox-writable state is surfaced as drift // instead of reported as a clean lockdown. + // + // Scope: DAC only (mode + ownership + legacy-state-layout). The + // immutable bit is intentionally out of scope here because shields + // state does not persist whether `chattr +i` succeeded at lock time + // (`chattr` is best-effort because kubectl exec may lack + // CAP_LINUX_IMMUTABLE), so a missing `i` flag at status time cannot + // be distinguished from "never set in the first place". Verifying it + // would require schema migration; tracked as a follow-up. let driftIssues: string[] = []; try { const target = resolveConfig(sandboxName); diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh index 076c63f92b..425c654166 100755 --- a/test/e2e/test-shields-config.sh +++ b/test/e2e/test-shields-config.sh @@ -313,6 +313,80 @@ else fail "shields status should show UP: ${STATUS_OUTPUT}" fi +if echo "$STATUS_OUTPUT" | grep -q "DRIFTED"; then + fail "clean lockdown should not report DRIFTED: ${STATUS_OUTPUT}" +else + pass "shields status does not report DRIFTED on a clean lockdown" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 5b: host-root tamper — shields status surfaces drift (#4243) +# ══════════════════════════════════════════════════════════════════ +section "Phase 5b: shields status detects host-root tamper" + +CTR=$(docker ps --filter "name=openshell-${SANDBOX_NAME}" --format "{{.Names}}" | head -n 1) +if [ -z "${CTR}" ]; then + fail "Could not find sandbox container openshell-${SANDBOX_NAME} for tamper drill" +else + info "Tampering sandbox filesystem via host-root docker exec on ${CTR}" + CONFIG_DIR=$(dirname "${CONFIG_PATH}") + HASH_PATH="${CONFIG_DIR}/.config-hash" + docker exec --user root "${CTR}" sh -c " + chmod 2770 '${CONFIG_DIR}' && + chown sandbox:sandbox '${CONFIG_DIR}' && + chmod 660 '${CONFIG_PATH}' '${HASH_PATH}' && + chown sandbox:sandbox '${CONFIG_PATH}' '${HASH_PATH}' + " >/dev/null 2>&1 || true + + PERMS_TAMPERED=$(docker exec --user root "${CTR}" stat -c '%a %U:%G' "${CONFIG_PATH}" 2>/dev/null || true) + info "Config perms after tamper: ${PERMS_TAMPERED}" + + set +e + DRIFT_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) + DRIFT_EXIT=$? + set -e + echo "$DRIFT_OUTPUT" + + if [ "${DRIFT_EXIT}" = "2" ]; then + pass "shields status exits 2 when sandbox filesystem drifts from declared lockdown" + else + fail "shields status should exit 2 on drift, got ${DRIFT_EXIT}: ${DRIFT_OUTPUT}" + fi + + if echo "$DRIFT_OUTPUT" | grep -q "DRIFTED"; then + pass "shields status reports DRIFTED on host-root tamper" + else + fail "shields status output missing DRIFTED marker: ${DRIFT_OUTPUT}" + fi + + if echo "$DRIFT_OUTPUT" | grep -qE "mode=660 \(expected 444\)"; then + pass "drift listing includes file mode mismatch" + else + fail "drift listing should include file mode mismatch: ${DRIFT_OUTPUT}" + fi + + if echo "$DRIFT_OUTPUT" | grep -qE "owner=sandbox:sandbox \(expected root:root\)"; then + pass "drift listing includes ownership mismatch" + else + fail "drift listing should include ownership mismatch: ${DRIFT_OUTPUT}" + fi + + info "Restoring lockdown perms so Phase 6 can run shields down cleanly" + docker exec --user root "${CTR}" sh -c " + chmod 755 '${CONFIG_DIR}' && + chown root:root '${CONFIG_DIR}' && + chmod 444 '${CONFIG_PATH}' '${HASH_PATH}' && + chown root:root '${CONFIG_PATH}' '${HASH_PATH}' + " >/dev/null 2>&1 || true + + CLEAN_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) + if echo "$CLEAN_OUTPUT" | grep -q "Shields: UP (lockdown active)"; then + pass "shields status returns to clean UP after manual re-lock" + else + fail "shields status should return to clean UP after re-lock: ${CLEAN_OUTPUT}" + fi +fi + # ══════════════════════════════════════════════════════════════════ # Phase 6: shields down — config returns to writable # ══════════════════════════════════════════════════════════════════ From 6b067d2ff3f4f81a9941f3fc6c9fbc92bf0a287c Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 09:23:55 +0000 Subject: [PATCH 05/12] fix(shields): guard E2E exit codes + scope drift wording to locked state Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 2 +- src/lib/shields/index.ts | 4 +- test/e2e/test-shields-config.sh | 103 ++++++++++++++++++------------- 3 files changed, 64 insertions(+), 45 deletions(-) diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 45dc7bd0c2..d7251320d3 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -202,7 +202,7 @@ Writable agent state such as plugins, skills, hooks, and workspace metadata live By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and writable state entry points cannot be changed by the sandbox user. -- DAC permissions (default): the sandbox user owns `/sandbox/.openclaw` with mode `2770` (group-writable with setgid so descendants stay in the sandbox group) and `openclaw.json` with mode `660`, so the agent reads and writes config directly. `shields up` re-owns the directory `root:root 755` and the config files `root:root 444`, and `shields status` cross-checks both states to surface drift. +- DAC permissions (default): the sandbox user owns `/sandbox/.openclaw` with mode `2770` (group-writable with setgid so descendants stay in the sandbox group) and `openclaw.json` with mode `660`, so the agent reads and writes config directly. `shields up` re-owns the directory `root:root 755` and the config files `root:root 444`, and `shields status` cross-checks the locked state against the sandbox filesystem and surfaces drift if a host-root tamper reverts those perms. - **Config integrity hash.** The image includes a SHA256 hash of `openclaw.json`. In the default mutable state, `.config-hash` is sandbox-owned and is not a tamper-proof trust anchor, so startup does not fail closed on that hash. When the hash is root-owned and read-only, startup enforces it and refuses to start if the hash does not match. - **Gateway token environment.** The gateway exports `OPENCLAW_GATEWAY_TOKEN` and writes it to `/tmp/nemoclaw-proxy-env.sh` for interactive sandbox sessions. Keep this in mind when deciding whether a workload should run with mutable config or an immutable config posture. diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 0287e75665..ad486fa307 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -1215,8 +1215,8 @@ function shieldsStatus( // state does not persist whether `chattr +i` succeeded at lock time // (`chattr` is best-effort because kubectl exec may lack // CAP_LINUX_IMMUTABLE), so a missing `i` flag at status time cannot - // be distinguished from "never set in the first place". Verifying it - // would require schema migration; tracked as a follow-up. + // be distinguished from "never set in the first place". Verifying + // it requires persisted lock metadata. let driftIssues: string[] = []; try { const target = resolveConfig(sandboxName); diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh index 425c654166..e39569d83b 100755 --- a/test/e2e/test-shields-config.sh +++ b/test/e2e/test-shields-config.sh @@ -320,7 +320,7 @@ else fi # ══════════════════════════════════════════════════════════════════ -# Phase 5b: host-root tamper — shields status surfaces drift (#4243) +# Phase 5b: host-root tamper — shields status surfaces drift # ══════════════════════════════════════════════════════════════════ section "Phase 5b: shields status detects host-root tamper" @@ -331,59 +331,78 @@ else info "Tampering sandbox filesystem via host-root docker exec on ${CTR}" CONFIG_DIR=$(dirname "${CONFIG_PATH}") HASH_PATH="${CONFIG_DIR}/.config-hash" + + set +e docker exec --user root "${CTR}" sh -c " chmod 2770 '${CONFIG_DIR}' && chown sandbox:sandbox '${CONFIG_DIR}' && chmod 660 '${CONFIG_PATH}' '${HASH_PATH}' && chown sandbox:sandbox '${CONFIG_PATH}' '${HASH_PATH}' - " >/dev/null 2>&1 || true - - PERMS_TAMPERED=$(docker exec --user root "${CTR}" stat -c '%a %U:%G' "${CONFIG_PATH}" 2>/dev/null || true) - info "Config perms after tamper: ${PERMS_TAMPERED}" - - set +e - DRIFT_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) - DRIFT_EXIT=$? + " >/dev/null 2>&1 + TAMPER_EXIT=$? set -e - echo "$DRIFT_OUTPUT" - if [ "${DRIFT_EXIT}" = "2" ]; then - pass "shields status exits 2 when sandbox filesystem drifts from declared lockdown" + if [ "${TAMPER_EXIT}" != "0" ]; then + fail "Could not apply tamper perms via docker exec (exit ${TAMPER_EXIT})" else - fail "shields status should exit 2 on drift, got ${DRIFT_EXIT}: ${DRIFT_OUTPUT}" - fi + PERMS_TAMPERED=$(docker exec --user root "${CTR}" stat -c '%a %U:%G' "${CONFIG_PATH}" 2>/dev/null || true) + info "Config perms after tamper: ${PERMS_TAMPERED}" + + set +e + DRIFT_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) + DRIFT_EXIT=$? + set -e + echo "$DRIFT_OUTPUT" + + if [ "${DRIFT_EXIT}" = "2" ]; then + pass "shields status exits 2 when sandbox filesystem drifts from declared lockdown" + else + fail "shields status should exit 2 on drift, got ${DRIFT_EXIT}: ${DRIFT_OUTPUT}" + fi - if echo "$DRIFT_OUTPUT" | grep -q "DRIFTED"; then - pass "shields status reports DRIFTED on host-root tamper" - else - fail "shields status output missing DRIFTED marker: ${DRIFT_OUTPUT}" - fi + if echo "$DRIFT_OUTPUT" | grep -q "DRIFTED"; then + pass "shields status reports DRIFTED on host-root tamper" + else + fail "shields status output missing DRIFTED marker: ${DRIFT_OUTPUT}" + fi - if echo "$DRIFT_OUTPUT" | grep -qE "mode=660 \(expected 444\)"; then - pass "drift listing includes file mode mismatch" - else - fail "drift listing should include file mode mismatch: ${DRIFT_OUTPUT}" - fi + if echo "$DRIFT_OUTPUT" | grep -qE "mode=660 \(expected 444\)"; then + pass "drift listing includes file mode mismatch" + else + fail "drift listing should include file mode mismatch: ${DRIFT_OUTPUT}" + fi - if echo "$DRIFT_OUTPUT" | grep -qE "owner=sandbox:sandbox \(expected root:root\)"; then - pass "drift listing includes ownership mismatch" - else - fail "drift listing should include ownership mismatch: ${DRIFT_OUTPUT}" - fi + if echo "$DRIFT_OUTPUT" | grep -qE "owner=sandbox:sandbox \(expected root:root\)"; then + pass "drift listing includes ownership mismatch" + else + fail "drift listing should include ownership mismatch: ${DRIFT_OUTPUT}" + fi - info "Restoring lockdown perms so Phase 6 can run shields down cleanly" - docker exec --user root "${CTR}" sh -c " - chmod 755 '${CONFIG_DIR}' && - chown root:root '${CONFIG_DIR}' && - chmod 444 '${CONFIG_PATH}' '${HASH_PATH}' && - chown root:root '${CONFIG_PATH}' '${HASH_PATH}' - " >/dev/null 2>&1 || true - - CLEAN_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) - if echo "$CLEAN_OUTPUT" | grep -q "Shields: UP (lockdown active)"; then - pass "shields status returns to clean UP after manual re-lock" - else - fail "shields status should return to clean UP after re-lock: ${CLEAN_OUTPUT}" + info "Restoring lockdown perms so Phase 6 can run shields down cleanly" + set +e + docker exec --user root "${CTR}" sh -c " + chmod 755 '${CONFIG_DIR}' && + chown root:root '${CONFIG_DIR}' && + chmod 444 '${CONFIG_PATH}' '${HASH_PATH}' && + chown root:root '${CONFIG_PATH}' '${HASH_PATH}' + " >/dev/null 2>&1 + RESTORE_EXIT=$? + set -e + + if [ "${RESTORE_EXIT}" != "0" ]; then + fail "Could not restore lockdown perms via docker exec (exit ${RESTORE_EXIT})" + else + set +e + CLEAN_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) + CLEAN_EXIT=$? + set -e + + if [ "${CLEAN_EXIT}" = "0" ] && echo "$CLEAN_OUTPUT" | grep -q "Shields: UP (lockdown active)"; then + pass "shields status returns to clean UP after manual re-lock" + else + fail "shields status should return to clean UP after re-lock (exit ${CLEAN_EXIT}): ${CLEAN_OUTPUT}" + fi + fi fi fi From 5829c9fcf83f9eab0944ec59f80a47af2ecb9eaa Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 09:57:42 +0000 Subject: [PATCH 06/12] fix(shields): drop errexit toggles in E2E, move drift tests to focused file Signed-off-by: Tinson Lai --- src/lib/shields/index.test.ts | 109 --------------- src/lib/shields/shields-status.test.ts | 185 +++++++++++++++++++++++++ test/e2e/test-shields-config.sh | 12 +- 3 files changed, 187 insertions(+), 119 deletions(-) create mode 100644 src/lib/shields/shields-status.test.ts diff --git a/src/lib/shields/index.test.ts b/src/lib/shields/index.test.ts index a5ce1b7cbc..32f302c610 100644 --- a/src/lib/shields/index.test.ts +++ b/src/lib/shields/index.test.ts @@ -656,115 +656,6 @@ describe("shields — unit logic", () => { }); }); - // ------------------------------------------------------------------- - // shieldsStatus: locked-state drift surface - // ------------------------------------------------------------------- - describe("shieldsStatus surfaces drift returned by the verifier", () => { - function writeLockedState(sandboxName: string): void { - fs.mkdirSync(stateDir(), { recursive: true }); - fs.writeFileSync( - path.join(stateDir(), `shields-${sandboxName}.json`), - JSON.stringify( - { - shieldsDown: false, - updatedAt: new Date().toISOString(), - }, - null, - 2, - ), - { mode: 0o600 }, - ); - } - - it("prints DRIFTED with the issue list and exits 2 when the verifier reports drift", async () => { - const sandboxName = "openclaw"; - writeLockedState(sandboxName); - const driftIssues = [ - "/sandbox/.openclaw/openclaw.json mode=660 (expected 444)", - "/sandbox/.openclaw/openclaw.json owner=sandbox:sandbox (expected root:root)", - "dir mode=2770 (expected 755)", - "dir owner=sandbox:sandbox (expected root:root)", - ]; - const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); - const exitSpy = vi - .spyOn(process, "exit") - .mockImplementation((code?: string | number | null) => { - throw new Error(`exit ${String(code)}`); - }); - - const { shieldsStatus } = await loadShieldsModule(); - expect(() => - shieldsStatus(sandboxName, true, { - verifyLockState: () => ({ ok: false, issues: driftIssues }), - resolveConfig: () => ({ - agentName: "openclaw", - configPath: "/sandbox/.openclaw/openclaw.json", - configDir: "/sandbox/.openclaw", - }), - }), - ).toThrow("exit 2"); - - expect(errorSpy).toHaveBeenCalledWith( - " Shields: UP (DRIFTED — declared locked but sandbox filesystem differs)", - ); - expect(errorSpy).toHaveBeenCalledWith(" Drift:"); - for (const issue of driftIssues) { - expect(errorSpy).toHaveBeenCalledWith(` - ${issue}`); - } - expect(errorSpy).toHaveBeenCalledWith( - ` Recovery: nemoclaw ${sandboxName} shields up # re-lock and re-verify`, - ); - expect(exitSpy).toHaveBeenCalledWith(2); - }); - - it("prints a clean locked status when the verifier reports no drift", async () => { - const sandboxName = "openclaw"; - writeLockedState(sandboxName); - const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); - const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); - - const { shieldsStatus } = await loadShieldsModule(); - shieldsStatus(sandboxName, true, { - verifyLockState: () => ({ ok: true, issues: [] }), - resolveConfig: () => ({ - agentName: "openclaw", - configPath: "/sandbox/.openclaw/openclaw.json", - configDir: "/sandbox/.openclaw", - }), - }); - - expect(logSpy).toHaveBeenCalledWith(" Shields: UP (lockdown active)"); - expect(logSpy).toHaveBeenCalledWith(" Policy: restrictive"); - expect(errorSpy).not.toHaveBeenCalled(); - }); - - it("treats a resolveConfig throw as drift so the locked status cannot mask a setup gap", async () => { - const sandboxName = "openclaw"; - writeLockedState(sandboxName); - const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); - const exitSpy = vi - .spyOn(process, "exit") - .mockImplementation((code?: string | number | null) => { - throw new Error(`exit ${String(code)}`); - }); - - const { shieldsStatus } = await loadShieldsModule(); - expect(() => - shieldsStatus(sandboxName, true, { - verifyLockState: () => ({ ok: true, issues: [] }), - resolveConfig: () => { - throw new Error("agent config not found"); - }, - }), - ).toThrow("exit 2"); - - const allErrors = errorSpy.mock.calls.map((args) => args[0]).join("\n"); - expect(allErrors).toContain( - "unable to resolve agent config target: agent config not found", - ); - expect(exitSpy).toHaveBeenCalledWith(2); - }); - }); }); // ------------------------------------------------------------------- diff --git a/src/lib/shields/shields-status.test.ts b/src/lib/shields/shields-status.test.ts new file mode 100644 index 0000000000..3777358e04 --- /dev/null +++ b/src/lib/shields/shields-status.test.ts @@ -0,0 +1,185 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import fs from "node:fs"; +import path from "node:path"; +import os from "node:os"; + +vi.mock("../runner", () => ({ + run: vi.fn(() => ({ status: 0 })), + runCapture: vi.fn(() => ""), + validateName: vi.fn((name) => name), + shellQuote: vi.fn((s) => `'${s}'`), + redact: vi.fn((s) => s), + ROOT: "/mock/root", +})); + +vi.mock("../policy", () => ({ + buildPolicyGetCommand: vi.fn(() => ["openshell", "policy", "get"]), + buildPolicySetCommand: vi.fn(() => ["openshell", "policy", "set"]), + parseCurrentPolicy: vi.fn((raw) => raw || ""), + PERMISSIVE_POLICY_PATH: "/mock/permissive.yaml", + resolvePermissivePolicyPath: vi.fn(() => "/mock/permissive.yaml"), +})); + +vi.mock("../sandbox/config", () => ({ + resolveAgentConfig: vi.fn(() => ({ + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + format: "json", + configFile: "openclaw.json", + })), +})); + +vi.mock("../adapters/docker/exec", () => ({ + dockerExecFileSync: vi.fn(() => ""), +})); + +vi.mock("./audit", () => ({ + appendAuditEntry: vi.fn(), +})); + +vi.mock("node:child_process", () => ({ + execFileSync: vi.fn(() => ""), + spawnSync: vi.fn(() => ({ status: 0, stdout: "", stderr: "" })), + spawn: vi.fn(), +})); + +let tmpDir: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "shields-status-test-")); + vi.stubEnv("HOME", tmpDir); + vi.resetModules(); + vi.clearAllMocks(); +}); + +afterEach(() => { + vi.restoreAllMocks(); + vi.unstubAllEnvs(); + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +async function loadShieldsModule(): Promise { + const distModulePath = path.join( + process.cwd(), + "dist", + "lib", + "shields", + "index.js", + ); + return import(distModulePath); +} + +function stateDir(): string { + return path.join(tmpDir, ".nemoclaw", "state"); +} + +function writeLockedState(sandboxName: string): void { + fs.mkdirSync(stateDir(), { recursive: true }); + fs.writeFileSync( + path.join(stateDir(), `shields-${sandboxName}.json`), + JSON.stringify( + { + shieldsDown: false, + updatedAt: new Date().toISOString(), + }, + null, + 2, + ), + { mode: 0o600 }, + ); +} + +describe("shieldsStatus surfaces drift returned by the verifier", () => { + it("prints DRIFTED with the issue list and exits 2 when the verifier reports drift", async () => { + const sandboxName = "openclaw"; + writeLockedState(sandboxName); + const driftIssues = [ + "/sandbox/.openclaw/openclaw.json mode=660 (expected 444)", + "/sandbox/.openclaw/openclaw.json owner=sandbox:sandbox (expected root:root)", + "dir mode=2770 (expected 755)", + "dir owner=sandbox:sandbox (expected root:root)", + ]; + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + const exitSpy = vi + .spyOn(process, "exit") + .mockImplementation((code?: string | number | null) => { + throw new Error(`exit ${String(code)}`); + }); + + const { shieldsStatus } = await loadShieldsModule(); + expect(() => + shieldsStatus(sandboxName, true, { + verifyLockState: () => ({ ok: false, issues: driftIssues }), + resolveConfig: () => ({ + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + }), + }), + ).toThrow("exit 2"); + + expect(errorSpy).toHaveBeenCalledWith( + " Shields: UP (DRIFTED — declared locked but sandbox filesystem differs)", + ); + expect(errorSpy).toHaveBeenCalledWith(" Drift:"); + for (const issue of driftIssues) { + expect(errorSpy).toHaveBeenCalledWith(` - ${issue}`); + } + expect(errorSpy).toHaveBeenCalledWith( + ` Recovery: nemoclaw ${sandboxName} shields up # re-lock and re-verify`, + ); + expect(exitSpy).toHaveBeenCalledWith(2); + }); + + it("prints a clean locked status when the verifier reports no drift", async () => { + const sandboxName = "openclaw"; + writeLockedState(sandboxName); + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + + const { shieldsStatus } = await loadShieldsModule(); + shieldsStatus(sandboxName, true, { + verifyLockState: () => ({ ok: true, issues: [] }), + resolveConfig: () => ({ + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + }), + }); + + expect(logSpy).toHaveBeenCalledWith(" Shields: UP (lockdown active)"); + expect(logSpy).toHaveBeenCalledWith(" Policy: restrictive"); + expect(errorSpy).not.toHaveBeenCalled(); + }); + + it("treats a resolveConfig throw as drift so the locked status cannot mask a setup gap", async () => { + const sandboxName = "openclaw"; + writeLockedState(sandboxName); + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + const exitSpy = vi + .spyOn(process, "exit") + .mockImplementation((code?: string | number | null) => { + throw new Error(`exit ${String(code)}`); + }); + + const { shieldsStatus } = await loadShieldsModule(); + expect(() => + shieldsStatus(sandboxName, true, { + verifyLockState: () => ({ ok: true, issues: [] }), + resolveConfig: () => { + throw new Error("agent config not found"); + }, + }), + ).toThrow("exit 2"); + + const allErrors = errorSpy.mock.calls.map((args) => args[0]).join("\n"); + expect(allErrors).toContain( + "unable to resolve agent config target: agent config not found", + ); + expect(exitSpy).toHaveBeenCalledWith(2); + }); +}); diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh index e39569d83b..43aac4da10 100755 --- a/test/e2e/test-shields-config.sh +++ b/test/e2e/test-shields-config.sh @@ -332,7 +332,6 @@ else CONFIG_DIR=$(dirname "${CONFIG_PATH}") HASH_PATH="${CONFIG_DIR}/.config-hash" - set +e docker exec --user root "${CTR}" sh -c " chmod 2770 '${CONFIG_DIR}' && chown sandbox:sandbox '${CONFIG_DIR}' && @@ -340,7 +339,6 @@ else chown sandbox:sandbox '${CONFIG_PATH}' '${HASH_PATH}' " >/dev/null 2>&1 TAMPER_EXIT=$? - set -e if [ "${TAMPER_EXIT}" != "0" ]; then fail "Could not apply tamper perms via docker exec (exit ${TAMPER_EXIT})" @@ -348,10 +346,8 @@ else PERMS_TAMPERED=$(docker exec --user root "${CTR}" stat -c '%a %U:%G' "${CONFIG_PATH}" 2>/dev/null || true) info "Config perms after tamper: ${PERMS_TAMPERED}" - set +e - DRIFT_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) + DRIFT_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) || true DRIFT_EXIT=$? - set -e echo "$DRIFT_OUTPUT" if [ "${DRIFT_EXIT}" = "2" ]; then @@ -379,7 +375,6 @@ else fi info "Restoring lockdown perms so Phase 6 can run shields down cleanly" - set +e docker exec --user root "${CTR}" sh -c " chmod 755 '${CONFIG_DIR}' && chown root:root '${CONFIG_DIR}' && @@ -387,15 +382,12 @@ else chown root:root '${CONFIG_PATH}' '${HASH_PATH}' " >/dev/null 2>&1 RESTORE_EXIT=$? - set -e if [ "${RESTORE_EXIT}" != "0" ]; then fail "Could not restore lockdown perms via docker exec (exit ${RESTORE_EXIT})" else - set +e - CLEAN_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) + CLEAN_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) || true CLEAN_EXIT=$? - set -e if [ "${CLEAN_EXIT}" = "0" ] && echo "$CLEAN_OUTPUT" | grep -q "Shields: UP (lockdown active)"; then pass "shields status returns to clean UP after manual re-lock" From 5333c7abbeff84d5eb1ca77fc553f833582d1ff6 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 10:15:21 +0000 Subject: [PATCH 07/12] fix(shields,e2e): capture true status exit + clear setgid on restore Signed-off-by: Tinson Lai --- test/e2e/test-shields-config.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh index 43aac4da10..50994cad8d 100755 --- a/test/e2e/test-shields-config.sh +++ b/test/e2e/test-shields-config.sh @@ -346,7 +346,7 @@ else PERMS_TAMPERED=$(docker exec --user root "${CTR}" stat -c '%a %U:%G' "${CONFIG_PATH}" 2>/dev/null || true) info "Config perms after tamper: ${PERMS_TAMPERED}" - DRIFT_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) || true + DRIFT_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) DRIFT_EXIT=$? echo "$DRIFT_OUTPUT" @@ -375,7 +375,11 @@ else fi info "Restoring lockdown perms so Phase 6 can run shields down cleanly" + # BusyBox chmod with 3-digit octal preserves setgid, so clear it + # explicitly before applying the 755 dir mode to match what + # `shields up` produces (plain 755, not 2755). docker exec --user root "${CTR}" sh -c " + chmod g-s '${CONFIG_DIR}' && chmod 755 '${CONFIG_DIR}' && chown root:root '${CONFIG_DIR}' && chmod 444 '${CONFIG_PATH}' '${HASH_PATH}' && @@ -386,7 +390,7 @@ else if [ "${RESTORE_EXIT}" != "0" ]; then fail "Could not restore lockdown perms via docker exec (exit ${RESTORE_EXIT})" else - CLEAN_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) || true + CLEAN_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) CLEAN_EXIT=$? if [ "${CLEAN_EXIT}" = "0" ] && echo "$CLEAN_OUTPUT" | grep -q "Shields: UP (lockdown active)"; then From dd38ddb09e88f9485b90e54774ce578b9319903f Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 11:14:40 +0000 Subject: [PATCH 08/12] revert: roll shields PR files back to 0c3b6a4fa state Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 4 +- src/lib/shields/index.test.ts | 154 ++++++++++++++++++-- src/lib/shields/index.ts | 93 ++++++++++--- src/lib/shields/shields-status.test.ts | 185 ------------------------- src/lib/shields/verify-lock.test.ts | 78 ++--------- src/lib/shields/verify-lock.ts | 36 ++--- test/e2e/test-shields-config.sh | 89 ------------ 7 files changed, 235 insertions(+), 404 deletions(-) delete mode 100644 src/lib/shields/shields-status.test.ts diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index d7251320d3..dd2b75569c 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -202,13 +202,13 @@ Writable agent state such as plugins, skills, hooks, and workspace metadata live By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and writable state entry points cannot be changed by the sandbox user. -- DAC permissions (default): the sandbox user owns `/sandbox/.openclaw` with mode `2770` (group-writable with setgid so descendants stay in the sandbox group) and `openclaw.json` with mode `660`, so the agent reads and writes config directly. `shields up` re-owns the directory `root:root 755` and the config files `root:root 444`, and `shields status` cross-checks the locked state against the sandbox filesystem and surfaces drift if a host-root tamper reverts those perms. +- **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `700` and `openclaw.json` with mode `600`, so the agent can read and write config directly. - **Config integrity hash.** The image includes a SHA256 hash of `openclaw.json`. In the default mutable state, `.config-hash` is sandbox-owned and is not a tamper-proof trust anchor, so startup does not fail closed on that hash. When the hash is root-owned and read-only, startup enforces it and refuses to start if the hash does not match. - **Gateway token environment.** The gateway exports `OPENCLAW_GATEWAY_TOKEN` and writes it to `/tmp/nemoclaw-proxy-env.sh` for interactive sandbox sessions. Keep this in mind when deciding whether a workload should run with mutable config or an immutable config posture. | Aspect | Detail | |---|---| -| Default | The sandbox keeps `/sandbox/.openclaw` writable (`2770 sandbox:sandbox`), sets `openclaw.json` to `660 sandbox:sandbox`, lets the agent manage state directly, and has the gateway place `OPENCLAW_GATEWAY_TOKEN` in `/tmp/nemoclaw-proxy-env.sh` for interactive shells. | +| Default | The sandbox keeps `/sandbox/.openclaw` writable (`700 sandbox:sandbox`), sets `openclaw.json` to `600 sandbox:sandbox`, lets the agent manage state directly, and has the gateway place `OPENCLAW_GATEWAY_TOKEN` in `/tmp/nemoclaw-proxy-env.sh` for interactive shells. | | What you can change | Apply a reviewed host-side immutability workflow to lock config and state directories with DAC permissions and the immutable flag where available. | | Risk of default | A writable `.openclaw` directory lets the agent modify its own gateway config: disabling CORS or redirecting inference to an attacker-controlled endpoint. | | Recommendation | For always-on assistants handling sensitive workloads, lock config after initial setup. For development workflows, the writable default is appropriate. | diff --git a/src/lib/shields/index.test.ts b/src/lib/shields/index.test.ts index 32f302c610..4e0ea6ed63 100644 --- a/src/lib/shields/index.test.ts +++ b/src/lib/shields/index.test.ts @@ -98,21 +98,6 @@ afterEach(() => { // Instead, test the logic by directly manipulating state files and // calling functions that read them at invocation time. -async function loadShieldsModule(): Promise { - const distModulePath = path.join( - process.cwd(), - "dist", - "lib", - "shields", - "index.js", - ); - return import(distModulePath); -} - -function stateDir(): string { - return path.join(tmpDir, ".nemoclaw", "state"); -} - describe("shields — unit logic", () => { describe("parseDuration (inline in shields.ts)", () => { // parseDuration is inlined in shields.ts. Test it via the ESM module. @@ -397,6 +382,21 @@ describe("shields — unit logic", () => { }); describe("NC-3112: status self-heals stale expired auto-restore markers", () => { + async function loadShieldsModule() { + const distModulePath = path.join( + process.cwd(), + "dist", + "lib", + "shields", + "index.js", + ); + return import(distModulePath); + } + + function stateDir(): string { + return path.join(tmpDir, ".nemoclaw", "state"); + } + function writeState( sandboxName: string, state: Record, @@ -656,6 +656,130 @@ describe("shields — unit logic", () => { }); }); + // ------------------------------------------------------------------- + // shieldsStatus: locked-state drift surface + // ------------------------------------------------------------------- + describe("shieldsStatus surfaces drift returned by the verifier", () => { + async function loadShieldsModule() { + const distModulePath = path.join( + process.cwd(), + "dist", + "lib", + "shields", + "index.js", + ); + return import(distModulePath); + } + + function stateDir(): string { + return path.join(tmpDir, ".nemoclaw", "state"); + } + + function writeLockedState(sandboxName: string): void { + fs.mkdirSync(stateDir(), { recursive: true }); + fs.writeFileSync( + path.join(stateDir(), `shields-${sandboxName}.json`), + JSON.stringify( + { + shieldsDown: false, + updatedAt: new Date().toISOString(), + }, + null, + 2, + ), + { mode: 0o600 }, + ); + } + + it("prints DRIFTED with the issue list and exits 2 when the verifier reports drift", async () => { + const sandboxName = "openclaw"; + writeLockedState(sandboxName); + const driftIssues = [ + "/sandbox/.openclaw/openclaw.json mode=660 (expected 444)", + "/sandbox/.openclaw/openclaw.json owner=sandbox:sandbox (expected root:root)", + "dir mode=2770 (expected 755)", + "dir owner=sandbox:sandbox (expected root:root)", + ]; + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + const exitSpy = vi + .spyOn(process, "exit") + .mockImplementation((code?: string | number | null) => { + throw new Error(`exit ${String(code)}`); + }); + + const { shieldsStatus } = await loadShieldsModule(); + expect(() => + shieldsStatus(sandboxName, true, { + verifyLockState: () => ({ ok: false, issues: driftIssues }), + resolveConfig: () => ({ + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + }), + }), + ).toThrow("exit 2"); + + expect(errorSpy).toHaveBeenCalledWith( + " Shields: UP (DRIFTED — declared locked but sandbox filesystem differs)", + ); + expect(errorSpy).toHaveBeenCalledWith(" Drift:"); + for (const issue of driftIssues) { + expect(errorSpy).toHaveBeenCalledWith(` - ${issue}`); + } + expect(errorSpy).toHaveBeenCalledWith( + ` Recovery: nemoclaw ${sandboxName} shields up # re-lock and re-verify`, + ); + expect(exitSpy).toHaveBeenCalledWith(2); + }); + + it("prints a clean locked status when the verifier reports no drift", async () => { + const sandboxName = "openclaw"; + writeLockedState(sandboxName); + const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + + const { shieldsStatus } = await loadShieldsModule(); + shieldsStatus(sandboxName, true, { + verifyLockState: () => ({ ok: true, issues: [] }), + resolveConfig: () => ({ + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + }), + }); + + expect(logSpy).toHaveBeenCalledWith(" Shields: UP (lockdown active)"); + expect(logSpy).toHaveBeenCalledWith(" Policy: restrictive"); + expect(errorSpy).not.toHaveBeenCalled(); + }); + + it("treats a resolveConfig throw as drift so the locked status cannot mask a setup gap", async () => { + const sandboxName = "openclaw"; + writeLockedState(sandboxName); + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + const exitSpy = vi + .spyOn(process, "exit") + .mockImplementation((code?: string | number | null) => { + throw new Error(`exit ${String(code)}`); + }); + + const { shieldsStatus } = await loadShieldsModule(); + expect(() => + shieldsStatus(sandboxName, true, { + verifyLockState: () => ({ ok: true, issues: [] }), + resolveConfig: () => { + throw new Error("agent config not found"); + }, + }), + ).toThrow("exit 2"); + + const allErrors = errorSpy.mock.calls.map((args) => args[0]).join("\n"); + expect(allErrors).toContain( + "unable to resolve agent config target: agent config not found", + ); + expect(exitSpy).toHaveBeenCalledWith(2); + }); + }); }); // ------------------------------------------------------------------- diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index ad486fa307..59a132fc1d 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -16,6 +16,10 @@ const { fork } = require("child_process"); const { randomBytes } = require("crypto"); const { run, runCapture, validateName } = require("../runner"); const { dockerExecFileSync } = require("../adapters/docker/exec"); +const { dockerCapture } = require("../adapters/docker/run"); +const registry = require("../state/registry") as { + getSandbox?: (name: string) => { openshellDriver?: string | null } | null; +}; const { buildPolicyGetCommand, buildPolicySetCommand, @@ -38,9 +42,6 @@ const { const { resolveNemoclawStateDir } = require("../state/paths"); const { appendAuditEntry } = require("./audit"); const { resolveAgentConfig } = require("../sandbox/config"); -const { - privilegedSandboxExecArgv, -}: typeof import("../sandbox/privileged-exec") = require("../sandbox/privileged-exec"); const { buildRuntimePermissivePolicy, }: typeof import("./permissive-runtime") = require("./permissive-runtime"); @@ -55,12 +56,66 @@ const STATE_DIR = resolveNemoclawStateDir(); // privileged sandbox exec — bypasses the sandbox's Landlock context // // openshell sandbox exec runs commands INSIDE the Landlock domain, so it -// can't modify read_only paths or change chattr flags. Privileged Docker exec -// starts a new root process that does NOT inherit the Landlock ruleset. -// OpenShell gateways expose sandboxes as Docker containers, so we exec into -// the sandbox container directly as root. +// can't modify read_only paths or change chattr flags. kubectl exec starts +// a new process in the pod that does NOT inherit the Landlock ruleset. +// On the legacy gateway we reach kubectl via the K3s container. On the +// Docker-driver gateway there is no K3s container, so we exec into the +// sandbox Docker container directly as root. // --------------------------------------------------------------------------- +const K3S_CONTAINER = "openshell-cluster-nemoclaw"; + +function resolveDockerDriverSandboxContainer( + sandboxName: string, +): string | null { + try { + if (registry.getSandbox?.(sandboxName)?.openshellDriver !== "docker") { + return null; + } + } catch { + return null; + } + const prefix = `openshell-${sandboxName}-`; + const exact = `openshell-${sandboxName}`; + const output = dockerCapture(["ps", "--format", "{{.Names}}"], { + ignoreError: true, + }); + return ( + output + .split("\n") + .map((line: string) => line.trim()) + .find((name: string) => name === exact || name.startsWith(prefix)) || null + ); +} + +function kubectlExecArgv(sandboxName: string, cmd: string[]): string[] { + return [ + "exec", + K3S_CONTAINER, + "kubectl", + "exec", + "-n", + "openshell", + sandboxName, + "-c", + "agent", + "--", + ...cmd, + ]; +} + +function privilegedSandboxExecArgv( + sandboxName: string, + cmd: string[], +): string[] { + const dockerDriverContainer = + resolveDockerDriverSandboxContainer(sandboxName); + if (dockerDriverContainer) { + return ["exec", "--user", "root", dockerDriverContainer, ...cmd]; + } + return kubectlExecArgv(sandboxName, cmd); +} + function privilegedSandboxExec(sandboxName: string, cmd: string[]): void { dockerExecFileSync(privilegedSandboxExecArgv(sandboxName, cmd), { stdio: ["ignore", "pipe", "pipe"], @@ -448,7 +503,7 @@ function assertNoLegacyStateLayout( // user and the gateway UID can write the mutable config tree. Hermes keeps its // tighter single-user layout. // -// Note on chattr: best-effort — it may silently fail if privileged exec +// Note on chattr: best-effort — it may silently fail if kubectl exec // lacks CAP_LINUX_IMMUTABLE or if the file was never immutable. That's fine: // the file becomes writable through the permissive policy (disables Landlock // read_only) + chown/chmod below. @@ -582,7 +637,7 @@ function unlockAgentConfig( // 2. UNIX permissions — 444 root:root (mandatory, verified here) // 3. chattr +i immutable bit — defense-in-depth (best-effort) // -// Layer 3 is best-effort because privileged exec may lack +// Layer 3 is best-effort because kubectl exec may lack // CAP_LINUX_IMMUTABLE. Layers 1+2 are sufficient. We still attempt it // in case the runtime environment supports it. // --------------------------------------------------------------------------- @@ -623,7 +678,7 @@ function lockAgentConfig( errors.push("chown root:root config dir"); } - // Best-effort: privileged exec may lack CAP_LINUX_IMMUTABLE. Track the + // Best-effort: kubectl exec may lack CAP_LINUX_IMMUTABLE. Track the // result so verification doesn't require something that was never there. let chattrSucceeded = true; for (const f of filesToLock) { @@ -705,7 +760,7 @@ function rollbackShieldsDown( } else { console.error(" Config remains unlocked — manual intervention required."); console.error( - ` Restore sandbox access, then run: nemoclaw ${sandboxName} shields up`, + ` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`, ); } } @@ -968,7 +1023,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void { // 4. Start auto-restore timer (detached child process), unless skipped. // Pass the absolute restore time, not a relative timeout. Steps 1-2b - // can take minutes (policy apply + privileged chmod), so a relative timeout + // can take minutes (policy apply + kubectl chmod), so a relative timeout // passed at fork time would fire too early. if (!opts.skipTimer) { const restoreAt = new Date(Date.now() + timeoutSeconds * 1000); @@ -1098,13 +1153,13 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): " Config remains unlocked — manual intervention required.", ); console.error( - ` Restore sandbox access, then run: nemoclaw ${sandboxName} shields up`, + ` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`, ); return failShieldsCommand(activation.error ?? "unknown restore error", opts.throwOnError); } } else { // 2b. Lock config file to read-only. - // Uses direct privileged exec to bypass Landlock (same as shields down). + // Uses kubectl exec to bypass Landlock (same as shields down). // Each operation runs independently and the result is verified. // If verification fails, config remains unlocked — we do not lie about state. const target = resolveAgentConfig(sandboxName); @@ -1120,7 +1175,7 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): " Config remains unlocked — manual intervention required.", ); console.error( - ` Restore sandbox access, then run: nemoclaw ${sandboxName} shields up`, + ` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`, ); return failShieldsCommand(message, opts.throwOnError); } @@ -1209,14 +1264,6 @@ function shieldsStatus( // Cross-check the sandbox filesystem so a host-root tamper that reverts // protected perms back to a sandbox-writable state is surfaced as drift // instead of reported as a clean lockdown. - // - // Scope: DAC only (mode + ownership + legacy-state-layout). The - // immutable bit is intentionally out of scope here because shields - // state does not persist whether `chattr +i` succeeded at lock time - // (`chattr` is best-effort because kubectl exec may lack - // CAP_LINUX_IMMUTABLE), so a missing `i` flag at status time cannot - // be distinguished from "never set in the first place". Verifying - // it requires persisted lock metadata. let driftIssues: string[] = []; try { const target = resolveConfig(sandboxName); diff --git a/src/lib/shields/shields-status.test.ts b/src/lib/shields/shields-status.test.ts deleted file mode 100644 index 3777358e04..0000000000 --- a/src/lib/shields/shields-status.test.ts +++ /dev/null @@ -1,185 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; -import fs from "node:fs"; -import path from "node:path"; -import os from "node:os"; - -vi.mock("../runner", () => ({ - run: vi.fn(() => ({ status: 0 })), - runCapture: vi.fn(() => ""), - validateName: vi.fn((name) => name), - shellQuote: vi.fn((s) => `'${s}'`), - redact: vi.fn((s) => s), - ROOT: "/mock/root", -})); - -vi.mock("../policy", () => ({ - buildPolicyGetCommand: vi.fn(() => ["openshell", "policy", "get"]), - buildPolicySetCommand: vi.fn(() => ["openshell", "policy", "set"]), - parseCurrentPolicy: vi.fn((raw) => raw || ""), - PERMISSIVE_POLICY_PATH: "/mock/permissive.yaml", - resolvePermissivePolicyPath: vi.fn(() => "/mock/permissive.yaml"), -})); - -vi.mock("../sandbox/config", () => ({ - resolveAgentConfig: vi.fn(() => ({ - agentName: "openclaw", - configPath: "/sandbox/.openclaw/openclaw.json", - configDir: "/sandbox/.openclaw", - format: "json", - configFile: "openclaw.json", - })), -})); - -vi.mock("../adapters/docker/exec", () => ({ - dockerExecFileSync: vi.fn(() => ""), -})); - -vi.mock("./audit", () => ({ - appendAuditEntry: vi.fn(), -})); - -vi.mock("node:child_process", () => ({ - execFileSync: vi.fn(() => ""), - spawnSync: vi.fn(() => ({ status: 0, stdout: "", stderr: "" })), - spawn: vi.fn(), -})); - -let tmpDir: string; - -beforeEach(() => { - tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "shields-status-test-")); - vi.stubEnv("HOME", tmpDir); - vi.resetModules(); - vi.clearAllMocks(); -}); - -afterEach(() => { - vi.restoreAllMocks(); - vi.unstubAllEnvs(); - fs.rmSync(tmpDir, { recursive: true, force: true }); -}); - -async function loadShieldsModule(): Promise { - const distModulePath = path.join( - process.cwd(), - "dist", - "lib", - "shields", - "index.js", - ); - return import(distModulePath); -} - -function stateDir(): string { - return path.join(tmpDir, ".nemoclaw", "state"); -} - -function writeLockedState(sandboxName: string): void { - fs.mkdirSync(stateDir(), { recursive: true }); - fs.writeFileSync( - path.join(stateDir(), `shields-${sandboxName}.json`), - JSON.stringify( - { - shieldsDown: false, - updatedAt: new Date().toISOString(), - }, - null, - 2, - ), - { mode: 0o600 }, - ); -} - -describe("shieldsStatus surfaces drift returned by the verifier", () => { - it("prints DRIFTED with the issue list and exits 2 when the verifier reports drift", async () => { - const sandboxName = "openclaw"; - writeLockedState(sandboxName); - const driftIssues = [ - "/sandbox/.openclaw/openclaw.json mode=660 (expected 444)", - "/sandbox/.openclaw/openclaw.json owner=sandbox:sandbox (expected root:root)", - "dir mode=2770 (expected 755)", - "dir owner=sandbox:sandbox (expected root:root)", - ]; - const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); - const exitSpy = vi - .spyOn(process, "exit") - .mockImplementation((code?: string | number | null) => { - throw new Error(`exit ${String(code)}`); - }); - - const { shieldsStatus } = await loadShieldsModule(); - expect(() => - shieldsStatus(sandboxName, true, { - verifyLockState: () => ({ ok: false, issues: driftIssues }), - resolveConfig: () => ({ - agentName: "openclaw", - configPath: "/sandbox/.openclaw/openclaw.json", - configDir: "/sandbox/.openclaw", - }), - }), - ).toThrow("exit 2"); - - expect(errorSpy).toHaveBeenCalledWith( - " Shields: UP (DRIFTED — declared locked but sandbox filesystem differs)", - ); - expect(errorSpy).toHaveBeenCalledWith(" Drift:"); - for (const issue of driftIssues) { - expect(errorSpy).toHaveBeenCalledWith(` - ${issue}`); - } - expect(errorSpy).toHaveBeenCalledWith( - ` Recovery: nemoclaw ${sandboxName} shields up # re-lock and re-verify`, - ); - expect(exitSpy).toHaveBeenCalledWith(2); - }); - - it("prints a clean locked status when the verifier reports no drift", async () => { - const sandboxName = "openclaw"; - writeLockedState(sandboxName); - const logSpy = vi.spyOn(console, "log").mockImplementation(() => {}); - const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); - - const { shieldsStatus } = await loadShieldsModule(); - shieldsStatus(sandboxName, true, { - verifyLockState: () => ({ ok: true, issues: [] }), - resolveConfig: () => ({ - agentName: "openclaw", - configPath: "/sandbox/.openclaw/openclaw.json", - configDir: "/sandbox/.openclaw", - }), - }); - - expect(logSpy).toHaveBeenCalledWith(" Shields: UP (lockdown active)"); - expect(logSpy).toHaveBeenCalledWith(" Policy: restrictive"); - expect(errorSpy).not.toHaveBeenCalled(); - }); - - it("treats a resolveConfig throw as drift so the locked status cannot mask a setup gap", async () => { - const sandboxName = "openclaw"; - writeLockedState(sandboxName); - const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); - const exitSpy = vi - .spyOn(process, "exit") - .mockImplementation((code?: string | number | null) => { - throw new Error(`exit ${String(code)}`); - }); - - const { shieldsStatus } = await loadShieldsModule(); - expect(() => - shieldsStatus(sandboxName, true, { - verifyLockState: () => ({ ok: true, issues: [] }), - resolveConfig: () => { - throw new Error("agent config not found"); - }, - }), - ).toThrow("exit 2"); - - const allErrors = errorSpy.mock.calls.map((args) => args[0]).join("\n"); - expect(allErrors).toContain( - "unable to resolve agent config target: agent config not found", - ); - expect(exitSpy).toHaveBeenCalledWith(2); - }); -}); diff --git a/src/lib/shields/verify-lock.test.ts b/src/lib/shields/verify-lock.test.ts index 586edd564c..8d19c11eb6 100644 --- a/src/lib/shields/verify-lock.test.ts +++ b/src/lib/shields/verify-lock.test.ts @@ -22,8 +22,6 @@ const target = { sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], }; -const noopLegacyLayoutAsserter = (): void => {}; - type StatLookup = Record; function makeExec(perms: StatLookup): (cmd: string[]) => string { @@ -45,10 +43,7 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "755 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const result = verifyShieldsLockState("openclaw", target, { exec }); expect(result.ok).toBe(true); expect(result.issues).toEqual([]); @@ -62,10 +57,7 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "2770 sandbox:sandbox", }); - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const result = verifyShieldsLockState("openclaw", target, { exec }); expect(result.ok).toBe(false); expect(result.issues).toEqual( @@ -98,10 +90,7 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "755 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const result = verifyShieldsLockState("openclaw", target, { exec }); expect(result.ok).toBe(false); expect(result.issues).toContain( @@ -118,10 +107,7 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "775 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const result = verifyShieldsLockState("openclaw", target, { exec }); expect(result.ok).toBe(false); expect(result.issues).toContain("dir mode=775 (expected 755)"); @@ -133,10 +119,7 @@ describe("verifyShieldsLockState", () => { throw new Error("Container not found"); }; - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const result = verifyShieldsLockState("openclaw", target, { exec }); expect(result.ok).toBe(false); expect(result.issues.some((issue: string) => issue.includes("stat failed"))).toBe( @@ -161,16 +144,12 @@ describe("verifyShieldsLockState", () => { return ""; }; - const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { exec }); expect(withoutChattrCheck.ok).toBe(true); const withChattrCheck = verifyShieldsLockState("openclaw", target, { exec, verifyChattr: true, - assertLegacyLayout: noopLegacyLayoutAsserter, }); expect(withChattrCheck.ok).toBe(false); expect(withChattrCheck.issues).toEqual( @@ -181,34 +160,6 @@ describe("verifyShieldsLockState", () => { ); }); - it("records lsattr exec failure as drift when verifyChattr is requested so a missing binary is not a silent pass", async () => { - const { verifyShieldsLockState } = await loadVerifier(); - const exec = (cmd: string[]): string => { - if (cmd[0] === "stat") { - if (cmd[cmd.length - 1] === "/sandbox/.openclaw") return "755 root:root"; - return "444 root:root"; - } - if (cmd[0] === "lsattr") { - throw new Error("lsattr: command not found"); - } - return ""; - }; - - const result = verifyShieldsLockState("openclaw", target, { - exec, - verifyChattr: true, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); - - expect(result.ok).toBe(false); - expect(result.issues).toEqual( - expect.arrayContaining([ - "/sandbox/.openclaw/openclaw.json lsattr failed: lsattr: command not found", - "/sandbox/.openclaw/.config-hash lsattr failed: lsattr: command not found", - ]), - ); - }); - it("surfaces a legacy state layout violation when the asserter throws", async () => { const { verifyShieldsLockState } = await loadVerifier(); const exec = makeExec({ @@ -232,19 +183,8 @@ describe("verifyShieldsLockState", () => { it("rejects calls without an exec dependency so production paths cannot silently no-op", async () => { const { verifyShieldsLockState } = await loadVerifier(); - expect(() => - verifyShieldsLockState("openclaw", target, { - assertLegacyLayout: noopLegacyLayoutAsserter, - } as unknown as Parameters[2]), - ).toThrow(/requires options\.exec/); - }); - - it("rejects calls without an assertLegacyLayout dependency so the legacy-layout check cannot be skipped", async () => { - const { verifyShieldsLockState } = await loadVerifier(); - expect(() => - verifyShieldsLockState("openclaw", target, { - exec: () => "", - } as unknown as Parameters[2]), - ).toThrow(/requires options\.assertLegacyLayout/); + expect(() => verifyShieldsLockState("openclaw", target)).toThrow( + /requires options\.exec/, + ); }); }); diff --git a/src/lib/shields/verify-lock.ts b/src/lib/shields/verify-lock.ts index e864f75ba3..79e3d90b42 100644 --- a/src/lib/shields/verify-lock.ts +++ b/src/lib/shields/verify-lock.ts @@ -6,16 +6,8 @@ // config directory, no legacy state layout, and (when the caller knows // chattr was applied) the immutable bit. Returns the list of mismatches // so callers can either fail the lock operation or surface drift after a -// host-root tamper. -// -// Failure handling: -// - `stat` failures are recorded as issues so the caller cannot mistake -// an unreachable sandbox for a clean lockdown. -// - `lsattr` failures are recorded as issues only when `verifyChattr` is -// true. Some images do not ship `lsattr`, so when chattr verification -// was not requested the binary's absence is treated as an unavailable -// check rather than a tamper signal. -// - `assertLegacyLayout` failures are recorded as issues. +// host-root tamper. Stat/lsattr failures are folded into `issues` so the +// caller can decide whether to treat them as drift. export type LockTarget = { configPath: string; @@ -25,8 +17,8 @@ export type LockTarget = { export type VerifyShieldsLockOptions = { verifyChattr?: boolean; - exec: (cmd: string[]) => string; - assertLegacyLayout: (sandboxName: string, configDir: string) => void; + exec?: (cmd: string[]) => string; + assertLegacyLayout?: (sandboxName: string, configDir: string) => void; }; export type VerifyShieldsLockResult = { @@ -38,18 +30,21 @@ const EXPECTED_FILE_MODE = "444"; const EXPECTED_DIR_MODE = "755"; const EXPECTED_OWNER = "root:root"; +function noopAssertLegacyLayout(_sandboxName: string, _configDir: string): void { + // Production callers replace this with the real legacy-layout assertion; + // when omitted, the verifier treats legacy-layout state as "no issue". +} + export function verifyShieldsLockState( sandboxName: string, target: LockTarget, - options: VerifyShieldsLockOptions, + options: VerifyShieldsLockOptions = {}, ): VerifyShieldsLockResult { - if (!options || typeof options.exec !== "function") { + if (!options.exec) { throw new Error("verifyShieldsLockState requires options.exec"); } - if (typeof options.assertLegacyLayout !== "function") { - throw new Error("verifyShieldsLockState requires options.assertLegacyLayout"); - } - const { exec, assertLegacyLayout } = options; + const exec = options.exec; + const assertLegacyLayout = options.assertLegacyLayout ?? noopAssertLegacyLayout; const issues: string[] = []; const filesToVerify = [target.configPath, ...(target.sensitiveFiles || [])]; @@ -87,9 +82,8 @@ export function verifyShieldsLockState( // First whitespace-delimited token is the flags field. const [flags] = attrs.trim().split(/\s+/, 1); if (!flags.includes("i")) issues.push(`${f} immutable bit not set`); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - issues.push(`${f} lsattr failed: ${msg}`); + } catch { + // lsattr may not be available on all images — skip } } } diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh index 50994cad8d..076c63f92b 100755 --- a/test/e2e/test-shields-config.sh +++ b/test/e2e/test-shields-config.sh @@ -313,95 +313,6 @@ else fail "shields status should show UP: ${STATUS_OUTPUT}" fi -if echo "$STATUS_OUTPUT" | grep -q "DRIFTED"; then - fail "clean lockdown should not report DRIFTED: ${STATUS_OUTPUT}" -else - pass "shields status does not report DRIFTED on a clean lockdown" -fi - -# ══════════════════════════════════════════════════════════════════ -# Phase 5b: host-root tamper — shields status surfaces drift -# ══════════════════════════════════════════════════════════════════ -section "Phase 5b: shields status detects host-root tamper" - -CTR=$(docker ps --filter "name=openshell-${SANDBOX_NAME}" --format "{{.Names}}" | head -n 1) -if [ -z "${CTR}" ]; then - fail "Could not find sandbox container openshell-${SANDBOX_NAME} for tamper drill" -else - info "Tampering sandbox filesystem via host-root docker exec on ${CTR}" - CONFIG_DIR=$(dirname "${CONFIG_PATH}") - HASH_PATH="${CONFIG_DIR}/.config-hash" - - docker exec --user root "${CTR}" sh -c " - chmod 2770 '${CONFIG_DIR}' && - chown sandbox:sandbox '${CONFIG_DIR}' && - chmod 660 '${CONFIG_PATH}' '${HASH_PATH}' && - chown sandbox:sandbox '${CONFIG_PATH}' '${HASH_PATH}' - " >/dev/null 2>&1 - TAMPER_EXIT=$? - - if [ "${TAMPER_EXIT}" != "0" ]; then - fail "Could not apply tamper perms via docker exec (exit ${TAMPER_EXIT})" - else - PERMS_TAMPERED=$(docker exec --user root "${CTR}" stat -c '%a %U:%G' "${CONFIG_PATH}" 2>/dev/null || true) - info "Config perms after tamper: ${PERMS_TAMPERED}" - - DRIFT_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) - DRIFT_EXIT=$? - echo "$DRIFT_OUTPUT" - - if [ "${DRIFT_EXIT}" = "2" ]; then - pass "shields status exits 2 when sandbox filesystem drifts from declared lockdown" - else - fail "shields status should exit 2 on drift, got ${DRIFT_EXIT}: ${DRIFT_OUTPUT}" - fi - - if echo "$DRIFT_OUTPUT" | grep -q "DRIFTED"; then - pass "shields status reports DRIFTED on host-root tamper" - else - fail "shields status output missing DRIFTED marker: ${DRIFT_OUTPUT}" - fi - - if echo "$DRIFT_OUTPUT" | grep -qE "mode=660 \(expected 444\)"; then - pass "drift listing includes file mode mismatch" - else - fail "drift listing should include file mode mismatch: ${DRIFT_OUTPUT}" - fi - - if echo "$DRIFT_OUTPUT" | grep -qE "owner=sandbox:sandbox \(expected root:root\)"; then - pass "drift listing includes ownership mismatch" - else - fail "drift listing should include ownership mismatch: ${DRIFT_OUTPUT}" - fi - - info "Restoring lockdown perms so Phase 6 can run shields down cleanly" - # BusyBox chmod with 3-digit octal preserves setgid, so clear it - # explicitly before applying the 755 dir mode to match what - # `shields up` produces (plain 755, not 2755). - docker exec --user root "${CTR}" sh -c " - chmod g-s '${CONFIG_DIR}' && - chmod 755 '${CONFIG_DIR}' && - chown root:root '${CONFIG_DIR}' && - chmod 444 '${CONFIG_PATH}' '${HASH_PATH}' && - chown root:root '${CONFIG_PATH}' '${HASH_PATH}' - " >/dev/null 2>&1 - RESTORE_EXIT=$? - - if [ "${RESTORE_EXIT}" != "0" ]; then - fail "Could not restore lockdown perms via docker exec (exit ${RESTORE_EXIT})" - else - CLEAN_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) - CLEAN_EXIT=$? - - if [ "${CLEAN_EXIT}" = "0" ] && echo "$CLEAN_OUTPUT" | grep -q "Shields: UP (lockdown active)"; then - pass "shields status returns to clean UP after manual re-lock" - else - fail "shields status should return to clean UP after re-lock (exit ${CLEAN_EXIT}): ${CLEAN_OUTPUT}" - fi - fi - fi -fi - # ══════════════════════════════════════════════════════════════════ # Phase 6: shields down — config returns to writable # ══════════════════════════════════════════════════════════════════ From 45a3956bd05a004bf1eb10acd004e6c4c8f191b9 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 11:42:53 +0000 Subject: [PATCH 09/12] fix(shields): central privileged-exec, drift re-lock, tighten verifier Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 4 +- src/lib/shields/index.ts | 120 +++++++++++++--------------- src/lib/shields/verify-lock.test.ts | 78 +++++++++++++++--- src/lib/shields/verify-lock.ts | 36 +++++---- test/e2e/test-shields-config.sh | 89 +++++++++++++++++++++ 5 files changed, 236 insertions(+), 91 deletions(-) diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index dd2b75569c..acc70b6a4f 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -202,13 +202,13 @@ Writable agent state such as plugins, skills, hooks, and workspace metadata live By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and writable state entry points cannot be changed by the sandbox user. -- **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `700` and `openclaw.json` with mode `600`, so the agent can read and write config directly. +- DAC permissions (default): the sandbox user owns `/sandbox/.openclaw` with mode `2770` (group-writable with setgid so descendants stay in the sandbox group) and `openclaw.json` with mode `660`, so the agent reads and writes config directly. `shields up` re-owns the directory `root:root 755` and the config files `root:root 444`, and `shields status` cross-checks the locked state against the sandbox filesystem and surfaces drift when a host-root tamper reverts those perms. - **Config integrity hash.** The image includes a SHA256 hash of `openclaw.json`. In the default mutable state, `.config-hash` is sandbox-owned and is not a tamper-proof trust anchor, so startup does not fail closed on that hash. When the hash is root-owned and read-only, startup enforces it and refuses to start if the hash does not match. - **Gateway token environment.** The gateway exports `OPENCLAW_GATEWAY_TOKEN` and writes it to `/tmp/nemoclaw-proxy-env.sh` for interactive sandbox sessions. Keep this in mind when deciding whether a workload should run with mutable config or an immutable config posture. | Aspect | Detail | |---|---| -| Default | The sandbox keeps `/sandbox/.openclaw` writable (`700 sandbox:sandbox`), sets `openclaw.json` to `600 sandbox:sandbox`, lets the agent manage state directly, and has the gateway place `OPENCLAW_GATEWAY_TOKEN` in `/tmp/nemoclaw-proxy-env.sh` for interactive shells. | +| Default | The sandbox keeps `/sandbox/.openclaw` writable (`2770 sandbox:sandbox`), sets `openclaw.json` to `660 sandbox:sandbox`, lets the agent manage state directly, and has the gateway place `OPENCLAW_GATEWAY_TOKEN` in `/tmp/nemoclaw-proxy-env.sh` for interactive shells. | | What you can change | Apply a reviewed host-side immutability workflow to lock config and state directories with DAC permissions and the immutable flag where available. | | Risk of default | A writable `.openclaw` directory lets the agent modify its own gateway config: disabling CORS or redirecting inference to an attacker-controlled endpoint. | | Recommendation | For always-on assistants handling sensitive workloads, lock config after initial setup. For development workflows, the writable default is appropriate. | diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 59a132fc1d..04efd52bc2 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -16,10 +16,6 @@ const { fork } = require("child_process"); const { randomBytes } = require("crypto"); const { run, runCapture, validateName } = require("../runner"); const { dockerExecFileSync } = require("../adapters/docker/exec"); -const { dockerCapture } = require("../adapters/docker/run"); -const registry = require("../state/registry") as { - getSandbox?: (name: string) => { openshellDriver?: string | null } | null; -}; const { buildPolicyGetCommand, buildPolicySetCommand, @@ -42,6 +38,9 @@ const { const { resolveNemoclawStateDir } = require("../state/paths"); const { appendAuditEntry } = require("./audit"); const { resolveAgentConfig } = require("../sandbox/config"); +const { + privilegedSandboxExecArgv, +}: typeof import("../sandbox/privileged-exec") = require("../sandbox/privileged-exec"); const { buildRuntimePermissivePolicy, }: typeof import("./permissive-runtime") = require("./permissive-runtime"); @@ -53,69 +52,16 @@ const { const STATE_DIR = resolveNemoclawStateDir(); // --------------------------------------------------------------------------- -// privileged sandbox exec — bypasses the sandbox's Landlock context +// privileged sandbox exec — bypasses the sandbox's Landlock context. // -// openshell sandbox exec runs commands INSIDE the Landlock domain, so it -// can't modify read_only paths or change chattr flags. kubectl exec starts -// a new process in the pod that does NOT inherit the Landlock ruleset. -// On the legacy gateway we reach kubectl via the K3s container. On the -// Docker-driver gateway there is no K3s container, so we exec into the -// sandbox Docker container directly as root. +// `openshell sandbox exec` runs commands inside the Landlock domain, so it +// cannot modify read-only paths or change chattr flags. Privileged Docker +// exec starts a new root process that does NOT inherit the Landlock ruleset. +// Container resolution + registry-scoped owner matching live in +// `../sandbox/privileged-exec`; this file just adds the dockerExecFileSync +// invocation wrapper used by the lock/unlock helpers below. // --------------------------------------------------------------------------- -const K3S_CONTAINER = "openshell-cluster-nemoclaw"; - -function resolveDockerDriverSandboxContainer( - sandboxName: string, -): string | null { - try { - if (registry.getSandbox?.(sandboxName)?.openshellDriver !== "docker") { - return null; - } - } catch { - return null; - } - const prefix = `openshell-${sandboxName}-`; - const exact = `openshell-${sandboxName}`; - const output = dockerCapture(["ps", "--format", "{{.Names}}"], { - ignoreError: true, - }); - return ( - output - .split("\n") - .map((line: string) => line.trim()) - .find((name: string) => name === exact || name.startsWith(prefix)) || null - ); -} - -function kubectlExecArgv(sandboxName: string, cmd: string[]): string[] { - return [ - "exec", - K3S_CONTAINER, - "kubectl", - "exec", - "-n", - "openshell", - sandboxName, - "-c", - "agent", - "--", - ...cmd, - ]; -} - -function privilegedSandboxExecArgv( - sandboxName: string, - cmd: string[], -): string[] { - const dockerDriverContainer = - resolveDockerDriverSandboxContainer(sandboxName); - if (dockerDriverContainer) { - return ["exec", "--user", "root", dockerDriverContainer, ...cmd]; - } - return kubectlExecArgv(sandboxName, cmd); -} - function privilegedSandboxExec(sandboxName: string, cmd: string[]): void { dockerExecFileSync(privilegedSandboxExecArgv(sandboxName, cmd), { stdio: ["ignore", "pipe", "pipe"], @@ -1121,8 +1067,52 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): // shieldsDown === false means explicitly locked by a previous shields-up. // undefined (no state file) means fresh sandbox — mutable default, allow shields-up. if (state.shieldsDown === false) { + // Re-verify the sandbox filesystem matches the declared locked posture. + // If a host-root tamper has drifted the protected perms, fall through to + // re-apply the lock instead of bailing — `shields status` directs the + // operator here for exactly this remediation path. + const target = resolveAgentConfig(sandboxName); + const verification = verifyShieldsLockState(sandboxName, target, { + exec: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), + assertLegacyLayout: assertNoLegacyStateLayout, + }); + if (verification.ok) { + clearTimerMarker(sandboxName); + console.log(" Lockdown is already active."); + return; + } + + console.log( + " Lockdown was declared but the sandbox filesystem drifted; re-locking...", + ); + for (const issue of verification.issues) { + console.log(` - ${issue}`); + } + killTimer(sandboxName); + try { + lockAgentConfig(sandboxName, target); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(` ERROR: ${message}`); + console.error(" Config remains drifted — manual intervention required."); + return failShieldsCommand(message, opts.throwOnError); + } + saveShieldsState(sandboxName, { + shieldsDown: false, + shieldsDownAt: null, + shieldsDownTimeout: null, + shieldsDownReason: null, + shieldsDownPolicy: null, + }); clearTimerMarker(sandboxName); - console.log(" Lockdown is already active."); + appendAuditEntry({ + action: "shields_up", + sandbox: sandboxName, + timestamp: new Date().toISOString(), + restored_by: "operator", + reason: "drift remediation", + }); + console.log(` Lockdown re-applied for ${sandboxName} after drift.`); return; } diff --git a/src/lib/shields/verify-lock.test.ts b/src/lib/shields/verify-lock.test.ts index 8d19c11eb6..586edd564c 100644 --- a/src/lib/shields/verify-lock.test.ts +++ b/src/lib/shields/verify-lock.test.ts @@ -22,6 +22,8 @@ const target = { sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], }; +const noopLegacyLayoutAsserter = (): void => {}; + type StatLookup = Record; function makeExec(perms: StatLookup): (cmd: string[]) => string { @@ -43,7 +45,10 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "755 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { exec }); + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(result.ok).toBe(true); expect(result.issues).toEqual([]); @@ -57,7 +62,10 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "2770 sandbox:sandbox", }); - const result = verifyShieldsLockState("openclaw", target, { exec }); + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(result.ok).toBe(false); expect(result.issues).toEqual( @@ -90,7 +98,10 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "755 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { exec }); + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(result.ok).toBe(false); expect(result.issues).toContain( @@ -107,7 +118,10 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "775 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { exec }); + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(result.ok).toBe(false); expect(result.issues).toContain("dir mode=775 (expected 755)"); @@ -119,7 +133,10 @@ describe("verifyShieldsLockState", () => { throw new Error("Container not found"); }; - const result = verifyShieldsLockState("openclaw", target, { exec }); + const result = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(result.ok).toBe(false); expect(result.issues.some((issue: string) => issue.includes("stat failed"))).toBe( @@ -144,12 +161,16 @@ describe("verifyShieldsLockState", () => { return ""; }; - const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { exec }); + const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { + exec, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); expect(withoutChattrCheck.ok).toBe(true); const withChattrCheck = verifyShieldsLockState("openclaw", target, { exec, verifyChattr: true, + assertLegacyLayout: noopLegacyLayoutAsserter, }); expect(withChattrCheck.ok).toBe(false); expect(withChattrCheck.issues).toEqual( @@ -160,6 +181,34 @@ describe("verifyShieldsLockState", () => { ); }); + it("records lsattr exec failure as drift when verifyChattr is requested so a missing binary is not a silent pass", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + const exec = (cmd: string[]): string => { + if (cmd[0] === "stat") { + if (cmd[cmd.length - 1] === "/sandbox/.openclaw") return "755 root:root"; + return "444 root:root"; + } + if (cmd[0] === "lsattr") { + throw new Error("lsattr: command not found"); + } + return ""; + }; + + const result = verifyShieldsLockState("openclaw", target, { + exec, + verifyChattr: true, + assertLegacyLayout: noopLegacyLayoutAsserter, + }); + + expect(result.ok).toBe(false); + expect(result.issues).toEqual( + expect.arrayContaining([ + "/sandbox/.openclaw/openclaw.json lsattr failed: lsattr: command not found", + "/sandbox/.openclaw/.config-hash lsattr failed: lsattr: command not found", + ]), + ); + }); + it("surfaces a legacy state layout violation when the asserter throws", async () => { const { verifyShieldsLockState } = await loadVerifier(); const exec = makeExec({ @@ -183,8 +232,19 @@ describe("verifyShieldsLockState", () => { it("rejects calls without an exec dependency so production paths cannot silently no-op", async () => { const { verifyShieldsLockState } = await loadVerifier(); - expect(() => verifyShieldsLockState("openclaw", target)).toThrow( - /requires options\.exec/, - ); + expect(() => + verifyShieldsLockState("openclaw", target, { + assertLegacyLayout: noopLegacyLayoutAsserter, + } as unknown as Parameters[2]), + ).toThrow(/requires options\.exec/); + }); + + it("rejects calls without an assertLegacyLayout dependency so the legacy-layout check cannot be skipped", async () => { + const { verifyShieldsLockState } = await loadVerifier(); + expect(() => + verifyShieldsLockState("openclaw", target, { + exec: () => "", + } as unknown as Parameters[2]), + ).toThrow(/requires options\.assertLegacyLayout/); }); }); diff --git a/src/lib/shields/verify-lock.ts b/src/lib/shields/verify-lock.ts index 79e3d90b42..a50e3857b9 100644 --- a/src/lib/shields/verify-lock.ts +++ b/src/lib/shields/verify-lock.ts @@ -6,8 +6,16 @@ // config directory, no legacy state layout, and (when the caller knows // chattr was applied) the immutable bit. Returns the list of mismatches // so callers can either fail the lock operation or surface drift after a -// host-root tamper. Stat/lsattr failures are folded into `issues` so the -// caller can decide whether to treat them as drift. +// host-root tamper. +// +// Failure handling: +// - `stat` failures are recorded as issues so an unreachable sandbox is +// never mistaken for a clean lockdown. +// - `lsattr` failures are recorded as issues only when `verifyChattr` is +// true. Some images do not ship `lsattr`, so without an explicit chattr +// request a missing binary is treated as an unavailable check rather +// than a tamper signal. +// - `assertLegacyLayout` failures are recorded as issues. export type LockTarget = { configPath: string; @@ -17,8 +25,8 @@ export type LockTarget = { export type VerifyShieldsLockOptions = { verifyChattr?: boolean; - exec?: (cmd: string[]) => string; - assertLegacyLayout?: (sandboxName: string, configDir: string) => void; + exec: (cmd: string[]) => string; + assertLegacyLayout: (sandboxName: string, configDir: string) => void; }; export type VerifyShieldsLockResult = { @@ -30,21 +38,18 @@ const EXPECTED_FILE_MODE = "444"; const EXPECTED_DIR_MODE = "755"; const EXPECTED_OWNER = "root:root"; -function noopAssertLegacyLayout(_sandboxName: string, _configDir: string): void { - // Production callers replace this with the real legacy-layout assertion; - // when omitted, the verifier treats legacy-layout state as "no issue". -} - export function verifyShieldsLockState( sandboxName: string, target: LockTarget, - options: VerifyShieldsLockOptions = {}, + options: VerifyShieldsLockOptions, ): VerifyShieldsLockResult { - if (!options.exec) { + if (!options || typeof options.exec !== "function") { throw new Error("verifyShieldsLockState requires options.exec"); } - const exec = options.exec; - const assertLegacyLayout = options.assertLegacyLayout ?? noopAssertLegacyLayout; + if (typeof options.assertLegacyLayout !== "function") { + throw new Error("verifyShieldsLockState requires options.assertLegacyLayout"); + } + const { exec, assertLegacyLayout } = options; const issues: string[] = []; const filesToVerify = [target.configPath, ...(target.sensitiveFiles || [])]; @@ -82,8 +87,9 @@ export function verifyShieldsLockState( // First whitespace-delimited token is the flags field. const [flags] = attrs.trim().split(/\s+/, 1); if (!flags.includes("i")) issues.push(`${f} immutable bit not set`); - } catch { - // lsattr may not be available on all images — skip + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + issues.push(`${f} lsattr failed: ${msg}`); } } } diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh index 076c63f92b..ab239cfa39 100755 --- a/test/e2e/test-shields-config.sh +++ b/test/e2e/test-shields-config.sh @@ -313,6 +313,95 @@ else fail "shields status should show UP: ${STATUS_OUTPUT}" fi +if echo "$STATUS_OUTPUT" | grep -q "DRIFTED"; then + fail "clean lockdown should not report DRIFTED: ${STATUS_OUTPUT}" +else + pass "shields status does not report DRIFTED on a clean lockdown" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 5b: host-root tamper — shields status surfaces drift, shields up re-locks +# ══════════════════════════════════════════════════════════════════ +section "Phase 5b: shields status detects host-root tamper" + +CTR=$(docker ps --filter "name=openshell-${SANDBOX_NAME}" --format "{{.Names}}" | head -n 1) +if [ -z "${CTR}" ]; then + fail "Could not find sandbox container openshell-${SANDBOX_NAME} for tamper drill" +else + info "Tampering sandbox filesystem via host-root docker exec on ${CTR}" + CONFIG_DIR=$(dirname "${CONFIG_PATH}") + HASH_PATH="${CONFIG_DIR}/.config-hash" + + docker exec --user root "${CTR}" sh -c " + chmod 2770 '${CONFIG_DIR}' && + chown sandbox:sandbox '${CONFIG_DIR}' && + chmod 660 '${CONFIG_PATH}' '${HASH_PATH}' && + chown sandbox:sandbox '${CONFIG_PATH}' '${HASH_PATH}' + " >/dev/null 2>&1 + TAMPER_EXIT=$? + + if [ "${TAMPER_EXIT}" != "0" ]; then + fail "Could not apply tamper perms via docker exec (exit ${TAMPER_EXIT})" + else + PERMS_TAMPERED=$(docker exec --user root "${CTR}" stat -c '%a %U:%G' "${CONFIG_PATH}" 2>/dev/null || true) + info "Config perms after tamper: ${PERMS_TAMPERED}" + + DRIFT_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) + DRIFT_EXIT=$? + echo "$DRIFT_OUTPUT" + + if [ "${DRIFT_EXIT}" = "2" ]; then + pass "shields status exits 2 when sandbox filesystem drifts from declared lockdown" + else + fail "shields status should exit 2 on drift, got ${DRIFT_EXIT}: ${DRIFT_OUTPUT}" + fi + + if echo "$DRIFT_OUTPUT" | grep -q "DRIFTED"; then + pass "shields status reports DRIFTED on host-root tamper" + else + fail "shields status output missing DRIFTED marker: ${DRIFT_OUTPUT}" + fi + + if echo "$DRIFT_OUTPUT" | grep -qE "mode=660 \(expected 444\)"; then + pass "drift listing includes file mode mismatch" + else + fail "drift listing should include file mode mismatch: ${DRIFT_OUTPUT}" + fi + + if echo "$DRIFT_OUTPUT" | grep -qE "owner=sandbox:sandbox \(expected root:root\)"; then + pass "drift listing includes ownership mismatch" + else + fail "drift listing should include ownership mismatch: ${DRIFT_OUTPUT}" + fi + + info "Re-locking via shields up to exercise the documented recovery path" + RELOCK_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields up 2>&1) + RELOCK_EXIT=$? + echo "$RELOCK_OUTPUT" + + if [ "${RELOCK_EXIT}" = "0" ]; then + pass "shields up exits 0 after re-lock from drift" + else + fail "shields up should exit 0 after re-lock, got ${RELOCK_EXIT}: ${RELOCK_OUTPUT}" + fi + + if echo "$RELOCK_OUTPUT" | grep -q "Lockdown re-applied"; then + pass "shields up reports drift remediation" + else + fail "shields up should report drift remediation: ${RELOCK_OUTPUT}" + fi + + CLEAN_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) + CLEAN_EXIT=$? + + if [ "${CLEAN_EXIT}" = "0" ] && echo "$CLEAN_OUTPUT" | grep -q "Shields: UP (lockdown active)"; then + pass "shields status returns to clean UP after shields up re-lock" + else + fail "shields status should return to clean UP after re-lock (exit ${CLEAN_EXIT}): ${CLEAN_OUTPUT}" + fi + fi +fi + # ══════════════════════════════════════════════════════════════════ # Phase 6: shields down — config returns to writable # ══════════════════════════════════════════════════════════════════ From 08b1d68b7b049071d6d5ab86790e18c407053ee5 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 12:11:20 +0000 Subject: [PATCH 10/12] fix(shields): persist chattrApplied + drop kubectl from operator-facing text Signed-off-by: Tinson Lai --- src/lib/shields/index.ts | 68 +++++++++++++++++++++++++++++----------- src/lib/shields/timer.ts | 6 +++- 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 04efd52bc2..e7c03b435d 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -105,6 +105,11 @@ interface ShieldsState { shieldsDownReason?: string | null; shieldsDownPolicy?: string | null; shieldsPolicySnapshotPath?: string | null; + // Recorded by the last successful lock to indicate whether `chattr +i` + // took effect against the locked files. Best-effort because privileged + // exec may lack CAP_LINUX_IMMUTABLE on some runtimes, so status callers + // only verify the immutable bit when this flag is true. + shieldsChattrApplied?: boolean; updatedAt?: string; } @@ -291,6 +296,7 @@ function isShieldsState(value: unknown): value is ShieldsState { isOptionalNullableString(value.shieldsDownReason) && isOptionalNullableString(value.shieldsDownPolicy) && isOptionalNullableString(value.shieldsPolicySnapshotPath) && + isOptionalBoolean(value.shieldsChattrApplied) && isOptionalString(value.updatedAt) ); } @@ -449,8 +455,9 @@ function assertNoLegacyStateLayout( // user and the gateway UID can write the mutable config tree. Hermes keeps its // tighter single-user layout. // -// Note on chattr: best-effort — it may silently fail if kubectl exec -// lacks CAP_LINUX_IMMUTABLE or if the file was never immutable. That's fine: +// Note on chattr: best-effort — it may silently fail if the privileged +// sandbox exec channel lacks CAP_LINUX_IMMUTABLE or if the file was never +// immutable. That's fine: // the file becomes writable through the permissive policy (disables Landlock // read_only) + chown/chmod below. // --------------------------------------------------------------------------- @@ -583,15 +590,17 @@ function unlockAgentConfig( // 2. UNIX permissions — 444 root:root (mandatory, verified here) // 3. chattr +i immutable bit — defense-in-depth (best-effort) // -// Layer 3 is best-effort because kubectl exec may lack -// CAP_LINUX_IMMUTABLE. Layers 1+2 are sufficient. We still attempt it -// in case the runtime environment supports it. +// Layer 3 is best-effort because the privileged sandbox exec channel may +// lack CAP_LINUX_IMMUTABLE on some runtimes. Layers 1+2 are sufficient. +// We still attempt layer 3 in case the runtime environment supports it, +// and report back whether `chattr +i` actually took effect so callers can +// persist that signal and verify the immutable bit at status time. // --------------------------------------------------------------------------- function lockAgentConfig( sandboxName: string, target: AgentConfigTarget, -): void { +): { chattrApplied: boolean } { const errors: string[] = []; const filesToLock = [target.configPath, ...(target.sensitiveFiles || [])]; @@ -624,8 +633,9 @@ function lockAgentConfig( errors.push("chown root:root config dir"); } - // Best-effort: kubectl exec may lack CAP_LINUX_IMMUTABLE. Track the - // result so verification doesn't require something that was never there. + // Best-effort: the privileged sandbox exec channel may lack + // CAP_LINUX_IMMUTABLE. Track the result so verification doesn't require + // something that was never there, and so callers can persist the outcome. let chattrSucceeded = true; for (const f of filesToLock) { try { @@ -670,6 +680,8 @@ function lockAgentConfig( if (issues.length > 0) { throw new Error(`Config not locked: ${issues.join(", ")}`); } + + return { chattrApplied: chattrSucceeded }; } function rollbackShieldsDown( @@ -682,9 +694,11 @@ function rollbackShieldsDown( ignoreError: true, }); let rollbackLocked = false; + let rollbackChattrApplied = false; if (rollbackResult.status === 0) { try { - lockAgentConfig(sandboxName, target); + const { chattrApplied } = lockAgentConfig(sandboxName, target); + rollbackChattrApplied = chattrApplied; rollbackLocked = true; } catch { console.error( @@ -701,12 +715,13 @@ function rollbackShieldsDown( shieldsDownTimeout: null, shieldsDownReason: null, shieldsDownPolicy: null, + shieldsChattrApplied: rollbackChattrApplied, }); console.error(" Lockdown restored. Config was never left unguarded."); } else { console.error(" Config remains unlocked — manual intervention required."); console.error( - ` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`, + ` Re-lock manually via privileged sandbox exec, then run: nemoclaw ${sandboxName} shields up`, ); } } @@ -714,6 +729,7 @@ function rollbackShieldsDown( interface LockdownActivationResult { ok: boolean; error?: string; + chattrApplied?: boolean; } function activateLockdownFromSnapshot( @@ -738,13 +754,12 @@ function activateLockdownFromSnapshot( const target = resolveAgentConfig(sandboxName); try { - lockAgentConfig(sandboxName, target); + const { chattrApplied } = lockAgentConfig(sandboxName, target); + return { ok: true, chattrApplied }; } catch (error) { const message = error instanceof Error ? error.message : String(error); return { ok: false, error: message }; } - - return { ok: true }; } function recoverExpiredAutoRestoreInline( @@ -969,7 +984,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void { // 4. Start auto-restore timer (detached child process), unless skipped. // Pass the absolute restore time, not a relative timeout. Steps 1-2b - // can take minutes (policy apply + kubectl chmod), so a relative timeout + // can take minutes (policy apply + privileged chmod), so a relative timeout // passed at fork time would fire too early. if (!opts.skipTimer) { const restoreAt = new Date(Date.now() + timeoutSeconds * 1000); @@ -1073,6 +1088,7 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): // operator here for exactly this remediation path. const target = resolveAgentConfig(sandboxName); const verification = verifyShieldsLockState(sandboxName, target, { + verifyChattr: state.shieldsChattrApplied === true, exec: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), assertLegacyLayout: assertNoLegacyStateLayout, }); @@ -1089,8 +1105,10 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): console.log(` - ${issue}`); } killTimer(sandboxName); + let driftChattrApplied = false; try { - lockAgentConfig(sandboxName, target); + const { chattrApplied } = lockAgentConfig(sandboxName, target); + driftChattrApplied = chattrApplied; } catch (err) { const message = err instanceof Error ? err.message : String(err); console.error(` ERROR: ${message}`); @@ -1103,6 +1121,7 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): shieldsDownTimeout: null, shieldsDownReason: null, shieldsDownPolicy: null, + shieldsChattrApplied: driftChattrApplied, }); clearTimerMarker(sandboxName); appendAuditEntry({ @@ -1134,6 +1153,7 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): ); return failShieldsCommand("Saved policy snapshot is missing", opts.throwOnError); } + let firstLockChattrApplied = false; if (snapshotPath) { console.log(" Restoring restrictive policy from snapshot..."); const activation = activateLockdownFromSnapshot(sandboxName, snapshotPath); @@ -1143,13 +1163,14 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): " Config remains unlocked — manual intervention required.", ); console.error( - ` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`, + ` Re-lock manually via privileged sandbox exec, then run: nemoclaw ${sandboxName} shields up`, ); return failShieldsCommand(activation.error ?? "unknown restore error", opts.throwOnError); } + firstLockChattrApplied = activation.chattrApplied === true; } else { // 2b. Lock config file to read-only. - // Uses kubectl exec to bypass Landlock (same as shields down). + // Uses privileged sandbox exec to bypass Landlock (same as shields down). // Each operation runs independently and the result is verified. // If verification fails, config remains unlocked — we do not lie about state. const target = resolveAgentConfig(sandboxName); @@ -1157,7 +1178,8 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): ` Locking ${target.agentName} config (${target.configPath})...`, ); try { - lockAgentConfig(sandboxName, target); + const { chattrApplied } = lockAgentConfig(sandboxName, target); + firstLockChattrApplied = chattrApplied; } catch (err) { const message = err instanceof Error ? err.message : String(err); console.error(` ERROR: ${message}`); @@ -1165,7 +1187,7 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): " Config remains unlocked — manual intervention required.", ); console.error( - ` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`, + ` Re-lock manually via privileged sandbox exec, then run: nemoclaw ${sandboxName} shields up`, ); return failShieldsCommand(message, opts.throwOnError); } @@ -1185,6 +1207,7 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): shieldsDownTimeout: null, shieldsDownReason: null, shieldsDownPolicy: null, + shieldsChattrApplied: firstLockChattrApplied, // Keep snapshotPath for forensics — don't clear it }); clearTimerMarker(sandboxName); @@ -1254,10 +1277,17 @@ function shieldsStatus( // Cross-check the sandbox filesystem so a host-root tamper that reverts // protected perms back to a sandbox-writable state is surfaced as drift // instead of reported as a clean lockdown. + // + // The immutable bit is only verified when shields state recorded that + // `chattr +i` actually took effect at lock time. The privileged exec + // channel may lack CAP_LINUX_IMMUTABLE on some runtimes, so without + // that signal a missing `i` flag cannot be distinguished from "never + // set in the first place" and would surface as a false-positive drift. let driftIssues: string[] = []; try { const target = resolveConfig(sandboxName); driftIssues = verify(sandboxName, target, { + verifyChattr: state.shieldsChattrApplied === true, exec: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), assertLegacyLayout: assertNoLegacyStateLayout, }).issues; diff --git a/src/lib/shields/timer.ts b/src/lib/shields/timer.ts index 42dec0b511..b5f4bb2c43 100644 --- a/src/lib/shields/timer.ts +++ b/src/lib/shields/timer.ts @@ -24,6 +24,7 @@ interface ShieldsStatePatch { shieldsDownTimeout?: number | null; shieldsDownReason?: string | null; shieldsDownPolicy?: string | null; + shieldsChattrApplied?: boolean; } interface TimerArgs { @@ -187,6 +188,7 @@ function runRestoreTimer(args: TimerArgs): void { // that interactive `shields up` uses. Fall back to the bare configPath/ // configDir from argv if resolution fails (e.g., registry unavailable). let lockVerified = true; + let autoLockChattrApplied = false; if (args.configPath) { let lockTarget: { agentName?: string; @@ -220,7 +222,8 @@ function runRestoreTimer(args: TimerArgs): void { } if (lockTarget) { try { - lockAgentConfig(args.sandboxName, lockTarget); + const { chattrApplied } = lockAgentConfig(args.sandboxName, lockTarget); + autoLockChattrApplied = chattrApplied; } catch (error: unknown) { lockVerified = false; appendAudit({ @@ -243,6 +246,7 @@ function runRestoreTimer(args: TimerArgs): void { shieldsDownTimeout: null, shieldsDownReason: null, shieldsDownPolicy: null, + shieldsChattrApplied: autoLockChattrApplied, }); appendAudit({ From 3a7e852cdab7ecfe61bdbfe5ff12c4c245497026 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 12:26:22 +0000 Subject: [PATCH 11/12] revert: roll shields PR files back to 0c3b6a4fa state Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 4 +- src/lib/shields/index.ts | 186 +++++++++++++--------------- src/lib/shields/timer.ts | 6 +- src/lib/shields/verify-lock.test.ts | 78 ++---------- src/lib/shields/verify-lock.ts | 36 +++--- test/e2e/test-shields-config.sh | 89 ------------- 6 files changed, 110 insertions(+), 289 deletions(-) diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index acc70b6a4f..dd2b75569c 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -202,13 +202,13 @@ Writable agent state such as plugins, skills, hooks, and workspace metadata live By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and writable state entry points cannot be changed by the sandbox user. -- DAC permissions (default): the sandbox user owns `/sandbox/.openclaw` with mode `2770` (group-writable with setgid so descendants stay in the sandbox group) and `openclaw.json` with mode `660`, so the agent reads and writes config directly. `shields up` re-owns the directory `root:root 755` and the config files `root:root 444`, and `shields status` cross-checks the locked state against the sandbox filesystem and surfaces drift when a host-root tamper reverts those perms. +- **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `700` and `openclaw.json` with mode `600`, so the agent can read and write config directly. - **Config integrity hash.** The image includes a SHA256 hash of `openclaw.json`. In the default mutable state, `.config-hash` is sandbox-owned and is not a tamper-proof trust anchor, so startup does not fail closed on that hash. When the hash is root-owned and read-only, startup enforces it and refuses to start if the hash does not match. - **Gateway token environment.** The gateway exports `OPENCLAW_GATEWAY_TOKEN` and writes it to `/tmp/nemoclaw-proxy-env.sh` for interactive sandbox sessions. Keep this in mind when deciding whether a workload should run with mutable config or an immutable config posture. | Aspect | Detail | |---|---| -| Default | The sandbox keeps `/sandbox/.openclaw` writable (`2770 sandbox:sandbox`), sets `openclaw.json` to `660 sandbox:sandbox`, lets the agent manage state directly, and has the gateway place `OPENCLAW_GATEWAY_TOKEN` in `/tmp/nemoclaw-proxy-env.sh` for interactive shells. | +| Default | The sandbox keeps `/sandbox/.openclaw` writable (`700 sandbox:sandbox`), sets `openclaw.json` to `600 sandbox:sandbox`, lets the agent manage state directly, and has the gateway place `OPENCLAW_GATEWAY_TOKEN` in `/tmp/nemoclaw-proxy-env.sh` for interactive shells. | | What you can change | Apply a reviewed host-side immutability workflow to lock config and state directories with DAC permissions and the immutable flag where available. | | Risk of default | A writable `.openclaw` directory lets the agent modify its own gateway config: disabling CORS or redirecting inference to an attacker-controlled endpoint. | | Recommendation | For always-on assistants handling sensitive workloads, lock config after initial setup. For development workflows, the writable default is appropriate. | diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index e7c03b435d..59a132fc1d 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -16,6 +16,10 @@ const { fork } = require("child_process"); const { randomBytes } = require("crypto"); const { run, runCapture, validateName } = require("../runner"); const { dockerExecFileSync } = require("../adapters/docker/exec"); +const { dockerCapture } = require("../adapters/docker/run"); +const registry = require("../state/registry") as { + getSandbox?: (name: string) => { openshellDriver?: string | null } | null; +}; const { buildPolicyGetCommand, buildPolicySetCommand, @@ -38,9 +42,6 @@ const { const { resolveNemoclawStateDir } = require("../state/paths"); const { appendAuditEntry } = require("./audit"); const { resolveAgentConfig } = require("../sandbox/config"); -const { - privilegedSandboxExecArgv, -}: typeof import("../sandbox/privileged-exec") = require("../sandbox/privileged-exec"); const { buildRuntimePermissivePolicy, }: typeof import("./permissive-runtime") = require("./permissive-runtime"); @@ -52,16 +53,69 @@ const { const STATE_DIR = resolveNemoclawStateDir(); // --------------------------------------------------------------------------- -// privileged sandbox exec — bypasses the sandbox's Landlock context. +// privileged sandbox exec — bypasses the sandbox's Landlock context // -// `openshell sandbox exec` runs commands inside the Landlock domain, so it -// cannot modify read-only paths or change chattr flags. Privileged Docker -// exec starts a new root process that does NOT inherit the Landlock ruleset. -// Container resolution + registry-scoped owner matching live in -// `../sandbox/privileged-exec`; this file just adds the dockerExecFileSync -// invocation wrapper used by the lock/unlock helpers below. +// openshell sandbox exec runs commands INSIDE the Landlock domain, so it +// can't modify read_only paths or change chattr flags. kubectl exec starts +// a new process in the pod that does NOT inherit the Landlock ruleset. +// On the legacy gateway we reach kubectl via the K3s container. On the +// Docker-driver gateway there is no K3s container, so we exec into the +// sandbox Docker container directly as root. // --------------------------------------------------------------------------- +const K3S_CONTAINER = "openshell-cluster-nemoclaw"; + +function resolveDockerDriverSandboxContainer( + sandboxName: string, +): string | null { + try { + if (registry.getSandbox?.(sandboxName)?.openshellDriver !== "docker") { + return null; + } + } catch { + return null; + } + const prefix = `openshell-${sandboxName}-`; + const exact = `openshell-${sandboxName}`; + const output = dockerCapture(["ps", "--format", "{{.Names}}"], { + ignoreError: true, + }); + return ( + output + .split("\n") + .map((line: string) => line.trim()) + .find((name: string) => name === exact || name.startsWith(prefix)) || null + ); +} + +function kubectlExecArgv(sandboxName: string, cmd: string[]): string[] { + return [ + "exec", + K3S_CONTAINER, + "kubectl", + "exec", + "-n", + "openshell", + sandboxName, + "-c", + "agent", + "--", + ...cmd, + ]; +} + +function privilegedSandboxExecArgv( + sandboxName: string, + cmd: string[], +): string[] { + const dockerDriverContainer = + resolveDockerDriverSandboxContainer(sandboxName); + if (dockerDriverContainer) { + return ["exec", "--user", "root", dockerDriverContainer, ...cmd]; + } + return kubectlExecArgv(sandboxName, cmd); +} + function privilegedSandboxExec(sandboxName: string, cmd: string[]): void { dockerExecFileSync(privilegedSandboxExecArgv(sandboxName, cmd), { stdio: ["ignore", "pipe", "pipe"], @@ -105,11 +159,6 @@ interface ShieldsState { shieldsDownReason?: string | null; shieldsDownPolicy?: string | null; shieldsPolicySnapshotPath?: string | null; - // Recorded by the last successful lock to indicate whether `chattr +i` - // took effect against the locked files. Best-effort because privileged - // exec may lack CAP_LINUX_IMMUTABLE on some runtimes, so status callers - // only verify the immutable bit when this flag is true. - shieldsChattrApplied?: boolean; updatedAt?: string; } @@ -296,7 +345,6 @@ function isShieldsState(value: unknown): value is ShieldsState { isOptionalNullableString(value.shieldsDownReason) && isOptionalNullableString(value.shieldsDownPolicy) && isOptionalNullableString(value.shieldsPolicySnapshotPath) && - isOptionalBoolean(value.shieldsChattrApplied) && isOptionalString(value.updatedAt) ); } @@ -455,9 +503,8 @@ function assertNoLegacyStateLayout( // user and the gateway UID can write the mutable config tree. Hermes keeps its // tighter single-user layout. // -// Note on chattr: best-effort — it may silently fail if the privileged -// sandbox exec channel lacks CAP_LINUX_IMMUTABLE or if the file was never -// immutable. That's fine: +// Note on chattr: best-effort — it may silently fail if kubectl exec +// lacks CAP_LINUX_IMMUTABLE or if the file was never immutable. That's fine: // the file becomes writable through the permissive policy (disables Landlock // read_only) + chown/chmod below. // --------------------------------------------------------------------------- @@ -590,17 +637,15 @@ function unlockAgentConfig( // 2. UNIX permissions — 444 root:root (mandatory, verified here) // 3. chattr +i immutable bit — defense-in-depth (best-effort) // -// Layer 3 is best-effort because the privileged sandbox exec channel may -// lack CAP_LINUX_IMMUTABLE on some runtimes. Layers 1+2 are sufficient. -// We still attempt layer 3 in case the runtime environment supports it, -// and report back whether `chattr +i` actually took effect so callers can -// persist that signal and verify the immutable bit at status time. +// Layer 3 is best-effort because kubectl exec may lack +// CAP_LINUX_IMMUTABLE. Layers 1+2 are sufficient. We still attempt it +// in case the runtime environment supports it. // --------------------------------------------------------------------------- function lockAgentConfig( sandboxName: string, target: AgentConfigTarget, -): { chattrApplied: boolean } { +): void { const errors: string[] = []; const filesToLock = [target.configPath, ...(target.sensitiveFiles || [])]; @@ -633,9 +678,8 @@ function lockAgentConfig( errors.push("chown root:root config dir"); } - // Best-effort: the privileged sandbox exec channel may lack - // CAP_LINUX_IMMUTABLE. Track the result so verification doesn't require - // something that was never there, and so callers can persist the outcome. + // Best-effort: kubectl exec may lack CAP_LINUX_IMMUTABLE. Track the + // result so verification doesn't require something that was never there. let chattrSucceeded = true; for (const f of filesToLock) { try { @@ -680,8 +724,6 @@ function lockAgentConfig( if (issues.length > 0) { throw new Error(`Config not locked: ${issues.join(", ")}`); } - - return { chattrApplied: chattrSucceeded }; } function rollbackShieldsDown( @@ -694,11 +736,9 @@ function rollbackShieldsDown( ignoreError: true, }); let rollbackLocked = false; - let rollbackChattrApplied = false; if (rollbackResult.status === 0) { try { - const { chattrApplied } = lockAgentConfig(sandboxName, target); - rollbackChattrApplied = chattrApplied; + lockAgentConfig(sandboxName, target); rollbackLocked = true; } catch { console.error( @@ -715,13 +755,12 @@ function rollbackShieldsDown( shieldsDownTimeout: null, shieldsDownReason: null, shieldsDownPolicy: null, - shieldsChattrApplied: rollbackChattrApplied, }); console.error(" Lockdown restored. Config was never left unguarded."); } else { console.error(" Config remains unlocked — manual intervention required."); console.error( - ` Re-lock manually via privileged sandbox exec, then run: nemoclaw ${sandboxName} shields up`, + ` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`, ); } } @@ -729,7 +768,6 @@ function rollbackShieldsDown( interface LockdownActivationResult { ok: boolean; error?: string; - chattrApplied?: boolean; } function activateLockdownFromSnapshot( @@ -754,12 +792,13 @@ function activateLockdownFromSnapshot( const target = resolveAgentConfig(sandboxName); try { - const { chattrApplied } = lockAgentConfig(sandboxName, target); - return { ok: true, chattrApplied }; + lockAgentConfig(sandboxName, target); } catch (error) { const message = error instanceof Error ? error.message : String(error); return { ok: false, error: message }; } + + return { ok: true }; } function recoverExpiredAutoRestoreInline( @@ -984,7 +1023,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void { // 4. Start auto-restore timer (detached child process), unless skipped. // Pass the absolute restore time, not a relative timeout. Steps 1-2b - // can take minutes (policy apply + privileged chmod), so a relative timeout + // can take minutes (policy apply + kubectl chmod), so a relative timeout // passed at fork time would fire too early. if (!opts.skipTimer) { const restoreAt = new Date(Date.now() + timeoutSeconds * 1000); @@ -1082,56 +1121,8 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): // shieldsDown === false means explicitly locked by a previous shields-up. // undefined (no state file) means fresh sandbox — mutable default, allow shields-up. if (state.shieldsDown === false) { - // Re-verify the sandbox filesystem matches the declared locked posture. - // If a host-root tamper has drifted the protected perms, fall through to - // re-apply the lock instead of bailing — `shields status` directs the - // operator here for exactly this remediation path. - const target = resolveAgentConfig(sandboxName); - const verification = verifyShieldsLockState(sandboxName, target, { - verifyChattr: state.shieldsChattrApplied === true, - exec: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), - assertLegacyLayout: assertNoLegacyStateLayout, - }); - if (verification.ok) { - clearTimerMarker(sandboxName); - console.log(" Lockdown is already active."); - return; - } - - console.log( - " Lockdown was declared but the sandbox filesystem drifted; re-locking...", - ); - for (const issue of verification.issues) { - console.log(` - ${issue}`); - } - killTimer(sandboxName); - let driftChattrApplied = false; - try { - const { chattrApplied } = lockAgentConfig(sandboxName, target); - driftChattrApplied = chattrApplied; - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - console.error(` ERROR: ${message}`); - console.error(" Config remains drifted — manual intervention required."); - return failShieldsCommand(message, opts.throwOnError); - } - saveShieldsState(sandboxName, { - shieldsDown: false, - shieldsDownAt: null, - shieldsDownTimeout: null, - shieldsDownReason: null, - shieldsDownPolicy: null, - shieldsChattrApplied: driftChattrApplied, - }); clearTimerMarker(sandboxName); - appendAuditEntry({ - action: "shields_up", - sandbox: sandboxName, - timestamp: new Date().toISOString(), - restored_by: "operator", - reason: "drift remediation", - }); - console.log(` Lockdown re-applied for ${sandboxName} after drift.`); + console.log(" Lockdown is already active."); return; } @@ -1153,7 +1144,6 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): ); return failShieldsCommand("Saved policy snapshot is missing", opts.throwOnError); } - let firstLockChattrApplied = false; if (snapshotPath) { console.log(" Restoring restrictive policy from snapshot..."); const activation = activateLockdownFromSnapshot(sandboxName, snapshotPath); @@ -1163,14 +1153,13 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): " Config remains unlocked — manual intervention required.", ); console.error( - ` Re-lock manually via privileged sandbox exec, then run: nemoclaw ${sandboxName} shields up`, + ` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`, ); return failShieldsCommand(activation.error ?? "unknown restore error", opts.throwOnError); } - firstLockChattrApplied = activation.chattrApplied === true; } else { // 2b. Lock config file to read-only. - // Uses privileged sandbox exec to bypass Landlock (same as shields down). + // Uses kubectl exec to bypass Landlock (same as shields down). // Each operation runs independently and the result is verified. // If verification fails, config remains unlocked — we do not lie about state. const target = resolveAgentConfig(sandboxName); @@ -1178,8 +1167,7 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): ` Locking ${target.agentName} config (${target.configPath})...`, ); try { - const { chattrApplied } = lockAgentConfig(sandboxName, target); - firstLockChattrApplied = chattrApplied; + lockAgentConfig(sandboxName, target); } catch (err) { const message = err instanceof Error ? err.message : String(err); console.error(` ERROR: ${message}`); @@ -1187,7 +1175,7 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): " Config remains unlocked — manual intervention required.", ); console.error( - ` Re-lock manually via privileged sandbox exec, then run: nemoclaw ${sandboxName} shields up`, + ` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`, ); return failShieldsCommand(message, opts.throwOnError); } @@ -1207,7 +1195,6 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): shieldsDownTimeout: null, shieldsDownReason: null, shieldsDownPolicy: null, - shieldsChattrApplied: firstLockChattrApplied, // Keep snapshotPath for forensics — don't clear it }); clearTimerMarker(sandboxName); @@ -1277,17 +1264,10 @@ function shieldsStatus( // Cross-check the sandbox filesystem so a host-root tamper that reverts // protected perms back to a sandbox-writable state is surfaced as drift // instead of reported as a clean lockdown. - // - // The immutable bit is only verified when shields state recorded that - // `chattr +i` actually took effect at lock time. The privileged exec - // channel may lack CAP_LINUX_IMMUTABLE on some runtimes, so without - // that signal a missing `i` flag cannot be distinguished from "never - // set in the first place" and would surface as a false-positive drift. let driftIssues: string[] = []; try { const target = resolveConfig(sandboxName); driftIssues = verify(sandboxName, target, { - verifyChattr: state.shieldsChattrApplied === true, exec: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), assertLegacyLayout: assertNoLegacyStateLayout, }).issues; diff --git a/src/lib/shields/timer.ts b/src/lib/shields/timer.ts index b5f4bb2c43..42dec0b511 100644 --- a/src/lib/shields/timer.ts +++ b/src/lib/shields/timer.ts @@ -24,7 +24,6 @@ interface ShieldsStatePatch { shieldsDownTimeout?: number | null; shieldsDownReason?: string | null; shieldsDownPolicy?: string | null; - shieldsChattrApplied?: boolean; } interface TimerArgs { @@ -188,7 +187,6 @@ function runRestoreTimer(args: TimerArgs): void { // that interactive `shields up` uses. Fall back to the bare configPath/ // configDir from argv if resolution fails (e.g., registry unavailable). let lockVerified = true; - let autoLockChattrApplied = false; if (args.configPath) { let lockTarget: { agentName?: string; @@ -222,8 +220,7 @@ function runRestoreTimer(args: TimerArgs): void { } if (lockTarget) { try { - const { chattrApplied } = lockAgentConfig(args.sandboxName, lockTarget); - autoLockChattrApplied = chattrApplied; + lockAgentConfig(args.sandboxName, lockTarget); } catch (error: unknown) { lockVerified = false; appendAudit({ @@ -246,7 +243,6 @@ function runRestoreTimer(args: TimerArgs): void { shieldsDownTimeout: null, shieldsDownReason: null, shieldsDownPolicy: null, - shieldsChattrApplied: autoLockChattrApplied, }); appendAudit({ diff --git a/src/lib/shields/verify-lock.test.ts b/src/lib/shields/verify-lock.test.ts index 586edd564c..8d19c11eb6 100644 --- a/src/lib/shields/verify-lock.test.ts +++ b/src/lib/shields/verify-lock.test.ts @@ -22,8 +22,6 @@ const target = { sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], }; -const noopLegacyLayoutAsserter = (): void => {}; - type StatLookup = Record; function makeExec(perms: StatLookup): (cmd: string[]) => string { @@ -45,10 +43,7 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "755 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const result = verifyShieldsLockState("openclaw", target, { exec }); expect(result.ok).toBe(true); expect(result.issues).toEqual([]); @@ -62,10 +57,7 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "2770 sandbox:sandbox", }); - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const result = verifyShieldsLockState("openclaw", target, { exec }); expect(result.ok).toBe(false); expect(result.issues).toEqual( @@ -98,10 +90,7 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "755 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const result = verifyShieldsLockState("openclaw", target, { exec }); expect(result.ok).toBe(false); expect(result.issues).toContain( @@ -118,10 +107,7 @@ describe("verifyShieldsLockState", () => { "/sandbox/.openclaw": "775 root:root", }); - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const result = verifyShieldsLockState("openclaw", target, { exec }); expect(result.ok).toBe(false); expect(result.issues).toContain("dir mode=775 (expected 755)"); @@ -133,10 +119,7 @@ describe("verifyShieldsLockState", () => { throw new Error("Container not found"); }; - const result = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const result = verifyShieldsLockState("openclaw", target, { exec }); expect(result.ok).toBe(false); expect(result.issues.some((issue: string) => issue.includes("stat failed"))).toBe( @@ -161,16 +144,12 @@ describe("verifyShieldsLockState", () => { return ""; }; - const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { - exec, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); + const withoutChattrCheck = verifyShieldsLockState("openclaw", target, { exec }); expect(withoutChattrCheck.ok).toBe(true); const withChattrCheck = verifyShieldsLockState("openclaw", target, { exec, verifyChattr: true, - assertLegacyLayout: noopLegacyLayoutAsserter, }); expect(withChattrCheck.ok).toBe(false); expect(withChattrCheck.issues).toEqual( @@ -181,34 +160,6 @@ describe("verifyShieldsLockState", () => { ); }); - it("records lsattr exec failure as drift when verifyChattr is requested so a missing binary is not a silent pass", async () => { - const { verifyShieldsLockState } = await loadVerifier(); - const exec = (cmd: string[]): string => { - if (cmd[0] === "stat") { - if (cmd[cmd.length - 1] === "/sandbox/.openclaw") return "755 root:root"; - return "444 root:root"; - } - if (cmd[0] === "lsattr") { - throw new Error("lsattr: command not found"); - } - return ""; - }; - - const result = verifyShieldsLockState("openclaw", target, { - exec, - verifyChattr: true, - assertLegacyLayout: noopLegacyLayoutAsserter, - }); - - expect(result.ok).toBe(false); - expect(result.issues).toEqual( - expect.arrayContaining([ - "/sandbox/.openclaw/openclaw.json lsattr failed: lsattr: command not found", - "/sandbox/.openclaw/.config-hash lsattr failed: lsattr: command not found", - ]), - ); - }); - it("surfaces a legacy state layout violation when the asserter throws", async () => { const { verifyShieldsLockState } = await loadVerifier(); const exec = makeExec({ @@ -232,19 +183,8 @@ describe("verifyShieldsLockState", () => { it("rejects calls without an exec dependency so production paths cannot silently no-op", async () => { const { verifyShieldsLockState } = await loadVerifier(); - expect(() => - verifyShieldsLockState("openclaw", target, { - assertLegacyLayout: noopLegacyLayoutAsserter, - } as unknown as Parameters[2]), - ).toThrow(/requires options\.exec/); - }); - - it("rejects calls without an assertLegacyLayout dependency so the legacy-layout check cannot be skipped", async () => { - const { verifyShieldsLockState } = await loadVerifier(); - expect(() => - verifyShieldsLockState("openclaw", target, { - exec: () => "", - } as unknown as Parameters[2]), - ).toThrow(/requires options\.assertLegacyLayout/); + expect(() => verifyShieldsLockState("openclaw", target)).toThrow( + /requires options\.exec/, + ); }); }); diff --git a/src/lib/shields/verify-lock.ts b/src/lib/shields/verify-lock.ts index a50e3857b9..79e3d90b42 100644 --- a/src/lib/shields/verify-lock.ts +++ b/src/lib/shields/verify-lock.ts @@ -6,16 +6,8 @@ // config directory, no legacy state layout, and (when the caller knows // chattr was applied) the immutable bit. Returns the list of mismatches // so callers can either fail the lock operation or surface drift after a -// host-root tamper. -// -// Failure handling: -// - `stat` failures are recorded as issues so an unreachable sandbox is -// never mistaken for a clean lockdown. -// - `lsattr` failures are recorded as issues only when `verifyChattr` is -// true. Some images do not ship `lsattr`, so without an explicit chattr -// request a missing binary is treated as an unavailable check rather -// than a tamper signal. -// - `assertLegacyLayout` failures are recorded as issues. +// host-root tamper. Stat/lsattr failures are folded into `issues` so the +// caller can decide whether to treat them as drift. export type LockTarget = { configPath: string; @@ -25,8 +17,8 @@ export type LockTarget = { export type VerifyShieldsLockOptions = { verifyChattr?: boolean; - exec: (cmd: string[]) => string; - assertLegacyLayout: (sandboxName: string, configDir: string) => void; + exec?: (cmd: string[]) => string; + assertLegacyLayout?: (sandboxName: string, configDir: string) => void; }; export type VerifyShieldsLockResult = { @@ -38,18 +30,21 @@ const EXPECTED_FILE_MODE = "444"; const EXPECTED_DIR_MODE = "755"; const EXPECTED_OWNER = "root:root"; +function noopAssertLegacyLayout(_sandboxName: string, _configDir: string): void { + // Production callers replace this with the real legacy-layout assertion; + // when omitted, the verifier treats legacy-layout state as "no issue". +} + export function verifyShieldsLockState( sandboxName: string, target: LockTarget, - options: VerifyShieldsLockOptions, + options: VerifyShieldsLockOptions = {}, ): VerifyShieldsLockResult { - if (!options || typeof options.exec !== "function") { + if (!options.exec) { throw new Error("verifyShieldsLockState requires options.exec"); } - if (typeof options.assertLegacyLayout !== "function") { - throw new Error("verifyShieldsLockState requires options.assertLegacyLayout"); - } - const { exec, assertLegacyLayout } = options; + const exec = options.exec; + const assertLegacyLayout = options.assertLegacyLayout ?? noopAssertLegacyLayout; const issues: string[] = []; const filesToVerify = [target.configPath, ...(target.sensitiveFiles || [])]; @@ -87,9 +82,8 @@ export function verifyShieldsLockState( // First whitespace-delimited token is the flags field. const [flags] = attrs.trim().split(/\s+/, 1); if (!flags.includes("i")) issues.push(`${f} immutable bit not set`); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - issues.push(`${f} lsattr failed: ${msg}`); + } catch { + // lsattr may not be available on all images — skip } } } diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh index ab239cfa39..076c63f92b 100755 --- a/test/e2e/test-shields-config.sh +++ b/test/e2e/test-shields-config.sh @@ -313,95 +313,6 @@ else fail "shields status should show UP: ${STATUS_OUTPUT}" fi -if echo "$STATUS_OUTPUT" | grep -q "DRIFTED"; then - fail "clean lockdown should not report DRIFTED: ${STATUS_OUTPUT}" -else - pass "shields status does not report DRIFTED on a clean lockdown" -fi - -# ══════════════════════════════════════════════════════════════════ -# Phase 5b: host-root tamper — shields status surfaces drift, shields up re-locks -# ══════════════════════════════════════════════════════════════════ -section "Phase 5b: shields status detects host-root tamper" - -CTR=$(docker ps --filter "name=openshell-${SANDBOX_NAME}" --format "{{.Names}}" | head -n 1) -if [ -z "${CTR}" ]; then - fail "Could not find sandbox container openshell-${SANDBOX_NAME} for tamper drill" -else - info "Tampering sandbox filesystem via host-root docker exec on ${CTR}" - CONFIG_DIR=$(dirname "${CONFIG_PATH}") - HASH_PATH="${CONFIG_DIR}/.config-hash" - - docker exec --user root "${CTR}" sh -c " - chmod 2770 '${CONFIG_DIR}' && - chown sandbox:sandbox '${CONFIG_DIR}' && - chmod 660 '${CONFIG_PATH}' '${HASH_PATH}' && - chown sandbox:sandbox '${CONFIG_PATH}' '${HASH_PATH}' - " >/dev/null 2>&1 - TAMPER_EXIT=$? - - if [ "${TAMPER_EXIT}" != "0" ]; then - fail "Could not apply tamper perms via docker exec (exit ${TAMPER_EXIT})" - else - PERMS_TAMPERED=$(docker exec --user root "${CTR}" stat -c '%a %U:%G' "${CONFIG_PATH}" 2>/dev/null || true) - info "Config perms after tamper: ${PERMS_TAMPERED}" - - DRIFT_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) - DRIFT_EXIT=$? - echo "$DRIFT_OUTPUT" - - if [ "${DRIFT_EXIT}" = "2" ]; then - pass "shields status exits 2 when sandbox filesystem drifts from declared lockdown" - else - fail "shields status should exit 2 on drift, got ${DRIFT_EXIT}: ${DRIFT_OUTPUT}" - fi - - if echo "$DRIFT_OUTPUT" | grep -q "DRIFTED"; then - pass "shields status reports DRIFTED on host-root tamper" - else - fail "shields status output missing DRIFTED marker: ${DRIFT_OUTPUT}" - fi - - if echo "$DRIFT_OUTPUT" | grep -qE "mode=660 \(expected 444\)"; then - pass "drift listing includes file mode mismatch" - else - fail "drift listing should include file mode mismatch: ${DRIFT_OUTPUT}" - fi - - if echo "$DRIFT_OUTPUT" | grep -qE "owner=sandbox:sandbox \(expected root:root\)"; then - pass "drift listing includes ownership mismatch" - else - fail "drift listing should include ownership mismatch: ${DRIFT_OUTPUT}" - fi - - info "Re-locking via shields up to exercise the documented recovery path" - RELOCK_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields up 2>&1) - RELOCK_EXIT=$? - echo "$RELOCK_OUTPUT" - - if [ "${RELOCK_EXIT}" = "0" ]; then - pass "shields up exits 0 after re-lock from drift" - else - fail "shields up should exit 0 after re-lock, got ${RELOCK_EXIT}: ${RELOCK_OUTPUT}" - fi - - if echo "$RELOCK_OUTPUT" | grep -q "Lockdown re-applied"; then - pass "shields up reports drift remediation" - else - fail "shields up should report drift remediation: ${RELOCK_OUTPUT}" - fi - - CLEAN_OUTPUT=$(nemoclaw "${SANDBOX_NAME}" shields status 2>&1) - CLEAN_EXIT=$? - - if [ "${CLEAN_EXIT}" = "0" ] && echo "$CLEAN_OUTPUT" | grep -q "Shields: UP (lockdown active)"; then - pass "shields status returns to clean UP after shields up re-lock" - else - fail "shields status should return to clean UP after re-lock (exit ${CLEAN_EXIT}): ${CLEAN_OUTPUT}" - fi - fi -fi - # ══════════════════════════════════════════════════════════════════ # Phase 6: shields down — config returns to writable # ══════════════════════════════════════════════════════════════════ From db1fabe013fc270c9c1595b8c1381e8a4def7a86 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 27 May 2026 14:32:25 +0000 Subject: [PATCH 12/12] fix(shields): re-lock on drift, persist chattrApplied, use central privileged exec helper Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 4 +- src/lib/shields/index.ts | 132 +++++++++++++--------------- src/lib/shields/verify-lock.test.ts | 8 +- src/lib/shields/verify-lock.ts | 6 +- 4 files changed, 72 insertions(+), 78 deletions(-) diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index dd2b75569c..74f0047a0d 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -202,13 +202,13 @@ Writable agent state such as plugins, skills, hooks, and workspace metadata live By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and writable state entry points cannot be changed by the sandbox user. -- **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `700` and `openclaw.json` with mode `600`, so the agent can read and write config directly. +- **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `2770` (setgid `sandbox:sandbox`) and `openclaw.json` with mode `660`, so the agent and its group can read and write config directly. `shields status` cross-checks the locked posture against the sandbox filesystem and reports drift when a host-root tamper reverts these perms. - **Config integrity hash.** The image includes a SHA256 hash of `openclaw.json`. In the default mutable state, `.config-hash` is sandbox-owned and is not a tamper-proof trust anchor, so startup does not fail closed on that hash. When the hash is root-owned and read-only, startup enforces it and refuses to start if the hash does not match. - **Gateway token environment.** The gateway exports `OPENCLAW_GATEWAY_TOKEN` and writes it to `/tmp/nemoclaw-proxy-env.sh` for interactive sandbox sessions. Keep this in mind when deciding whether a workload should run with mutable config or an immutable config posture. | Aspect | Detail | |---|---| -| Default | The sandbox keeps `/sandbox/.openclaw` writable (`700 sandbox:sandbox`), sets `openclaw.json` to `600 sandbox:sandbox`, lets the agent manage state directly, and has the gateway place `OPENCLAW_GATEWAY_TOKEN` in `/tmp/nemoclaw-proxy-env.sh` for interactive shells. | +| Default | The sandbox keeps `/sandbox/.openclaw` writable (`2770 sandbox:sandbox`), sets `openclaw.json` to `660 sandbox:sandbox`, lets the agent manage state directly, and has the gateway place `OPENCLAW_GATEWAY_TOKEN` in `/tmp/nemoclaw-proxy-env.sh` for interactive shells. | | What you can change | Apply a reviewed host-side immutability workflow to lock config and state directories with DAC permissions and the immutable flag where available. | | Risk of default | A writable `.openclaw` directory lets the agent modify its own gateway config: disabling CORS or redirecting inference to an attacker-controlled endpoint. | | Recommendation | For always-on assistants handling sensitive workloads, lock config after initial setup. For development workflows, the writable default is appropriate. | diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 59a132fc1d..7c27a46506 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -16,10 +16,9 @@ const { fork } = require("child_process"); const { randomBytes } = require("crypto"); const { run, runCapture, validateName } = require("../runner"); const { dockerExecFileSync } = require("../adapters/docker/exec"); -const { dockerCapture } = require("../adapters/docker/run"); -const registry = require("../state/registry") as { - getSandbox?: (name: string) => { openshellDriver?: string | null } | null; -}; +const { + privilegedSandboxExecArgv, +}: typeof import("../sandbox/privileged-exec") = require("../sandbox/privileged-exec"); const { buildPolicyGetCommand, buildPolicySetCommand, @@ -56,66 +55,12 @@ const STATE_DIR = resolveNemoclawStateDir(); // privileged sandbox exec — bypasses the sandbox's Landlock context // // openshell sandbox exec runs commands INSIDE the Landlock domain, so it -// can't modify read_only paths or change chattr flags. kubectl exec starts -// a new process in the pod that does NOT inherit the Landlock ruleset. -// On the legacy gateway we reach kubectl via the K3s container. On the -// Docker-driver gateway there is no K3s container, so we exec into the -// sandbox Docker container directly as root. +// can't modify read_only paths or change chattr flags. We delegate the +// argv shape to the central registry-scoped helper in +// src/lib/sandbox/privileged-exec.ts, which fails closed when no matching +// sandbox container is running. // --------------------------------------------------------------------------- -const K3S_CONTAINER = "openshell-cluster-nemoclaw"; - -function resolveDockerDriverSandboxContainer( - sandboxName: string, -): string | null { - try { - if (registry.getSandbox?.(sandboxName)?.openshellDriver !== "docker") { - return null; - } - } catch { - return null; - } - const prefix = `openshell-${sandboxName}-`; - const exact = `openshell-${sandboxName}`; - const output = dockerCapture(["ps", "--format", "{{.Names}}"], { - ignoreError: true, - }); - return ( - output - .split("\n") - .map((line: string) => line.trim()) - .find((name: string) => name === exact || name.startsWith(prefix)) || null - ); -} - -function kubectlExecArgv(sandboxName: string, cmd: string[]): string[] { - return [ - "exec", - K3S_CONTAINER, - "kubectl", - "exec", - "-n", - "openshell", - sandboxName, - "-c", - "agent", - "--", - ...cmd, - ]; -} - -function privilegedSandboxExecArgv( - sandboxName: string, - cmd: string[], -): string[] { - const dockerDriverContainer = - resolveDockerDriverSandboxContainer(sandboxName); - if (dockerDriverContainer) { - return ["exec", "--user", "root", dockerDriverContainer, ...cmd]; - } - return kubectlExecArgv(sandboxName, cmd); -} - function privilegedSandboxExec(sandboxName: string, cmd: string[]): void { dockerExecFileSync(privilegedSandboxExecArgv(sandboxName, cmd), { stdio: ["ignore", "pipe", "pipe"], @@ -159,6 +104,7 @@ interface ShieldsState { shieldsDownReason?: string | null; shieldsDownPolicy?: string | null; shieldsPolicySnapshotPath?: string | null; + chattrApplied?: boolean; updatedAt?: string; } @@ -345,6 +291,7 @@ function isShieldsState(value: unknown): value is ShieldsState { isOptionalNullableString(value.shieldsDownReason) && isOptionalNullableString(value.shieldsDownPolicy) && isOptionalNullableString(value.shieldsPolicySnapshotPath) && + isOptionalBoolean(value.chattrApplied) && isOptionalString(value.updatedAt) ); } @@ -645,7 +592,7 @@ function unlockAgentConfig( function lockAgentConfig( sandboxName: string, target: AgentConfigTarget, -): void { +): { chattrApplied: boolean } { const errors: string[] = []; const filesToLock = [target.configPath, ...(target.sensitiveFiles || [])]; @@ -724,6 +671,8 @@ function lockAgentConfig( if (issues.length > 0) { throw new Error(`Config not locked: ${issues.join(", ")}`); } + + return { chattrApplied: chattrSucceeded }; } function rollbackShieldsDown( @@ -735,11 +684,11 @@ function rollbackShieldsDown( const rollbackResult = run(buildPolicySetCommand(snapshotPath, sandboxName), { ignoreError: true, }); - let rollbackLocked = false; + let rollbackChattrApplied: boolean | null = null; if (rollbackResult.status === 0) { try { - lockAgentConfig(sandboxName, target); - rollbackLocked = true; + const lockResult = lockAgentConfig(sandboxName, target); + rollbackChattrApplied = lockResult.chattrApplied; } catch { console.error( " Warning: Rollback re-lock could not be verified. Check config manually.", @@ -748,13 +697,14 @@ function rollbackShieldsDown( } else { console.error(" Warning: Policy restore failed during rollback."); } - if (rollbackLocked) { + if (rollbackChattrApplied !== null) { saveShieldsState(sandboxName, { shieldsDown: false, shieldsDownAt: null, shieldsDownTimeout: null, shieldsDownReason: null, shieldsDownPolicy: null, + chattrApplied: rollbackChattrApplied, }); console.error(" Lockdown restored. Config was never left unguarded."); } else { @@ -1121,8 +1071,47 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): // shieldsDown === false means explicitly locked by a previous shields-up. // undefined (no state file) means fresh sandbox — mutable default, allow shields-up. if (state.shieldsDown === false) { + // Verify the sandbox filesystem still matches the locked posture. If a + // host-root tamper has reverted protected perms, re-apply the lock so + // the recovery hint surfaced by `shields status` actually works. + const target = resolveAgentConfig(sandboxName); + const { issues } = verifyShieldsLockState(sandboxName, target, { + verifyChattr: state.chattrApplied === true, + exec: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), + assertLegacyLayout: assertNoLegacyStateLayout, + }); + if (issues.length === 0) { + clearTimerMarker(sandboxName); + console.log(" Lockdown is already active."); + return; + } + console.log( + ` Lockdown drifted — re-applying lock for ${sandboxName}...`, + ); + let lockResult: { chattrApplied: boolean }; + try { + lockResult = lockAgentConfig(sandboxName, target); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(` ERROR: ${message}`); + console.error( + " Config remains drifted — manual intervention required.", + ); + return failShieldsCommand(message, opts.throwOnError); + } + saveShieldsState(sandboxName, { + shieldsDown: false, + chattrApplied: lockResult.chattrApplied, + }); clearTimerMarker(sandboxName); - console.log(" Lockdown is already active."); + appendAuditEntry({ + action: "shields_up", + sandbox: sandboxName, + timestamp: new Date().toISOString(), + restored_by: "operator", + reason: "drift remediation", + }); + console.log(` Lockdown re-applied for ${sandboxName}`); return; } @@ -1166,8 +1155,9 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): console.log( ` Locking ${target.agentName} config (${target.configPath})...`, ); + let lockResult: { chattrApplied: boolean }; try { - lockAgentConfig(sandboxName, target); + lockResult = lockAgentConfig(sandboxName, target); } catch (err) { const message = err instanceof Error ? err.message : String(err); console.error(` ERROR: ${message}`); @@ -1179,6 +1169,7 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): ); return failShieldsCommand(message, opts.throwOnError); } + saveShieldsState(sandboxName, { chattrApplied: lockResult.chattrApplied }); } // 3. Calculate duration @@ -1195,7 +1186,7 @@ function shieldsUp(sandboxName: string, opts: { throwOnError?: boolean } = {}): shieldsDownTimeout: null, shieldsDownReason: null, shieldsDownPolicy: null, - // Keep snapshotPath for forensics — don't clear it + // Keep snapshotPath + chattrApplied for forensics / drift re-verify }); clearTimerMarker(sandboxName); @@ -1268,6 +1259,7 @@ function shieldsStatus( try { const target = resolveConfig(sandboxName); driftIssues = verify(sandboxName, target, { + verifyChattr: state.chattrApplied === true, exec: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), assertLegacyLayout: assertNoLegacyStateLayout, }).issues; diff --git a/src/lib/shields/verify-lock.test.ts b/src/lib/shields/verify-lock.test.ts index 8d19c11eb6..d40093689c 100644 --- a/src/lib/shields/verify-lock.test.ts +++ b/src/lib/shields/verify-lock.test.ts @@ -183,8 +183,10 @@ describe("verifyShieldsLockState", () => { it("rejects calls without an exec dependency so production paths cannot silently no-op", async () => { const { verifyShieldsLockState } = await loadVerifier(); - expect(() => verifyShieldsLockState("openclaw", target)).toThrow( - /requires options\.exec/, - ); + const call = verifyShieldsLockState as unknown as ( + name: string, + lockTarget: unknown, + ) => unknown; + expect(() => call("openclaw", target)).toThrow(/requires options\.exec/); }); }); diff --git a/src/lib/shields/verify-lock.ts b/src/lib/shields/verify-lock.ts index 79e3d90b42..8f1fee5ee5 100644 --- a/src/lib/shields/verify-lock.ts +++ b/src/lib/shields/verify-lock.ts @@ -17,7 +17,7 @@ export type LockTarget = { export type VerifyShieldsLockOptions = { verifyChattr?: boolean; - exec?: (cmd: string[]) => string; + exec: (cmd: string[]) => string; assertLegacyLayout?: (sandboxName: string, configDir: string) => void; }; @@ -38,9 +38,9 @@ function noopAssertLegacyLayout(_sandboxName: string, _configDir: string): void export function verifyShieldsLockState( sandboxName: string, target: LockTarget, - options: VerifyShieldsLockOptions = {}, + options: VerifyShieldsLockOptions, ): VerifyShieldsLockResult { - if (!options.exec) { + if (!options || !options.exec) { throw new Error("verifyShieldsLockState requires options.exec"); } const exec = options.exec;