From 1ef364e6574a9298b239e75e6523d7966b6f3bf9 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 24 May 2026 07:47:29 +0000 Subject: [PATCH 01/13] fix(shields,state): preserve gateway access under shields-up + tolerate root-owned subdirs in audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NemoClaw #4065: shields-up locked HIGH_RISK_STATE_DIRS as root:root, which stripped sandbox-group ownership from descendants and broke OpenClaw plugin discovery (`extensions//` became unreachable to the gateway, which is only granted sandbox-group access). It also left `agents/main/` non-writable to the sandbox user, so the OpenClaw TUI's lazy mkdir of `agents/main/sessions/` failed with EACCES on first launch under lockdown. Switch the state-dir lock to `root:sandbox` (top-level configDir is still `root:root`) so the gateway keeps `r-x` via the sandbox group on descendants stripped to 2750 by `chmod -R go-w`, and restore `agents/*/sessions` to `sandbox:sandbox 2770` after the main lock loop so the agent keeps writing session metadata under lockdown. NemoClaw #4059: pre-backup audit joined per-dir `find` invocations with `&&`, so a single permission-denied subdir made the whole chain exit 1 and the rebuild treated every state dir as failed. Join with `;` and wrap each `find` with `|| true` — the audit's real signal is its stdout (symlink / hardlink / special-file rows); exit codes from perm-denied root-owned subdirs are noise. Signed-off-by: Tinson Lai --- src/lib/shields/index.ts | 83 ++++++++++-- src/lib/state/sandbox.ts | 12 +- test/repro-2681-group-writable.test.ts | 2 +- ...epro-4065-shields-up-runtime-perms.test.ts | 122 +++++++++++++++++ test/snapshot.test.ts | 125 ++++++++++++++++++ 5 files changed, 330 insertions(+), 14 deletions(-) create mode 100644 test/repro-4065-shields-up-runtime-perms.test.ts diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 051b27a430..196c33fbec 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -337,11 +337,20 @@ function isShieldsState(value: unknown): value is ShieldsState { // --------------------------------------------------------------------------- // NC-2227-05: State directories locked by shields-up. // -// During shields-up, these must be locked (root:root 755) so the sandbox -// user cannot create new entries or modify existing ones. This covers both -// executable state (skills, hooks, cron jobs, extensions, plugins, agent -// definitions) and writable agent state entry points such as workspace and -// memory, so a stale symlink bridge cannot bypass the lockdown. +// During shields-up, these must be locked so the sandbox user cannot create +// new entries or modify existing ones. This covers both executable state +// (skills, hooks, cron jobs, extensions, plugins, agent definitions) and +// writable agent state entry points such as workspace and memory, so a stale +// symlink bridge cannot bypass the lockdown. +// +// Lock ownership is `root:sandbox` (not `root:root`): the OpenClaw gateway +// runs as `gateway`, which is a member of the sandbox group via +// `Dockerfile.base`. Owning these dirs as `root:sandbox` preserves the +// sandbox group's `r-x` access to descendant files (after `chmod -R go-w`), +// so plugin discovery (NemoClaw #4065) can still scan `extensions//` +// while the sandbox user is denied write through the stripped group/other +// write bits. The top-level config dir is still owned by `root:root` +// (lockAgentConfig) — only the high-risk state subtrees switch. // // The list is a superset: directories that don't exist in a given agent's // config dir are silently skipped. @@ -363,10 +372,18 @@ const HIGH_RISK_STATE_DIRS = [ "telegram", ]; +// Runtime-data subpaths the agent must keep writing to under shields-up. +// Each entry is a shell glob relative to the agent config dir; after the +// main lock loop the matching directories are restored to +// `sandbox:sandbox 2770` so they remain writable inside an otherwise-locked +// tree (NemoClaw #4065). +const WRITABLE_RUNTIME_SUBPATHS = ["agents/*/sessions"]; + function applyStateDirLockMode( sandboxName: string, configDir: string, owner: string, + isLocking: boolean, ): void { // Locking (shields-up) strips group + world write. Unlocking (shields-down) // restores the same group-readable/writable + o-rwx mutable-default contract @@ -376,7 +393,6 @@ function applyStateDirLockMode( // The unlock variant uses `g+rwX,o-rwx` because a prior lock can strip group // access from descendants. Without re-adding group read/write explicitly, // shields-down would leave nested files readable/writable only by owner. - const isLocking = owner === "root:root"; const recursiveMode = isLocking ? "go-w" : "g+rwX,o-rwx"; const dirMode = isLocking ? "755" : "2770"; @@ -443,6 +459,45 @@ done } catch { // Best effort; verification below catches the primary config lock. } + + if (isLocking) { + restoreWritableRuntimeSubpaths(sandboxName, configDir); + } +} + +function restoreWritableRuntimeSubpaths( + sandboxName: string, + configDir: string, +): void { + try { + privilegedSandboxExec(sandboxName, [ + "sh", + "-c", + ` +set -u +config_dir="$1" +shift +for pattern in "$@"; do + for entry in "$config_dir"/$pattern; do + case "$entry" in + *"*"*) continue ;; + esac + parent="$(dirname "$entry")" + [ -d "$parent" ] || continue + mkdir -p "$entry" 2>/dev/null || continue + chown -R sandbox:sandbox "$entry" 2>/dev/null || true + chmod 2770 "$entry" 2>/dev/null || true + chmod -R g+rwX,o-rwx "$entry" 2>/dev/null || true + done +done +`, + "sh", + configDir, + ...WRITABLE_RUNTIME_SUBPATHS, + ]); + } catch { + // Best effort; sandbox can recreate sessions/ in shields-down if missing. + } } function legacyDataDirFor(configDir: string): string { @@ -550,7 +605,12 @@ function unlockAgentConfig( // NC-2227-05: Restore sandbox ownership on locked state directories. // Use chown -R to restore the full tree (files within may have been // locked to root:root by a prior shields-up). - applyStateDirLockMode(sandboxName, target.configDir, "sandbox:sandbox"); + applyStateDirLockMode( + sandboxName, + target.configDir, + "sandbox:sandbox", + false, + ); if (errors.length > 0) { console.error( @@ -682,10 +742,11 @@ function lockAgentConfig( } } - // NC-2227-05: Lock state directories. Root-own the directory and set 755 so - // the sandbox user can read/execute but cannot create new entries or modify - // existing ones. - applyStateDirLockMode(sandboxName, target.configDir, "root:root"); + // NC-2227-05: Lock state directories. Owned by `root:sandbox` so the + // gateway (in sandbox group) can still read plugin/agent code while the + // sandbox user is denied write through `chmod -R go-w`. See + // HIGH_RISK_STATE_DIRS doc above. Top-level configDir stays root:root. + applyStateDirLockMode(sandboxName, target.configDir, "root:sandbox", true); // OpenClaw's mutable-default config root is setgid (#2681). Clear setgid // after descendant locking so shields-up verifies the root config dir as diff --git a/src/lib/state/sandbox.ts b/src/lib/state/sandbox.ts index e996b4cd9c..521c1f400d 100644 --- a/src/lib/state/sandbox.ts +++ b/src/lib/state/sandbox.ts @@ -1105,12 +1105,20 @@ export function backupSandboxState(sandboxName: string, options: BackupOptions = // empty for non-symlinks but always present, so the field count is // stable. Tab separator assumes state-dir paths don't contain tabs, // matching the wider convention in this file. + // Per-dir `find` invocations are joined with `;` (not `&&`) and each + // is tolerant of its own exit code via `|| true`. The base image bakes + // a few state subdirs as root-owned (e.g. `extensions/`, + // `agents/`) and `find` walking those from the sandbox-user SSH + // session exits 1 on permission denied. The audit's real signal is + // stdout (the printf-emitted symlink/hardlink/special-file rows); + // letting one perm-denied subdir abort the whole chain blocks legitimate + // rebuilds on a fresh sandbox (NemoClaw #4059). const auditCmd = existingDirs .map( (d) => - `find ${shellQuote(`${dir}/${d}`)} \\( -type l -o \\( -type f -a -links +1 \\) -o \\( ! -type f -a ! -type d \\) \\) -printf "%y\\t%p\\t%l\\n" 2>/dev/null`, + `{ find ${shellQuote(`${dir}/${d}`)} \\( -type l -o \\( -type f -a -links +1 \\) -o \\( ! -type f -a ! -type d \\) \\) -printf "%y\\t%p\\t%l\\n" 2>/dev/null || true; }`, ) - .join(" && "); + .join("; "); _log(`Pre-backup audit: checking for symlinks, hard links, and special files`); const auditResult = spawnSync("ssh", [...sshArgs(configFile, sandboxName), auditCmd], { encoding: "utf-8", diff --git a/test/repro-2681-group-writable.test.ts b/test/repro-2681-group-writable.test.ts index 88f6bbcfd4..c11931daff 100644 --- a/test/repro-2681-group-writable.test.ts +++ b/test/repro-2681-group-writable.test.ts @@ -216,7 +216,7 @@ process.stdout.write(JSON.stringify(calls)); command[0] === "sh" && command[1] === "-c" && command.includes("/sandbox/.openclaw") && - command.includes("root:root") && + command.includes("root:sandbox") && command.includes("go-w") && command.includes("755"), ); diff --git a/test/repro-4065-shields-up-runtime-perms.test.ts b/test/repro-4065-shields-up-runtime-perms.test.ts new file mode 100644 index 0000000000..d2301938f2 --- /dev/null +++ b/test/repro-4065-shields-up-runtime-perms.test.ts @@ -0,0 +1,122 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Regression coverage for NemoClaw #4065. + * + * Pre-fix: shields-up `chown -R root:root` on state dirs left descendants + * unreadable to the sandbox-group gateway (plugin discovery emitted "plugin + * not found"), and `agents/main/sessions/` could not be created by OpenClaw + * TUI at runtime. + * + * Post-fix: state dirs are owned by `root:sandbox` (sandbox group preserves + * `r-x` to descendants), and `agents/*\/sessions` is restored to + * `sandbox:sandbox 2770` after the main lock loop so the agent keeps + * writing session metadata under lockdown. + */ + +import { spawnSync } from "node:child_process"; +import { describe, expect, it } from "vitest"; + +function runLockAgentConfigProbe(): string[][] { + const probe = spawnSync( + process.execPath, + [ + "-e", + String.raw` +const Module = require("node:module"); +const originalLoad = Module._load; +const calls = []; +Module._load = function patchedLoad(request, parent, isMain) { + if (request === "../adapters/docker/exec") { + return { + dockerExecFileSync(args) { + const separator = args.indexOf("--"); + const command = separator >= 0 ? args.slice(separator + 1) : args; + calls.push(command); + if (command[0] === "stat" && command[1] === "-c") { + return command.at(-1) === "/sandbox/.openclaw" + ? "755 root:root\n" + : "444 root:root\n"; + } + if (command[0] === "lsattr") { + return "----i----------------- " + command.at(-1) + "\n"; + } + return ""; + }, + }; + } + return originalLoad.call(this, request, parent, isMain); +}; +const { lockAgentConfig } = require("./dist/lib/shields/index.js"); +lockAgentConfig("sandbox-pod", { + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], +}); +process.stdout.write(JSON.stringify(calls)); +`, + ], + { encoding: "utf-8", timeout: 5000 }, + ); + expect(probe.status).toBe(0); + return JSON.parse(probe.stdout) as string[][]; +} + +describe("Issue #4065 — shields-up preserves sandbox-group access + runtime sessions writable", () => { + it("locks each high-risk state dir to root:sandbox so the gateway keeps r-x via the sandbox group", () => { + const commands = runLockAgentConfigProbe(); + + const stateDirChowns = commands.filter( + (command) => + command[0] === "chown" && + command[1] === "-R" && + typeof command[3] === "string" && + command[3].startsWith("/sandbox/.openclaw/"), + ); + + expect(stateDirChowns.length).toBeGreaterThan(0); + for (const command of stateDirChowns) { + expect(command[2]).toBe("root:sandbox"); + } + // Plugin discovery (#4065): extensions/ must be locked as root:sandbox so + // openclaw can scan extensions// via the sandbox group; root:root + // would block opendir from descendants stripped to 2750 by `chmod -R go-w`. + expect(stateDirChowns.map((command) => command[3])).toContain( + "/sandbox/.openclaw/extensions", + ); + expect(stateDirChowns.map((command) => command[3])).toContain( + "/sandbox/.openclaw/agents", + ); + }); + + it("keeps the top-level config dir owned by root:root (lock contract unchanged)", () => { + const commands = runLockAgentConfigProbe(); + expect(commands).toContainEqual([ + "chown", + "root:root", + "/sandbox/.openclaw", + ]); + expect(commands).toContainEqual([ + "chown", + "root:root", + "/sandbox/.openclaw/openclaw.json", + ]); + }); + + it("restores agents/*/sessions to sandbox:sandbox 2770 after the main lock loop", () => { + const commands = runLockAgentConfigProbe(); + const restoreShell = commands.find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + command.includes("/sandbox/.openclaw") && + command.includes("agents/*/sessions") && + typeof command[2] === "string" && + command[2].includes("chown -R sandbox:sandbox") && + command[2].includes("chmod 2770"), + ); + expect(restoreShell).toBeDefined(); + }); +}); diff --git a/test/snapshot.test.ts b/test/snapshot.test.ts index 4c1f2c7fd1..8bcc48a086 100644 --- a/test/snapshot.test.ts +++ b/test/snapshot.test.ts @@ -1044,6 +1044,131 @@ process.exit(0); fs.rmSync(fixture, { recursive: true, force: true }); } }); + + // Regression for NemoClaw #4059: when `find` walks a state dir that + // contains a root-owned subdir (e.g. base-image `extensions/` or + // `agents/`), the sandbox-user SSH session emits "Permission denied" + // on those subdirs and `find` exits non-zero. The pre-fix audit joined + // each per-dir `find` with `&&`, so a single non-zero exit aborted the + // whole audit and rebuild treated every state dir as failed. The audit's + // real signal is its stdout (symlink / hardlink / special-file rows); + // exit codes from permission-denied subdirs are noise. + it("treats audit-find exit 1 with empty stdout as a successful audit (NemoClaw #4059)", () => { + const fixture = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-audit-perm-denied-")); + const oldPath = process.env.PATH; + const oldOpenshell = process.env.NEMOCLAW_OPENSHELL_BIN; + try { + const binDir = path.join(fixture, "bin"); + const openclawDir = path.join(fixture, "sandbox-root", ".openclaw"); + const existingDirs = ["agents", "extensions", "workspace"]; + fs.mkdirSync(binDir, { recursive: true }); + for (const d of existingDirs) fs.mkdirSync(path.join(openclawDir, d), { recursive: true }); + + const openshell = writeFakeOpenshell(binDir); + writeExecutable( + path.join(binDir, "ssh"), + `#!/usr/bin/env node +const { spawnSync } = require("node:child_process"); +const fs = require("node:fs"); +const cmd = process.argv[process.argv.length - 1] || ""; +const existingDirs = ${JSON.stringify(existingDirs)}; +if (cmd.includes("[ -d ")) { + process.stdout.write(existingDirs.join("\\n") + "\\n"); + process.exit(0); +} +if (cmd.includes("find ")) { + // No symlinks / hardlinks / special files, but pretend one of the per-dir + // \`find\` invocations hit a root-owned subdir and exited 1. With the + // post-#4059 audit shape (\`{ find … || true; }\` joined by \`;\`), the + // overall SSH exit code is 0 and stdout is empty. + process.exit(0); +} +if (cmd.includes("tar -cf -")) { + const r = spawnSync("tar", ["-cf", "-", "-C", ${JSON.stringify(openclawDir)}, ...existingDirs], { + stdio: ["ignore", "pipe", "pipe"], + }); + if (r.stdout) fs.writeSync(1, r.stdout); + process.exit(r.status || 0); +} +process.exit(0); +`, + ); + + writeOpenClawRegistry("alpha"); + process.env.NEMOCLAW_OPENSHELL_BIN = openshell; + process.env.PATH = `${binDir}:${oldPath || ""}`; + + const backup = sandboxState.backupSandboxState("alpha"); + expect(backup.success).toBe(true); + expect(backup.error).toBeUndefined(); + expect(backup.backedUpDirs).toEqual(existingDirs); + } finally { + if (oldOpenshell === undefined) { + delete process.env.NEMOCLAW_OPENSHELL_BIN; + } else { + process.env.NEMOCLAW_OPENSHELL_BIN = oldOpenshell; + } + process.env.PATH = oldPath; + fs.rmSync(fixture, { recursive: true, force: true }); + } + }); + + // Complement to the test above: even when per-dir `find` exits non-zero on + // root-owned subdirs, real violations from other dirs in the chain must + // still surface. With `;`-joining each find still runs; stdout from the + // dirs the SSH user can traverse remains intact. + it("still rejects violations from readable dirs even if a sibling find exits non-zero", () => { + const fixture = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-audit-mixed-perm-")); + const oldPath = process.env.PATH; + const oldOpenshell = process.env.NEMOCLAW_OPENSHELL_BIN; + try { + const binDir = path.join(fixture, "bin"); + const openclawDir = path.join(fixture, "sandbox-root", ".openclaw"); + const existingDirs = ["agents", "workspace"]; + fs.mkdirSync(binDir, { recursive: true }); + for (const d of existingDirs) fs.mkdirSync(path.join(openclawDir, d), { recursive: true }); + + // `agents` simulates perm-denied (no rows emitted); `workspace` emits + // a non-whitelisted symlink that the audit must still catch. + const auditLines = [ + "l\t/sandbox/.openclaw/workspace/leak\t../openclaw.json", + ].join("\n"); + + const openshell = writeFakeOpenshell(binDir); + writeExecutable( + path.join(binDir, "ssh"), + `#!/usr/bin/env node +const cmd = process.argv[process.argv.length - 1] || ""; +const existingDirs = ${JSON.stringify(existingDirs)}; +if (cmd.includes("[ -d ")) { + process.stdout.write(existingDirs.join("\\n") + "\\n"); + process.exit(0); +} +if (cmd.includes("find ")) { + process.stdout.write(${JSON.stringify(auditLines)} + "\\n"); + process.exit(0); +} +process.exit(0); +`, + ); + + writeOpenClawRegistry("alpha"); + process.env.NEMOCLAW_OPENSHELL_BIN = openshell; + process.env.PATH = `${binDir}:${oldPath || ""}`; + + const backup = sandboxState.backupSandboxState("alpha"); + expect(backup.success).toBe(false); + expect(backup.error).toMatch(/workspace\/leak/); + } finally { + if (oldOpenshell === undefined) { + delete process.env.NEMOCLAW_OPENSHELL_BIN; + } else { + process.env.NEMOCLAW_OPENSHELL_BIN = oldOpenshell; + } + process.env.PATH = oldPath; + fs.rmSync(fixture, { recursive: true, force: true }); + } + }); }); describe("Hermes durable state files", () => { From 15d095f4845aef820a1b3661855c531758f15fc1 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 24 May 2026 08:11:55 +0000 Subject: [PATCH 02/13] refactor(shields,state): drop issue refs from comments + rename test file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review: scrub `#4065` / `#4059` mentions from production code comments and test docstrings, and rename the new shields-up regression test from `repro-4065-…` to `shields-up-runtime-perms.test.ts` so the filename describes behaviour rather than an issue number. Signed-off-by: Tinson Lai --- src/lib/shields/index.ts | 8 ++++---- src/lib/state/sandbox.ts | 2 +- ...st.ts => shields-up-runtime-perms.test.ts} | 19 +------------------ test/snapshot.test.ts | 18 +----------------- 4 files changed, 7 insertions(+), 40 deletions(-) rename test/{repro-4065-shields-up-runtime-perms.test.ts => shields-up-runtime-perms.test.ts} (78%) diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 196c33fbec..5c71581da4 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -347,9 +347,9 @@ function isShieldsState(value: unknown): value is ShieldsState { // runs as `gateway`, which is a member of the sandbox group via // `Dockerfile.base`. Owning these dirs as `root:sandbox` preserves the // sandbox group's `r-x` access to descendant files (after `chmod -R go-w`), -// so plugin discovery (NemoClaw #4065) can still scan `extensions//` -// while the sandbox user is denied write through the stripped group/other -// write bits. The top-level config dir is still owned by `root:root` +// so plugin discovery can still scan `extensions//` while the +// sandbox user is denied write through the stripped group/other write +// bits. The top-level config dir is still owned by `root:root` // (lockAgentConfig) — only the high-risk state subtrees switch. // // The list is a superset: directories that don't exist in a given agent's @@ -376,7 +376,7 @@ const HIGH_RISK_STATE_DIRS = [ // Each entry is a shell glob relative to the agent config dir; after the // main lock loop the matching directories are restored to // `sandbox:sandbox 2770` so they remain writable inside an otherwise-locked -// tree (NemoClaw #4065). +// tree. const WRITABLE_RUNTIME_SUBPATHS = ["agents/*/sessions"]; function applyStateDirLockMode( diff --git a/src/lib/state/sandbox.ts b/src/lib/state/sandbox.ts index 521c1f400d..9906e4b416 100644 --- a/src/lib/state/sandbox.ts +++ b/src/lib/state/sandbox.ts @@ -1112,7 +1112,7 @@ export function backupSandboxState(sandboxName: string, options: BackupOptions = // session exits 1 on permission denied. The audit's real signal is // stdout (the printf-emitted symlink/hardlink/special-file rows); // letting one perm-denied subdir abort the whole chain blocks legitimate - // rebuilds on a fresh sandbox (NemoClaw #4059). + // rebuilds. const auditCmd = existingDirs .map( (d) => diff --git a/test/repro-4065-shields-up-runtime-perms.test.ts b/test/shields-up-runtime-perms.test.ts similarity index 78% rename from test/repro-4065-shields-up-runtime-perms.test.ts rename to test/shields-up-runtime-perms.test.ts index d2301938f2..dcc32e0f32 100644 --- a/test/repro-4065-shields-up-runtime-perms.test.ts +++ b/test/shields-up-runtime-perms.test.ts @@ -1,20 +1,6 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -/** - * Regression coverage for NemoClaw #4065. - * - * Pre-fix: shields-up `chown -R root:root` on state dirs left descendants - * unreadable to the sandbox-group gateway (plugin discovery emitted "plugin - * not found"), and `agents/main/sessions/` could not be created by OpenClaw - * TUI at runtime. - * - * Post-fix: state dirs are owned by `root:sandbox` (sandbox group preserves - * `r-x` to descendants), and `agents/*\/sessions` is restored to - * `sandbox:sandbox 2770` after the main lock loop so the agent keeps - * writing session metadata under lockdown. - */ - import { spawnSync } from "node:child_process"; import { describe, expect, it } from "vitest"; @@ -64,7 +50,7 @@ process.stdout.write(JSON.stringify(calls)); return JSON.parse(probe.stdout) as string[][]; } -describe("Issue #4065 — shields-up preserves sandbox-group access + runtime sessions writable", () => { +describe("shields-up state-dir lock preserves sandbox-group access + runtime sessions writable", () => { it("locks each high-risk state dir to root:sandbox so the gateway keeps r-x via the sandbox group", () => { const commands = runLockAgentConfigProbe(); @@ -80,9 +66,6 @@ describe("Issue #4065 — shields-up preserves sandbox-group access + runtime se for (const command of stateDirChowns) { expect(command[2]).toBe("root:sandbox"); } - // Plugin discovery (#4065): extensions/ must be locked as root:sandbox so - // openclaw can scan extensions// via the sandbox group; root:root - // would block opendir from descendants stripped to 2750 by `chmod -R go-w`. expect(stateDirChowns.map((command) => command[3])).toContain( "/sandbox/.openclaw/extensions", ); diff --git a/test/snapshot.test.ts b/test/snapshot.test.ts index 8bcc48a086..17b557124b 100644 --- a/test/snapshot.test.ts +++ b/test/snapshot.test.ts @@ -1045,15 +1045,7 @@ process.exit(0); } }); - // Regression for NemoClaw #4059: when `find` walks a state dir that - // contains a root-owned subdir (e.g. base-image `extensions/` or - // `agents/`), the sandbox-user SSH session emits "Permission denied" - // on those subdirs and `find` exits non-zero. The pre-fix audit joined - // each per-dir `find` with `&&`, so a single non-zero exit aborted the - // whole audit and rebuild treated every state dir as failed. The audit's - // real signal is its stdout (symlink / hardlink / special-file rows); - // exit codes from permission-denied subdirs are noise. - it("treats audit-find exit 1 with empty stdout as a successful audit (NemoClaw #4059)", () => { + it("treats audit-find exit 1 with empty stdout as a successful audit", () => { const fixture = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-audit-perm-denied-")); const oldPath = process.env.PATH; const oldOpenshell = process.env.NEMOCLAW_OPENSHELL_BIN; @@ -1077,10 +1069,6 @@ if (cmd.includes("[ -d ")) { process.exit(0); } if (cmd.includes("find ")) { - // No symlinks / hardlinks / special files, but pretend one of the per-dir - // \`find\` invocations hit a root-owned subdir and exited 1. With the - // post-#4059 audit shape (\`{ find … || true; }\` joined by \`;\`), the - // overall SSH exit code is 0 and stdout is empty. process.exit(0); } if (cmd.includes("tar -cf -")) { @@ -1113,10 +1101,6 @@ process.exit(0); } }); - // Complement to the test above: even when per-dir `find` exits non-zero on - // root-owned subdirs, real violations from other dirs in the chain must - // still surface. With `;`-joining each find still runs; stdout from the - // dirs the SSH user can traverse remains intact. it("still rejects violations from readable dirs even if a sibling find exits non-zero", () => { const fixture = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-audit-mixed-perm-")); const oldPath = process.env.PATH; From 31ce5693e17db64ad95ce41d75623cfc296c3682 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 24 May 2026 08:34:46 +0000 Subject: [PATCH 03/13] fix(shields): create agents//sessions on lock + tighten audit-find test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review: 1. `restoreWritableRuntimeSubpaths` expanded the full pattern `agents/*/sessions` as a single glob. On a fresh sandbox where `sessions` does not exist yet, the glob has no matches and the shell leaves the literal pattern, which the `*"*"*` guard then drops — so `sessions/` was never created and the post-lockdown TUI mkdir still failed with EACCES. Split each pattern into a parent glob (expanded against the existing tree) plus a leaf to create, so the helper always mkdir's the missing leaf inside every existing parent. 2. The two pre-backup audit tests stubbed the SSH fake as always-exit-0 on `find`, so the `|| true` tolerance wrapper was not actually exercised. Make the fake exit non-zero with a Permission-denied stderr unless the audit cmd includes `|| true`, so the tests fail loudly if the wrapper is dropped. 3. New behavioural test runs the actual restore-helper script body against a real filesystem fixture and asserts that `agents/main/sessions` is created when only `agents/main` exists beforehand. Signed-off-by: Tinson Lai --- src/lib/shields/index.ts | 23 +++++++++++----- test/shields-up-runtime-perms.test.ts | 38 +++++++++++++++++++++++++++ test/snapshot.test.ts | 18 +++++++++++++ 3 files changed, 72 insertions(+), 7 deletions(-) diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 5c71581da4..e36bf4f183 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -478,16 +478,25 @@ set -u config_dir="$1" shift for pattern in "$@"; do - for entry in "$config_dir"/$pattern; do - case "$entry" in + case "$pattern" in + */*) prefix="\${pattern%/*}"; leaf="\${pattern##*/}" ;; + *) prefix=""; leaf="$pattern" ;; + esac + if [ -n "$prefix" ]; then + set -- "$config_dir"/$prefix + else + set -- "$config_dir" + fi + for parent in "$@"; do + case "$parent" in *"*"*) continue ;; esac - parent="$(dirname "$entry")" [ -d "$parent" ] || continue - mkdir -p "$entry" 2>/dev/null || continue - chown -R sandbox:sandbox "$entry" 2>/dev/null || true - chmod 2770 "$entry" 2>/dev/null || true - chmod -R g+rwX,o-rwx "$entry" 2>/dev/null || true + target="$parent/$leaf" + mkdir -p "$target" 2>/dev/null || continue + chown -R sandbox:sandbox "$target" 2>/dev/null || true + chmod 2770 "$target" 2>/dev/null || true + chmod -R g+rwX,o-rwx "$target" 2>/dev/null || true done done `, diff --git a/test/shields-up-runtime-perms.test.ts b/test/shields-up-runtime-perms.test.ts index dcc32e0f32..c94f43d379 100644 --- a/test/shields-up-runtime-perms.test.ts +++ b/test/shields-up-runtime-perms.test.ts @@ -1,6 +1,9 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; import { spawnSync } from "node:child_process"; import { describe, expect, it } from "vitest"; @@ -102,4 +105,39 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses ); expect(restoreShell).toBeDefined(); }); + + // Behavioral check against a real filesystem fixture: the restore script + // must mkdir `agents//sessions` even when the leaf does not yet exist + // (fresh sandbox, never-run TUI). The pre-fix script's case-`*` guard + // skipped the literal pattern when the glob matched nothing, leaving + // `sessions/` uncreated and the post-lockdown TUI mkdir blocked. + it("creates agents//sessions under a fresh agent dir that has no sessions yet", () => { + const fixture = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-shields-runtime-")); + const configDir = path.join(fixture, ".openclaw"); + const agentDir = path.join(configDir, "agents", "main"); + fs.mkdirSync(agentDir, { recursive: true }); + + const restoreShell = runLockAgentConfigProbe().find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + command.includes("agents/*/sessions"), + ); + if (!restoreShell) { + throw new Error("restore-writable-runtime-subpaths shell command not found"); + } + const script = restoreShell[2]; + const patterns = restoreShell.slice(4); + + const result = spawnSync( + "bash", + ["-c", `${script}\n`, "sh", configDir, ...patterns], + { encoding: "utf-8", timeout: 5000 }, + ); + expect(result.status).toBe(0); + expect(fs.existsSync(path.join(agentDir, "sessions"))).toBe(true); + expect(fs.statSync(path.join(agentDir, "sessions")).isDirectory()).toBe(true); + + fs.rmSync(fixture, { recursive: true, force: true }); + }); }); diff --git a/test/snapshot.test.ts b/test/snapshot.test.ts index 17b557124b..f1cb0c6f15 100644 --- a/test/snapshot.test.ts +++ b/test/snapshot.test.ts @@ -1069,6 +1069,15 @@ if (cmd.includes("[ -d ")) { process.exit(0); } if (cmd.includes("find ")) { + // Simulate a permission-denied subdir: when the audit cmd lacks the + // \`|| true\` tolerance wrapper (pre-fix shape), exit non-zero so the + // caller treats it as audit failure. The post-fix shape wraps each + // \`find\` with \`|| true\` and joins with \`;\`, so the audit cmd as a + // whole exits 0 even though a remote \`find\` would have exited 1. + if (!cmd.includes("|| true")) { + process.stderr.write("find: '/sandbox/.openclaw/extensions/nemoclaw': Permission denied\\n"); + process.exit(1); + } process.exit(0); } if (cmd.includes("tar -cf -")) { @@ -1129,6 +1138,15 @@ if (cmd.includes("[ -d ")) { process.exit(0); } if (cmd.includes("find ")) { + // Match real-shell behaviour: without the \`|| true\` tolerance wrapper + // the perm-denied sibling \`find\` would have aborted the chain. The + // post-fix audit cmd still emits the violation stdout because \`;\` + // joins each per-dir block so the readable sibling's output is + // preserved. + if (!cmd.includes("|| true")) { + process.stderr.write("find: '/sandbox/.openclaw/agents/main': Permission denied\\n"); + process.exit(1); + } process.stdout.write(${JSON.stringify(auditLines)} + "\\n"); process.exit(0); } From 8608886b21b09df026e49326d798f54742b81072 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 24 May 2026 09:05:09 +0000 Subject: [PATCH 04/13] fix(shields): drop symlinked parents in runtime-subpath restore + fix test argv slice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review: 1. The privileged restore helper called `[ -d "$parent" ]` before `mkdir`/`chown`/`chmod`, but `[ -d ]` follows symlinks. A pre-lockdown agent that swapped `agents/` for a symlink to an arbitrary host path could redirect the post-lock `mkdir -p ".../sessions"` and `chown -R sandbox:sandbox` through that link and rewrite ownership on any directory the privileged exec context can reach. Drop the parent (and the target leaf) when either is a symlink, before any mutation. 2. The behavioural test extracted patterns with `slice(4)`, which kept the captured `configDir` in the argv passed to bash — so the helper ran with `configDir` listed twice and the test argv diverged from the real `privilegedSandboxExec` call shape. Use `slice(5)` so only the patterns are forwarded. 3. New behavioural test asserts the symlink guard: when `agents/` is a symlink to a sibling host directory, the helper must not create `sessions/` under either the link target or the link itself. Also reword one comment to avoid contested terminology. Signed-off-by: Tinson Lai --- src/lib/shields/index.ts | 2 ++ test/shields-up-runtime-perms.test.ts | 43 ++++++++++++++++++++++++++- test/snapshot.test.ts | 3 +- 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index e36bf4f183..51cb7d746d 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -491,8 +491,10 @@ for pattern in "$@"; do case "$parent" in *"*"*) continue ;; esac + [ -L "$parent" ] && continue [ -d "$parent" ] || continue target="$parent/$leaf" + [ -L "$target" ] && continue mkdir -p "$target" 2>/dev/null || continue chown -R sandbox:sandbox "$target" 2>/dev/null || true chmod 2770 "$target" 2>/dev/null || true diff --git a/test/shields-up-runtime-perms.test.ts b/test/shields-up-runtime-perms.test.ts index c94f43d379..32d554d72e 100644 --- a/test/shields-up-runtime-perms.test.ts +++ b/test/shields-up-runtime-perms.test.ts @@ -127,7 +127,10 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses throw new Error("restore-writable-runtime-subpaths shell command not found"); } const script = restoreShell[2]; - const patterns = restoreShell.slice(4); + // Captured argv: ["sh", "-c", script, "sh", "/sandbox/.openclaw", ...patterns]. + // Drop everything up to and including the probed configDir so the fixture + // configDir is passed exactly once when re-running the script body. + const patterns = restoreShell.slice(5); const result = spawnSync( "bash", @@ -140,4 +143,42 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses fs.rmSync(fixture, { recursive: true, force: true }); }); + + // Defence in depth: if a malicious agent points `agents/` at /etc or + // any other host path before shields-up runs, the privileged restore + // helper must not mkdir/chown/chmod through that symlink. The script + // must drop symlinked parents (and symlinked targets) before touching + // them. + it("refuses to follow a symlinked agents/ parent during the runtime-subpath restore", () => { + const fixture = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-shields-symlink-")); + const configDir = path.join(fixture, ".openclaw"); + const agentsRoot = path.join(configDir, "agents"); + const hostTarget = path.join(fixture, "host-target"); + fs.mkdirSync(agentsRoot, { recursive: true }); + fs.mkdirSync(hostTarget, { recursive: true }); + fs.symlinkSync(hostTarget, path.join(agentsRoot, "main")); + + const restoreShell = runLockAgentConfigProbe().find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + command.includes("agents/*/sessions"), + ); + if (!restoreShell) { + throw new Error("restore-writable-runtime-subpaths shell command not found"); + } + const script = restoreShell[2]; + const patterns = restoreShell.slice(5); + + const result = spawnSync( + "bash", + ["-c", `${script}\n`, "sh", configDir, ...patterns], + { encoding: "utf-8", timeout: 5000 }, + ); + expect(result.status).toBe(0); + expect(fs.existsSync(path.join(hostTarget, "sessions"))).toBe(false); + expect(fs.existsSync(path.join(agentsRoot, "main", "sessions"))).toBe(false); + + fs.rmSync(fixture, { recursive: true, force: true }); + }); }); diff --git a/test/snapshot.test.ts b/test/snapshot.test.ts index f1cb0c6f15..0d58de7a59 100644 --- a/test/snapshot.test.ts +++ b/test/snapshot.test.ts @@ -1122,7 +1122,8 @@ process.exit(0); for (const d of existingDirs) fs.mkdirSync(path.join(openclawDir, d), { recursive: true }); // `agents` simulates perm-denied (no rows emitted); `workspace` emits - // a non-whitelisted symlink that the audit must still catch. + // a symlink that is not in the audit allow-list, which must still be + // caught even when a sibling find exits non-zero. const auditLines = [ "l\t/sandbox/.openclaw/workspace/leak\t../openclaw.json", ].join("\n"); From 1262ddfb28d5b697630c40beb935a162170ca364 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 24 May 2026 10:07:50 +0000 Subject: [PATCH 05/13] fix(shields): replay restore helper under sh + drop NC tag from updated comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review: 1. The behavioural test for the runtime-subpath restore was running the captured script body under `bash` rather than `sh`, while the production helper invokes it via `sh -c`. A bash-only construct slipping into the helper would pass the test but break the real call site. Switch the replay to `spawnSync("sh", …)` in both fixtures so the test covers exactly what the privileged exec runs. 2. Drop the NC-2227-05 tag from the updated `lockAgentConfig` comment. The state-directory ownership story is now self-contained in `HIGH_RISK_STATE_DIRS` doc + the helper itself; the issue tag does not add information and conflicts with the no-issue-refs-in-comments guidance applied across the rest of this PR. Signed-off-by: Tinson Lai --- src/lib/shields/index.ts | 8 ++++---- test/shields-up-runtime-perms.test.ts | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 51cb7d746d..1aec7b0388 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -753,10 +753,10 @@ function lockAgentConfig( } } - // NC-2227-05: Lock state directories. Owned by `root:sandbox` so the - // gateway (in sandbox group) can still read plugin/agent code while the - // sandbox user is denied write through `chmod -R go-w`. See - // HIGH_RISK_STATE_DIRS doc above. Top-level configDir stays root:root. + // Lock state directories. Owned by `root:sandbox` so the gateway (in + // sandbox group) can still read plugin/agent code while the sandbox user + // is denied write through `chmod -R go-w`. See HIGH_RISK_STATE_DIRS doc + // above. Top-level configDir stays root:root. applyStateDirLockMode(sandboxName, target.configDir, "root:sandbox", true); // OpenClaw's mutable-default config root is setgid (#2681). Clear setgid diff --git a/test/shields-up-runtime-perms.test.ts b/test/shields-up-runtime-perms.test.ts index 32d554d72e..2ae710331c 100644 --- a/test/shields-up-runtime-perms.test.ts +++ b/test/shields-up-runtime-perms.test.ts @@ -133,7 +133,7 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses const patterns = restoreShell.slice(5); const result = spawnSync( - "bash", + "sh", ["-c", `${script}\n`, "sh", configDir, ...patterns], { encoding: "utf-8", timeout: 5000 }, ); @@ -171,7 +171,7 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses const patterns = restoreShell.slice(5); const result = spawnSync( - "bash", + "sh", ["-c", `${script}\n`, "sh", configDir, ...patterns], { encoding: "utf-8", timeout: 5000 }, ); From 4bcb560e9a85f575385a155c4809e7c0d17480a1 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 24 May 2026 10:46:11 +0000 Subject: [PATCH 06/13] fix(shields): refuse symlinked state-dir/workspace roots in shields-up lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review: 1. The shields-up lock loop ran `chown -R`/`chmod -R`/`chmod g-s` on each `${configDir}/${dirName}` (and on every `workspace-*` glob hit) without rejecting symlinked roots. A pre-lockdown agent that swapped e.g. `extensions/` or `workspace-main/` for a symlink to a host path could redirect those recursive ownership and mode mutations at an attacker-controlled directory. Consolidate the per-state-dir loop into a single privileged shell exec that skips symlinks (`[ -L "$path" ] && continue`) before any mutation, and add the same guard to the existing `workspace-*` shell loop. 2. Drop the `NC-2227-05` issue tag from the state-directory header comment for consistency with the rest of this PR. Updates the regression tests: - `repro-2681`: assert the unlock fan-out via the new `sh -c` script shape (workspace included as an arg, plus the workspace-* glob path still present). - `shields-up-runtime-perms`: assert the state-dir lock and workspace-* scripts both contain the `[ -L … ] && continue` guard, and add a behavioural fixture that proves a symlinked `extensions/` root is skipped (its host target keeps its original mode and file contents). Signed-off-by: Tinson Lai --- src/lib/shields/index.ts | 71 +++++++++++-------- test/repro-2681-group-writable.test.ts | 25 +++++-- test/shields-up-runtime-perms.test.ts | 97 +++++++++++++++++++++----- 3 files changed, 142 insertions(+), 51 deletions(-) diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 1aec7b0388..30fd88a8a4 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -335,7 +335,7 @@ function isShieldsState(value: unknown): value is ShieldsState { } // --------------------------------------------------------------------------- -// NC-2227-05: State directories locked by shields-up. +// State directories locked by shields-up. // // During shields-up, these must be locked so the sandbox user cannot create // new entries or modify existing ones. This covers both executable state @@ -396,40 +396,48 @@ function applyStateDirLockMode( const recursiveMode = isLocking ? "go-w" : "g+rwX,o-rwx"; const dirMode = isLocking ? "755" : "2770"; - for (const dirName of HIGH_RISK_STATE_DIRS) { - const dirPath = `${configDir}/${dirName}`; - try { - privilegedSandboxExec(sandboxName, ["chown", "-R", owner, dirPath]); - } catch { - // Directory may not exist for this agent — silently skip - } - try { - privilegedSandboxExec(sandboxName, ["chmod", dirMode, dirPath]); - } catch { - // Silently skip - } - if (isLocking) { - try { - privilegedSandboxExec(sandboxName, ["chmod", "g-s", dirPath]); - } catch { - // Best effort; do not skip recursive write stripping. - } - } - try { - privilegedSandboxExec(sandboxName, [ - "chmod", - "-R", - recursiveMode, - dirPath, - ]); - } catch { - // Silently skip - } + const clearSetgid = isLocking ? "1" : "0"; + + // Reject symlinked state-dir roots (and workspace-* dirs) before any + // recursive ownership/mode mutation. A pre-lockdown agent that swapped + // e.g. `extensions/` for a symlink to /etc could otherwise redirect the + // privileged `chown -R`/`chmod -R` at an attacker-controlled host path. + try { + privilegedSandboxExec(sandboxName, [ + "sh", + "-c", + ` +set -u +config_dir="$1" +owner="$2" +recursive_mode="$3" +dir_mode="$4" +clear_setgid="$5" +shift 5 +for dir in "$@"; do + path="$config_dir/$dir" + [ -L "$path" ] && continue + [ -d "$path" ] || continue + chown -R "$owner" "$path" 2>/dev/null || true + chmod "$dir_mode" "$path" 2>/dev/null || true + [ "$clear_setgid" = "1" ] && chmod g-s "$path" 2>/dev/null || true + chmod -R "$recursive_mode" "$path" 2>/dev/null || true +done +`, + "sh", + configDir, + owner, + recursiveMode, + dirMode, + clearSetgid, + ...HIGH_RISK_STATE_DIRS, + ]); + } catch { + // Best effort; verification below catches the primary config lock. } // Multi-agent OpenClaw workspaces are named workspace-. They are // discovered dynamically because they are configured by openclaw.json. - const clearSetgid = isLocking ? "1" : "0"; try { privilegedSandboxExec(sandboxName, [ "sh", @@ -442,6 +450,7 @@ recursive_mode="$3" dir_mode="$4" clear_setgid="$5" for dir in "$config_dir"/workspace-*; do + [ -L "$dir" ] && continue [ -d "$dir" ] || continue chown -R "$owner" "$dir" 2>/dev/null || true chmod "$dir_mode" "$dir" 2>/dev/null || true diff --git a/test/repro-2681-group-writable.test.ts b/test/repro-2681-group-writable.test.ts index c11931daff..b293e84be6 100644 --- a/test/repro-2681-group-writable.test.ts +++ b/test/repro-2681-group-writable.test.ts @@ -159,11 +159,28 @@ describe("Issue #2681 — mutable OpenClaw config permissions", () => { expect(commands).toContainEqual(["chmod", "660", "/sandbox/.openclaw/openclaw.json"]); expect(commands).toContainEqual(["chmod", "660", "/sandbox/.openclaw/.config-hash"]); expect(commands).toContainEqual(["chmod", "2770", "/sandbox/.openclaw"]); - expect(commands).toContainEqual(["chmod", "2770", "/sandbox/.openclaw/workspace"]); - expect(commands).toContainEqual(["chmod", "-R", "g+rwX,o-rwx", "/sandbox/.openclaw/workspace"]); - expect(commands.find((command) => command[0] === "sh" && command[1] === "-c")).toEqual( - expect.arrayContaining(["/sandbox/.openclaw", "sandbox:sandbox", "g+rwX,o-rwx", "2770"]), + // The consolidated state-dir unlock script restores ownership and mode on + // every high-risk state dir (including `workspace`) inside a single + // `sh -c` invocation; the workspace-* glob is handled by a second + // `sh -c`. Both are asserted via the `arrayContaining` check below. + const shellInvocations = commands.filter( + (command) => command[0] === "sh" && command[1] === "-c", ); + expect( + shellInvocations.some((command) => + command.includes("/sandbox/.openclaw") && + command.includes("sandbox:sandbox") && + command.includes("g+rwX,o-rwx") && + command.includes("2770") && + command.includes("workspace"), + ), + ).toBe(true); + expect( + shellInvocations.some( + (command) => + typeof command[2] === "string" && command[2].includes('workspace-*'), + ), + ).toBe(true); }); it("shields-up strips setgid from the OpenClaw config root before verifying lock", () => { diff --git a/test/shields-up-runtime-perms.test.ts b/test/shields-up-runtime-perms.test.ts index 2ae710331c..768115ada2 100644 --- a/test/shields-up-runtime-perms.test.ts +++ b/test/shields-up-runtime-perms.test.ts @@ -53,28 +53,54 @@ process.stdout.write(JSON.stringify(calls)); return JSON.parse(probe.stdout) as string[][]; } +function findStateDirLockShell(commands: string[][]): string[] | undefined { + return commands.find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes('chown -R "$owner"') && + command.includes("root:sandbox") && + command.includes("agents") && + command.includes("extensions"), + ); +} + describe("shields-up state-dir lock preserves sandbox-group access + runtime sessions writable", () => { - it("locks each high-risk state dir to root:sandbox so the gateway keeps r-x via the sandbox group", () => { + it("locks the high-risk state dirs via a single sh-c script with root:sandbox ownership", () => { const commands = runLockAgentConfigProbe(); - const stateDirChowns = commands.filter( - (command) => - command[0] === "chown" && - command[1] === "-R" && - typeof command[3] === "string" && - command[3].startsWith("/sandbox/.openclaw/"), + const stateDirLockShell = findStateDirLockShell(commands); + expect(stateDirLockShell).toBeDefined(); + expect(stateDirLockShell).toEqual( + expect.arrayContaining(["root:sandbox", "go-w", "755"]), ); - - expect(stateDirChowns.length).toBeGreaterThan(0); - for (const command of stateDirChowns) { - expect(command[2]).toBe("root:sandbox"); - } - expect(stateDirChowns.map((command) => command[3])).toContain( - "/sandbox/.openclaw/extensions", + expect(stateDirLockShell).toEqual( + expect.arrayContaining(["agents", "extensions", "skills", "hooks"]), ); - expect(stateDirChowns.map((command) => command[3])).toContain( - "/sandbox/.openclaw/agents", + }); + + it("guards the state-dir lock script against symlinked roots", () => { + const commands = runLockAgentConfigProbe(); + const stateDirLockShell = findStateDirLockShell(commands); + expect(stateDirLockShell).toBeDefined(); + const script = stateDirLockShell?.[2] ?? ""; + expect(script).toContain('[ -L "$path" ] && continue'); + expect(script).toContain('[ -d "$path" ] || continue'); + }); + + it("guards the workspace-* lock script against symlinked roots", () => { + const commands = runLockAgentConfigProbe(); + const workspaceLockShell = commands.find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes('workspace-*'), ); + expect(workspaceLockShell).toBeDefined(); + const script = workspaceLockShell?.[2] ?? ""; + expect(script).toContain('[ -L "$dir" ] && continue'); }); it("keeps the top-level config dir owned by root:root (lock contract unchanged)", () => { @@ -144,6 +170,45 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses fs.rmSync(fixture, { recursive: true, force: true }); }); + // If a pre-lockdown agent swaps a high-risk state dir (here `extensions`) + // for a symlink to a host path, the consolidated state-dir lock script + // must skip the symlink without recursing into the target. + it("does not chown/chmod through a symlinked high-risk state dir", () => { + const fixture = fs.mkdtempSync( + path.join(os.tmpdir(), "nemoclaw-shields-statedir-symlink-"), + ); + const configDir = path.join(fixture, ".openclaw"); + const hostTarget = path.join(fixture, "host-target"); + fs.mkdirSync(configDir, { recursive: true }); + fs.mkdirSync(hostTarget, { recursive: true }); + const innocentFile = path.join(hostTarget, "host-file"); + fs.writeFileSync(innocentFile, "untouched\n", { mode: 0o600 }); + const hostFilePerms = fs.statSync(innocentFile).mode & 0o7777; + fs.symlinkSync(hostTarget, path.join(configDir, "extensions")); + + const stateDirLockShell = findStateDirLockShell(runLockAgentConfigProbe()); + if (!stateDirLockShell) { + throw new Error("state-dir lock shell command not found"); + } + const script = stateDirLockShell[2]; + // Captured argv: ["sh", "-c", script, "sh", configDir, owner, + // recursiveMode, dirMode, clearSetgid, ...stateDirs]. Drop the probed + // configDir from index 4 and substitute the fixture path. + const args = stateDirLockShell.slice(4); + args[0] = configDir; + + const result = spawnSync( + "sh", + ["-c", `${script}\n`, "sh", ...args], + { encoding: "utf-8", timeout: 5000 }, + ); + expect(result.status).toBe(0); + expect(fs.lstatSync(path.join(configDir, "extensions")).isSymbolicLink()).toBe(true); + expect(fs.statSync(innocentFile).mode & 0o7777).toBe(hostFilePerms); + + fs.rmSync(fixture, { recursive: true, force: true }); + }); + // Defence in depth: if a malicious agent points `agents/` at /etc or // any other host path before shields-up runs, the privileged restore // helper must not mkdir/chown/chmod through that symlink. The script From b95913ad0c78d1efd972054c21318a6901c01446 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 24 May 2026 11:54:28 +0000 Subject: [PATCH 07/13] fix(shields): fail shields-up when a state-dir root is a symlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review: Previously, when shields-up encountered a symlinked high-risk state-dir root (or a symlinked `workspace-*` dir), the privileged lock script silently skipped it via `[ -L "$path" ] && continue`. That refused to follow the link — good — but left the dir as-is and reported the lock as successful. The sandbox would then sit in "shields up (lockdown active)" status while a state-dir root still pointed at a writable host path, exactly the security regression the symlink guard was meant to prevent. Have the two consolidated lock shell scripts always exit 0 but emit `symlinked-root\t` on stdout for every symlinked root they refuse to touch. `applyStateDirLockMode` parses those lines and returns them as lock failures when invoked under `isLocking=true`. `lockAgentConfig` now throws "Config not locked: state dir root is a symlink: …" before any further verification, refusing to acknowledge shields-up. Unlock is unchanged: skipping symlinked roots is the correct best-effort behaviour there. New regression test exercises the end-to-end path: when the captured shell script reports a symlinked root via the mocked exec, the `lockAgentConfig` call throws with the expected diagnostic. Tests for the static script shape are updated to match the new `if [ -L … ]; then printf …; fi` form instead of the previous `[ -L … ] && continue`. Signed-off-by: Tinson Lai --- src/lib/shields/index.ts | 108 +++++++++++++++++++------- test/shields-up-runtime-perms.test.ts | 78 ++++++++++++++++++- 2 files changed, 155 insertions(+), 31 deletions(-) diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 30fd88a8a4..3dad1bae7e 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -384,7 +384,7 @@ function applyStateDirLockMode( configDir: string, owner: string, isLocking: boolean, -): void { +): string[] { // Locking (shields-up) strips group + world write. Unlocking (shields-down) // restores the same group-readable/writable + o-rwx mutable-default contract // as startup, plus setgid so the gateway UID — now in the sandbox group via @@ -395,18 +395,20 @@ function applyStateDirLockMode( // shields-down would leave nested files readable/writable only by owner. const recursiveMode = isLocking ? "go-w" : "g+rwX,o-rwx"; const dirMode = isLocking ? "755" : "2770"; - const clearSetgid = isLocking ? "1" : "0"; // Reject symlinked state-dir roots (and workspace-* dirs) before any // recursive ownership/mode mutation. A pre-lockdown agent that swapped // e.g. `extensions/` for a symlink to /etc could otherwise redirect the // privileged `chown -R`/`chmod -R` at an attacker-controlled host path. - try { - privilegedSandboxExec(sandboxName, [ - "sh", - "-c", - ` + // The script always exits 0 and reports symlinked roots on stdout so the + // TS layer can surface them as lock failures (caller fails shields-up + // rather than reporting "locked" while a state-dir root is still a + // symlink to a writable host path). + const symlinkReports: string[] = []; + + const stateDirReport = runStateDirLockScript(sandboxName, { + script: ` set -u config_dir="$1" owner="$2" @@ -416,7 +418,10 @@ clear_setgid="$5" shift 5 for dir in "$@"; do path="$config_dir/$dir" - [ -L "$path" ] && continue + if [ -L "$path" ]; then + printf 'symlinked-root\\t%s\\n' "$path" + continue + fi [ -d "$path" ] || continue chown -R "$owner" "$path" 2>/dev/null || true chmod "$dir_mode" "$path" 2>/dev/null || true @@ -424,25 +429,21 @@ for dir in "$@"; do chmod -R "$recursive_mode" "$path" 2>/dev/null || true done `, - "sh", + args: [ configDir, owner, recursiveMode, dirMode, clearSetgid, ...HIGH_RISK_STATE_DIRS, - ]); - } catch { - // Best effort; verification below catches the primary config lock. - } + ], + }); + symlinkReports.push(...stateDirReport); // Multi-agent OpenClaw workspaces are named workspace-. They are // discovered dynamically because they are configured by openclaw.json. - try { - privilegedSandboxExec(sandboxName, [ - "sh", - "-c", - ` + const workspaceReport = runStateDirLockScript(sandboxName, { + script: ` set -u config_dir="$1" owner="$2" @@ -450,7 +451,10 @@ recursive_mode="$3" dir_mode="$4" clear_setgid="$5" for dir in "$config_dir"/workspace-*; do - [ -L "$dir" ] && continue + if [ -L "$dir" ]; then + printf 'symlinked-root\\t%s\\n' "$dir" + continue + fi [ -d "$dir" ] || continue chown -R "$owner" "$dir" 2>/dev/null || true chmod "$dir_mode" "$dir" 2>/dev/null || true @@ -458,20 +462,55 @@ for dir in "$config_dir"/workspace-*; do chmod -R "$recursive_mode" "$dir" 2>/dev/null || true done `, + args: [configDir, owner, recursiveMode, dirMode, clearSetgid], + }); + symlinkReports.push(...workspaceReport); + + if (isLocking) { + restoreWritableRuntimeSubpaths(sandboxName, configDir); + } + + // Surface symlinked roots only when locking. Unlock is best-effort and + // tolerating a stale symlink there is safer than refusing to relax the + // tree at all. + if (!isLocking) return []; + return symlinkReports.map( + (path) => `state dir root is a symlink: ${path} (refusing to lock)`, + ); +} + +interface StateDirLockScript { + script: string; + args: string[]; +} + +// Run a privileged shell script that mutates state-dir permissions and +// reports any symlinked roots it refused to touch via tab-prefixed stdout +// lines ("symlinked-root\t"). Returns the parsed list of skipped +// paths; on any other failure, returns an empty list so the caller can +// continue with the rest of the lock fan-out. +function runStateDirLockScript( + sandboxName: string, + script: StateDirLockScript, +): string[] { + let stdout = ""; + try { + stdout = privilegedSandboxExecCapture(sandboxName, [ "sh", - configDir, - owner, - recursiveMode, - dirMode, - clearSetgid, + "-c", + script.script, + "sh", + ...script.args, ]); } catch { - // Best effort; verification below catches the primary config lock. + return []; } - - if (isLocking) { - restoreWritableRuntimeSubpaths(sandboxName, configDir); + const skipped: string[] = []; + for (const line of stdout.split("\n")) { + const [tag, path] = line.split("\t"); + if (tag === "symlinked-root" && path) skipped.push(path); } + return skipped; } function restoreWritableRuntimeSubpaths( @@ -766,7 +805,18 @@ function lockAgentConfig( // sandbox group) can still read plugin/agent code while the sandbox user // is denied write through `chmod -R go-w`. See HIGH_RISK_STATE_DIRS doc // above. Top-level configDir stays root:root. - applyStateDirLockMode(sandboxName, target.configDir, "root:sandbox", true); + const stateDirLockIssues = applyStateDirLockMode( + sandboxName, + target.configDir, + "root:sandbox", + true, + ); + if (stateDirLockIssues.length > 0) { + // Symlinked state-dir roots are a security-relevant violation: + // continuing would let shields-up report "locked" while a state + // dir still points at a writable host path. Refuse the lock. + throw new Error(`Config not locked: ${stateDirLockIssues.join(", ")}`); + } // OpenClaw's mutable-default config root is setgid (#2681). Clear setgid // after descendant locking so shields-up verifies the root config dir as diff --git a/test/shields-up-runtime-perms.test.ts b/test/shields-up-runtime-perms.test.ts index 768115ada2..e8f63ebc47 100644 --- a/test/shields-up-runtime-perms.test.ts +++ b/test/shields-up-runtime-perms.test.ts @@ -53,6 +53,65 @@ process.stdout.write(JSON.stringify(calls)); return JSON.parse(probe.stdout) as string[][]; } +function runLockAgentConfigProbeExpectingThrow( + symlinkedPath: string, +): { stdout: string; stderr: string; status: number | null } { + return spawnSync( + process.execPath, + [ + "-e", + String.raw` +const Module = require("node:module"); +const originalLoad = Module._load; +const symlinkedPath = ${JSON.stringify(symlinkedPath)}; +Module._load = function patchedLoad(request, parent, isMain) { + if (request === "../adapters/docker/exec") { + return { + dockerExecFileSync(args) { + const separator = args.indexOf("--"); + const command = separator >= 0 ? args.slice(separator + 1) : args; + if ( + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes("symlinked-root") + ) { + return "symlinked-root\t" + symlinkedPath + "\n"; + } + if (command[0] === "stat" && command[1] === "-c") { + return command.at(-1) === "/sandbox/.openclaw" + ? "755 root:root\n" + : "444 root:root\n"; + } + if (command[0] === "lsattr") { + return "----i----------------- " + command.at(-1) + "\n"; + } + return ""; + }, + }; + } + return originalLoad.call(this, request, parent, isMain); +}; +try { + const { lockAgentConfig } = require("./dist/lib/shields/index.js"); + lockAgentConfig("sandbox-pod", { + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], + }); + process.stdout.write("UNEXPECTED_SUCCESS"); + process.exit(0); +} catch (err) { + process.stderr.write(err && err.message ? err.message : String(err)); + process.exit(2); +} +`, + ], + { encoding: "utf-8", timeout: 5000 }, + ); +} + function findStateDirLockShell(commands: string[][]): string[] | undefined { return commands.find( (command) => @@ -85,7 +144,8 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses const stateDirLockShell = findStateDirLockShell(commands); expect(stateDirLockShell).toBeDefined(); const script = stateDirLockShell?.[2] ?? ""; - expect(script).toContain('[ -L "$path" ] && continue'); + expect(script).toContain('if [ -L "$path" ]; then'); + expect(script).toContain('symlinked-root'); expect(script).toContain('[ -d "$path" ] || continue'); }); @@ -100,7 +160,21 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses ); expect(workspaceLockShell).toBeDefined(); const script = workspaceLockShell?.[2] ?? ""; - expect(script).toContain('[ -L "$dir" ] && continue'); + expect(script).toContain('if [ -L "$dir" ]; then'); + expect(script).toContain('symlinked-root'); + }); + + // A symlinked state-dir root must abort shields-up; otherwise the lock + // would report success while the dir still points at a writable host + // path. + it("throws when shields-up encounters a symlinked state-dir root", () => { + const result = runLockAgentConfigProbeExpectingThrow( + "/sandbox/.openclaw/extensions", + ); + expect(result.status).toBe(2); + expect(result.stderr).toContain("Config not locked"); + expect(result.stderr).toContain("state dir root is a symlink"); + expect(result.stderr).toContain("/sandbox/.openclaw/extensions"); }); it("keeps the top-level config dir owned by root:root (lock contract unchanged)", () => { From f099b8b9a3cc84464272a3a34a478064a0f661ff Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 24 May 2026 12:28:08 +0000 Subject: [PATCH 08/13] fix(shields): preflight all state-dir/workspace roots before mutating + doc update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review: 1. The inline `[ -L … ] && printf 'symlinked-root\t…'` guard in the mutation script ran per-iteration. A symlinked root later in the list could still leave earlier (non-symlinked) state dirs already reowned to `root:sandbox` by the time the lock helper bailed out. shields-up would then report "config not locked" while the tree was partially mutated. Add a dedicated preflight pass that runs before any `chown`/`chmod`, scans every high-risk state-dir root *and* every `workspace-*` dir for symlinks, and returns the full list. When `isLocking=true` and the preflight finds any symlinked root, `applyStateDirLockMode` short-circuits without touching the mutation pass or the sessions-restore helper, and `lockAgentConfig` throws `Config not locked: state dir root is a symlink: …`. The inline symlink guards in the mutation scripts stay for defence-in-depth in case the preflight and the mutation observe different fs state. 2. New regression test mocks the preflight script to report a symlinked root and asserts (a) `lockAgentConfig` throws with the expected diagnostic and (b) no mutation calls (state-dir lock, workspace-* lock, or sessions-restore) were ever issued. 3. Add a second regression test that asserts a dedicated preflight script (no `chown`/`chmod`, just the `[ -L … ] && printf` checks) is present in the recorded call sequence. 4. Update `docs/security/best-practices.mdx` to document the new `root:sandbox` state-dir ownership, the `agents//sessions` runtime carve-out, and the hard fail on symlinked state-dir roots. Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 5 +- src/lib/shields/index.ts | 80 +++++++++++++----- test/shields-up-runtime-perms.test.ts | 116 ++++++++++++++++++++++++-- 3 files changed, 173 insertions(+), 28 deletions(-) diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index dd2b75569c..e91a25f619 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -200,7 +200,10 @@ In root mode, the gateway process still runs as the separate `gateway` user, but Writable agent state such as plugins, skills, hooks, and workspace metadata lives directly under `/sandbox/.openclaw`. By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. -For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and writable state entry points cannot be changed by the sandbox user. +For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and high-risk state entry points cannot be changed by the sandbox user. +Under lockdown, the high-risk state directories (`skills`, `hooks`, `cron`, `agents`, `extensions`, `plugins`, `workspace`, `memory`, `credentials`, `identity`, `devices`, `canvas`, `telegram`) are owned by `root:sandbox` rather than `root:root`, so the OpenClaw gateway (a member of the `sandbox` group) keeps read access to plugin and agent code while the sandbox user is denied write through `chmod -R go-w`. +A narrow set of runtime-data subpaths is exempted from the lock so the agent can keep operating — currently `agents//sessions/`, which the OpenClaw TUI creates and writes session metadata into; the lock helper restores those subpaths to `sandbox:sandbox 2770` after the surrounding tree is locked. +If any high-risk state-dir root is a symlink when shields-up runs, the lock refuses to proceed and reports "Config not locked: state dir root is a symlink" rather than silently following the link with privileged `chown -R` / `chmod -R`. - **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `700` and `openclaw.json` with mode `600`, so the agent can read and write config directly. - **Config integrity hash.** The image includes a SHA256 hash of `openclaw.json`. In the default mutable state, `.config-hash` is sandbox-owned and is not a tamper-proof trust anchor, so startup does not fail closed on that hash. When the hash is root-owned and read-only, startup enforces it and refuses to start if the hash does not match. diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 3dad1bae7e..ee16b1a788 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -397,17 +397,27 @@ function applyStateDirLockMode( const dirMode = isLocking ? "755" : "2770"; const clearSetgid = isLocking ? "1" : "0"; - // Reject symlinked state-dir roots (and workspace-* dirs) before any - // recursive ownership/mode mutation. A pre-lockdown agent that swapped - // e.g. `extensions/` for a symlink to /etc could otherwise redirect the - // privileged `chown -R`/`chmod -R` at an attacker-controlled host path. - // The script always exits 0 and reports symlinked roots on stdout so the - // TS layer can surface them as lock failures (caller fails shields-up - // rather than reporting "locked" while a state-dir root is still a - // symlink to a writable host path). - const symlinkReports: string[] = []; - - const stateDirReport = runStateDirLockScript(sandboxName, { + // Preflight: before any chown/chmod runs, scan every high-risk state-dir + // root and every workspace-* dir for symlinked roots. A pre-lockdown + // agent that swapped e.g. `extensions/` for a symlink to /etc could + // otherwise redirect the privileged `chown -R`/`chmod -R` at an + // attacker-controlled host path. Doing the symlink check inline in the + // mutation script would still leave any earlier (non-symlinked) dirs + // already mutated by the time the script reaches the bad entry, so + // shields-up could half-lock the tree and then fail. The preflight + // returns all symlinked paths up front; we abort lock without touching + // anything when any are present. + const symlinkedRoots = preflightSymlinkedRoots(sandboxName, configDir); + if (isLocking && symlinkedRoots.length > 0) { + return symlinkedRoots.map( + (path) => `state dir root is a symlink: ${path} (refusing to lock)`, + ); + } + + // Mutation pass — still skips symlinked roots inline as a defence-in-depth + // measure in case the preflight and mutation observe different fs state + // (the preflight pass already aborted shields-up in the common case). + runStateDirLockScript(sandboxName, { script: ` set -u config_dir="$1" @@ -438,11 +448,10 @@ done ...HIGH_RISK_STATE_DIRS, ], }); - symlinkReports.push(...stateDirReport); // Multi-agent OpenClaw workspaces are named workspace-. They are // discovered dynamically because they are configured by openclaw.json. - const workspaceReport = runStateDirLockScript(sandboxName, { + runStateDirLockScript(sandboxName, { script: ` set -u config_dir="$1" @@ -464,19 +473,48 @@ done `, args: [configDir, owner, recursiveMode, dirMode, clearSetgid], }); - symlinkReports.push(...workspaceReport); if (isLocking) { restoreWritableRuntimeSubpaths(sandboxName, configDir); } + return []; +} - // Surface symlinked roots only when locking. Unlock is best-effort and - // tolerating a stale symlink there is safer than refusing to relax the - // tree at all. - if (!isLocking) return []; - return symlinkReports.map( - (path) => `state dir root is a symlink: ${path} (refusing to lock)`, - ); +// Probe every high-risk state-dir root and every workspace-* dir for +// symlinks before any mutation runs. Returns the absolute paths of all +// symlinked roots; an empty list means it is safe to proceed with the +// lock fan-out. Best-effort: a failed shell exec returns an empty list, +// matching the rest of the lock pipeline's "verification below catches +// the primary config lock" stance. +function preflightSymlinkedRoots(sandboxName: string, configDir: string): string[] { + let stdout = ""; + try { + stdout = privilegedSandboxExecCapture(sandboxName, [ + "sh", + "-c", + ` +set -u +config_dir="$1" +shift +for dir in "$@"; do + path="$config_dir/$dir" + [ -L "$path" ] && printf '%s\\n' "$path" +done +for dir in "$config_dir"/workspace-*; do + [ -L "$dir" ] && printf '%s\\n' "$dir" +done +`, + "sh", + configDir, + ...HIGH_RISK_STATE_DIRS, + ]); + } catch { + return []; + } + return stdout + .split("\n") + .map((line) => line.trim()) + .filter((line) => line.length > 0); } interface StateDirLockScript { diff --git a/test/shields-up-runtime-perms.test.ts b/test/shields-up-runtime-perms.test.ts index e8f63ebc47..bddbdd4f5c 100644 --- a/test/shields-up-runtime-perms.test.ts +++ b/test/shields-up-runtime-perms.test.ts @@ -74,9 +74,11 @@ Module._load = function patchedLoad(request, parent, isMain) { command[0] === "sh" && command[1] === "-c" && typeof command[2] === "string" && - command[2].includes("symlinked-root") + !command[2].includes("symlinked-root") ) { - return "symlinked-root\t" + symlinkedPath + "\n"; + // Preflight script: report a symlinked root so the caller + // refuses to lock without mutating any state dir. + return symlinkedPath + "\n"; } if (command[0] === "stat" && command[1] === "-c") { return command.at(-1) === "/sandbox/.openclaw" @@ -151,19 +153,37 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses it("guards the workspace-* lock script against symlinked roots", () => { const commands = runLockAgentConfigProbe(); - const workspaceLockShell = commands.find( + const workspaceMutationShell = commands.find( (command) => command[0] === "sh" && command[1] === "-c" && typeof command[2] === "string" && - command[2].includes('workspace-*'), + command[2].includes('workspace-*') && + command[2].includes('chown -R "$owner"'), ); - expect(workspaceLockShell).toBeDefined(); - const script = workspaceLockShell?.[2] ?? ""; + expect(workspaceMutationShell).toBeDefined(); + const script = workspaceMutationShell?.[2] ?? ""; expect(script).toContain('if [ -L "$dir" ]; then'); expect(script).toContain('symlinked-root'); }); + it("runs a symlink-preflight script before any mutation", () => { + const commands = runLockAgentConfigProbe(); + const preflightShell = commands.find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes('workspace-*') && + !command[2].includes("chown") && + !command[2].includes("chmod"), + ); + expect(preflightShell).toBeDefined(); + const script = preflightShell?.[2] ?? ""; + expect(script).toContain('[ -L "$path" ] && printf'); + expect(script).toContain('[ -L "$dir" ] && printf'); + }); + // A symlinked state-dir root must abort shields-up; otherwise the lock // would report success while the dir still points at a writable host // path. @@ -177,6 +197,90 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses expect(result.stderr).toContain("/sandbox/.openclaw/extensions"); }); + // Atomicity: when the preflight detects a symlinked state-dir root, + // no chown/chmod must run on any of the other (non-symlinked) state + // dirs in the same lock attempt. Without this guarantee, earlier + // entries in HIGH_RISK_STATE_DIRS could be silently re-owned to + // root:sandbox before the later symlink is reached and the lock + // bails out, leaving a half-locked tree behind. + it("does not mutate any state dir when the preflight reports a symlinked root", () => { + const probe = spawnSync( + process.execPath, + [ + "-e", + String.raw` +const Module = require("node:module"); +const originalLoad = Module._load; +const calls = []; +Module._load = function patchedLoad(request, parent, isMain) { + if (request === "../adapters/docker/exec") { + return { + dockerExecFileSync(args) { + const separator = args.indexOf("--"); + const command = separator >= 0 ? args.slice(separator + 1) : args; + calls.push(command); + if ( + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + !command[2].includes("symlinked-root") + ) { + // Preflight script: report /sandbox/.openclaw/extensions as + // symlinked so the caller refuses to mutate. + return "/sandbox/.openclaw/extensions\n"; + } + return ""; + }, + }; + } + return originalLoad.call(this, request, parent, isMain); +}; +try { + const { lockAgentConfig } = require("./dist/lib/shields/index.js"); + lockAgentConfig("sandbox-pod", { + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], + }); + process.stdout.write("UNEXPECTED_SUCCESS\n"); + process.stdout.write(JSON.stringify(calls)); + process.exit(0); +} catch (err) { + process.stdout.write(JSON.stringify(calls)); + process.stderr.write(err && err.message ? err.message : String(err)); + process.exit(2); +} +`, + ], + { encoding: "utf-8", timeout: 5000 }, + ); + expect(probe.status).toBe(2); + expect(probe.stderr).toContain("state dir root is a symlink"); + + const calls = JSON.parse(probe.stdout) as string[][]; + // Mutation pass scripts contain "symlinked-root\\t" in their script body. + // If any such call was recorded, atomicity is broken. + const mutationCalls = calls.filter( + (command) => + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes("symlinked-root"), + ); + expect(mutationCalls).toEqual([]); + // Restore-writable-subpaths must also not run (it would mkdir + // agents//sessions inside what could be a half-locked tree). + const restoreCalls = calls.filter( + (command) => + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes("agents/*/sessions"), + ); + expect(restoreCalls).toEqual([]); + }); + it("keeps the top-level config dir owned by root:root (lock contract unchanged)", () => { const commands = runLockAgentConfigProbe(); expect(commands).toContainEqual([ From c55ed517e5f5a5e2358b0e08492cdb5bed6042e8 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Mon, 25 May 2026 04:32:18 +0000 Subject: [PATCH 09/13] revert: drop pre-backup audit-find tolerance from this PR Signed-off-by: Tinson Lai --- src/lib/state/sandbox.ts | 12 +--- test/snapshot.test.ts | 128 --------------------------------------- 2 files changed, 2 insertions(+), 138 deletions(-) diff --git a/src/lib/state/sandbox.ts b/src/lib/state/sandbox.ts index 518d43e307..3d0188b47f 100644 --- a/src/lib/state/sandbox.ts +++ b/src/lib/state/sandbox.ts @@ -1100,20 +1100,12 @@ export function backupSandboxState(sandboxName: string, options: BackupOptions = // empty for non-symlinks but always present, so the field count is // stable. Tab separator assumes state-dir paths don't contain tabs, // matching the wider convention in this file. - // Per-dir `find` invocations are joined with `;` (not `&&`) and each - // is tolerant of its own exit code via `|| true`. The base image bakes - // a few state subdirs as root-owned (e.g. `extensions/`, - // `agents/`) and `find` walking those from the sandbox-user SSH - // session exits 1 on permission denied. The audit's real signal is - // stdout (the printf-emitted symlink/hardlink/special-file rows); - // letting one perm-denied subdir abort the whole chain blocks legitimate - // rebuilds. const auditCmd = existingDirs .map( (d) => - `{ find ${shellQuote(`${dir}/${d}`)} \\( -type l -o \\( -type f -a -links +1 \\) -o \\( ! -type f -a ! -type d \\) \\) -printf "%y\\t%p\\t%l\\n" 2>/dev/null || true; }`, + `find ${shellQuote(`${dir}/${d}`)} \\( -type l -o \\( -type f -a -links +1 \\) -o \\( ! -type f -a ! -type d \\) \\) -printf "%y\\t%p\\t%l\\n" 2>/dev/null`, ) - .join("; "); + .join(" && "); _log(`Pre-backup audit: checking for symlinks, hard links, and special files`); const auditResult = spawnSync("ssh", [...sshArgs(configFile, sandboxName), auditCmd], { encoding: "utf-8", diff --git a/test/snapshot.test.ts b/test/snapshot.test.ts index 0d58de7a59..4c1f2c7fd1 100644 --- a/test/snapshot.test.ts +++ b/test/snapshot.test.ts @@ -1044,134 +1044,6 @@ process.exit(0); fs.rmSync(fixture, { recursive: true, force: true }); } }); - - it("treats audit-find exit 1 with empty stdout as a successful audit", () => { - const fixture = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-audit-perm-denied-")); - const oldPath = process.env.PATH; - const oldOpenshell = process.env.NEMOCLAW_OPENSHELL_BIN; - try { - const binDir = path.join(fixture, "bin"); - const openclawDir = path.join(fixture, "sandbox-root", ".openclaw"); - const existingDirs = ["agents", "extensions", "workspace"]; - fs.mkdirSync(binDir, { recursive: true }); - for (const d of existingDirs) fs.mkdirSync(path.join(openclawDir, d), { recursive: true }); - - const openshell = writeFakeOpenshell(binDir); - writeExecutable( - path.join(binDir, "ssh"), - `#!/usr/bin/env node -const { spawnSync } = require("node:child_process"); -const fs = require("node:fs"); -const cmd = process.argv[process.argv.length - 1] || ""; -const existingDirs = ${JSON.stringify(existingDirs)}; -if (cmd.includes("[ -d ")) { - process.stdout.write(existingDirs.join("\\n") + "\\n"); - process.exit(0); -} -if (cmd.includes("find ")) { - // Simulate a permission-denied subdir: when the audit cmd lacks the - // \`|| true\` tolerance wrapper (pre-fix shape), exit non-zero so the - // caller treats it as audit failure. The post-fix shape wraps each - // \`find\` with \`|| true\` and joins with \`;\`, so the audit cmd as a - // whole exits 0 even though a remote \`find\` would have exited 1. - if (!cmd.includes("|| true")) { - process.stderr.write("find: '/sandbox/.openclaw/extensions/nemoclaw': Permission denied\\n"); - process.exit(1); - } - process.exit(0); -} -if (cmd.includes("tar -cf -")) { - const r = spawnSync("tar", ["-cf", "-", "-C", ${JSON.stringify(openclawDir)}, ...existingDirs], { - stdio: ["ignore", "pipe", "pipe"], - }); - if (r.stdout) fs.writeSync(1, r.stdout); - process.exit(r.status || 0); -} -process.exit(0); -`, - ); - - writeOpenClawRegistry("alpha"); - process.env.NEMOCLAW_OPENSHELL_BIN = openshell; - process.env.PATH = `${binDir}:${oldPath || ""}`; - - const backup = sandboxState.backupSandboxState("alpha"); - expect(backup.success).toBe(true); - expect(backup.error).toBeUndefined(); - expect(backup.backedUpDirs).toEqual(existingDirs); - } finally { - if (oldOpenshell === undefined) { - delete process.env.NEMOCLAW_OPENSHELL_BIN; - } else { - process.env.NEMOCLAW_OPENSHELL_BIN = oldOpenshell; - } - process.env.PATH = oldPath; - fs.rmSync(fixture, { recursive: true, force: true }); - } - }); - - it("still rejects violations from readable dirs even if a sibling find exits non-zero", () => { - const fixture = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-audit-mixed-perm-")); - const oldPath = process.env.PATH; - const oldOpenshell = process.env.NEMOCLAW_OPENSHELL_BIN; - try { - const binDir = path.join(fixture, "bin"); - const openclawDir = path.join(fixture, "sandbox-root", ".openclaw"); - const existingDirs = ["agents", "workspace"]; - fs.mkdirSync(binDir, { recursive: true }); - for (const d of existingDirs) fs.mkdirSync(path.join(openclawDir, d), { recursive: true }); - - // `agents` simulates perm-denied (no rows emitted); `workspace` emits - // a symlink that is not in the audit allow-list, which must still be - // caught even when a sibling find exits non-zero. - const auditLines = [ - "l\t/sandbox/.openclaw/workspace/leak\t../openclaw.json", - ].join("\n"); - - const openshell = writeFakeOpenshell(binDir); - writeExecutable( - path.join(binDir, "ssh"), - `#!/usr/bin/env node -const cmd = process.argv[process.argv.length - 1] || ""; -const existingDirs = ${JSON.stringify(existingDirs)}; -if (cmd.includes("[ -d ")) { - process.stdout.write(existingDirs.join("\\n") + "\\n"); - process.exit(0); -} -if (cmd.includes("find ")) { - // Match real-shell behaviour: without the \`|| true\` tolerance wrapper - // the perm-denied sibling \`find\` would have aborted the chain. The - // post-fix audit cmd still emits the violation stdout because \`;\` - // joins each per-dir block so the readable sibling's output is - // preserved. - if (!cmd.includes("|| true")) { - process.stderr.write("find: '/sandbox/.openclaw/agents/main': Permission denied\\n"); - process.exit(1); - } - process.stdout.write(${JSON.stringify(auditLines)} + "\\n"); - process.exit(0); -} -process.exit(0); -`, - ); - - writeOpenClawRegistry("alpha"); - process.env.NEMOCLAW_OPENSHELL_BIN = openshell; - process.env.PATH = `${binDir}:${oldPath || ""}`; - - const backup = sandboxState.backupSandboxState("alpha"); - expect(backup.success).toBe(false); - expect(backup.error).toMatch(/workspace\/leak/); - } finally { - if (oldOpenshell === undefined) { - delete process.env.NEMOCLAW_OPENSHELL_BIN; - } else { - process.env.NEMOCLAW_OPENSHELL_BIN = oldOpenshell; - } - process.env.PATH = oldPath; - fs.rmSync(fixture, { recursive: true, force: true }); - } - }); }); describe("Hermes durable state files", () => { From 8085cd17a3fb98ea33fec3a8271dc3fa4b80e58d Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 31 May 2026 07:58:31 +0000 Subject: [PATCH 10/13] fix(shields): verify state-dir lock, expand high-risk set, mock privileged-exec in tests Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 2 +- src/lib/shields/index.ts | 172 ++++++++++++++++++++++---- test/shields-up-runtime-perms.test.ts | 41 +++++- 3 files changed, 188 insertions(+), 27 deletions(-) diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index b982b18414..1fc8873428 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -201,7 +201,7 @@ Writable agent state such as plugins, skills, hooks, and workspace metadata live By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and high-risk state entry points cannot be changed by the sandbox user. -Under lockdown, the high-risk state directories (`skills`, `hooks`, `cron`, `agents`, `extensions`, `plugins`, `workspace`, `memory`, `credentials`, `identity`, `devices`, `canvas`, `telegram`) are owned by `root:sandbox` rather than `root:root`, so the OpenClaw gateway (a member of the `sandbox` group) keeps read access to plugin and agent code while the sandbox user is denied write through `chmod -R go-w`. +Under lockdown, the high-risk state directories (`skills`, `hooks`, `cron`, `agents`, `extensions`, `plugins`, `workspace`, `memory`, `credentials`, `identity`, `devices`, `canvas`, `telegram`, `wechat`, `whatsapp`, `platforms`, `weixin`, `pairing`, `profiles`, `skins`) are owned by `root:sandbox` rather than `root:root`, so the OpenClaw gateway (a member of the `sandbox` group) keeps read access to plugin and agent code while the sandbox user is denied write through `chmod -R go-w`. The list is the union of state directories declared by every shipped agent manifest; dirs that aren't present in a given agent's config tree are silently skipped. Runtime-mutable subtrees (`sessions/`, `memories/`, `logs/`, `cache/`, `plans/`, and `openclaw-weixin/` which is regenerated at image-build time) are intentionally exempt so the agent can keep operating. A narrow set of runtime-data subpaths is exempted from the lock so the agent can keep operating — currently `agents//sessions/`, which the OpenClaw TUI creates and writes session metadata into; the lock helper restores those subpaths to `sandbox:sandbox 2770` after the surrounding tree is locked. If any high-risk state-dir root is a symlink when shields-up runs, the lock refuses to proceed and reports "Config not locked: state dir root is a symlink" rather than silently following the link with privileged `chown -R` / `chmod -R`. diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 0ebac3f539..2328bf7dbe 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -350,6 +350,18 @@ function isShieldsState(value: unknown): value is ShieldsState { // // The list is a superset: directories that don't exist in a given agent's // config dir are silently skipped. +// +// Coverage tracks the union of state_dirs declared by every shipped agent +// manifest (agents/openclaw/manifest.yaml, agents/hermes/manifest.yaml). +// Runtime-mutable subtrees that must keep being writable while shields are +// up are intentionally omitted: +// - `sessions` (Hermes top-level) and `agents/*/sessions` (OpenClaw) — the +// latter is restored via WRITABLE_RUNTIME_SUBPATHS after the lock loop. +// - `memories`, `logs`, `cache`, `plans` (Hermes) — runtime mutables. +// - `openclaw-weixin` is regenerated from envs at image-build time +// (see src/lib/actions/sandbox/rebuild.ts) and is not a manifest state_dir. +// Any new agent state_dir that holds executable code or credentials must be +// added here in lockstep with its manifest entry. // --------------------------------------------------------------------------- const HIGH_RISK_STATE_DIRS = [ @@ -366,6 +378,13 @@ const HIGH_RISK_STATE_DIRS = [ "devices", "canvas", "telegram", + "wechat", // OpenClaw runtime channel state + "whatsapp", // OpenClaw + Hermes channel state (Hermes also nests under platforms/) + "platforms", // Hermes channel-bridge auth state (whatsapp/, etc.) + "weixin", // Hermes iLink WeChat per-account context tokens + "pairing", // Hermes device-pairing state + "profiles", // Hermes saved profiles + "skins", // Hermes UI/personality bundles (code-like) ]; // Runtime-data subpaths the agent must keep writing to under shields-up. @@ -410,10 +429,10 @@ function applyStateDirLockMode( ); } - // Mutation pass — still skips symlinked roots inline as a defence-in-depth + // Mutation pass — still skips symlinked roots inline as a defense-in-depth // measure in case the preflight and mutation observe different fs state // (the preflight pass already aborted shields-up in the common case). - runStateDirLockScript(sandboxName, { + const mainPassResult = runStateDirLockScript(sandboxName, { script: ` set -u config_dir="$1" @@ -429,10 +448,18 @@ for dir in "$@"; do continue fi [ -d "$path" ] || continue - chown -R "$owner" "$path" 2>/dev/null || true - chmod "$dir_mode" "$path" 2>/dev/null || true - [ "$clear_setgid" = "1" ] && chmod g-s "$path" 2>/dev/null || true - chmod -R "$recursive_mode" "$path" 2>/dev/null || true + if ! chown -R "$owner" "$path" 2>/dev/null; then + printf 'mutation-failed\\tchown\\t%s\\n' "$path" + fi + if ! chmod "$dir_mode" "$path" 2>/dev/null; then + printf 'mutation-failed\\tchmod-dir\\t%s\\n' "$path" + fi + if [ "$clear_setgid" = "1" ]; then + chmod g-s "$path" 2>/dev/null || true + fi + if ! chmod -R "$recursive_mode" "$path" 2>/dev/null; then + printf 'mutation-failed\\tchmod-recursive\\t%s\\n' "$path" + fi done `, args: [ @@ -447,7 +474,7 @@ done // Multi-agent OpenClaw workspaces are named workspace-. They are // discovered dynamically because they are configured by openclaw.json. - runStateDirLockScript(sandboxName, { + const workspacePassResult = runStateDirLockScript(sandboxName, { script: ` set -u config_dir="$1" @@ -461,10 +488,18 @@ for dir in "$config_dir"/workspace-*; do continue fi [ -d "$dir" ] || continue - chown -R "$owner" "$dir" 2>/dev/null || true - chmod "$dir_mode" "$dir" 2>/dev/null || true - [ "$clear_setgid" = "1" ] && chmod g-s "$dir" 2>/dev/null || true - chmod -R "$recursive_mode" "$dir" 2>/dev/null || true + if ! chown -R "$owner" "$dir" 2>/dev/null; then + printf 'mutation-failed\\tchown\\t%s\\n' "$dir" + fi + if ! chmod "$dir_mode" "$dir" 2>/dev/null; then + printf 'mutation-failed\\tchmod-dir\\t%s\\n' "$dir" + fi + if [ "$clear_setgid" = "1" ]; then + chmod g-s "$dir" 2>/dev/null || true + fi + if ! chmod -R "$recursive_mode" "$dir" 2>/dev/null; then + printf 'mutation-failed\\tchmod-recursive\\t%s\\n' "$dir" + fi done `, args: [configDir, owner, recursiveMode, dirMode, clearSetgid], @@ -473,7 +508,89 @@ done if (isLocking) { restoreWritableRuntimeSubpaths(sandboxName, configDir); } - return []; + + const issues: string[] = []; + for (const path of [...mainPassResult.symlinkedRoots, ...workspacePassResult.symlinkedRoots]) { + issues.push(`state dir root is a symlink: ${path} (refusing to lock)`); + } + for (const failure of [...mainPassResult.mutationFailures, ...workspacePassResult.mutationFailures]) { + issues.push(`state dir mutation failed (${failure.op}): ${failure.path}`); + } + if (isLocking) { + const verifyIssues = verifyStateDirsLocked(sandboxName, configDir, owner, dirMode); + issues.push(...verifyIssues); + } + return issues; +} + +// Post-lock verification: stat every existing high-risk state-dir root (and +// every workspace-* root) and confirm the owner + dir mode match what the +// lock pass should have established. The script-level chown/chmod fallbacks +// suppress per-call errors so transient kernel/filesystem hiccups don't +// abort the whole fan-out; the verification pass below is what catches a +// silent mutation failure and surfaces it as a lock issue so `shields up` +// does not report success while extensions/agents/etc. remain unreadable. +function verifyStateDirsLocked( + sandboxName: string, + configDir: string, + expectedOwner: string, + expectedDirMode: string, +): string[] { + let stdout = ""; + try { + stdout = privilegedSandboxExecCapture(sandboxName, [ + "sh", + "-c", + ` +set -u +config_dir="$1" +expected_owner="$2" +expected_mode="$3" +shift 3 +check() { + path="$1" + [ -L "$path" ] && { printf 'verify-symlink\\t%s\\n' "$path"; return; } + [ -d "$path" ] || return + perms="$(stat -c '%a %U:%G' "$path" 2>/dev/null)" || { + printf 'verify-stat-failed\\t%s\\n' "$path" + return + } + mode="\${perms%% *}" + owner="\${perms#* }" + [ "$mode" = "$expected_mode" ] || printf 'verify-mode\\t%s\\t%s\\t%s\\n' "$path" "$mode" "$expected_mode" + [ "$owner" = "$expected_owner" ] || printf 'verify-owner\\t%s\\t%s\\t%s\\n' "$path" "$owner" "$expected_owner" +} +for dir in "$@"; do + check "$config_dir/$dir" +done +for dir in "$config_dir"/workspace-*; do + case "$dir" in *"*"*) continue ;; esac + check "$dir" +done +`, + "sh", + configDir, + expectedOwner, + expectedDirMode, + ...HIGH_RISK_STATE_DIRS, + ]); + } catch { + return []; + } + const issues: string[] = []; + for (const line of stdout.split("\n")) { + const parts = line.split("\t"); + if (parts[0] === "verify-symlink" && parts[1]) { + issues.push(`state dir became a symlink mid-lock: ${parts[1]}`); + } else if (parts[0] === "verify-stat-failed" && parts[1]) { + issues.push(`state dir stat failed after lock: ${parts[1]}`); + } else if (parts[0] === "verify-mode" && parts[1]) { + issues.push(`state dir mode=${parts[2]} (expected ${parts[3]}): ${parts[1]}`); + } else if (parts[0] === "verify-owner" && parts[1]) { + issues.push(`state dir owner=${parts[2]} (expected ${parts[3]}): ${parts[1]}`); + } + } + return issues; } // Probe every high-risk state-dir root and every workspace-* dir for @@ -518,15 +635,21 @@ interface StateDirLockScript { args: string[]; } +interface StateDirLockScriptResult { + symlinkedRoots: string[]; + mutationFailures: Array<{ op: string; path: string }>; +} + // Run a privileged shell script that mutates state-dir permissions and // reports any symlinked roots it refused to touch via tab-prefixed stdout -// lines ("symlinked-root\t"). Returns the parsed list of skipped -// paths; on any other failure, returns an empty list so the caller can -// continue with the rest of the lock fan-out. +// lines ("symlinked-root\t"), plus per-op mutation failures +// ("mutation-failed\t\t"). Returns the parsed result; on any +// other failure, returns an empty result so the caller can continue with +// the rest of the lock fan-out. function runStateDirLockScript( sandboxName: string, script: StateDirLockScript, -): string[] { +): StateDirLockScriptResult { let stdout = ""; try { stdout = privilegedSandboxExecCapture(sandboxName, [ @@ -537,14 +660,21 @@ function runStateDirLockScript( ...script.args, ]); } catch { - return []; + return { symlinkedRoots: [], mutationFailures: [] }; } - const skipped: string[] = []; + const result: StateDirLockScriptResult = { + symlinkedRoots: [], + mutationFailures: [], + }; for (const line of stdout.split("\n")) { - const [tag, path] = line.split("\t"); - if (tag === "symlinked-root" && path) skipped.push(path); + const parts = line.split("\t"); + if (parts[0] === "symlinked-root" && parts[1]) { + result.symlinkedRoots.push(parts[1]); + } else if (parts[0] === "mutation-failed" && parts[1] && parts[2]) { + result.mutationFailures.push({ op: parts[1], path: parts[2] }); + } } - return skipped; + return result; } function restoreWritableRuntimeSubpaths( diff --git a/test/shields-up-runtime-perms.test.ts b/test/shields-up-runtime-perms.test.ts index bddbdd4f5c..1841bed824 100644 --- a/test/shields-up-runtime-perms.test.ts +++ b/test/shields-up-runtime-perms.test.ts @@ -31,10 +31,24 @@ Module._load = function patchedLoad(request, parent, isMain) { if (command[0] === "lsattr") { return "----i----------------- " + command.at(-1) + "\n"; } + if (command[0] === "sha256sum") { + return ( + "0000000000000000000000000000000000000000000000000000000000000001 " + + command.at(-1) + + "\n" + ); + } return ""; }, }; } + if (request === "../sandbox/privileged-exec") { + return { + privilegedSandboxExecArgv(_sandboxName, cmd) { + return [...cmd]; + }, + }; + } return originalLoad.call(this, request, parent, isMain); }; const { lockAgentConfig } = require("./dist/lib/shields/index.js"); @@ -76,8 +90,6 @@ Module._load = function patchedLoad(request, parent, isMain) { typeof command[2] === "string" && !command[2].includes("symlinked-root") ) { - // Preflight script: report a symlinked root so the caller - // refuses to lock without mutating any state dir. return symlinkedPath + "\n"; } if (command[0] === "stat" && command[1] === "-c") { @@ -88,10 +100,24 @@ Module._load = function patchedLoad(request, parent, isMain) { if (command[0] === "lsattr") { return "----i----------------- " + command.at(-1) + "\n"; } + if (command[0] === "sha256sum") { + return ( + "0000000000000000000000000000000000000000000000000000000000000001 " + + command.at(-1) + + "\n" + ); + } return ""; }, }; } + if (request === "../sandbox/privileged-exec") { + return { + privilegedSandboxExecArgv(_sandboxName, cmd) { + return [...cmd]; + }, + }; + } return originalLoad.call(this, request, parent, isMain); }; try { @@ -225,14 +251,19 @@ Module._load = function patchedLoad(request, parent, isMain) { typeof command[2] === "string" && !command[2].includes("symlinked-root") ) { - // Preflight script: report /sandbox/.openclaw/extensions as - // symlinked so the caller refuses to mutate. return "/sandbox/.openclaw/extensions\n"; } return ""; }, }; } + if (request === "../sandbox/privileged-exec") { + return { + privilegedSandboxExecArgv(_sandboxName, cmd) { + return [...cmd]; + }, + }; + } return originalLoad.call(this, request, parent, isMain); }; try { @@ -387,7 +418,7 @@ try { fs.rmSync(fixture, { recursive: true, force: true }); }); - // Defence in depth: if a malicious agent points `agents/` at /etc or + // Defense in depth: if a malicious agent points `agents/` at /etc or // any other host path before shields-up runs, the privileged restore // helper must not mkdir/chown/chmod through that symlink. The script // must drop symlinked parents (and symlinked targets) before touching From 9a6a63500c0b5698448c6cd9bb0f3dc5389ace1f Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 31 May 2026 08:29:44 +0000 Subject: [PATCH 11/13] refactor(shields): extract state-dir lock, fail-closed preflight, strip group read on credentials Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 2 +- src/lib/shields/index.ts | 413 ++----------------------- src/lib/shields/state-dir-lock.ts | 490 ++++++++++++++++++++++++++++++ 3 files changed, 511 insertions(+), 394 deletions(-) create mode 100644 src/lib/shields/state-dir-lock.ts diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 1fc8873428..eda3f77db8 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -201,7 +201,7 @@ Writable agent state such as plugins, skills, hooks, and workspace metadata live By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and high-risk state entry points cannot be changed by the sandbox user. -Under lockdown, the high-risk state directories (`skills`, `hooks`, `cron`, `agents`, `extensions`, `plugins`, `workspace`, `memory`, `credentials`, `identity`, `devices`, `canvas`, `telegram`, `wechat`, `whatsapp`, `platforms`, `weixin`, `pairing`, `profiles`, `skins`) are owned by `root:sandbox` rather than `root:root`, so the OpenClaw gateway (a member of the `sandbox` group) keeps read access to plugin and agent code while the sandbox user is denied write through `chmod -R go-w`. The list is the union of state directories declared by every shipped agent manifest; dirs that aren't present in a given agent's config tree are silently skipped. Runtime-mutable subtrees (`sessions/`, `memories/`, `logs/`, `cache/`, `plans/`, and `openclaw-weixin/` which is regenerated at image-build time) are intentionally exempt so the agent can keep operating. +Under lockdown, the high-risk state directories (`skills`, `hooks`, `cron`, `agents`, `extensions`, `plugins`, `workspace`, `memory`, `devices`, `canvas`, `telegram`, `wechat`, `whatsapp`, `platforms`, `weixin`, `profiles`, `skins`) are owned by `root:sandbox` rather than `root:root`, so the OpenClaw gateway (a member of the `sandbox` group) keeps read access to plugin and agent code while the sandbox user is denied write through `chmod -R go-w`. Secret-bearing directories (`credentials`, `identity`, `pairing`) get a stricter posture: `root:root 700` with `chmod -R go-rwX`, so neither the sandbox user nor the gateway can read them while shields are up. The mutable-default posture (`sandbox:sandbox 2770`) is restored for both groups on shields-down. The list is the union of state directories declared by every shipped agent manifest; dirs that aren't present in a given agent's config tree are silently skipped. Runtime-mutable subtrees (`sessions/`, `memories/`, `logs/`, `cache/`, `plans/`, and `openclaw-weixin/` which is regenerated at image-build time) are intentionally exempt so the agent can keep operating. A narrow set of runtime-data subpaths is exempted from the lock so the agent can keep operating — currently `agents//sessions/`, which the OpenClaw TUI creates and writes session metadata into; the lock helper restores those subpaths to `sandbox:sandbox 2770` after the surrounding tree is locked. If any high-risk state-dir root is a symlink when shields-up runs, the lock refuses to proceed and reports "Config not locked: state dir root is a symlink" rather than silently following the link with privileged `chown -R` / `chmod -R`. diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 2328bf7dbe..90c66cd078 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -53,6 +53,9 @@ const { isHashVerificationIssue, isSha256Hex, }: typeof import("./seal") = require("./seal"); +const { + applyStateDirLockMode, +}: typeof import("./state-dir-lock") = require("./state-dir-lock"); const STATE_DIR = resolveNemoclawStateDir(); @@ -331,396 +334,18 @@ function isShieldsState(value: unknown): value is ShieldsState { } // --------------------------------------------------------------------------- -// State directories locked by shields-up. -// -// During shields-up, these must be locked so the sandbox user cannot create -// new entries or modify existing ones. This covers both executable state -// (skills, hooks, cron jobs, extensions, plugins, agent definitions) and -// writable agent state entry points such as workspace and memory, so a stale -// symlink bridge cannot bypass the lockdown. -// -// Lock ownership is `root:sandbox` (not `root:root`): the OpenClaw gateway -// runs as `gateway`, which is a member of the sandbox group via -// `Dockerfile.base`. Owning these dirs as `root:sandbox` preserves the -// sandbox group's `r-x` access to descendant files (after `chmod -R go-w`), -// so plugin discovery can still scan `extensions//` while the -// sandbox user is denied write through the stripped group/other write -// bits. The top-level config dir is still owned by `root:root` -// (lockAgentConfig) — only the high-risk state subtrees switch. -// -// The list is a superset: directories that don't exist in a given agent's -// config dir are silently skipped. -// -// Coverage tracks the union of state_dirs declared by every shipped agent -// manifest (agents/openclaw/manifest.yaml, agents/hermes/manifest.yaml). -// Runtime-mutable subtrees that must keep being writable while shields are -// up are intentionally omitted: -// - `sessions` (Hermes top-level) and `agents/*/sessions` (OpenClaw) — the -// latter is restored via WRITABLE_RUNTIME_SUBPATHS after the lock loop. -// - `memories`, `logs`, `cache`, `plans` (Hermes) — runtime mutables. -// - `openclaw-weixin` is regenerated from envs at image-build time -// (see src/lib/actions/sandbox/rebuild.ts) and is not a manifest state_dir. -// Any new agent state_dir that holds executable code or credentials must be -// added here in lockstep with its manifest entry. +// State-dir lock — adapter between this module's privileged-exec helpers and +// the lock pipeline in ./state-dir-lock. The inventory of locked dirs, the +// preflight/mutation/verification logic, and the `agents/*/sessions` +// carve-out live in that sibling module so this file stays focused on +// shields state transitions. // --------------------------------------------------------------------------- -const HIGH_RISK_STATE_DIRS = [ - "skills", - "hooks", - "cron", - "agents", - "extensions", - "plugins", // Hermes equivalent of extensions - "workspace", - "memory", - "credentials", - "identity", - "devices", - "canvas", - "telegram", - "wechat", // OpenClaw runtime channel state - "whatsapp", // OpenClaw + Hermes channel state (Hermes also nests under platforms/) - "platforms", // Hermes channel-bridge auth state (whatsapp/, etc.) - "weixin", // Hermes iLink WeChat per-account context tokens - "pairing", // Hermes device-pairing state - "profiles", // Hermes saved profiles - "skins", // Hermes UI/personality bundles (code-like) -]; - -// Runtime-data subpaths the agent must keep writing to under shields-up. -// Each entry is a shell glob relative to the agent config dir; after the -// main lock loop the matching directories are restored to -// `sandbox:sandbox 2770` so they remain writable inside an otherwise-locked -// tree. -const WRITABLE_RUNTIME_SUBPATHS = ["agents/*/sessions"]; - -function applyStateDirLockMode( - sandboxName: string, - configDir: string, - owner: string, - isLocking: boolean, -): string[] { - // Locking (shields-up) strips group + world write. Unlocking (shields-down) - // restores the same group-readable/writable + o-rwx mutable-default contract - // as startup, plus setgid so the gateway UID — now in the sandbox group via - // Dockerfile.base — can write to OpenClaw's mutable config tree (#2681). - // - // The unlock variant uses `g+rwX,o-rwx` because a prior lock can strip group - // access from descendants. Without re-adding group read/write explicitly, - // shields-down would leave nested files readable/writable only by owner. - const recursiveMode = isLocking ? "go-w" : "g+rwX,o-rwx"; - const dirMode = isLocking ? "755" : "2770"; - const clearSetgid = isLocking ? "1" : "0"; - - // Preflight: before any chown/chmod runs, scan every high-risk state-dir - // root and every workspace-* dir for symlinked roots. A pre-lockdown - // agent that swapped e.g. `extensions/` for a symlink to /etc could - // otherwise redirect the privileged `chown -R`/`chmod -R` at an - // attacker-controlled host path. Doing the symlink check inline in the - // mutation script would still leave any earlier (non-symlinked) dirs - // already mutated by the time the script reaches the bad entry, so - // shields-up could half-lock the tree and then fail. The preflight - // returns all symlinked paths up front; we abort lock without touching - // anything when any are present. - const symlinkedRoots = preflightSymlinkedRoots(sandboxName, configDir); - if (isLocking && symlinkedRoots.length > 0) { - return symlinkedRoots.map( - (path) => `state dir root is a symlink: ${path} (refusing to lock)`, - ); - } - - // Mutation pass — still skips symlinked roots inline as a defense-in-depth - // measure in case the preflight and mutation observe different fs state - // (the preflight pass already aborted shields-up in the common case). - const mainPassResult = runStateDirLockScript(sandboxName, { - script: ` -set -u -config_dir="$1" -owner="$2" -recursive_mode="$3" -dir_mode="$4" -clear_setgid="$5" -shift 5 -for dir in "$@"; do - path="$config_dir/$dir" - if [ -L "$path" ]; then - printf 'symlinked-root\\t%s\\n' "$path" - continue - fi - [ -d "$path" ] || continue - if ! chown -R "$owner" "$path" 2>/dev/null; then - printf 'mutation-failed\\tchown\\t%s\\n' "$path" - fi - if ! chmod "$dir_mode" "$path" 2>/dev/null; then - printf 'mutation-failed\\tchmod-dir\\t%s\\n' "$path" - fi - if [ "$clear_setgid" = "1" ]; then - chmod g-s "$path" 2>/dev/null || true - fi - if ! chmod -R "$recursive_mode" "$path" 2>/dev/null; then - printf 'mutation-failed\\tchmod-recursive\\t%s\\n' "$path" - fi -done -`, - args: [ - configDir, - owner, - recursiveMode, - dirMode, - clearSetgid, - ...HIGH_RISK_STATE_DIRS, - ], - }); - - // Multi-agent OpenClaw workspaces are named workspace-. They are - // discovered dynamically because they are configured by openclaw.json. - const workspacePassResult = runStateDirLockScript(sandboxName, { - script: ` -set -u -config_dir="$1" -owner="$2" -recursive_mode="$3" -dir_mode="$4" -clear_setgid="$5" -for dir in "$config_dir"/workspace-*; do - if [ -L "$dir" ]; then - printf 'symlinked-root\\t%s\\n' "$dir" - continue - fi - [ -d "$dir" ] || continue - if ! chown -R "$owner" "$dir" 2>/dev/null; then - printf 'mutation-failed\\tchown\\t%s\\n' "$dir" - fi - if ! chmod "$dir_mode" "$dir" 2>/dev/null; then - printf 'mutation-failed\\tchmod-dir\\t%s\\n' "$dir" - fi - if [ "$clear_setgid" = "1" ]; then - chmod g-s "$dir" 2>/dev/null || true - fi - if ! chmod -R "$recursive_mode" "$dir" 2>/dev/null; then - printf 'mutation-failed\\tchmod-recursive\\t%s\\n' "$dir" - fi -done -`, - args: [configDir, owner, recursiveMode, dirMode, clearSetgid], - }); - - if (isLocking) { - restoreWritableRuntimeSubpaths(sandboxName, configDir); - } - - const issues: string[] = []; - for (const path of [...mainPassResult.symlinkedRoots, ...workspacePassResult.symlinkedRoots]) { - issues.push(`state dir root is a symlink: ${path} (refusing to lock)`); - } - for (const failure of [...mainPassResult.mutationFailures, ...workspacePassResult.mutationFailures]) { - issues.push(`state dir mutation failed (${failure.op}): ${failure.path}`); - } - if (isLocking) { - const verifyIssues = verifyStateDirsLocked(sandboxName, configDir, owner, dirMode); - issues.push(...verifyIssues); - } - return issues; -} - -// Post-lock verification: stat every existing high-risk state-dir root (and -// every workspace-* root) and confirm the owner + dir mode match what the -// lock pass should have established. The script-level chown/chmod fallbacks -// suppress per-call errors so transient kernel/filesystem hiccups don't -// abort the whole fan-out; the verification pass below is what catches a -// silent mutation failure and surfaces it as a lock issue so `shields up` -// does not report success while extensions/agents/etc. remain unreadable. -function verifyStateDirsLocked( - sandboxName: string, - configDir: string, - expectedOwner: string, - expectedDirMode: string, -): string[] { - let stdout = ""; - try { - stdout = privilegedSandboxExecCapture(sandboxName, [ - "sh", - "-c", - ` -set -u -config_dir="$1" -expected_owner="$2" -expected_mode="$3" -shift 3 -check() { - path="$1" - [ -L "$path" ] && { printf 'verify-symlink\\t%s\\n' "$path"; return; } - [ -d "$path" ] || return - perms="$(stat -c '%a %U:%G' "$path" 2>/dev/null)" || { - printf 'verify-stat-failed\\t%s\\n' "$path" - return - } - mode="\${perms%% *}" - owner="\${perms#* }" - [ "$mode" = "$expected_mode" ] || printf 'verify-mode\\t%s\\t%s\\t%s\\n' "$path" "$mode" "$expected_mode" - [ "$owner" = "$expected_owner" ] || printf 'verify-owner\\t%s\\t%s\\t%s\\n' "$path" "$owner" "$expected_owner" -} -for dir in "$@"; do - check "$config_dir/$dir" -done -for dir in "$config_dir"/workspace-*; do - case "$dir" in *"*"*) continue ;; esac - check "$dir" -done -`, - "sh", - configDir, - expectedOwner, - expectedDirMode, - ...HIGH_RISK_STATE_DIRS, - ]); - } catch { - return []; - } - const issues: string[] = []; - for (const line of stdout.split("\n")) { - const parts = line.split("\t"); - if (parts[0] === "verify-symlink" && parts[1]) { - issues.push(`state dir became a symlink mid-lock: ${parts[1]}`); - } else if (parts[0] === "verify-stat-failed" && parts[1]) { - issues.push(`state dir stat failed after lock: ${parts[1]}`); - } else if (parts[0] === "verify-mode" && parts[1]) { - issues.push(`state dir mode=${parts[2]} (expected ${parts[3]}): ${parts[1]}`); - } else if (parts[0] === "verify-owner" && parts[1]) { - issues.push(`state dir owner=${parts[2]} (expected ${parts[3]}): ${parts[1]}`); - } - } - return issues; -} - -// Probe every high-risk state-dir root and every workspace-* dir for -// symlinks before any mutation runs. Returns the absolute paths of all -// symlinked roots; an empty list means it is safe to proceed with the -// lock fan-out. Best-effort: a failed shell exec returns an empty list, -// matching the rest of the lock pipeline's "verification below catches -// the primary config lock" stance. -function preflightSymlinkedRoots(sandboxName: string, configDir: string): string[] { - let stdout = ""; - try { - stdout = privilegedSandboxExecCapture(sandboxName, [ - "sh", - "-c", - ` -set -u -config_dir="$1" -shift -for dir in "$@"; do - path="$config_dir/$dir" - [ -L "$path" ] && printf '%s\\n' "$path" -done -for dir in "$config_dir"/workspace-*; do - [ -L "$dir" ] && printf '%s\\n' "$dir" -done -`, - "sh", - configDir, - ...HIGH_RISK_STATE_DIRS, - ]); - } catch { - return []; - } - return stdout - .split("\n") - .map((line) => line.trim()) - .filter((line) => line.length > 0); -} - -interface StateDirLockScript { - script: string; - args: string[]; -} - -interface StateDirLockScriptResult { - symlinkedRoots: string[]; - mutationFailures: Array<{ op: string; path: string }>; -} - -// Run a privileged shell script that mutates state-dir permissions and -// reports any symlinked roots it refused to touch via tab-prefixed stdout -// lines ("symlinked-root\t"), plus per-op mutation failures -// ("mutation-failed\t\t"). Returns the parsed result; on any -// other failure, returns an empty result so the caller can continue with -// the rest of the lock fan-out. -function runStateDirLockScript( - sandboxName: string, - script: StateDirLockScript, -): StateDirLockScriptResult { - let stdout = ""; - try { - stdout = privilegedSandboxExecCapture(sandboxName, [ - "sh", - "-c", - script.script, - "sh", - ...script.args, - ]); - } catch { - return { symlinkedRoots: [], mutationFailures: [] }; - } - const result: StateDirLockScriptResult = { - symlinkedRoots: [], - mutationFailures: [], +function stateDirLockExec(sandboxName: string) { + return { + exec: (cmd: string[]) => privilegedSandboxExec(sandboxName, cmd), + capture: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), }; - for (const line of stdout.split("\n")) { - const parts = line.split("\t"); - if (parts[0] === "symlinked-root" && parts[1]) { - result.symlinkedRoots.push(parts[1]); - } else if (parts[0] === "mutation-failed" && parts[1] && parts[2]) { - result.mutationFailures.push({ op: parts[1], path: parts[2] }); - } - } - return result; -} - -function restoreWritableRuntimeSubpaths( - sandboxName: string, - configDir: string, -): void { - try { - privilegedSandboxExec(sandboxName, [ - "sh", - "-c", - ` -set -u -config_dir="$1" -shift -for pattern in "$@"; do - case "$pattern" in - */*) prefix="\${pattern%/*}"; leaf="\${pattern##*/}" ;; - *) prefix=""; leaf="$pattern" ;; - esac - if [ -n "$prefix" ]; then - set -- "$config_dir"/$prefix - else - set -- "$config_dir" - fi - for parent in "$@"; do - case "$parent" in - *"*"*) continue ;; - esac - [ -L "$parent" ] && continue - [ -d "$parent" ] || continue - target="$parent/$leaf" - [ -L "$target" ] && continue - mkdir -p "$target" 2>/dev/null || continue - chown -R sandbox:sandbox "$target" 2>/dev/null || true - chmod 2770 "$target" 2>/dev/null || true - chmod -R g+rwX,o-rwx "$target" 2>/dev/null || true - done -done -`, - "sh", - configDir, - ...WRITABLE_RUNTIME_SUBPATHS, - ]); - } catch { - // Best effort; sandbox can recreate sessions/ in shields-down if missing. - } } function legacyDataDirFor(configDir: string): string { @@ -826,7 +451,7 @@ function unlockAgentConfig( // Use chown -R to restore the full tree (files within may have been // locked to root:root by a prior shields-up). applyStateDirLockMode( - sandboxName, + stateDirLockExec(sandboxName), target.configDir, "sandbox:sandbox", false, @@ -979,12 +604,14 @@ function lockAgentConfig( } } - // Lock state directories. Owned by `root:sandbox` so the gateway (in - // sandbox group) can still read plugin/agent code while the sandbox user - // is denied write through `chmod -R go-w`. See HIGH_RISK_STATE_DIRS doc - // above. Top-level configDir stays root:root. + // Lock state directories. High-risk dirs use `root:sandbox` ownership so + // the gateway (in the sandbox group) can still read plugin/agent code while + // the sandbox user is denied write through `chmod -R go-w`. Secret-bearing + // dirs (CONFIDENTIALITY_STATE_DIRS in ./state-dir-lock) go to `root:root` + // 700/go-rwX so neither the sandbox user nor the gateway can read them + // while shields are up. Top-level configDir stays root:root. const stateDirLockIssues = applyStateDirLockMode( - sandboxName, + stateDirLockExec(sandboxName), target.configDir, "root:sandbox", true, diff --git a/src/lib/shields/state-dir-lock.ts b/src/lib/shields/state-dir-lock.ts new file mode 100644 index 0000000000..fc66379433 --- /dev/null +++ b/src/lib/shields/state-dir-lock.ts @@ -0,0 +1,490 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// State-dir lock fan-out for shields up/down. Owns the inventory of +// high-risk and secret-bearing state directories, the preflight + mutation +// + verification pipeline, and the runtime-subpath carve-out for +// `agents/*/sessions`. The shields entrypoint (`src/lib/shields/index.ts`) +// stays focused on shields state transitions and delegates the chown/chmod +// fan-out to this module. + +export interface PrivilegedExec { + exec(cmd: string[]): void; + capture(cmd: string[]): string; +} + +// --------------------------------------------------------------------------- +// State directories locked by shields-up. +// +// During shields-up, these must be locked so the sandbox user cannot create +// new entries or modify existing ones. This covers both executable state +// (skills, hooks, cron jobs, extensions, plugins, agent definitions) and +// writable agent state entry points such as workspace and memory, so a stale +// symlink bridge cannot bypass the lockdown. +// +// Lock ownership for HIGH_RISK_STATE_DIRS is `root:sandbox` (not +// `root:root`): the OpenClaw gateway runs as `gateway`, which is a member of +// the sandbox group via `Dockerfile.base`. Owning these dirs as +// `root:sandbox` preserves the sandbox group's `r-x` access to descendant +// files (after `chmod -R go-w`), so plugin discovery can still scan +// `extensions//` while the sandbox user is denied write through the +// stripped group/other write bits. +// +// CONFIDENTIALITY_STATE_DIRS holds secret-bearing trees (auth tokens, host +// device pairing material). These get a stricter posture under shields-up: +// `root:root 700` with `chmod -R go-rwX`, so neither the sandbox user nor +// the gateway can read them while shields are up. The mutable-default +// posture is restored on shields-down. +// +// The list is a superset: directories that don't exist in a given agent's +// config dir are silently skipped. +// +// Coverage tracks the union of state_dirs declared by every shipped agent +// manifest (agents/openclaw/manifest.yaml, agents/hermes/manifest.yaml). +// Runtime-mutable subtrees that must keep being writable while shields are +// up are intentionally omitted: +// - `sessions` (Hermes top-level) and `agents/*/sessions` (OpenClaw) — the +// latter is restored via WRITABLE_RUNTIME_SUBPATHS after the lock loop. +// - `memories`, `logs`, `cache`, `plans` (Hermes) — runtime mutables. +// - `openclaw-weixin` is regenerated from envs at image-build time +// (see src/lib/actions/sandbox/rebuild.ts) and is not a manifest state_dir. +// Any new agent state_dir that holds executable code or credentials must be +// added to one of these sets in lockstep with its manifest entry. +// --------------------------------------------------------------------------- + +export const HIGH_RISK_STATE_DIRS = [ + "skills", + "hooks", + "cron", + "agents", + "extensions", + "plugins", // Hermes equivalent of extensions + "workspace", + "memory", + "devices", + "canvas", + "telegram", + "wechat", // OpenClaw runtime channel state + "whatsapp", // OpenClaw + Hermes channel state (Hermes also nests under platforms/) + "platforms", // Hermes channel-bridge auth state (whatsapp/, etc.) + "weixin", // Hermes iLink WeChat per-account context tokens + "profiles", // Hermes saved profiles + "skins", // Hermes UI/personality bundles (code-like) +]; + +export const CONFIDENTIALITY_STATE_DIRS = [ + "credentials", + "identity", + "pairing", // Hermes device-pairing material (auth tokens) +]; + +// Runtime-data subpaths the agent must keep writing to under shields-up. +// Each entry is a shell glob relative to the agent config dir; after the +// main lock loop the matching directories are restored to +// `sandbox:sandbox 2770` so they remain writable inside an otherwise-locked +// tree. +// +// Removal condition: when the OpenClaw runtime moves session state out of +// the locked config tree (e.g. into `/sandbox/.openclaw-runtime/`), this +// carve-out can be deleted and lockAgentConfig can leave the tree fully +// owned by `root:sandbox` with no writable holes. +export const WRITABLE_RUNTIME_SUBPATHS = ["agents/*/sessions"]; + +interface StateDirLockScript { + script: string; + args: string[]; +} + +interface StateDirLockScriptResult { + symlinkedRoots: string[]; + mutationFailures: Array<{ op: string; path: string }>; +} + +interface PreflightResult { + symlinkedRoots: string[]; + // Non-null when the preflight exec itself failed. Fail-closed: the + // caller must treat a non-null `error` as "do not lock" so a kubectl + // exec hiccup cannot let shields-up advance while a symlinked root + // remains unobserved. + error: string | null; +} + +// Apply lock or unlock mode to every existing high-risk and confidentiality +// state directory under `configDir`. Returns a list of issues; an empty +// list means the fan-out completed cleanly. +export function applyStateDirLockMode( + privileged: PrivilegedExec, + configDir: string, + highRiskOwner: string, + isLocking: boolean, +): string[] { + const allStateDirs = [...HIGH_RISK_STATE_DIRS, ...CONFIDENTIALITY_STATE_DIRS]; + + // Preflight: before any chown/chmod runs, scan every state-dir root and + // every workspace-* dir for symlinked roots. A pre-lockdown agent that + // swapped e.g. `extensions/` for a symlink to /etc could otherwise + // redirect the privileged `chown -R`/`chmod -R` at an attacker-controlled + // host path. + const preflight = preflightSymlinkedRoots(privileged, configDir, allStateDirs); + if (isLocking && preflight.error !== null) { + return [`Symlink preflight failed; refusing to lock: ${preflight.error}`]; + } + if (isLocking && preflight.symlinkedRoots.length > 0) { + return preflight.symlinkedRoots.map( + (path) => `state dir root is a symlink: ${path} (refusing to lock)`, + ); + } + + // Locking (shields-up) strips group + world write for HIGH_RISK dirs. + // Unlocking (shields-down) restores the same group-readable/writable + + // o-rwx mutable-default contract as startup, plus setgid so the gateway + // UID — now in the sandbox group via Dockerfile.base — can write to + // OpenClaw's mutable config tree (#2681). The unlock variant uses + // `g+rwX,o-rwx` because a prior lock can strip group access from + // descendants. + const highRiskRecursiveMode = isLocking ? "go-w" : "g+rwX,o-rwx"; + const highRiskDirMode = isLocking ? "755" : "2770"; + const clearSetgid = isLocking ? "1" : "0"; + + const mainPassResult = runStateDirLockScript(privileged, { + script: STATE_DIR_LOCK_SCRIPT_BY_LIST, + args: [ + configDir, + highRiskOwner, + highRiskRecursiveMode, + highRiskDirMode, + clearSetgid, + ...HIGH_RISK_STATE_DIRS, + ], + }); + + // Multi-agent OpenClaw workspaces are named workspace-. Glob is + // expanded by the shell because the list is configured by openclaw.json. + const workspacePassResult = runStateDirLockScript(privileged, { + script: STATE_DIR_LOCK_SCRIPT_WORKSPACE_GLOB, + args: [configDir, highRiskOwner, highRiskRecursiveMode, highRiskDirMode, clearSetgid], + }); + + // Secret-bearing dirs: stricter posture. Locked = `root:root 700` with + // `go-rwX` so neither sandbox user nor gateway can read them while + // shields are up. Unlocked = sandbox:sandbox 2770 / g+rwX. + const confidentialityOwner = isLocking ? "root:root" : highRiskOwner; + const confidentialityRecursiveMode = isLocking ? "go-rwX" : "g+rwX,o-rwx"; + const confidentialityDirMode = isLocking ? "700" : "2770"; + const confidentialityPassResult = runStateDirLockScript(privileged, { + script: STATE_DIR_LOCK_SCRIPT_BY_LIST, + args: [ + configDir, + confidentialityOwner, + confidentialityRecursiveMode, + confidentialityDirMode, + clearSetgid, + ...CONFIDENTIALITY_STATE_DIRS, + ], + }); + + const issues: string[] = []; + const allResults = [mainPassResult, workspacePassResult, confidentialityPassResult]; + for (const result of allResults) { + for (const path of result.symlinkedRoots) { + issues.push(`state dir root is a symlink: ${path} (refusing to lock)`); + } + for (const failure of result.mutationFailures) { + issues.push(`state dir mutation failed (${failure.op}): ${failure.path}`); + } + } + + if (isLocking) { + issues.push( + ...verifyStateDirsLocked(privileged, configDir, { + highRiskOwner, + highRiskDirMode, + confidentialityOwner, + confidentialityDirMode, + }), + ); + } + + // Restoration of runtime-writable subpaths runs only when the lock + // fan-out reported no issues. If preflight, mutation, or verification + // surfaced anything, the tree is in a known-bad state and re-opening + // `agents/*/sessions` for the sandbox user would widen the blast radius + // of whatever broke. + if (isLocking && issues.length === 0) { + const restoreError = restoreWritableRuntimeSubpaths(privileged, configDir); + if (restoreError !== null) { + issues.push(`runtime-writable subpath restore failed: ${restoreError}`); + } + } + return issues; +} + +const STATE_DIR_LOCK_SCRIPT_BY_LIST = ` +set -u +config_dir="$1" +owner="$2" +recursive_mode="$3" +dir_mode="$4" +clear_setgid="$5" +shift 5 +for dir in "$@"; do + path="$config_dir/$dir" + if [ -L "$path" ]; then + printf 'symlinked-root\\t%s\\n' "$path" + continue + fi + [ -d "$path" ] || continue + if ! chown -R "$owner" "$path" 2>/dev/null; then + printf 'mutation-failed\\tchown\\t%s\\n' "$path" + fi + if ! chmod "$dir_mode" "$path" 2>/dev/null; then + printf 'mutation-failed\\tchmod-dir\\t%s\\n' "$path" + fi + if [ "$clear_setgid" = "1" ]; then + chmod g-s "$path" 2>/dev/null || true + fi + if ! chmod -R "$recursive_mode" "$path" 2>/dev/null; then + printf 'mutation-failed\\tchmod-recursive\\t%s\\n' "$path" + fi +done +`; + +const STATE_DIR_LOCK_SCRIPT_WORKSPACE_GLOB = ` +set -u +config_dir="$1" +owner="$2" +recursive_mode="$3" +dir_mode="$4" +clear_setgid="$5" +for dir in "$config_dir"/workspace-*; do + if [ -L "$dir" ]; then + printf 'symlinked-root\\t%s\\n' "$dir" + continue + fi + [ -d "$dir" ] || continue + if ! chown -R "$owner" "$dir" 2>/dev/null; then + printf 'mutation-failed\\tchown\\t%s\\n' "$dir" + fi + if ! chmod "$dir_mode" "$dir" 2>/dev/null; then + printf 'mutation-failed\\tchmod-dir\\t%s\\n' "$dir" + fi + if [ "$clear_setgid" = "1" ]; then + chmod g-s "$dir" 2>/dev/null || true + fi + if ! chmod -R "$recursive_mode" "$dir" 2>/dev/null; then + printf 'mutation-failed\\tchmod-recursive\\t%s\\n' "$dir" + fi +done +`; + +interface VerifyExpectations { + highRiskOwner: string; + highRiskDirMode: string; + confidentialityOwner: string; + confidentialityDirMode: string; +} + +function verifyStateDirsLocked( + privileged: PrivilegedExec, + configDir: string, + expected: VerifyExpectations, +): string[] { + const issues: string[] = []; + issues.push( + ...verifyStateDirGroup(privileged, configDir, HIGH_RISK_STATE_DIRS, { + owner: expected.highRiskOwner, + dirMode: expected.highRiskDirMode, + includeWorkspaceGlob: true, + }), + ); + issues.push( + ...verifyStateDirGroup(privileged, configDir, CONFIDENTIALITY_STATE_DIRS, { + owner: expected.confidentialityOwner, + dirMode: expected.confidentialityDirMode, + includeWorkspaceGlob: false, + }), + ); + return issues; +} + +function verifyStateDirGroup( + privileged: PrivilegedExec, + configDir: string, + dirs: readonly string[], + expected: { owner: string; dirMode: string; includeWorkspaceGlob: boolean }, +): string[] { + let stdout = ""; + try { + stdout = privileged.capture([ + "sh", + "-c", + ` +set -u +config_dir="$1" +expected_owner="$2" +expected_mode="$3" +include_workspace="$4" +shift 4 +check() { + path="$1" + [ -L "$path" ] && { printf 'verify-symlink\\t%s\\n' "$path"; return; } + [ -d "$path" ] || return + perms="$(stat -c '%a %U:%G' "$path" 2>/dev/null)" || { + printf 'verify-stat-failed\\t%s\\n' "$path" + return + } + mode="\${perms%% *}" + owner="\${perms#* }" + [ "$mode" = "$expected_mode" ] || printf 'verify-mode\\t%s\\t%s\\t%s\\n' "$path" "$mode" "$expected_mode" + [ "$owner" = "$expected_owner" ] || printf 'verify-owner\\t%s\\t%s\\t%s\\n' "$path" "$owner" "$expected_owner" +} +for dir in "$@"; do + check "$config_dir/$dir" +done +if [ "$include_workspace" = "1" ]; then + for dir in "$config_dir"/workspace-*; do + case "$dir" in *"*"*) continue ;; esac + check "$dir" + done +fi +`, + "sh", + configDir, + expected.owner, + expected.dirMode, + expected.includeWorkspaceGlob ? "1" : "0", + ...dirs, + ]); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return [`state dir verification exec failed: ${msg}`]; + } + const issues: string[] = []; + for (const line of stdout.split("\n")) { + const parts = line.split("\t"); + if (parts[0] === "verify-symlink" && parts[1]) { + issues.push(`state dir became a symlink mid-lock: ${parts[1]}`); + } else if (parts[0] === "verify-stat-failed" && parts[1]) { + issues.push(`state dir stat failed after lock: ${parts[1]}`); + } else if (parts[0] === "verify-mode" && parts[1]) { + issues.push(`state dir mode=${parts[2]} (expected ${parts[3]}): ${parts[1]}`); + } else if (parts[0] === "verify-owner" && parts[1]) { + issues.push(`state dir owner=${parts[2]} (expected ${parts[3]}): ${parts[1]}`); + } + } + return issues; +} + +function preflightSymlinkedRoots( + privileged: PrivilegedExec, + configDir: string, + allStateDirs: readonly string[], +): PreflightResult { + let stdout = ""; + try { + stdout = privileged.capture([ + "sh", + "-c", + ` +set -u +config_dir="$1" +shift +for dir in "$@"; do + path="$config_dir/$dir" + [ -L "$path" ] && printf '%s\\n' "$path" +done +for dir in "$config_dir"/workspace-*; do + [ -L "$dir" ] && printf '%s\\n' "$dir" +done +`, + "sh", + configDir, + ...allStateDirs, + ]); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return { symlinkedRoots: [], error: msg }; + } + return { + symlinkedRoots: stdout + .split("\n") + .map((line) => line.trim()) + .filter((line) => line.length > 0), + error: null, + }; +} + +function runStateDirLockScript( + privileged: PrivilegedExec, + script: StateDirLockScript, +): StateDirLockScriptResult { + let stdout = ""; + try { + stdout = privileged.capture(["sh", "-c", script.script, "sh", ...script.args]); + } catch { + return { symlinkedRoots: [], mutationFailures: [] }; + } + const result: StateDirLockScriptResult = { + symlinkedRoots: [], + mutationFailures: [], + }; + for (const line of stdout.split("\n")) { + const parts = line.split("\t"); + if (parts[0] === "symlinked-root" && parts[1]) { + result.symlinkedRoots.push(parts[1]); + } else if (parts[0] === "mutation-failed" && parts[1] && parts[2]) { + result.mutationFailures.push({ op: parts[1], path: parts[2] }); + } + } + return result; +} + +// Restore runtime-writable subpaths after a successful lock fan-out. Returns +// null on success, an error message on failure so the caller can surface +// the failure rather than silently swallowing it. +function restoreWritableRuntimeSubpaths( + privileged: PrivilegedExec, + configDir: string, +): string | null { + try { + privileged.exec([ + "sh", + "-c", + ` +set -u +config_dir="$1" +shift +for pattern in "$@"; do + case "$pattern" in + */*) prefix="\${pattern%/*}"; leaf="\${pattern##*/}" ;; + *) prefix=""; leaf="$pattern" ;; + esac + if [ -n "$prefix" ]; then + set -- "$config_dir"/$prefix + else + set -- "$config_dir" + fi + for parent in "$@"; do + case "$parent" in + *"*"*) continue ;; + esac + [ -L "$parent" ] && continue + [ -d "$parent" ] || continue + target="$parent/$leaf" + [ -L "$target" ] && continue + mkdir -p "$target" 2>/dev/null || continue + chown -R sandbox:sandbox "$target" 2>/dev/null || true + chmod 2770 "$target" 2>/dev/null || true + chmod -R g+rwX,o-rwx "$target" 2>/dev/null || true + done +done +`, + "sh", + configDir, + ...WRITABLE_RUNTIME_SUBPATHS, + ]); + } catch (err) { + return err instanceof Error ? err.message : String(err); + } + return null; +} From 6d8233d2c209eef05577eeba22916f87eda0d115 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 31 May 2026 08:55:12 +0000 Subject: [PATCH 12/13] fix(shields): hoist preflight, exit 0 on empty roots, verify+surface restore failures Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 8 +- src/lib/shields/index.ts | 13 +++ src/lib/shields/state-dir-lock.ts | 146 ++++++++++++++++++-------- test/shields-up-runtime-perms.test.ts | 4 +- 4 files changed, 126 insertions(+), 45 deletions(-) diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index eda3f77db8..ac00980baf 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -201,7 +201,13 @@ Writable agent state such as plugins, skills, hooks, and workspace metadata live By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and high-risk state entry points cannot be changed by the sandbox user. -Under lockdown, the high-risk state directories (`skills`, `hooks`, `cron`, `agents`, `extensions`, `plugins`, `workspace`, `memory`, `devices`, `canvas`, `telegram`, `wechat`, `whatsapp`, `platforms`, `weixin`, `profiles`, `skins`) are owned by `root:sandbox` rather than `root:root`, so the OpenClaw gateway (a member of the `sandbox` group) keeps read access to plugin and agent code while the sandbox user is denied write through `chmod -R go-w`. Secret-bearing directories (`credentials`, `identity`, `pairing`) get a stricter posture: `root:root 700` with `chmod -R go-rwX`, so neither the sandbox user nor the gateway can read them while shields are up. The mutable-default posture (`sandbox:sandbox 2770`) is restored for both groups on shields-down. The list is the union of state directories declared by every shipped agent manifest; dirs that aren't present in a given agent's config tree are silently skipped. Runtime-mutable subtrees (`sessions/`, `memories/`, `logs/`, `cache/`, `plans/`, and `openclaw-weixin/` which is regenerated at image-build time) are intentionally exempt so the agent can keep operating. +Shields-up locks the high-risk state directories (`skills`, `hooks`, `cron`, `agents`, `extensions`, `plugins`, `workspace`, `memory`, `devices`, `canvas`, `telegram`, `wechat`, `whatsapp`, `platforms`, `weixin`, `profiles`, `skins`) to `root:sandbox` with `chmod -R go-w`. +The OpenClaw gateway (a member of the `sandbox` group) keeps read access to plugin and agent code; the sandbox user can no longer write them. +Shields-up also locks the secret-bearing directories (`credentials`, `identity`, `pairing`) to `root:root 700` with `chmod -R go-rwX`. +Neither the sandbox user nor the gateway can read those secrets while shields are up. +Shields-down restores both groups to the mutable-default posture (`sandbox:sandbox 2770`). +The list is the union of state directories declared by every shipped agent manifest; the lock helper silently skips dirs that aren't present in a given agent's config tree. +Shields-up exempts runtime-mutable subtrees (`sessions/`, `memories/`, `logs/`, `cache/`, `plans/`, plus `openclaw-weixin/`, which the image build regenerates) so the agent can keep operating. A narrow set of runtime-data subpaths is exempted from the lock so the agent can keep operating — currently `agents//sessions/`, which the OpenClaw TUI creates and writes session metadata into; the lock helper restores those subpaths to `sandbox:sandbox 2770` after the surrounding tree is locked. If any high-risk state-dir root is a symlink when shields-up runs, the lock refuses to proceed and reports "Config not locked: state dir root is a symlink" rather than silently following the link with privileged `chown -R` / `chmod -R`. diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 90c66cd078..1b510b15b0 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -55,6 +55,7 @@ const { }: typeof import("./seal") = require("./seal"); const { applyStateDirLockMode, + preflightStateDirLock, }: typeof import("./state-dir-lock") = require("./state-dir-lock"); const STATE_DIR = resolveNemoclawStateDir(); @@ -564,6 +565,18 @@ function lockAgentConfig( const errors: string[] = []; const filesToLock = [target.configPath, ...(target.sensitiveFiles || [])]; + // Symlink preflight runs before any file or directory mutation: if a + // pre-lockdown agent swapped e.g. `extensions/` for a symlink to /etc, + // we abort before the privileged chmod/chown touches anything, so the + // tree is never half-mutated against an attacker-controlled host path. + const preflightIssues = preflightStateDirLock( + stateDirLockExec(sandboxName), + target.configDir, + ); + if (preflightIssues.length > 0) { + throw new Error(`Config not locked: ${preflightIssues.join(", ")}`); + } + for (const f of filesToLock) { try { privilegedSandboxExec(sandboxName, ["chmod", "444", f]); diff --git a/src/lib/shields/state-dir-lock.ts b/src/lib/shields/state-dir-lock.ts index fc66379433..351b0dc0a1 100644 --- a/src/lib/shields/state-dir-lock.ts +++ b/src/lib/shields/state-dir-lock.ts @@ -98,6 +98,10 @@ interface StateDirLockScript { interface StateDirLockScriptResult { symlinkedRoots: string[]; mutationFailures: Array<{ op: string; path: string }>; + // Non-null when the script exec itself failed (kubectl exec hiccup, + // privileged-exec timeout, etc.). Fail-closed: the caller surfaces a + // lock issue rather than treating the empty output as a clean run. + execError: string | null; } interface PreflightResult { @@ -109,37 +113,47 @@ interface PreflightResult { error: string | null; } +// Run the symlink preflight against every state-dir root and the +// `workspace-*` glob, returning issues the caller must surface before +// touching any config files. Empty list means it is safe to proceed with +// `applyStateDirLockMode`. Exposed separately so callers can hoist this +// before chmod/chown on configPath + sensitiveFiles, keeping the "no +// mutations until preflight clears" invariant. +export function preflightStateDirLock( + privileged: PrivilegedExec, + configDir: string, +): string[] { + const allStateDirs = [...HIGH_RISK_STATE_DIRS, ...CONFIDENTIALITY_STATE_DIRS]; + const preflight = preflightSymlinkedRoots(privileged, configDir, allStateDirs); + if (preflight.error !== null) { + return [`Symlink preflight failed; refusing to lock: ${preflight.error}`]; + } + return preflight.symlinkedRoots.map( + (path) => `state dir root is a symlink: ${path} (refusing to lock)`, + ); +} + // Apply lock or unlock mode to every existing high-risk and confidentiality // state directory under `configDir`. Returns a list of issues; an empty // list means the fan-out completed cleanly. +// +// Callers that lock (`isLocking === true`) must invoke +// `preflightStateDirLock` first and abort on a non-empty result. This +// function intentionally re-runs symlink checks inline so a mid-lock race +// (state-dir root becoming a symlink after the hoisted preflight) is still +// caught, but the hoisted preflight is the one that keeps file mutations +// from leaking out before validation. export function applyStateDirLockMode( privileged: PrivilegedExec, configDir: string, highRiskOwner: string, isLocking: boolean, ): string[] { - const allStateDirs = [...HIGH_RISK_STATE_DIRS, ...CONFIDENTIALITY_STATE_DIRS]; - - // Preflight: before any chown/chmod runs, scan every state-dir root and - // every workspace-* dir for symlinked roots. A pre-lockdown agent that - // swapped e.g. `extensions/` for a symlink to /etc could otherwise - // redirect the privileged `chown -R`/`chmod -R` at an attacker-controlled - // host path. - const preflight = preflightSymlinkedRoots(privileged, configDir, allStateDirs); - if (isLocking && preflight.error !== null) { - return [`Symlink preflight failed; refusing to lock: ${preflight.error}`]; - } - if (isLocking && preflight.symlinkedRoots.length > 0) { - return preflight.symlinkedRoots.map( - (path) => `state dir root is a symlink: ${path} (refusing to lock)`, - ); - } - // Locking (shields-up) strips group + world write for HIGH_RISK dirs. // Unlocking (shields-down) restores the same group-readable/writable + // o-rwx mutable-default contract as startup, plus setgid so the gateway // UID — now in the sandbox group via Dockerfile.base — can write to - // OpenClaw's mutable config tree (#2681). The unlock variant uses + // OpenClaw's mutable config tree. The unlock variant uses // `g+rwX,o-rwx` because a prior lock can strip group access from // descendants. const highRiskRecursiveMode = isLocking ? "go-w" : "g+rwX,o-rwx"; @@ -184,8 +198,15 @@ export function applyStateDirLockMode( }); const issues: string[] = []; - const allResults = [mainPassResult, workspacePassResult, confidentialityPassResult]; - for (const result of allResults) { + const allResults = [ + { label: "high-risk", result: mainPassResult }, + { label: "workspace-*", result: workspacePassResult }, + { label: "confidentiality", result: confidentialityPassResult }, + ]; + for (const { label, result } of allResults) { + if (result.execError !== null) { + issues.push(`state dir lock ${label} exec failed: ${result.execError}`); + } for (const path of result.symlinkedRoots) { issues.push(`state dir root is a symlink: ${path} (refusing to lock)`); } @@ -211,10 +232,7 @@ export function applyStateDirLockMode( // `agents/*/sessions` for the sandbox user would widen the blast radius // of whatever broke. if (isLocking && issues.length === 0) { - const restoreError = restoreWritableRuntimeSubpaths(privileged, configDir); - if (restoreError !== null) { - issues.push(`runtime-writable subpath restore failed: ${restoreError}`); - } + issues.push(...restoreWritableRuntimeSubpaths(privileged, configDir)); } return issues; } @@ -247,6 +265,7 @@ for dir in "$@"; do printf 'mutation-failed\\tchmod-recursive\\t%s\\n' "$path" fi done +exit 0 `; const STATE_DIR_LOCK_SCRIPT_WORKSPACE_GLOB = ` @@ -275,6 +294,7 @@ for dir in "$config_dir"/workspace-*; do printf 'mutation-failed\\tchmod-recursive\\t%s\\n' "$dir" fi done +exit 0 `; interface VerifyExpectations { @@ -347,6 +367,7 @@ if [ "$include_workspace" = "1" ]; then check "$dir" done fi +exit 0 `, "sh", configDir, @@ -391,11 +412,12 @@ config_dir="$1" shift for dir in "$@"; do path="$config_dir/$dir" - [ -L "$path" ] && printf '%s\\n' "$path" + if [ -L "$path" ]; then printf '%s\\n' "$path"; fi done for dir in "$config_dir"/workspace-*; do - [ -L "$dir" ] && printf '%s\\n' "$dir" + if [ -L "$dir" ]; then printf '%s\\n' "$dir"; fi done +exit 0 `, "sh", configDir, @@ -421,12 +443,17 @@ function runStateDirLockScript( let stdout = ""; try { stdout = privileged.capture(["sh", "-c", script.script, "sh", ...script.args]); - } catch { - return { symlinkedRoots: [], mutationFailures: [] }; + } catch (err) { + return { + symlinkedRoots: [], + mutationFailures: [], + execError: err instanceof Error ? err.message : String(err), + }; } const result: StateDirLockScriptResult = { symlinkedRoots: [], mutationFailures: [], + execError: null, }; for (const line of stdout.split("\n")) { const parts = line.split("\t"); @@ -439,15 +466,18 @@ function runStateDirLockScript( return result; } -// Restore runtime-writable subpaths after a successful lock fan-out. Returns -// null on success, an error message on failure so the caller can surface -// the failure rather than silently swallowing it. +// Restore runtime-writable subpaths after a successful lock fan-out. +// Returns issues for any mkdir/chown/chmod failure observed inside the +// script (parsed from `restore-failed\t\t` markers) plus a +// stat-based verification pass that confirms every restored target ends +// up as `sandbox:sandbox 2770`. Empty list means the carve-out is good. function restoreWritableRuntimeSubpaths( privileged: PrivilegedExec, configDir: string, -): string | null { +): string[] { + let stdout = ""; try { - privileged.exec([ + stdout = privileged.capture([ "sh", "-c", ` @@ -468,23 +498,55 @@ for pattern in "$@"; do case "$parent" in *"*"*) continue ;; esac - [ -L "$parent" ] && continue - [ -d "$parent" ] || continue + if [ -L "$parent" ]; then continue; fi + if [ ! -d "$parent" ]; then continue; fi target="$parent/$leaf" - [ -L "$target" ] && continue - mkdir -p "$target" 2>/dev/null || continue - chown -R sandbox:sandbox "$target" 2>/dev/null || true - chmod 2770 "$target" 2>/dev/null || true - chmod -R g+rwX,o-rwx "$target" 2>/dev/null || true + if [ -L "$target" ]; then continue; fi + if ! mkdir -p "$target" 2>/dev/null; then + printf 'restore-failed\\tmkdir\\t%s\\n' "$target" + continue + fi + if ! chown -R sandbox:sandbox "$target" 2>/dev/null; then + printf 'restore-failed\\tchown\\t%s\\n' "$target" + fi + if ! chmod 2770 "$target" 2>/dev/null; then + printf 'restore-failed\\tchmod-dir\\t%s\\n' "$target" + fi + if ! chmod -R g+rwX,o-rwx "$target" 2>/dev/null; then + printf 'restore-failed\\tchmod-recursive\\t%s\\n' "$target" + fi + perms="$(stat -c '%a %U:%G' "$target" 2>/dev/null)" || { + printf 'restore-verify-stat-failed\\t%s\\n' "$target" + continue + } + mode="\${perms%% *}" + owner="\${perms#* }" + [ "$mode" = "2770" ] || printf 'restore-verify-mode\\t%s\\t%s\\n' "$target" "$mode" + [ "$owner" = "sandbox:sandbox" ] || printf 'restore-verify-owner\\t%s\\t%s\\n' "$target" "$owner" done done +exit 0 `, "sh", configDir, ...WRITABLE_RUNTIME_SUBPATHS, ]); } catch (err) { - return err instanceof Error ? err.message : String(err); + const msg = err instanceof Error ? err.message : String(err); + return [`runtime-writable subpath restore exec failed: ${msg}`]; + } + const issues: string[] = []; + for (const line of stdout.split("\n")) { + const parts = line.split("\t"); + if (parts[0] === "restore-failed" && parts[1] && parts[2]) { + issues.push(`runtime-writable subpath restore failed (${parts[1]}): ${parts[2]}`); + } else if (parts[0] === "restore-verify-stat-failed" && parts[1]) { + issues.push(`runtime-writable subpath stat failed after restore: ${parts[1]}`); + } else if (parts[0] === "restore-verify-mode" && parts[1]) { + issues.push(`runtime-writable subpath mode=${parts[2]} (expected 2770): ${parts[1]}`); + } else if (parts[0] === "restore-verify-owner" && parts[1]) { + issues.push(`runtime-writable subpath owner=${parts[2]} (expected sandbox:sandbox): ${parts[1]}`); + } } - return null; + return issues; } diff --git a/test/shields-up-runtime-perms.test.ts b/test/shields-up-runtime-perms.test.ts index 1841bed824..efbf5108b6 100644 --- a/test/shields-up-runtime-perms.test.ts +++ b/test/shields-up-runtime-perms.test.ts @@ -206,8 +206,8 @@ describe("shields-up state-dir lock preserves sandbox-group access + runtime ses ); expect(preflightShell).toBeDefined(); const script = preflightShell?.[2] ?? ""; - expect(script).toContain('[ -L "$path" ] && printf'); - expect(script).toContain('[ -L "$dir" ] && printf'); + expect(script).toContain('if [ -L "$path" ]; then printf'); + expect(script).toContain('if [ -L "$dir" ]; then printf'); }); // A symlinked state-dir root must abort shields-up; otherwise the lock From d2d24d77c3dfa08dfdbca85c9272b5feb295dfe7 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Sun, 31 May 2026 09:18:09 +0000 Subject: [PATCH 13/13] fix(shields): surface unlock state-dir issues, split doc exemption wording Signed-off-by: Tinson Lai --- docs/security/best-practices.mdx | 5 +++-- src/lib/shields/index.ts | 9 +++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index ac00980baf..587c521aa9 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -207,8 +207,9 @@ Shields-up also locks the secret-bearing directories (`credentials`, `identity`, Neither the sandbox user nor the gateway can read those secrets while shields are up. Shields-down restores both groups to the mutable-default posture (`sandbox:sandbox 2770`). The list is the union of state directories declared by every shipped agent manifest; the lock helper silently skips dirs that aren't present in a given agent's config tree. -Shields-up exempts runtime-mutable subtrees (`sessions/`, `memories/`, `logs/`, `cache/`, `plans/`, plus `openclaw-weixin/`, which the image build regenerates) so the agent can keep operating. -A narrow set of runtime-data subpaths is exempted from the lock so the agent can keep operating — currently `agents//sessions/`, which the OpenClaw TUI creates and writes session metadata into; the lock helper restores those subpaths to `sandbox:sandbox 2770` after the surrounding tree is locked. +Two exemption kinds keep runtime data writable. +The lock inventory omits top-level Hermes runtime dirs (`sessions/`, `memories/`, `logs/`, `cache/`, `plans/`) and the image-build-regenerated `openclaw-weixin/`; the lock helper never touches those paths. +Inside a locked tree, the helper restores `agents//sessions/` to `sandbox:sandbox 2770` after the surrounding `agents/` lock so the OpenClaw TUI can create and write session metadata under an otherwise root-owned parent. If any high-risk state-dir root is a symlink when shields-up runs, the lock refuses to proceed and reports "Config not locked: state dir root is a symlink" rather than silently following the link with privileged `chown -R` / `chmod -R`. - **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `2770` (setgid `sandbox:sandbox`) and `openclaw.json` with mode `660`, so the agent and its group can read and write config directly. A reviewed host-side immutability workflow should compare the intended ownership and mode with the live sandbox filesystem before treating the config tree as locked. diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 1b510b15b0..b742cc180c 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -450,13 +450,18 @@ function unlockAgentConfig( // NC-2227-05: Restore sandbox ownership on locked state directories. // Use chown -R to restore the full tree (files within may have been - // locked to root:root by a prior shields-up). - applyStateDirLockMode( + // locked to root:root by a prior shields-up). Surface fan-out issues + // so `shields down` cannot report success while a state dir is still + // root-owned or read-only. + const stateDirUnlockIssues = applyStateDirLockMode( stateDirLockExec(sandboxName), target.configDir, "sandbox:sandbox", false, ); + for (const issue of stateDirUnlockIssues) { + errors.push(`state dir unlock: ${issue}`); + } if (errors.length > 0) { console.error(