diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 66f8a6ef2e..587c521aa9 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -200,7 +200,17 @@ In root mode, the gateway process still runs as the separate `gateway` user, but Writable agent state such as plugins, skills, hooks, and workspace metadata lives directly under `/sandbox/.openclaw`. By default, this directory starts writable so the agent can manage its own config, install skills, and write to standard home-directory paths natively. -For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and writable state entry points cannot be changed by the sandbox user. +For sensitive workloads, use a reviewed host-side immutability workflow after initial setup so config and high-risk state entry points cannot be changed by the sandbox user. +Shields-up locks the high-risk state directories (`skills`, `hooks`, `cron`, `agents`, `extensions`, `plugins`, `workspace`, `memory`, `devices`, `canvas`, `telegram`, `wechat`, `whatsapp`, `platforms`, `weixin`, `profiles`, `skins`) to `root:sandbox` with `chmod -R go-w`. +The OpenClaw gateway (a member of the `sandbox` group) keeps read access to plugin and agent code; the sandbox user can no longer write them. +Shields-up also locks the secret-bearing directories (`credentials`, `identity`, `pairing`) to `root:root 700` with `chmod -R go-rwX`. +Neither the sandbox user nor the gateway can read those secrets while shields are up. +Shields-down restores both groups to the mutable-default posture (`sandbox:sandbox 2770`). +The list is the union of state directories declared by every shipped agent manifest; the lock helper silently skips dirs that aren't present in a given agent's config tree. +Two exemption kinds keep runtime data writable. +The lock inventory omits top-level Hermes runtime dirs (`sessions/`, `memories/`, `logs/`, `cache/`, `plans/`) and the image-build-regenerated `openclaw-weixin/`; the lock helper never touches those paths. +Inside a locked tree, the helper restores `agents//sessions/` to `sandbox:sandbox 2770` after the surrounding `agents/` lock so the OpenClaw TUI can create and write session metadata under an otherwise root-owned parent. +If any high-risk state-dir root is a symlink when shields-up runs, the lock refuses to proceed and reports "Config not locked: state dir root is a symlink" rather than silently following the link with privileged `chown -R` / `chmod -R`. - **DAC permissions (default).** The sandbox user owns `/sandbox/.openclaw` with mode `2770` (setgid `sandbox:sandbox`) and `openclaw.json` with mode `660`, so the agent and its group can read and write config directly. A reviewed host-side immutability workflow should compare the intended ownership and mode with the live sandbox filesystem before treating the config tree as locked. - **Config integrity hash.** The image includes a SHA256 hash of `openclaw.json`. In the default mutable state, `.config-hash` is sandbox-owned and is not a tamper-proof trust anchor, so startup does not fail closed on that hash. When the hash is root-owned and read-only, startup enforces it and refuses to start if the hash does not match. diff --git a/src/lib/shields/index.ts b/src/lib/shields/index.ts index 86f195cf6d..b742cc180c 100644 --- a/src/lib/shields/index.ts +++ b/src/lib/shields/index.ts @@ -53,6 +53,10 @@ const { isHashVerificationIssue, isSha256Hex, }: typeof import("./seal") = require("./seal"); +const { + applyStateDirLockMode, + preflightStateDirLock, +}: typeof import("./state-dir-lock") = require("./state-dir-lock"); const STATE_DIR = resolveNemoclawStateDir(); @@ -331,114 +335,18 @@ function isShieldsState(value: unknown): value is ShieldsState { } // --------------------------------------------------------------------------- -// NC-2227-05: State directories locked by shields-up. -// -// During shields-up, these must be locked (root:root 755) so the sandbox -// user cannot create new entries or modify existing ones. This covers both -// executable state (skills, hooks, cron jobs, extensions, plugins, agent -// definitions) and writable agent state entry points such as workspace and -// memory, so a stale symlink bridge cannot bypass the lockdown. -// -// The list is a superset: directories that don't exist in a given agent's -// config dir are silently skipped. +// State-dir lock — adapter between this module's privileged-exec helpers and +// the lock pipeline in ./state-dir-lock. The inventory of locked dirs, the +// preflight/mutation/verification logic, and the `agents/*/sessions` +// carve-out live in that sibling module so this file stays focused on +// shields state transitions. // --------------------------------------------------------------------------- -const HIGH_RISK_STATE_DIRS = [ - "skills", - "hooks", - "cron", - "agents", - "extensions", - "plugins", // Hermes equivalent of extensions - "workspace", - "memory", - "credentials", - "identity", - "devices", - "canvas", - "telegram", -]; - -function applyStateDirLockMode( - sandboxName: string, - configDir: string, - owner: string, -): void { - // Locking (shields-up) strips group + world write. Unlocking (shields-down) - // restores the same group-readable/writable + o-rwx mutable-default contract - // as startup, plus setgid so the gateway UID — now in the sandbox group via - // Dockerfile.base — can write to OpenClaw's mutable config tree (#2681). - // - // The unlock variant uses `g+rwX,o-rwx` because a prior lock can strip group - // access from descendants. Without re-adding group read/write explicitly, - // shields-down would leave nested files readable/writable only by owner. - const isLocking = owner === "root:root"; - const recursiveMode = isLocking ? "go-w" : "g+rwX,o-rwx"; - const dirMode = isLocking ? "755" : "2770"; - - for (const dirName of HIGH_RISK_STATE_DIRS) { - const dirPath = `${configDir}/${dirName}`; - try { - privilegedSandboxExec(sandboxName, ["chown", "-R", owner, dirPath]); - } catch { - // Directory may not exist for this agent — silently skip - } - try { - privilegedSandboxExec(sandboxName, ["chmod", dirMode, dirPath]); - } catch { - // Silently skip - } - if (isLocking) { - try { - privilegedSandboxExec(sandboxName, ["chmod", "g-s", dirPath]); - } catch { - // Best effort; do not skip recursive write stripping. - } - } - try { - privilegedSandboxExec(sandboxName, [ - "chmod", - "-R", - recursiveMode, - dirPath, - ]); - } catch { - // Silently skip - } - } - - // Multi-agent OpenClaw workspaces are named workspace-. They are - // discovered dynamically because they are configured by openclaw.json. - const clearSetgid = isLocking ? "1" : "0"; - try { - privilegedSandboxExec(sandboxName, [ - "sh", - "-c", - ` -set -u -config_dir="$1" -owner="$2" -recursive_mode="$3" -dir_mode="$4" -clear_setgid="$5" -for dir in "$config_dir"/workspace-*; do - [ -d "$dir" ] || continue - chown -R "$owner" "$dir" 2>/dev/null || true - chmod "$dir_mode" "$dir" 2>/dev/null || true - [ "$clear_setgid" = "1" ] && chmod g-s "$dir" 2>/dev/null || true - chmod -R "$recursive_mode" "$dir" 2>/dev/null || true -done -`, - "sh", - configDir, - owner, - recursiveMode, - dirMode, - clearSetgid, - ]); - } catch { - // Best effort; verification below catches the primary config lock. - } +function stateDirLockExec(sandboxName: string) { + return { + exec: (cmd: string[]) => privilegedSandboxExec(sandboxName, cmd), + capture: (cmd: string[]) => privilegedSandboxExecCapture(sandboxName, cmd), + }; } function legacyDataDirFor(configDir: string): string { @@ -542,8 +450,18 @@ function unlockAgentConfig( // NC-2227-05: Restore sandbox ownership on locked state directories. // Use chown -R to restore the full tree (files within may have been - // locked to root:root by a prior shields-up). - applyStateDirLockMode(sandboxName, target.configDir, "sandbox:sandbox"); + // locked to root:root by a prior shields-up). Surface fan-out issues + // so `shields down` cannot report success while a state dir is still + // root-owned or read-only. + const stateDirUnlockIssues = applyStateDirLockMode( + stateDirLockExec(sandboxName), + target.configDir, + "sandbox:sandbox", + false, + ); + for (const issue of stateDirUnlockIssues) { + errors.push(`state dir unlock: ${issue}`); + } if (errors.length > 0) { console.error( @@ -652,6 +570,18 @@ function lockAgentConfig( const errors: string[] = []; const filesToLock = [target.configPath, ...(target.sensitiveFiles || [])]; + // Symlink preflight runs before any file or directory mutation: if a + // pre-lockdown agent swapped e.g. `extensions/` for a symlink to /etc, + // we abort before the privileged chmod/chown touches anything, so the + // tree is never half-mutated against an attacker-controlled host path. + const preflightIssues = preflightStateDirLock( + stateDirLockExec(sandboxName), + target.configDir, + ); + if (preflightIssues.length > 0) { + throw new Error(`Config not locked: ${preflightIssues.join(", ")}`); + } + for (const f of filesToLock) { try { privilegedSandboxExec(sandboxName, ["chmod", "444", f]); @@ -692,10 +622,24 @@ function lockAgentConfig( } } - // NC-2227-05: Lock state directories. Root-own the directory and set 755 so - // the sandbox user can read/execute but cannot create new entries or modify - // existing ones. - applyStateDirLockMode(sandboxName, target.configDir, "root:root"); + // Lock state directories. High-risk dirs use `root:sandbox` ownership so + // the gateway (in the sandbox group) can still read plugin/agent code while + // the sandbox user is denied write through `chmod -R go-w`. Secret-bearing + // dirs (CONFIDENTIALITY_STATE_DIRS in ./state-dir-lock) go to `root:root` + // 700/go-rwX so neither the sandbox user nor the gateway can read them + // while shields are up. Top-level configDir stays root:root. + const stateDirLockIssues = applyStateDirLockMode( + stateDirLockExec(sandboxName), + target.configDir, + "root:sandbox", + true, + ); + if (stateDirLockIssues.length > 0) { + // Symlinked state-dir roots are a security-relevant violation: + // continuing would let shields-up report "locked" while a state + // dir still points at a writable host path. Refuse the lock. + throw new Error(`Config not locked: ${stateDirLockIssues.join(", ")}`); + } // OpenClaw's mutable-default config root is setgid (#2681). Clear setgid // after descendant locking so shields-up verifies the root config dir as diff --git a/src/lib/shields/state-dir-lock.ts b/src/lib/shields/state-dir-lock.ts new file mode 100644 index 0000000000..351b0dc0a1 --- /dev/null +++ b/src/lib/shields/state-dir-lock.ts @@ -0,0 +1,552 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// State-dir lock fan-out for shields up/down. Owns the inventory of +// high-risk and secret-bearing state directories, the preflight + mutation +// + verification pipeline, and the runtime-subpath carve-out for +// `agents/*/sessions`. The shields entrypoint (`src/lib/shields/index.ts`) +// stays focused on shields state transitions and delegates the chown/chmod +// fan-out to this module. + +export interface PrivilegedExec { + exec(cmd: string[]): void; + capture(cmd: string[]): string; +} + +// --------------------------------------------------------------------------- +// State directories locked by shields-up. +// +// During shields-up, these must be locked so the sandbox user cannot create +// new entries or modify existing ones. This covers both executable state +// (skills, hooks, cron jobs, extensions, plugins, agent definitions) and +// writable agent state entry points such as workspace and memory, so a stale +// symlink bridge cannot bypass the lockdown. +// +// Lock ownership for HIGH_RISK_STATE_DIRS is `root:sandbox` (not +// `root:root`): the OpenClaw gateway runs as `gateway`, which is a member of +// the sandbox group via `Dockerfile.base`. Owning these dirs as +// `root:sandbox` preserves the sandbox group's `r-x` access to descendant +// files (after `chmod -R go-w`), so plugin discovery can still scan +// `extensions//` while the sandbox user is denied write through the +// stripped group/other write bits. +// +// CONFIDENTIALITY_STATE_DIRS holds secret-bearing trees (auth tokens, host +// device pairing material). These get a stricter posture under shields-up: +// `root:root 700` with `chmod -R go-rwX`, so neither the sandbox user nor +// the gateway can read them while shields are up. The mutable-default +// posture is restored on shields-down. +// +// The list is a superset: directories that don't exist in a given agent's +// config dir are silently skipped. +// +// Coverage tracks the union of state_dirs declared by every shipped agent +// manifest (agents/openclaw/manifest.yaml, agents/hermes/manifest.yaml). +// Runtime-mutable subtrees that must keep being writable while shields are +// up are intentionally omitted: +// - `sessions` (Hermes top-level) and `agents/*/sessions` (OpenClaw) — the +// latter is restored via WRITABLE_RUNTIME_SUBPATHS after the lock loop. +// - `memories`, `logs`, `cache`, `plans` (Hermes) — runtime mutables. +// - `openclaw-weixin` is regenerated from envs at image-build time +// (see src/lib/actions/sandbox/rebuild.ts) and is not a manifest state_dir. +// Any new agent state_dir that holds executable code or credentials must be +// added to one of these sets in lockstep with its manifest entry. +// --------------------------------------------------------------------------- + +export const HIGH_RISK_STATE_DIRS = [ + "skills", + "hooks", + "cron", + "agents", + "extensions", + "plugins", // Hermes equivalent of extensions + "workspace", + "memory", + "devices", + "canvas", + "telegram", + "wechat", // OpenClaw runtime channel state + "whatsapp", // OpenClaw + Hermes channel state (Hermes also nests under platforms/) + "platforms", // Hermes channel-bridge auth state (whatsapp/, etc.) + "weixin", // Hermes iLink WeChat per-account context tokens + "profiles", // Hermes saved profiles + "skins", // Hermes UI/personality bundles (code-like) +]; + +export const CONFIDENTIALITY_STATE_DIRS = [ + "credentials", + "identity", + "pairing", // Hermes device-pairing material (auth tokens) +]; + +// Runtime-data subpaths the agent must keep writing to under shields-up. +// Each entry is a shell glob relative to the agent config dir; after the +// main lock loop the matching directories are restored to +// `sandbox:sandbox 2770` so they remain writable inside an otherwise-locked +// tree. +// +// Removal condition: when the OpenClaw runtime moves session state out of +// the locked config tree (e.g. into `/sandbox/.openclaw-runtime/`), this +// carve-out can be deleted and lockAgentConfig can leave the tree fully +// owned by `root:sandbox` with no writable holes. +export const WRITABLE_RUNTIME_SUBPATHS = ["agents/*/sessions"]; + +interface StateDirLockScript { + script: string; + args: string[]; +} + +interface StateDirLockScriptResult { + symlinkedRoots: string[]; + mutationFailures: Array<{ op: string; path: string }>; + // Non-null when the script exec itself failed (kubectl exec hiccup, + // privileged-exec timeout, etc.). Fail-closed: the caller surfaces a + // lock issue rather than treating the empty output as a clean run. + execError: string | null; +} + +interface PreflightResult { + symlinkedRoots: string[]; + // Non-null when the preflight exec itself failed. Fail-closed: the + // caller must treat a non-null `error` as "do not lock" so a kubectl + // exec hiccup cannot let shields-up advance while a symlinked root + // remains unobserved. + error: string | null; +} + +// Run the symlink preflight against every state-dir root and the +// `workspace-*` glob, returning issues the caller must surface before +// touching any config files. Empty list means it is safe to proceed with +// `applyStateDirLockMode`. Exposed separately so callers can hoist this +// before chmod/chown on configPath + sensitiveFiles, keeping the "no +// mutations until preflight clears" invariant. +export function preflightStateDirLock( + privileged: PrivilegedExec, + configDir: string, +): string[] { + const allStateDirs = [...HIGH_RISK_STATE_DIRS, ...CONFIDENTIALITY_STATE_DIRS]; + const preflight = preflightSymlinkedRoots(privileged, configDir, allStateDirs); + if (preflight.error !== null) { + return [`Symlink preflight failed; refusing to lock: ${preflight.error}`]; + } + return preflight.symlinkedRoots.map( + (path) => `state dir root is a symlink: ${path} (refusing to lock)`, + ); +} + +// Apply lock or unlock mode to every existing high-risk and confidentiality +// state directory under `configDir`. Returns a list of issues; an empty +// list means the fan-out completed cleanly. +// +// Callers that lock (`isLocking === true`) must invoke +// `preflightStateDirLock` first and abort on a non-empty result. This +// function intentionally re-runs symlink checks inline so a mid-lock race +// (state-dir root becoming a symlink after the hoisted preflight) is still +// caught, but the hoisted preflight is the one that keeps file mutations +// from leaking out before validation. +export function applyStateDirLockMode( + privileged: PrivilegedExec, + configDir: string, + highRiskOwner: string, + isLocking: boolean, +): string[] { + // Locking (shields-up) strips group + world write for HIGH_RISK dirs. + // Unlocking (shields-down) restores the same group-readable/writable + + // o-rwx mutable-default contract as startup, plus setgid so the gateway + // UID — now in the sandbox group via Dockerfile.base — can write to + // OpenClaw's mutable config tree. The unlock variant uses + // `g+rwX,o-rwx` because a prior lock can strip group access from + // descendants. + const highRiskRecursiveMode = isLocking ? "go-w" : "g+rwX,o-rwx"; + const highRiskDirMode = isLocking ? "755" : "2770"; + const clearSetgid = isLocking ? "1" : "0"; + + const mainPassResult = runStateDirLockScript(privileged, { + script: STATE_DIR_LOCK_SCRIPT_BY_LIST, + args: [ + configDir, + highRiskOwner, + highRiskRecursiveMode, + highRiskDirMode, + clearSetgid, + ...HIGH_RISK_STATE_DIRS, + ], + }); + + // Multi-agent OpenClaw workspaces are named workspace-. Glob is + // expanded by the shell because the list is configured by openclaw.json. + const workspacePassResult = runStateDirLockScript(privileged, { + script: STATE_DIR_LOCK_SCRIPT_WORKSPACE_GLOB, + args: [configDir, highRiskOwner, highRiskRecursiveMode, highRiskDirMode, clearSetgid], + }); + + // Secret-bearing dirs: stricter posture. Locked = `root:root 700` with + // `go-rwX` so neither sandbox user nor gateway can read them while + // shields are up. Unlocked = sandbox:sandbox 2770 / g+rwX. + const confidentialityOwner = isLocking ? "root:root" : highRiskOwner; + const confidentialityRecursiveMode = isLocking ? "go-rwX" : "g+rwX,o-rwx"; + const confidentialityDirMode = isLocking ? "700" : "2770"; + const confidentialityPassResult = runStateDirLockScript(privileged, { + script: STATE_DIR_LOCK_SCRIPT_BY_LIST, + args: [ + configDir, + confidentialityOwner, + confidentialityRecursiveMode, + confidentialityDirMode, + clearSetgid, + ...CONFIDENTIALITY_STATE_DIRS, + ], + }); + + const issues: string[] = []; + const allResults = [ + { label: "high-risk", result: mainPassResult }, + { label: "workspace-*", result: workspacePassResult }, + { label: "confidentiality", result: confidentialityPassResult }, + ]; + for (const { label, result } of allResults) { + if (result.execError !== null) { + issues.push(`state dir lock ${label} exec failed: ${result.execError}`); + } + for (const path of result.symlinkedRoots) { + issues.push(`state dir root is a symlink: ${path} (refusing to lock)`); + } + for (const failure of result.mutationFailures) { + issues.push(`state dir mutation failed (${failure.op}): ${failure.path}`); + } + } + + if (isLocking) { + issues.push( + ...verifyStateDirsLocked(privileged, configDir, { + highRiskOwner, + highRiskDirMode, + confidentialityOwner, + confidentialityDirMode, + }), + ); + } + + // Restoration of runtime-writable subpaths runs only when the lock + // fan-out reported no issues. If preflight, mutation, or verification + // surfaced anything, the tree is in a known-bad state and re-opening + // `agents/*/sessions` for the sandbox user would widen the blast radius + // of whatever broke. + if (isLocking && issues.length === 0) { + issues.push(...restoreWritableRuntimeSubpaths(privileged, configDir)); + } + return issues; +} + +const STATE_DIR_LOCK_SCRIPT_BY_LIST = ` +set -u +config_dir="$1" +owner="$2" +recursive_mode="$3" +dir_mode="$4" +clear_setgid="$5" +shift 5 +for dir in "$@"; do + path="$config_dir/$dir" + if [ -L "$path" ]; then + printf 'symlinked-root\\t%s\\n' "$path" + continue + fi + [ -d "$path" ] || continue + if ! chown -R "$owner" "$path" 2>/dev/null; then + printf 'mutation-failed\\tchown\\t%s\\n' "$path" + fi + if ! chmod "$dir_mode" "$path" 2>/dev/null; then + printf 'mutation-failed\\tchmod-dir\\t%s\\n' "$path" + fi + if [ "$clear_setgid" = "1" ]; then + chmod g-s "$path" 2>/dev/null || true + fi + if ! chmod -R "$recursive_mode" "$path" 2>/dev/null; then + printf 'mutation-failed\\tchmod-recursive\\t%s\\n' "$path" + fi +done +exit 0 +`; + +const STATE_DIR_LOCK_SCRIPT_WORKSPACE_GLOB = ` +set -u +config_dir="$1" +owner="$2" +recursive_mode="$3" +dir_mode="$4" +clear_setgid="$5" +for dir in "$config_dir"/workspace-*; do + if [ -L "$dir" ]; then + printf 'symlinked-root\\t%s\\n' "$dir" + continue + fi + [ -d "$dir" ] || continue + if ! chown -R "$owner" "$dir" 2>/dev/null; then + printf 'mutation-failed\\tchown\\t%s\\n' "$dir" + fi + if ! chmod "$dir_mode" "$dir" 2>/dev/null; then + printf 'mutation-failed\\tchmod-dir\\t%s\\n' "$dir" + fi + if [ "$clear_setgid" = "1" ]; then + chmod g-s "$dir" 2>/dev/null || true + fi + if ! chmod -R "$recursive_mode" "$dir" 2>/dev/null; then + printf 'mutation-failed\\tchmod-recursive\\t%s\\n' "$dir" + fi +done +exit 0 +`; + +interface VerifyExpectations { + highRiskOwner: string; + highRiskDirMode: string; + confidentialityOwner: string; + confidentialityDirMode: string; +} + +function verifyStateDirsLocked( + privileged: PrivilegedExec, + configDir: string, + expected: VerifyExpectations, +): string[] { + const issues: string[] = []; + issues.push( + ...verifyStateDirGroup(privileged, configDir, HIGH_RISK_STATE_DIRS, { + owner: expected.highRiskOwner, + dirMode: expected.highRiskDirMode, + includeWorkspaceGlob: true, + }), + ); + issues.push( + ...verifyStateDirGroup(privileged, configDir, CONFIDENTIALITY_STATE_DIRS, { + owner: expected.confidentialityOwner, + dirMode: expected.confidentialityDirMode, + includeWorkspaceGlob: false, + }), + ); + return issues; +} + +function verifyStateDirGroup( + privileged: PrivilegedExec, + configDir: string, + dirs: readonly string[], + expected: { owner: string; dirMode: string; includeWorkspaceGlob: boolean }, +): string[] { + let stdout = ""; + try { + stdout = privileged.capture([ + "sh", + "-c", + ` +set -u +config_dir="$1" +expected_owner="$2" +expected_mode="$3" +include_workspace="$4" +shift 4 +check() { + path="$1" + [ -L "$path" ] && { printf 'verify-symlink\\t%s\\n' "$path"; return; } + [ -d "$path" ] || return + perms="$(stat -c '%a %U:%G' "$path" 2>/dev/null)" || { + printf 'verify-stat-failed\\t%s\\n' "$path" + return + } + mode="\${perms%% *}" + owner="\${perms#* }" + [ "$mode" = "$expected_mode" ] || printf 'verify-mode\\t%s\\t%s\\t%s\\n' "$path" "$mode" "$expected_mode" + [ "$owner" = "$expected_owner" ] || printf 'verify-owner\\t%s\\t%s\\t%s\\n' "$path" "$owner" "$expected_owner" +} +for dir in "$@"; do + check "$config_dir/$dir" +done +if [ "$include_workspace" = "1" ]; then + for dir in "$config_dir"/workspace-*; do + case "$dir" in *"*"*) continue ;; esac + check "$dir" + done +fi +exit 0 +`, + "sh", + configDir, + expected.owner, + expected.dirMode, + expected.includeWorkspaceGlob ? "1" : "0", + ...dirs, + ]); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return [`state dir verification exec failed: ${msg}`]; + } + const issues: string[] = []; + for (const line of stdout.split("\n")) { + const parts = line.split("\t"); + if (parts[0] === "verify-symlink" && parts[1]) { + issues.push(`state dir became a symlink mid-lock: ${parts[1]}`); + } else if (parts[0] === "verify-stat-failed" && parts[1]) { + issues.push(`state dir stat failed after lock: ${parts[1]}`); + } else if (parts[0] === "verify-mode" && parts[1]) { + issues.push(`state dir mode=${parts[2]} (expected ${parts[3]}): ${parts[1]}`); + } else if (parts[0] === "verify-owner" && parts[1]) { + issues.push(`state dir owner=${parts[2]} (expected ${parts[3]}): ${parts[1]}`); + } + } + return issues; +} + +function preflightSymlinkedRoots( + privileged: PrivilegedExec, + configDir: string, + allStateDirs: readonly string[], +): PreflightResult { + let stdout = ""; + try { + stdout = privileged.capture([ + "sh", + "-c", + ` +set -u +config_dir="$1" +shift +for dir in "$@"; do + path="$config_dir/$dir" + if [ -L "$path" ]; then printf '%s\\n' "$path"; fi +done +for dir in "$config_dir"/workspace-*; do + if [ -L "$dir" ]; then printf '%s\\n' "$dir"; fi +done +exit 0 +`, + "sh", + configDir, + ...allStateDirs, + ]); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return { symlinkedRoots: [], error: msg }; + } + return { + symlinkedRoots: stdout + .split("\n") + .map((line) => line.trim()) + .filter((line) => line.length > 0), + error: null, + }; +} + +function runStateDirLockScript( + privileged: PrivilegedExec, + script: StateDirLockScript, +): StateDirLockScriptResult { + let stdout = ""; + try { + stdout = privileged.capture(["sh", "-c", script.script, "sh", ...script.args]); + } catch (err) { + return { + symlinkedRoots: [], + mutationFailures: [], + execError: err instanceof Error ? err.message : String(err), + }; + } + const result: StateDirLockScriptResult = { + symlinkedRoots: [], + mutationFailures: [], + execError: null, + }; + for (const line of stdout.split("\n")) { + const parts = line.split("\t"); + if (parts[0] === "symlinked-root" && parts[1]) { + result.symlinkedRoots.push(parts[1]); + } else if (parts[0] === "mutation-failed" && parts[1] && parts[2]) { + result.mutationFailures.push({ op: parts[1], path: parts[2] }); + } + } + return result; +} + +// Restore runtime-writable subpaths after a successful lock fan-out. +// Returns issues for any mkdir/chown/chmod failure observed inside the +// script (parsed from `restore-failed\t\t` markers) plus a +// stat-based verification pass that confirms every restored target ends +// up as `sandbox:sandbox 2770`. Empty list means the carve-out is good. +function restoreWritableRuntimeSubpaths( + privileged: PrivilegedExec, + configDir: string, +): string[] { + let stdout = ""; + try { + stdout = privileged.capture([ + "sh", + "-c", + ` +set -u +config_dir="$1" +shift +for pattern in "$@"; do + case "$pattern" in + */*) prefix="\${pattern%/*}"; leaf="\${pattern##*/}" ;; + *) prefix=""; leaf="$pattern" ;; + esac + if [ -n "$prefix" ]; then + set -- "$config_dir"/$prefix + else + set -- "$config_dir" + fi + for parent in "$@"; do + case "$parent" in + *"*"*) continue ;; + esac + if [ -L "$parent" ]; then continue; fi + if [ ! -d "$parent" ]; then continue; fi + target="$parent/$leaf" + if [ -L "$target" ]; then continue; fi + if ! mkdir -p "$target" 2>/dev/null; then + printf 'restore-failed\\tmkdir\\t%s\\n' "$target" + continue + fi + if ! chown -R sandbox:sandbox "$target" 2>/dev/null; then + printf 'restore-failed\\tchown\\t%s\\n' "$target" + fi + if ! chmod 2770 "$target" 2>/dev/null; then + printf 'restore-failed\\tchmod-dir\\t%s\\n' "$target" + fi + if ! chmod -R g+rwX,o-rwx "$target" 2>/dev/null; then + printf 'restore-failed\\tchmod-recursive\\t%s\\n' "$target" + fi + perms="$(stat -c '%a %U:%G' "$target" 2>/dev/null)" || { + printf 'restore-verify-stat-failed\\t%s\\n' "$target" + continue + } + mode="\${perms%% *}" + owner="\${perms#* }" + [ "$mode" = "2770" ] || printf 'restore-verify-mode\\t%s\\t%s\\n' "$target" "$mode" + [ "$owner" = "sandbox:sandbox" ] || printf 'restore-verify-owner\\t%s\\t%s\\n' "$target" "$owner" + done +done +exit 0 +`, + "sh", + configDir, + ...WRITABLE_RUNTIME_SUBPATHS, + ]); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return [`runtime-writable subpath restore exec failed: ${msg}`]; + } + const issues: string[] = []; + for (const line of stdout.split("\n")) { + const parts = line.split("\t"); + if (parts[0] === "restore-failed" && parts[1] && parts[2]) { + issues.push(`runtime-writable subpath restore failed (${parts[1]}): ${parts[2]}`); + } else if (parts[0] === "restore-verify-stat-failed" && parts[1]) { + issues.push(`runtime-writable subpath stat failed after restore: ${parts[1]}`); + } else if (parts[0] === "restore-verify-mode" && parts[1]) { + issues.push(`runtime-writable subpath mode=${parts[2]} (expected 2770): ${parts[1]}`); + } else if (parts[0] === "restore-verify-owner" && parts[1]) { + issues.push(`runtime-writable subpath owner=${parts[2]} (expected sandbox:sandbox): ${parts[1]}`); + } + } + return issues; +} diff --git a/test/repro-2681-group-writable.test.ts b/test/repro-2681-group-writable.test.ts index b476a1da4a..f1d451b1ef 100644 --- a/test/repro-2681-group-writable.test.ts +++ b/test/repro-2681-group-writable.test.ts @@ -175,11 +175,28 @@ describe("mutable agent config permissions", () => { expect(commands).toContainEqual(["chmod", "660", "/sandbox/.openclaw/openclaw.json"]); expect(commands).toContainEqual(["chmod", "660", "/sandbox/.openclaw/.config-hash"]); expect(commands).toContainEqual(["chmod", "2770", "/sandbox/.openclaw"]); - expect(commands).toContainEqual(["chmod", "2770", "/sandbox/.openclaw/workspace"]); - expect(commands).toContainEqual(["chmod", "-R", "g+rwX,o-rwx", "/sandbox/.openclaw/workspace"]); - expect(commands.find((command) => command[0] === "sh" && command[1] === "-c")).toEqual( - expect.arrayContaining(["/sandbox/.openclaw", "sandbox:sandbox", "g+rwX,o-rwx", "2770"]), + // The consolidated state-dir unlock script restores ownership and mode on + // every high-risk state dir (including `workspace`) inside a single + // `sh -c` invocation; the workspace-* glob is handled by a second + // `sh -c`. Both are asserted via the `arrayContaining` check below. + const shellInvocations = commands.filter( + (command) => command[0] === "sh" && command[1] === "-c", ); + expect( + shellInvocations.some((command) => + command.includes("/sandbox/.openclaw") && + command.includes("sandbox:sandbox") && + command.includes("g+rwX,o-rwx") && + command.includes("2770") && + command.includes("workspace"), + ), + ).toBe(true); + expect( + shellInvocations.some( + (command) => + typeof command[2] === "string" && command[2].includes('workspace-*'), + ), + ).toBe(true); }); it("shields-down restores Hermes sticky group-writable config root without group-writable config files", () => { @@ -275,7 +292,7 @@ process.stdout.write(JSON.stringify(calls)); command[0] === "sh" && command[1] === "-c" && command.includes("/sandbox/.openclaw") && - command.includes("root:root") && + command.includes("root:sandbox") && command.includes("go-w") && command.includes("755"), ); diff --git a/test/shields-up-runtime-perms.test.ts b/test/shields-up-runtime-perms.test.ts new file mode 100644 index 0000000000..efbf5108b6 --- /dev/null +++ b/test/shields-up-runtime-perms.test.ts @@ -0,0 +1,458 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { spawnSync } from "node:child_process"; +import { describe, expect, it } from "vitest"; + +function runLockAgentConfigProbe(): string[][] { + const probe = spawnSync( + process.execPath, + [ + "-e", + String.raw` +const Module = require("node:module"); +const originalLoad = Module._load; +const calls = []; +Module._load = function patchedLoad(request, parent, isMain) { + if (request === "../adapters/docker/exec") { + return { + dockerExecFileSync(args) { + const separator = args.indexOf("--"); + const command = separator >= 0 ? args.slice(separator + 1) : args; + calls.push(command); + if (command[0] === "stat" && command[1] === "-c") { + return command.at(-1) === "/sandbox/.openclaw" + ? "755 root:root\n" + : "444 root:root\n"; + } + if (command[0] === "lsattr") { + return "----i----------------- " + command.at(-1) + "\n"; + } + if (command[0] === "sha256sum") { + return ( + "0000000000000000000000000000000000000000000000000000000000000001 " + + command.at(-1) + + "\n" + ); + } + return ""; + }, + }; + } + if (request === "../sandbox/privileged-exec") { + return { + privilegedSandboxExecArgv(_sandboxName, cmd) { + return [...cmd]; + }, + }; + } + return originalLoad.call(this, request, parent, isMain); +}; +const { lockAgentConfig } = require("./dist/lib/shields/index.js"); +lockAgentConfig("sandbox-pod", { + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], +}); +process.stdout.write(JSON.stringify(calls)); +`, + ], + { encoding: "utf-8", timeout: 5000 }, + ); + expect(probe.status).toBe(0); + return JSON.parse(probe.stdout) as string[][]; +} + +function runLockAgentConfigProbeExpectingThrow( + symlinkedPath: string, +): { stdout: string; stderr: string; status: number | null } { + return spawnSync( + process.execPath, + [ + "-e", + String.raw` +const Module = require("node:module"); +const originalLoad = Module._load; +const symlinkedPath = ${JSON.stringify(symlinkedPath)}; +Module._load = function patchedLoad(request, parent, isMain) { + if (request === "../adapters/docker/exec") { + return { + dockerExecFileSync(args) { + const separator = args.indexOf("--"); + const command = separator >= 0 ? args.slice(separator + 1) : args; + if ( + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + !command[2].includes("symlinked-root") + ) { + return symlinkedPath + "\n"; + } + if (command[0] === "stat" && command[1] === "-c") { + return command.at(-1) === "/sandbox/.openclaw" + ? "755 root:root\n" + : "444 root:root\n"; + } + if (command[0] === "lsattr") { + return "----i----------------- " + command.at(-1) + "\n"; + } + if (command[0] === "sha256sum") { + return ( + "0000000000000000000000000000000000000000000000000000000000000001 " + + command.at(-1) + + "\n" + ); + } + return ""; + }, + }; + } + if (request === "../sandbox/privileged-exec") { + return { + privilegedSandboxExecArgv(_sandboxName, cmd) { + return [...cmd]; + }, + }; + } + return originalLoad.call(this, request, parent, isMain); +}; +try { + const { lockAgentConfig } = require("./dist/lib/shields/index.js"); + lockAgentConfig("sandbox-pod", { + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], + }); + process.stdout.write("UNEXPECTED_SUCCESS"); + process.exit(0); +} catch (err) { + process.stderr.write(err && err.message ? err.message : String(err)); + process.exit(2); +} +`, + ], + { encoding: "utf-8", timeout: 5000 }, + ); +} + +function findStateDirLockShell(commands: string[][]): string[] | undefined { + return commands.find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes('chown -R "$owner"') && + command.includes("root:sandbox") && + command.includes("agents") && + command.includes("extensions"), + ); +} + +describe("shields-up state-dir lock preserves sandbox-group access + runtime sessions writable", () => { + it("locks the high-risk state dirs via a single sh-c script with root:sandbox ownership", () => { + const commands = runLockAgentConfigProbe(); + + const stateDirLockShell = findStateDirLockShell(commands); + expect(stateDirLockShell).toBeDefined(); + expect(stateDirLockShell).toEqual( + expect.arrayContaining(["root:sandbox", "go-w", "755"]), + ); + expect(stateDirLockShell).toEqual( + expect.arrayContaining(["agents", "extensions", "skills", "hooks"]), + ); + }); + + it("guards the state-dir lock script against symlinked roots", () => { + const commands = runLockAgentConfigProbe(); + const stateDirLockShell = findStateDirLockShell(commands); + expect(stateDirLockShell).toBeDefined(); + const script = stateDirLockShell?.[2] ?? ""; + expect(script).toContain('if [ -L "$path" ]; then'); + expect(script).toContain('symlinked-root'); + expect(script).toContain('[ -d "$path" ] || continue'); + }); + + it("guards the workspace-* lock script against symlinked roots", () => { + const commands = runLockAgentConfigProbe(); + const workspaceMutationShell = commands.find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes('workspace-*') && + command[2].includes('chown -R "$owner"'), + ); + expect(workspaceMutationShell).toBeDefined(); + const script = workspaceMutationShell?.[2] ?? ""; + expect(script).toContain('if [ -L "$dir" ]; then'); + expect(script).toContain('symlinked-root'); + }); + + it("runs a symlink-preflight script before any mutation", () => { + const commands = runLockAgentConfigProbe(); + const preflightShell = commands.find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes('workspace-*') && + !command[2].includes("chown") && + !command[2].includes("chmod"), + ); + expect(preflightShell).toBeDefined(); + const script = preflightShell?.[2] ?? ""; + expect(script).toContain('if [ -L "$path" ]; then printf'); + expect(script).toContain('if [ -L "$dir" ]; then printf'); + }); + + // A symlinked state-dir root must abort shields-up; otherwise the lock + // would report success while the dir still points at a writable host + // path. + it("throws when shields-up encounters a symlinked state-dir root", () => { + const result = runLockAgentConfigProbeExpectingThrow( + "/sandbox/.openclaw/extensions", + ); + expect(result.status).toBe(2); + expect(result.stderr).toContain("Config not locked"); + expect(result.stderr).toContain("state dir root is a symlink"); + expect(result.stderr).toContain("/sandbox/.openclaw/extensions"); + }); + + // Atomicity: when the preflight detects a symlinked state-dir root, + // no chown/chmod must run on any of the other (non-symlinked) state + // dirs in the same lock attempt. Without this guarantee, earlier + // entries in HIGH_RISK_STATE_DIRS could be silently re-owned to + // root:sandbox before the later symlink is reached and the lock + // bails out, leaving a half-locked tree behind. + it("does not mutate any state dir when the preflight reports a symlinked root", () => { + const probe = spawnSync( + process.execPath, + [ + "-e", + String.raw` +const Module = require("node:module"); +const originalLoad = Module._load; +const calls = []; +Module._load = function patchedLoad(request, parent, isMain) { + if (request === "../adapters/docker/exec") { + return { + dockerExecFileSync(args) { + const separator = args.indexOf("--"); + const command = separator >= 0 ? args.slice(separator + 1) : args; + calls.push(command); + if ( + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + !command[2].includes("symlinked-root") + ) { + return "/sandbox/.openclaw/extensions\n"; + } + return ""; + }, + }; + } + if (request === "../sandbox/privileged-exec") { + return { + privilegedSandboxExecArgv(_sandboxName, cmd) { + return [...cmd]; + }, + }; + } + return originalLoad.call(this, request, parent, isMain); +}; +try { + const { lockAgentConfig } = require("./dist/lib/shields/index.js"); + lockAgentConfig("sandbox-pod", { + agentName: "openclaw", + configPath: "/sandbox/.openclaw/openclaw.json", + configDir: "/sandbox/.openclaw", + sensitiveFiles: ["/sandbox/.openclaw/.config-hash"], + }); + process.stdout.write("UNEXPECTED_SUCCESS\n"); + process.stdout.write(JSON.stringify(calls)); + process.exit(0); +} catch (err) { + process.stdout.write(JSON.stringify(calls)); + process.stderr.write(err && err.message ? err.message : String(err)); + process.exit(2); +} +`, + ], + { encoding: "utf-8", timeout: 5000 }, + ); + expect(probe.status).toBe(2); + expect(probe.stderr).toContain("state dir root is a symlink"); + + const calls = JSON.parse(probe.stdout) as string[][]; + // Mutation pass scripts contain "symlinked-root\\t" in their script body. + // If any such call was recorded, atomicity is broken. + const mutationCalls = calls.filter( + (command) => + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes("symlinked-root"), + ); + expect(mutationCalls).toEqual([]); + // Restore-writable-subpaths must also not run (it would mkdir + // agents//sessions inside what could be a half-locked tree). + const restoreCalls = calls.filter( + (command) => + command[0] === "sh" && + command[1] === "-c" && + typeof command[2] === "string" && + command[2].includes("agents/*/sessions"), + ); + expect(restoreCalls).toEqual([]); + }); + + it("keeps the top-level config dir owned by root:root (lock contract unchanged)", () => { + const commands = runLockAgentConfigProbe(); + expect(commands).toContainEqual([ + "chown", + "root:root", + "/sandbox/.openclaw", + ]); + expect(commands).toContainEqual([ + "chown", + "root:root", + "/sandbox/.openclaw/openclaw.json", + ]); + }); + + it("restores agents/*/sessions to sandbox:sandbox 2770 after the main lock loop", () => { + const commands = runLockAgentConfigProbe(); + const restoreShell = commands.find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + command.includes("/sandbox/.openclaw") && + command.includes("agents/*/sessions") && + typeof command[2] === "string" && + command[2].includes("chown -R sandbox:sandbox") && + command[2].includes("chmod 2770"), + ); + expect(restoreShell).toBeDefined(); + }); + + // Behavioral check against a real filesystem fixture: the restore script + // must mkdir `agents//sessions` even when the leaf does not yet exist + // (fresh sandbox, never-run TUI). The pre-fix script's case-`*` guard + // skipped the literal pattern when the glob matched nothing, leaving + // `sessions/` uncreated and the post-lockdown TUI mkdir blocked. + it("creates agents//sessions under a fresh agent dir that has no sessions yet", () => { + const fixture = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-shields-runtime-")); + const configDir = path.join(fixture, ".openclaw"); + const agentDir = path.join(configDir, "agents", "main"); + fs.mkdirSync(agentDir, { recursive: true }); + + const restoreShell = runLockAgentConfigProbe().find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + command.includes("agents/*/sessions"), + ); + if (!restoreShell) { + throw new Error("restore-writable-runtime-subpaths shell command not found"); + } + const script = restoreShell[2]; + // Captured argv: ["sh", "-c", script, "sh", "/sandbox/.openclaw", ...patterns]. + // Drop everything up to and including the probed configDir so the fixture + // configDir is passed exactly once when re-running the script body. + const patterns = restoreShell.slice(5); + + const result = spawnSync( + "sh", + ["-c", `${script}\n`, "sh", configDir, ...patterns], + { encoding: "utf-8", timeout: 5000 }, + ); + expect(result.status).toBe(0); + expect(fs.existsSync(path.join(agentDir, "sessions"))).toBe(true); + expect(fs.statSync(path.join(agentDir, "sessions")).isDirectory()).toBe(true); + + fs.rmSync(fixture, { recursive: true, force: true }); + }); + + // If a pre-lockdown agent swaps a high-risk state dir (here `extensions`) + // for a symlink to a host path, the consolidated state-dir lock script + // must skip the symlink without recursing into the target. + it("does not chown/chmod through a symlinked high-risk state dir", () => { + const fixture = fs.mkdtempSync( + path.join(os.tmpdir(), "nemoclaw-shields-statedir-symlink-"), + ); + const configDir = path.join(fixture, ".openclaw"); + const hostTarget = path.join(fixture, "host-target"); + fs.mkdirSync(configDir, { recursive: true }); + fs.mkdirSync(hostTarget, { recursive: true }); + const innocentFile = path.join(hostTarget, "host-file"); + fs.writeFileSync(innocentFile, "untouched\n", { mode: 0o600 }); + const hostFilePerms = fs.statSync(innocentFile).mode & 0o7777; + fs.symlinkSync(hostTarget, path.join(configDir, "extensions")); + + const stateDirLockShell = findStateDirLockShell(runLockAgentConfigProbe()); + if (!stateDirLockShell) { + throw new Error("state-dir lock shell command not found"); + } + const script = stateDirLockShell[2]; + // Captured argv: ["sh", "-c", script, "sh", configDir, owner, + // recursiveMode, dirMode, clearSetgid, ...stateDirs]. Drop the probed + // configDir from index 4 and substitute the fixture path. + const args = stateDirLockShell.slice(4); + args[0] = configDir; + + const result = spawnSync( + "sh", + ["-c", `${script}\n`, "sh", ...args], + { encoding: "utf-8", timeout: 5000 }, + ); + expect(result.status).toBe(0); + expect(fs.lstatSync(path.join(configDir, "extensions")).isSymbolicLink()).toBe(true); + expect(fs.statSync(innocentFile).mode & 0o7777).toBe(hostFilePerms); + + fs.rmSync(fixture, { recursive: true, force: true }); + }); + + // Defense in depth: if a malicious agent points `agents/` at /etc or + // any other host path before shields-up runs, the privileged restore + // helper must not mkdir/chown/chmod through that symlink. The script + // must drop symlinked parents (and symlinked targets) before touching + // them. + it("refuses to follow a symlinked agents/ parent during the runtime-subpath restore", () => { + const fixture = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-shields-symlink-")); + const configDir = path.join(fixture, ".openclaw"); + const agentsRoot = path.join(configDir, "agents"); + const hostTarget = path.join(fixture, "host-target"); + fs.mkdirSync(agentsRoot, { recursive: true }); + fs.mkdirSync(hostTarget, { recursive: true }); + fs.symlinkSync(hostTarget, path.join(agentsRoot, "main")); + + const restoreShell = runLockAgentConfigProbe().find( + (command) => + command[0] === "sh" && + command[1] === "-c" && + command.includes("agents/*/sessions"), + ); + if (!restoreShell) { + throw new Error("restore-writable-runtime-subpaths shell command not found"); + } + const script = restoreShell[2]; + const patterns = restoreShell.slice(5); + + const result = spawnSync( + "sh", + ["-c", `${script}\n`, "sh", configDir, ...patterns], + { encoding: "utf-8", timeout: 5000 }, + ); + expect(result.status).toBe(0); + expect(fs.existsSync(path.join(hostTarget, "sessions"))).toBe(false); + expect(fs.existsSync(path.join(agentsRoot, "main", "sessions"))).toBe(false); + + fs.rmSync(fixture, { recursive: true, force: true }); + }); +});