From 5c9e76167f8d5058d0e0570fa0344be9aead15ad Mon Sep 17 00:00:00 2001 From: Megan Foster Date: Tue, 9 Jun 2026 21:01:50 +0800 Subject: [PATCH 1/2] Harden safety, executor, and CI; dedup shell quoting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five targeted hardening fixes from the code review, no behavioral rewrites: - ci: add GitHub Actions workflow (typecheck, lint, build, test on Node 18/20/22). The suite is fully offline, so no API keys/credits are used. - executor: wire a per-step wall-clock timeout into the setup path so a hung command (stuck install, stdin prompt, unreachable registry) can no longer block forever. Generous per-type defaults, overridable via SETUPR_STEP_TIMEOUT_MS; timeouts surface a clear terminated message. - safety: remove dead-code ternary (force ? confirm : confirm) — high risk now plainly always confirms and --force cannot bypass it. Normalize forceCanSkipConfirmation to `risk === "medium"` in both safety.ts and engine.ts so the two agree. - safety: tighten secret detection to match inline KEY=value credential assignments and recognizable secret value literals instead of bare words, removing false-positive confirmations (e.g. `npm i next-auth`). - refactor: extract the duplicated shellQuote helper into src/util/shell.ts and import it in security, verification, and product. - docs: document the safety threat model in SECURITY.md (best-effort denylist + best-effort redaction, not a sandbox). Adds 9 regression tests (186 total). typecheck, lint, build, and the full suite pass. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/ci.yml | 46 +++++++++++++++++++++++++++++++++ SECURITY.md | 32 +++++++++++++++++++++++ src/agent/safety.ts | 18 +++++++++---- src/commands/plain/product.ts | 5 +--- src/core/engine.ts | 4 ++- src/executor/index.ts | 35 ++++++++++++++++++++++--- src/security/index.ts | 5 +--- src/util/shell.ts | 8 ++++++ src/verification/index.ts | 5 +--- tests/agent-runtime.test.ts | 32 +++++++++++++++++++++++ tests/util-shell.test.ts | 48 +++++++++++++++++++++++++++++++++++ 11 files changed, 217 insertions(+), 21 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 src/util/shell.ts create mode 100644 tests/util-shell.test.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..5940293 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: CI + +on: + push: + branches: ["**"] + pull_request: + branches: ["**"] + +# Cancel superseded runs on the same ref to save CI minutes. +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-and-test: + name: Build & test (Node ${{ matrix.node }}) + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + node: ["18", "20", "22"] + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Node ${{ matrix.node }} + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node }} + cache: npm + + - name: Install dependencies + run: npm ci + + - name: Typecheck + run: npm run typecheck + + - name: Lint + run: npm run lint + + - name: Build + run: npm run build + + # The test suite is fully offline (no live AI provider calls), so no API keys are needed. + - name: Test + run: npm test diff --git a/SECURITY.md b/SECURITY.md index 27c3319..a3e1ae3 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -16,3 +16,35 @@ Please do not open a public issue for a suspected vulnerability. Email the maint ## Secret Handling Never commit API keys, Telegram tokens, OAuth credentials, database files, `.env` files, or local user exports. Use `.env.example` for placeholders only. + +Secrets are redacted on a best-effort basis (`redactText`/`redactObject` in `src/core/engine.ts`) before commands, history, and logs are persisted. This targets common token shapes and `NAME=value` credential assignments — it is not a guarantee that no secret will ever reach a log file. + +## Threat Model + +Setupr runs real shell commands on your machine with your full user privileges. The command-safety +layer (`src/agent/safety.ts`) is a **best-effort, defense-in-depth guard, not a sandbox.** It +classifies each planned command and decides whether to allow, confirm, or block it: + +- **Block (cannot be bypassed by `--force`)** — clearly destructive or hostile patterns such as + `rm -rf /` (and `~`, `*`, `$HOME` variants), `sudo`, `chmod 777`, `chown -R`, and + `curl … | sh`/`| bash` pipe-to-shell installs. +- **Confirm** — high/medium-risk commands (dependency installs, commands that delete files or reset + git state, commands that embed a secret value or `KEY=value` credential assignment). `--force` + may proceed past a **medium**-risk confirmation using safe defaults, but **never** past a + high-risk or blocked one. +- **Allow** — everything else. + +What this layer is **not**: + +- **Not a security boundary.** Anything you could run in your shell, a command run through Setupr + can run. +- **The block list is a denylist, and denylists are inherently incomplete.** It catches common, + obvious footguns; an obfuscated or equivalent destructive command can get past it. Treat the + guard as a seatbelt, not a vault door. +- **Trust the project and its AI-generated plan.** When AI planning is enabled, setup steps may be + proposed by a model based on the project's contents. Review the plan — especially before using + `--force` — the way you would review a shell script you downloaded. + +Recommendations: run Setupr only against projects you trust, read the pre-execution plan before +confirming, be deliberate with `--force`, and keep real secrets in `.env` files rather than inline +in commands. diff --git a/src/agent/safety.ts b/src/agent/safety.ts index 3531b50..418ee77 100644 --- a/src/agent/safety.ts +++ b/src/agent/safety.ts @@ -10,7 +10,11 @@ export interface SafetyEvaluation { forceCanSkipConfirmation: boolean; } -const SECRET_PATTERN = /(API[_-]?KEY|TOKEN|SECRET|PASSWORD|PRIVATE[_-]?KEY|CREDENTIAL|AUTH)/i; +// Flag only commands that actually embed a secret: an inline `NAME=value` assignment to a +// credential-shaped variable, or a recognizable secret value literal. Matching bare words like +// "auth" or "token" produced false positives on legitimate commands (e.g. `npm i next-auth`). +const SECRET_ASSIGNMENT_PATTERN = /\b(?:API[_-]?KEY|ACCESS[_-]?KEY|AUTH[_-]?TOKEN|TOKEN|SECRET|PASSWORD|PASSWD|PRIVATE[_-]?KEY|CREDENTIALS?)\s*=\s*[^\s'"]{6,}/i; +const SECRET_VALUE_PATTERN = /\b(?:sk-ant-[A-Za-z0-9-]{16,}|sk-[A-Za-z0-9]{16,}|ghp_[A-Za-z0-9]{16,}|gho_[A-Za-z0-9]{16,}|github_pat_[A-Za-z0-9_]{20,}|AIza[A-Za-z0-9_-]{16,}|xox[baprs]-[A-Za-z0-9-]{10,})\b|-----BEGIN[A-Z ]+PRIVATE KEY-----/; const SHELL_META_PATTERN = /(;|&&|\|\||`|\$\(|>\s*\/|rm\s+-rf\s+(?:\/|\*|~|\$HOME))/; const DESTRUCTIVE_PATTERN = /\b(rm|del|rmdir|trash|git\s+reset|git\s+clean|docker\s+system\s+prune)\b/i; const INSTALL_PATTERN = /\b(npm|pnpm|yarn|bun|pip|poetry|cargo|go)\b.*\b(install|add|get|download|build)\b/i; @@ -20,9 +24,9 @@ export function evaluateCommandSafety(command: string, options: { force?: boolea let risk: SafetyRisk = "none"; let decision: SafetyDecision = "allow"; - if (SECRET_PATTERN.test(command)) { + if (SECRET_ASSIGNMENT_PATTERN.test(command) || SECRET_VALUE_PATTERN.test(command)) { risk = maxRisk(risk, "high"); - reasons.push("The command text appears to include a secret-looking token or variable name."); + reasons.push("The command appears to embed a secret value or credential assignment in plaintext."); } if (SHELL_META_PATTERN.test(command)) { @@ -51,14 +55,18 @@ export function evaluateCommandSafety(command: string, options: { force?: boolea } if (risk === "critical") decision = "block"; - else if (risk === "high") decision = options.force ? "confirm" : "confirm"; + // High risk always requires confirmation; --force cannot bypass it. + else if (risk === "high") decision = "confirm"; + // Medium risk confirms by default, but --force may proceed with safe defaults. else if (risk === "medium") decision = options.force ? "allow" : "confirm"; return { decision, risk, reasons, - forceCanSkipConfirmation: risk === "low" || risk === "medium", + // --force only skips a confirmation it would otherwise raise (medium risk). Low/none have no + // confirmation to skip; high/critical can never be skipped by force. + forceCanSkipConfirmation: risk === "medium", }; } diff --git a/src/commands/plain/product.ts b/src/commands/plain/product.ts index 51081f6..d9622e9 100644 --- a/src/commands/plain/product.ts +++ b/src/commands/plain/product.ts @@ -8,6 +8,7 @@ import { createSetuprError, printPlainError } from "../../errors/index.js"; import { scanProject } from "../../scanner/index.js"; import { collectContext } from "../../context/collector.js"; import { collectDashboardStatus } from "../../status/collector.js"; +import { shellQuote } from "../../util/shell.js"; interface ProductFlags { args?: string[]; @@ -231,7 +232,3 @@ function parseGitHubRepo(remote: string): string | null { const https = remote.match(/github\.com\/([^/]+\/[^/.]+)(?:\.git)?$/); return https?.[1] || null; } - -function shellQuote(value: string): string { - return `'${value.replace(/'/g, "'\\''")}'`; -} diff --git a/src/core/engine.ts b/src/core/engine.ts index fd3ff6e..9406c0e 100644 --- a/src/core/engine.ts +++ b/src/core/engine.ts @@ -144,7 +144,9 @@ export class ProjectEngine { decision: risk === "high" || risk === "medium" ? "confirm" : "allow", risk: risk === "none" ? "none" : risk, reasons, - forceCanSkipConfirmation: risk !== "high", + // Consistent with agent/safety.ts: --force only skips the medium-risk confirmation; + // high risk always confirms. + forceCanSkipConfirmation: risk === "medium", }; } diff --git a/src/executor/index.ts b/src/executor/index.ts index 7b5afe0..97218be 100644 --- a/src/executor/index.ts +++ b/src/executor/index.ts @@ -17,6 +17,24 @@ export interface ExecutionResult { duration: number; } +// Per-step wall-clock limits so a hung command (stuck install, prompt waiting on stdin, +// unreachable registry) can never block the setup run forever. Override with the +// SETUPR_STEP_TIMEOUT_MS env var (applies to every command step). +const DEFAULT_STEP_TIMEOUT_MS: Record = { + runtime: 600_000, // installing/switching runtimes can be slow + deps: 600_000, // package installs on large projects + script: 600_000, // build/postinstall scripts + config: 120_000, + env: 120_000, + verify: 120_000, +}; + +export function stepTimeoutMs(step: SetupStep): number { + const override = Number(process.env.SETUPR_STEP_TIMEOUT_MS); + if (Number.isFinite(override) && override > 0) return override; + return DEFAULT_STEP_TIMEOUT_MS[step.type] ?? 300_000; +} + export async function executeStep( step: SetupStep, cwd: string, @@ -64,6 +82,8 @@ export async function executeStep( store.getState().addLog({ content: step.command, type: "command" }); + const timeoutMs = stepTimeoutMs(step); + try { const result = await runCommand(step.command, cwd, (line) => { store.getState().addLog({ content: line, type: "progress" }); @@ -71,7 +91,7 @@ export async function executeStep( role: "system", content: `[${step.label}] ${line}`, }); - }); + }, undefined, { timeoutMs }); const duration = Date.now() - start; const durationStr = duration < 1000 ? `${duration}ms` : `${(duration / 1000).toFixed(1)}s`; @@ -81,19 +101,28 @@ export async function executeStep( store.getState().addLog({ content: `✓ ${step.label} — OK (${durationStr})`, type: "success" }); return { success: true, output: result.stdout, duration }; } else { + if (result.timedOut) { + store.getState().addLog({ + content: `✗ ${step.label} — timed out after ${Math.round(timeoutMs / 1000)}s and was terminated`, + type: "error", + }); + } + const stderr = result.timedOut + ? `Command timed out after ${Math.round(timeoutMs / 1000)}s and was terminated.\n${result.stderr}`.trim() + : result.stderr; const psetupError = classifyCommandFailure({ command: step.command, cwd, exitCode: result.exitCode, stdout: result.stdout, - stderr: result.stderr, + stderr, stepLabel: step.label, stepType: step.type, }); store.getState().updateStep(step.id, { status: "failed", error: psetupError.title }); store.getState().addLog({ content: `✗ ${step.label} — ${errorSummary(psetupError)}`, type: "error" }); store.getState().addMessage({ role: "system", content: errorSummary(psetupError) }); - return { success: false, output: result.stdout, error: result.stderr, psetupError, duration }; + return { success: false, output: result.stdout, error: stderr, psetupError, duration }; } } catch (err) { const duration = Date.now() - start; diff --git a/src/security/index.ts b/src/security/index.ts index 90636b4..d3ca10c 100644 --- a/src/security/index.ts +++ b/src/security/index.ts @@ -5,6 +5,7 @@ import { runCommand } from "../executor/index.js"; import { parseEnvKeys, parseEnvPairs } from "../env/index.js"; import { scanProject } from "../scanner/index.js"; import { appendHistoryEvent, readProjectJson, writeProjectJson } from "../state/project.js"; +import { shellQuote } from "../util/shell.js"; export type SecuritySeverity = "info" | "low" | "medium" | "high" | "critical"; export type SecurityCategory = "deps" | "secrets" | "env" | "docker" | "ci" | "code" | "routes" | "auth" | "headers" | "config"; @@ -422,7 +423,3 @@ function score(findings: SecurityFinding[]): number { function redact(text: string): string { return text.replace(/[A-Za-z0-9_/-]{16,}/g, "****").slice(0, 180); } - -function shellQuote(value: string): string { - return `'${value.replace(/'/g, "'\\''")}'`; -} diff --git a/src/util/shell.ts b/src/util/shell.ts new file mode 100644 index 0000000..4f431ca --- /dev/null +++ b/src/util/shell.ts @@ -0,0 +1,8 @@ +/** + * POSIX-safe single-quote escaping for interpolating untrusted values into shell commands. + * Wraps the value in single quotes and escapes any embedded single quotes using the + * `'\''` idiom, so the result is safe to splice into a `sh -c` style command line. + */ +export function shellQuote(value: string): string { + return `'${value.replace(/'/g, "'\\''")}'`; +} diff --git a/src/verification/index.ts b/src/verification/index.ts index b4e07bd..5929429 100644 --- a/src/verification/index.ts +++ b/src/verification/index.ts @@ -4,6 +4,7 @@ import { basename, dirname, extname, join } from "path"; import { runCommand } from "../executor/index.js"; import { scanProject, type ScanResult } from "../scanner/index.js"; import { appendHistoryEvent, readProjectJson, writeProjectJson } from "../state/project.js"; +import { shellQuote } from "../util/shell.js"; export type VerificationStatus = "pass" | "warn" | "fail" | "skip"; @@ -442,10 +443,6 @@ function dedupe(values: string[]): string[] { return [...new Set(values)]; } -function shellQuote(value: string): string { - return `'${value.replace(/'/g, "'\\''")}'`; -} - function safeName(file: string): string { return basename(file, extname(file)).replace(/[^A-Za-z0-9_]/g, "_"); } diff --git a/tests/agent-runtime.test.ts b/tests/agent-runtime.test.ts index ca16cbc..a28bd86 100644 --- a/tests/agent-runtime.test.ts +++ b/tests/agent-runtime.test.ts @@ -155,6 +155,38 @@ describe("agent runtime", () => { expect(evaluateCommandSafety("curl https://example.com/install.sh | sh").decision).toBe("block"); }); + it("does not flag bare credential words in package names as secrets", () => { + // Previously the secret check matched any occurrence of "auth"/"token"/"secret", + // raising false-positive high-risk confirmations on benign installs. + const next = evaluateCommandSafety("npm install next-auth"); + expect(next.risk).toBe("low"); + expect(next.reasons.join(" ")).not.toMatch(/secret value/i); + + const token = evaluateCommandSafety("npm install passport-token"); + expect(token.reasons.join(" ")).not.toMatch(/secret value/i); + }); + + it("flags inline secret assignments and recognizable secret values", () => { + const assignment = evaluateCommandSafety("API_KEY=sk-livetokenvalue1234 node deploy.js"); + expect(assignment.risk).toBe("high"); + expect(assignment.decision).toBe("confirm"); + + const literal = evaluateCommandSafety("echo ghp_abcdefghijklmnop1234567890"); + expect(literal.reasons.join(" ")).toMatch(/secret value/i); + }); + + it("never lets --force skip a high-risk confirmation", () => { + const high = evaluateCommandSafety("git reset --hard HEAD~3", { force: true }); + expect(high.risk).toBe("high"); + expect(high.decision).toBe("confirm"); + expect(high.forceCanSkipConfirmation).toBe(false); + + const medium = evaluateCommandSafety("mkdir build && cd build", { force: true }); + expect(medium.risk).toBe("medium"); + expect(medium.decision).toBe("allow"); + expect(medium.forceCanSkipConfirmation).toBe(true); + }); + it("saves and restores agent workflow checkpoints", async () => { await saveAgentWorkflowCheckpoint(tempDir, { cwd: tempDir, diff --git a/tests/util-shell.test.ts b/tests/util-shell.test.ts new file mode 100644 index 0000000..abdaeb4 --- /dev/null +++ b/tests/util-shell.test.ts @@ -0,0 +1,48 @@ +import { afterEach, describe, expect, it } from "vitest"; +import { shellQuote } from "../src/util/shell.js"; +import { stepTimeoutMs } from "../src/executor/index.js"; +import type { SetupStep } from "../src/ai/planner.js"; + +describe("shellQuote", () => { + it("wraps plain values in single quotes", () => { + expect(shellQuote("lodash")).toBe("'lodash'"); + }); + + it("escapes embedded single quotes safely", () => { + expect(shellQuote("it's")).toBe("'it'\\''s'"); + }); + + it("neutralizes shell metacharacters by quoting", () => { + expect(shellQuote("a; rm -rf /")).toBe("'a; rm -rf /'"); + expect(shellQuote("$(whoami)")).toBe("'$(whoami)'"); + }); +}); + +describe("stepTimeoutMs", () => { + const step = (type: SetupStep["type"]): SetupStep => ({ + id: "s1", + label: "step", + type, + command: "echo hi", + status: "pending", + }); + + afterEach(() => { + delete process.env.SETUPR_STEP_TIMEOUT_MS; + }); + + it("uses generous defaults for slow step types", () => { + expect(stepTimeoutMs(step("deps"))).toBe(600_000); + expect(stepTimeoutMs(step("verify"))).toBe(120_000); + }); + + it("honors a positive SETUPR_STEP_TIMEOUT_MS override", () => { + process.env.SETUPR_STEP_TIMEOUT_MS = "5000"; + expect(stepTimeoutMs(step("deps"))).toBe(5000); + }); + + it("ignores an invalid override and falls back to the default", () => { + process.env.SETUPR_STEP_TIMEOUT_MS = "not-a-number"; + expect(stepTimeoutMs(step("deps"))).toBe(600_000); + }); +}); From 077dba77c910db3b7d6809f8017d1cb7d07554a2 Mon Sep 17 00:00:00 2001 From: Megan Foster Date: Tue, 9 Jun 2026 21:04:33 +0800 Subject: [PATCH 2/2] ci: run on Node 20/22 (vitest 4 toolchain needs Node 20+) The vitest 4 / rolldown test toolchain imports `styleText` from node:util, which only exists on Node 20+, so the test step fails on Node 18. The published CLI bundle still targets Node 18; CI builds and tests on the versions the dev toolchain supports. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5940293..1382c46 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,10 @@ jobs: strategy: fail-fast: false matrix: - node: ["18", "20", "22"] + # The dev/test toolchain (vitest 4 via rolldown) requires Node 20+ + # (it imports `styleText` from node:util, added in Node 20). The published + # CLI bundle still targets Node 18, but CI builds and tests on 20/22. + node: ["20", "22"] steps: - name: Checkout uses: actions/checkout@v4