From 60c205d4fb56d43b7ae77873de64d043bbc13373 Mon Sep 17 00:00:00 2001 From: casanel-net <263300303+casanel-net@users.noreply.github.com> Date: Sat, 23 May 2026 19:44:13 +0900 Subject: [PATCH] fix(security): refuse to follow symlinks that escape the project root in MCP source-returning reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both MCP source-returning handlers re-read each matched file via fs.readFileSync(absPath) with only a logical path-prefix check via validatePathWithinRoot. An attacker-planted in-tree symlink (e.g. evil.ts -> ../../.env) at a logically-in-tree path passes that check; the read follows the link and the project-external file's contents reach the agent. Same CWE-59 class as the #280 session-marker fix. - src/context/index.ts extractNodeCode — codegraph_context source emission - src/mcp/tools.ts handleExplore source loop — codegraph_explore emission The repo already had isPathWithinRootReal (src/utils.ts:132) that does the realpath comparison; this patch just calls it from both read sites and logs a debug breadcrumb when an escape is refused. Two regression tests added under a new describe block in __tests__/security.test.ts, one per handler. The sentinel lives in a function body (not a signature) so the assertion isolates the re-read leak from any index-time metadata exposure. Tests skip on platforms where the user can't create symlinks, matching the existing session-marker symlink tests. --- __tests__/security.test.ts | 128 +++++++++++++++++++++++++++++++++++++ src/context/index.ts | 12 +++- src/mcp/tools.ts | 9 ++- 3 files changed, 145 insertions(+), 4 deletions(-) diff --git a/__tests__/security.test.ts b/__tests__/security.test.ts index 75ac8432..83681e53 100644 --- a/__tests__/security.test.ts +++ b/__tests__/security.test.ts @@ -642,3 +642,131 @@ describe('Session marker symlink resistance', () => { } }); }); + +describe('codegraph_context source-emission symlink escape resistance', () => { + // The MCP source-returning loop in src/mcp/tools.ts re-reads each matched + // file via fs.readFileSync(absPath) and ships the bytes back to the agent. + // If an attacker can plant an in-tree symlink whose name passes isSourceFile + // (e.g. evil.ts) but whose realpath escapes the project root, the read + // follows the link and the project-external file's contents reach the agent. + // validatePathWithinRoot is a logical path-prefix check only and lets such a + // path through; isPathWithinRootReal (already present in src/utils.ts) does + // the realpath comparison that closes the escape. Same CWE-59 class as the + // session-marker write-side fix. + let projectDir: string; + let outsideDir: string; + + beforeEach(() => { + projectDir = createTempDir(); + outsideDir = createTempDir(); + }); + + afterEach(() => { + cleanupTempDir(projectDir); + cleanupTempDir(outsideDir); + }); + + it('does not re-read an in-tree symlink whose target escapes the project root when emitting source', async () => { + // Threat model: the escape target is a real .ts file the indexer can + // parse, so it ends up in the DB just like any in-tree file. The bug we + // guard against is the source-emission *re-read* — the read-time + // `fs.readFileSync(absPath)` follows the symlink and surfaces the real + // target's body to the agent. We use a sentinel in the function BODY (not + // the export name / signature), so the value cannot leak via the + // index-time symbol-signature metadata path — only via a re-read of the + // file's contents on the codegraph_context call. + const sentinelSymbol = 'EscapedSentinel_42_DoNotLeak'; + const sentinelValue = 'leaked-only-via-re-read-zzz'; + const secretFile = path.join(outsideDir, 'secret.ts'); + fs.writeFileSync( + secretFile, + `export function ${sentinelSymbol}(): string {\n return '${sentinelValue}';\n}\n` + ); + + // Legit in-tree file so the project has at least one real source entry. + fs.writeFileSync(path.join(projectDir, 'a.ts'), 'export const x = 1;\n'); + + // Plant the escape symlink at a logically-in-tree path. The walker still + // adds it (escape-target rejection at the walker layer is a separate + // concern outside this PR's scope), so the path ends up in the DB and is + // a live candidate for source emission on the next codegraph_context call. + try { + fs.symlinkSync(secretFile, path.join(projectDir, 'evil.ts'), 'file'); + } catch { + // Skip on platforms where the user can't create symlinks (Windows + // without dev mode + admin) — the escape we're guarding against doesn't + // apply when symlinks aren't creatable. + return; + } + + const cg = await CodeGraph.init(projectDir); + await cg.indexAll(); + + const handler = new ToolHandler(cg); + let resultText: string; + try { + // Task chosen to surface the sentinel symbol — the search query lands + // on the symlinked file, and (pre-fix) the source-emission re-read would + // then ship the escape target's function body back to the agent. + const result = await handler.execute('codegraph_context', { task: sentinelSymbol }); + resultText = result.content.map((c) => c.text).join('\n'); + } finally { + cg.close(); + } + + // Positive control: the symbol/file must still be referenced in the + // response (otherwise the negative assertion below could trivially pass + // because the search missed entirely, the budget dropped the node, etc). + expect(resultText).toContain(sentinelSymbol); + expect(resultText).toContain('evil.ts'); + + // Core assertion: post-fix path silently skips re-read for paths whose + // realpath escapes — the agent must not see the secret file's body bytes. + expect(resultText).not.toContain(sentinelValue); + }); + + it('does not re-read an in-tree symlink that escapes the project root in codegraph_explore source emission', async () => { + // Same threat as the codegraph_context test above, but exercised through + // codegraph_explore's separate source-emission loop in src/mcp/tools.ts. + // Both read sites share the same logical-only `validatePathWithinRoot` + // pattern and were fixed by the same `isPathWithinRootReal` addition. + const sentinelSymbol = 'EscapedSentinelExplore_42_DoNotLeak'; + const sentinelValue = 'leaked-only-via-explore-zzz'; + const secretFile = path.join(outsideDir, 'secret.ts'); + fs.writeFileSync( + secretFile, + `export function ${sentinelSymbol}(): string {\n return '${sentinelValue}';\n}\n` + ); + + fs.writeFileSync(path.join(projectDir, 'a.ts'), 'export const x = 1;\n'); + + try { + fs.symlinkSync(secretFile, path.join(projectDir, 'evil.ts'), 'file'); + } catch { + return; + } + + const cg = await CodeGraph.init(projectDir); + await cg.indexAll(); + + const handler = new ToolHandler(cg); + let resultText: string; + try { + const result = await handler.execute('codegraph_explore', { query: sentinelSymbol }); + resultText = result.content.map((c) => c.text).join('\n'); + } finally { + cg.close(); + } + + // Positive control: the symlinked file was indexed and matched — otherwise + // the body-leak assertion below could pass for an unrelated reason + // (search miss, budget drop, etc.). The header / found-count strings come + // straight from the explore handler before any source emission. + expect(resultText).toContain(sentinelSymbol); + expect(resultText).toMatch(/Found \d+ symbols? across \d+ files?/); + + // Core assertion: post-fix path silently skips re-read for paths whose + // realpath escapes — the agent must not see the secret file's body bytes. + expect(resultText).not.toContain(sentinelValue); + }); +}); diff --git a/src/context/index.ts b/src/context/index.ts index 7298cd41..e8292049 100644 --- a/src/context/index.ts +++ b/src/context/index.ts @@ -24,7 +24,7 @@ import { QueryBuilder } from '../db/queries'; import { GraphTraverser } from '../graph'; import { formatContextAsMarkdown, formatContextAsJson } from './formatter'; import { logDebug } from '../errors'; -import { validatePathWithinRoot } from '../utils'; +import { isPathWithinRootReal, validatePathWithinRoot } from '../utils'; import { isTestFile, extractSearchTerms, scorePathRelevance, getStemVariants } from '../search/query-utils'; /** @@ -933,7 +933,15 @@ export class ContextBuilder { private async extractNodeCode(node: Node): Promise { const filePath = validatePathWithinRoot(this.projectRoot, node.filePath); - if (!filePath || !fs.existsSync(filePath)) { + // The logical `validatePathWithinRoot` check lets a symlinked + // `node.filePath` whose real target escapes the project pass; the + // subsequent `readFileSync` would then ship the external file's bytes back + // to the agent. `isPathWithinRootReal` does the realpath comparison. + // Same CWE-59 class as the #280 session-marker fix (different mechanic). + if (!filePath || !fs.existsSync(filePath) || !isPathWithinRootReal(filePath, this.projectRoot)) { + if (filePath) { + logDebug('Skipping source emission for path that escapes project root', { filePath, projectRoot: this.projectRoot }); + } return null; } diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index 16df373d..f954afe9 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -16,7 +16,7 @@ import { readFileSync, writeSync, } from 'fs'; -import { clamp, validatePathWithinRoot, validateProjectPath } from '../utils'; +import { clamp, isPathWithinRootReal, validatePathWithinRoot, validateProjectPath } from '../utils'; import { tmpdir } from 'os'; import { join } from 'path'; @@ -1110,7 +1110,12 @@ export class ToolHandler { if (totalChars > budget.maxOutputChars * 0.9) break; const absPath = validatePathWithinRoot(projectRoot, filePath); - if (!absPath || !existsSync(absPath)) continue; + // The logical `validatePathWithinRoot` check lets a symlinked `filePath` + // whose real target escapes the project pass; the subsequent + // `readFileSync` would then ship the external file's bytes back to the + // agent. `isPathWithinRootReal` does the realpath comparison. + // Same CWE-59 class as the #280 session-marker fix (different mechanic). + if (!absPath || !existsSync(absPath) || !isPathWithinRootReal(absPath, projectRoot)) continue; let fileContent: string; try {