From 998c5ad7fd057ca3eb087fff99fe6ac01f592383 Mon Sep 17 00:00:00 2001 From: Jason Ginchereau Date: Mon, 18 May 2026 09:56:29 -1000 Subject: [PATCH 01/10] feat: optional path validation during cache restore --- .../cache/__tests__/listAndValidate.test.ts | 387 +++++++ packages/cache/__tests__/options.test.ts | 7 +- .../cache/__tests__/pathValidation.test.ts | 963 ++++++++++++++++++ packages/cache/__tests__/restoreCache.test.ts | 15 +- .../cache/__tests__/restoreCacheV2.test.ts | 15 +- .../cache/__tests__/tarPathValidation.test.ts | 349 +++++++ .../cache/docs/path-validation-test-plan.md | 199 ++++ packages/cache/package-lock.json | 70 +- packages/cache/package.json | 9 +- packages/cache/src/cache.ts | 41 +- .../cache/src/internal/cacheIntegrityError.ts | 36 + .../cache/src/internal/listAndValidate.ts | 208 ++++ packages/cache/src/internal/pathValidation.ts | 522 ++++++++++ packages/cache/src/internal/tar.ts | 105 +- packages/cache/src/options.ts | 38 +- 15 files changed, 2948 insertions(+), 16 deletions(-) create mode 100644 packages/cache/__tests__/listAndValidate.test.ts create mode 100644 packages/cache/__tests__/pathValidation.test.ts create mode 100644 packages/cache/__tests__/tarPathValidation.test.ts create mode 100644 packages/cache/docs/path-validation-test-plan.md create mode 100644 packages/cache/src/internal/cacheIntegrityError.ts create mode 100644 packages/cache/src/internal/listAndValidate.ts create mode 100644 packages/cache/src/internal/pathValidation.ts diff --git a/packages/cache/__tests__/listAndValidate.test.ts b/packages/cache/__tests__/listAndValidate.test.ts new file mode 100644 index 0000000000..33410f0444 --- /dev/null +++ b/packages/cache/__tests__/listAndValidate.test.ts @@ -0,0 +1,387 @@ +import {mkdirSync, mkdtempSync, writeFileSync, rmSync} from 'fs' +import * as os from 'os' +import * as path from 'path' +import * as zlib from 'zlib' +import {execSync} from 'child_process' +import {Header} from 'tar' +import {CompressionMethod} from '../src/internal/constants' +import {listAndValidate} from '../src/internal/listAndValidate' + +/** + * Real-archive integration tests for listAndValidate. These build small tar + * archives in-memory using `tar.Header`, write them to disk, and run them + * through the same parser the production code uses. No mocks. + */ + +interface TestEntry { + path: string + type: 'File' | 'Directory' | 'SymbolicLink' | 'Link' | 'CharacterDevice' + linkpath?: string + body?: Buffer +} + +function buildTarArchive(entries: TestEntry[]): Buffer { + const blocks: Buffer[] = [] + for (const entry of entries) { + const body = entry.body ?? Buffer.alloc(0) + const header = new Header({ + path: entry.path, + mode: 0o644, + uid: 0, + gid: 0, + size: body.length, + mtime: new Date(0), + type: entry.type, + linkpath: entry.linkpath, + uname: 'root', + gname: 'root' + }) + const headerBuf = Buffer.alloc(512) + header.encode(headerBuf, 0) + blocks.push(headerBuf) + if (body.length > 0) { + blocks.push(body) + const pad = (512 - (body.length % 512)) % 512 + if (pad > 0) blocks.push(Buffer.alloc(pad)) + } + } + // Two zero blocks mark end of archive. + blocks.push(Buffer.alloc(1024)) + return Buffer.concat(blocks) +} + +const TEST_ROOT = mkdtempSync(path.join(os.tmpdir(), 'cache-listAndValidate-')) + +/** + * Detect whether the `zstd` binary is on PATH at module load. We use a + * conditional `describe.skip` (rather than per-test early-returns) so that + * Jest reports skipped tests as `skipped` in its summary — silently + * `return`-ing from a test reports it as `passed`, which masks coverage gaps + * on machines where zstd is missing. + */ +const ZSTD_AVAILABLE = ((): boolean => { + try { + execSync(process.platform === 'win32' ? 'where zstd' : 'which zstd', { + stdio: 'ignore' + }) + return true + } catch { + return false + } +})() +const describeZstd = ZSTD_AVAILABLE ? describe : describe.skip + +function workspace(): string { + return path.join(TEST_ROOT, 'workspace') +} + +function writeArchive(name: string, data: Buffer): string { + mkdirSync(TEST_ROOT, {recursive: true}) + const fullPath = path.join(TEST_ROOT, name) + writeFileSync(fullPath, data) + return fullPath +} + +beforeAll(() => { + mkdirSync(workspace(), {recursive: true}) +}) + +afterAll(() => { + try { + rmSync(TEST_ROOT, {recursive: true, force: true}) + } catch { + // best-effort cleanup + } +}) + +describe('listAndValidate (real archives)', () => { + describe('uncompressed tar', () => { + test('clean archive: zero violations', async () => { + const archive = buildTarArchive([ + {path: 'cache/file1.txt', type: 'File', body: Buffer.from('hello')}, + {path: 'cache/sub/file2.txt', type: 'File', body: Buffer.from('world')}, + {path: 'cache/sub/', type: 'Directory'} + ]) + const archivePath = writeArchive('clean.tar', archive) + // Inject a fake gzip header by re-compressing? No — we want to test + // the uncompressed path. listAndValidate doesn't have a "raw" code + // path; it always assumes compression based on `compressionMethod`. + // To test uncompressed bytes we run it through gzip and pass Gzip. + const gzipped = zlib.gzipSync(archive) + writeFileSync(archivePath, gzipped) + + const violations = await listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toEqual([]) + }) + }) + + describe('gzip-compressed tar', () => { + test('clean archive: zero violations', async () => { + const archive = buildTarArchive([ + {path: 'cache/file.txt', type: 'File', body: Buffer.from('hi')} + ]) + const archivePath = writeArchive( + 'clean.tar.gz', + zlib.gzipSync(archive) + ) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toEqual([]) + }) + + test('classic ../../../etc/passwd traversal: one violation', async () => { + const archive = buildTarArchive([ + {path: 'cache/legit.txt', type: 'File', body: Buffer.from('ok')}, + { + path: '../../../etc/passwd', + type: 'File', + body: Buffer.from('pwned') + } + ]) + const archivePath = writeArchive( + 'traversal.tar.gz', + zlib.gzipSync(archive) + ) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toHaveLength(1) + expect(violations[0].path).toBe('../../../etc/passwd') + expect(violations[0].entryType).toBe('File') + }) + + test('absolute path entry: one violation', async () => { + const absPath = + process.platform === 'win32' + ? 'C:/Windows/System32/evil.dll' + : '/etc/cron.d/evil' + const archive = buildTarArchive([ + {path: absPath, type: 'File', body: Buffer.from('x')} + ]) + const archivePath = writeArchive('abs.tar.gz', zlib.gzipSync(archive)) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toHaveLength(1) + expect(violations[0].path).toBe(absPath) + }) + + test('symlink with absolute target: one violation', async () => { + const archive = buildTarArchive([ + { + path: 'cache/link', + type: 'SymbolicLink', + linkpath: '/etc/passwd' + } + ]) + const archivePath = writeArchive( + 'symlink-abs.tar.gz', + zlib.gzipSync(archive) + ) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toHaveLength(1) + expect(violations[0].entryType).toBe('SymbolicLink') + expect(violations[0].linkpath).toBe('/etc/passwd') + }) + + test('symlink target traversing out of allowed roots: one violation', async () => { + const archive = buildTarArchive([ + { + path: 'cache/link', + type: 'SymbolicLink', + linkpath: '../../../etc/passwd' + } + ]) + const archivePath = writeArchive( + 'symlink-traverse.tar.gz', + zlib.gzipSync(archive) + ) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toHaveLength(1) + }) + + test('hardlink to ../etc/passwd: one violation', async () => { + const archive = buildTarArchive([ + { + path: 'cache/link', + type: 'Link', + linkpath: '../../../etc/passwd' + } + ]) + const archivePath = writeArchive( + 'hardlink-traverse.tar.gz', + zlib.gzipSync(archive) + ) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toHaveLength(1) + expect(violations[0].entryType).toBe('Link') + }) + + test('mixed clean and malicious entries: only the bad ones are reported', async () => { + const archive = buildTarArchive([ + {path: 'cache/a.txt', type: 'File', body: Buffer.from('1')}, + {path: '../escape.txt', type: 'File', body: Buffer.from('2')}, + {path: 'cache/sub/b.txt', type: 'File', body: Buffer.from('3')}, + { + path: 'cache/link', + type: 'SymbolicLink', + linkpath: '/tmp/x' + }, + {path: 'cache/sub/c.txt', type: 'File', body: Buffer.from('4')} + ]) + const archivePath = writeArchive( + 'mixed.tar.gz', + zlib.gzipSync(archive) + ) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + const paths = violations.map(v => v.path).sort() + expect(paths).toEqual(['../escape.txt', 'cache/link']) + }) + + test('character device entry is rejected', async () => { + const archive = buildTarArchive([ + {path: 'cache/dev', type: 'CharacterDevice'} + ]) + const archivePath = writeArchive( + 'chardev.tar.gz', + zlib.gzipSync(archive) + ) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toHaveLength(1) + expect(violations[0].entryType).toBe('CharacterDevice') + }) + + test('archive with a single small entry: zero violations', async () => { + const archive = buildTarArchive([ + {path: 'cache/tiny.txt', type: 'File', body: Buffer.from('1')} + ]) + const archivePath = writeArchive( + 'single.tar.gz', + zlib.gzipSync(archive) + ) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toEqual([]) + }) + + test('corrupted / non-tar bytes: throws Error', async () => { + const archivePath = writeArchive( + 'corrupt.tar.gz', + zlib.gzipSync(Buffer.from('this is not a tar archive at all')) + ) + await expect( + listAndValidate( + archivePath, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + ).rejects.toThrow() + }) + }) + + describeZstd('zstd-compressed tar', () => { + test('clean archive (Zstd with --long): zero violations', async () => { + const archive = buildTarArchive([ + {path: 'cache/x.bin', type: 'File', body: Buffer.from('hello')} + ]) + // Compress via the zstd binary with --long=30 to mirror the real + // cache-creation pipeline. + const archivePath = path.join(TEST_ROOT, 'clean.tar.zst') + mkdirSync(TEST_ROOT, {recursive: true}) + writeFileSync(`${archivePath}.raw`, archive) + execSync( + `zstd --long=30 --force -o "${archivePath}" "${archivePath}.raw"`, + {stdio: 'ignore'} + ) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Zstd, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toEqual([]) + }) + + test('traversal in zstd archive: one violation', async () => { + const archive = buildTarArchive([ + {path: '../escape.txt', type: 'File', body: Buffer.from('x')} + ]) + const archivePath = path.join(TEST_ROOT, 'evil.tar.zst') + writeFileSync(`${archivePath}.raw`, archive) + execSync( + `zstd --long=30 --force -o "${archivePath}" "${archivePath}.raw"`, + {stdio: 'ignore'} + ) + const violations = await listAndValidate( + archivePath, + CompressionMethod.Zstd, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toHaveLength(1) + }) + + test('ZstdWithoutLong compression method works', async () => { + const archive = buildTarArchive([ + {path: 'cache/y.bin', type: 'File', body: Buffer.from('z')} + ]) + const archivePath = path.join(TEST_ROOT, 'clean.short.tar.zst') + writeFileSync(`${archivePath}.raw`, archive) + execSync(`zstd --force -o "${archivePath}" "${archivePath}.raw"`, { + stdio: 'ignore' + }) + const violations = await listAndValidate( + archivePath, + CompressionMethod.ZstdWithoutLong, + [path.join(workspace(), 'cache')], + workspace() + ) + expect(violations).toEqual([]) + }) + }) +}) diff --git a/packages/cache/__tests__/options.test.ts b/packages/cache/__tests__/options.test.ts index b4c5a1f170..9b4ee5bf53 100644 --- a/packages/cache/__tests__/options.test.ts +++ b/packages/cache/__tests__/options.test.ts @@ -11,6 +11,7 @@ const downloadConcurrency = 8 const timeoutInMs = 30000 const segmentTimeoutInMs = 600000 const lookupOnly = false +const pathValidation = 'off' test('getDownloadOptions sets defaults', async () => { const actualOptions = getDownloadOptions() @@ -21,7 +22,8 @@ test('getDownloadOptions sets defaults', async () => { downloadConcurrency, timeoutInMs, segmentTimeoutInMs, - lookupOnly + lookupOnly, + pathValidation }) }) @@ -32,7 +34,8 @@ test('getDownloadOptions overrides all settings', async () => { downloadConcurrency: 14, timeoutInMs: 20000, segmentTimeoutInMs: 3600000, - lookupOnly: true + lookupOnly: true, + pathValidation: 'error' } const actualOptions = getDownloadOptions(expectedOptions) diff --git a/packages/cache/__tests__/pathValidation.test.ts b/packages/cache/__tests__/pathValidation.test.ts new file mode 100644 index 0000000000..860c7b27b0 --- /dev/null +++ b/packages/cache/__tests__/pathValidation.test.ts @@ -0,0 +1,963 @@ +import * as os from 'os' +import * as path from 'path' +import { + PathValidationViolation, + deriveAllowedRoots, + formatViolationSummary, + validateEntry +} from '../src/internal/pathValidation' + +const IS_WINDOWS = process.platform === 'win32' +const CASE_INSENSITIVE = process.platform === 'win32' || process.platform === 'darwin' + +describe('deriveAllowedRoots', () => { + const cwd = IS_WINDOWS ? 'C:\\workspace' : '/workspace' + + describe('glob-prefix stripping', () => { + test('literal absolute path is returned unchanged', () => { + const input = IS_WINDOWS ? 'C:\\foo\\bar' : '/foo/bar' + const expected = IS_WINDOWS ? 'C:\\foo\\bar' : '/foo/bar' + expect(deriveAllowedRoots([input], cwd)).toEqual([expected]) + }) + + test('star suffix strips trailing glob segment', () => { + const input = IS_WINDOWS ? 'C:\\foo\\bar\\*' : '/foo/bar/*' + const expected = IS_WINDOWS ? 'C:\\foo\\bar' : '/foo/bar' + expect(deriveAllowedRoots([input], cwd)).toEqual([expected]) + }) + + test('question-mark glob strips its segment', () => { + const input = IS_WINDOWS ? 'C:\\foo\\bar\\?c' : '/foo/bar/?c' + const expected = IS_WINDOWS ? 'C:\\foo\\bar' : '/foo/bar' + expect(deriveAllowedRoots([input], cwd)).toEqual([expected]) + }) + + test('character class strips its segment', () => { + const input = IS_WINDOWS ? 'C:\\foo\\[bc]\\d' : '/foo/[bc]/d' + const expected = IS_WINDOWS ? 'C:\\foo' : '/foo' + expect(deriveAllowedRoots([input], cwd)).toEqual([expected]) + }) + + test('brace expansion strips its segment', () => { + const input = IS_WINDOWS ? 'C:\\foo\\{x,y}\\d' : '/foo/{x,y}/d' + const expected = IS_WINDOWS ? 'C:\\foo' : '/foo' + expect(deriveAllowedRoots([input], cwd)).toEqual([expected]) + }) + + test('** in the middle strips at the **', () => { + const input = IS_WINDOWS ? 'C:\\foo\\**\\d' : '/foo/**/d' + const expected = IS_WINDOWS ? 'C:\\foo' : '/foo' + expect(deriveAllowedRoots([input], cwd)).toEqual([expected]) + }) + + test('leading ** falls back to extraction CWD', () => { + const expected = IS_WINDOWS ? 'C:\\workspace' : '/workspace' + expect(deriveAllowedRoots(['**/node_modules'], cwd)).toEqual([expected]) + }) + + test('single * falls back to extraction CWD', () => { + const expected = IS_WINDOWS ? 'C:\\workspace' : '/workspace' + expect(deriveAllowedRoots(['*'], cwd)).toEqual([expected]) + }) + }) + + describe('negation handling', () => { + test('negation pattern (! prefix) is dropped from allowed roots', () => { + const input = IS_WINDOWS + ? ['!C:\\foo\\secret'] + : ['!/foo/secret'] + expect(deriveAllowedRoots(input, cwd)).toEqual([]) + }) + + test('negation does not subtract from a sibling allowed root', () => { + const allowed = IS_WINDOWS ? 'C:\\foo' : '/foo' + const negated = IS_WINDOWS ? '!C:\\foo\\secret' : '!/foo/secret' + expect(deriveAllowedRoots([allowed, negated], cwd)).toEqual([allowed]) + }) + }) + + describe('path expansion', () => { + test('~ expands to home directory', () => { + expect(deriveAllowedRoots(['~'], cwd)).toEqual([os.homedir()]) + }) + + test('~/x expands to home/x', () => { + const expected = path.join(os.homedir(), '.cache') + expect(deriveAllowedRoots(['~/.cache'], cwd)).toEqual([expected]) + }) + + test('${VAR} expands an environment variable', () => { + const original = process.env['CACHE_TEST_ROOT'] + process.env['CACHE_TEST_ROOT'] = IS_WINDOWS ? 'C:\\envroot' : '/envroot' + try { + const expected = IS_WINDOWS ? 'C:\\envroot\\sub' : '/envroot/sub' + expect( + deriveAllowedRoots(['${CACHE_TEST_ROOT}/sub'], cwd) + ).toEqual([expected]) + } finally { + if (original === undefined) delete process.env['CACHE_TEST_ROOT'] + else process.env['CACHE_TEST_ROOT'] = original + } + }) + + test('$VAR style expands an environment variable', () => { + const original = process.env['CACHE_TEST_ROOT'] + process.env['CACHE_TEST_ROOT'] = IS_WINDOWS ? 'C:\\envroot' : '/envroot' + try { + const expected = IS_WINDOWS ? 'C:\\envroot\\sub' : '/envroot/sub' + expect(deriveAllowedRoots(['$CACHE_TEST_ROOT/sub'], cwd)).toEqual([ + expected + ]) + } finally { + if (original === undefined) delete process.env['CACHE_TEST_ROOT'] + else process.env['CACHE_TEST_ROOT'] = original + } + }) + + test('%VAR% Windows-style expands an environment variable', () => { + const original = process.env['CACHE_TEST_WIN_ROOT'] + process.env['CACHE_TEST_WIN_ROOT'] = IS_WINDOWS ? 'C:\\winroot' : '/winroot' + try { + const expected = IS_WINDOWS ? 'C:\\winroot\\sub' : '/winroot/sub' + expect( + deriveAllowedRoots(['%CACHE_TEST_WIN_ROOT%/sub'], cwd) + ).toEqual([expected]) + } finally { + if (original === undefined) delete process.env['CACHE_TEST_WIN_ROOT'] + else process.env['CACHE_TEST_WIN_ROOT'] = original + } + }) + + test('unknown env var expands to empty string', () => { + delete process.env['DEFINITELY_NOT_SET_VAR_XYZ123'] + // After expansion: "/sub", which is absolute on POSIX, so it stays /sub. + // On Windows it becomes a relative path resolved against cwd. + const result = deriveAllowedRoots( + ['${DEFINITELY_NOT_SET_VAR_XYZ123}/sub'], + cwd + ) + expect(result).toHaveLength(1) + // Just ensure no crash and produces a deterministic value. + expect(typeof result[0]).toBe('string') + }) + + test('env value containing glob characters is preserved verbatim, not truncated', () => { + // Regression test: an earlier implementation expanded env vars before + // detecting glob characters, so an env value that happened to contain + // a `*` or `{` would truncate the prefix mid-path and silently broaden + // the allowed root. Glob detection must run on the pre-expansion text. + const original = process.env['CACHE_TEST_ROOT'] + process.env['CACHE_TEST_ROOT'] = IS_WINDOWS + ? 'C:\\envroot*odd' + : '/envroot*odd' + try { + const expected = IS_WINDOWS + ? 'C:\\envroot*odd\\sub' + : '/envroot*odd/sub' + expect( + deriveAllowedRoots(['${CACHE_TEST_ROOT}/sub'], cwd) + ).toEqual([expected]) + } finally { + if (original === undefined) delete process.env['CACHE_TEST_ROOT'] + else process.env['CACHE_TEST_ROOT'] = original + } + }) + }) + + describe('normalization', () => { + test('trailing slash is normalized away', () => { + const a = IS_WINDOWS ? 'C:\\foo\\bar' : '/foo/bar' + const b = IS_WINDOWS ? 'C:\\foo\\bar\\' : '/foo/bar/' + expect(deriveAllowedRoots([a], cwd)).toEqual(deriveAllowedRoots([b], cwd)) + }) + + test('relative paths resolve against CWD', () => { + const expected = IS_WINDOWS + ? 'C:\\workspace\\node_modules' + : '/workspace/node_modules' + expect(deriveAllowedRoots(['node_modules'], cwd)).toEqual([expected]) + }) + + test('. resolves to CWD', () => { + const expected = IS_WINDOWS ? 'C:\\workspace' : '/workspace' + expect(deriveAllowedRoots(['.'], cwd)).toEqual([expected]) + }) + + test('whitespace-only path is dropped', () => { + expect(deriveAllowedRoots([' '], cwd)).toEqual([]) + }) + + test('empty string path is dropped', () => { + expect(deriveAllowedRoots([''], cwd)).toEqual([]) + }) + + test('input with embedded ./ segments normalizes', () => { + const input = IS_WINDOWS ? 'C:\\foo\\.\\bar' : '/foo/./bar' + const expected = IS_WINDOWS ? 'C:\\foo\\bar' : '/foo/bar' + expect(deriveAllowedRoots([input], cwd)).toEqual([expected]) + }) + + test('input with embedded ../ segments normalizes', () => { + const input = IS_WINDOWS ? 'C:\\foo\\bar\\..\\baz' : '/foo/bar/../baz' + const expected = IS_WINDOWS ? 'C:\\foo\\baz' : '/foo/baz' + expect(deriveAllowedRoots([input], cwd)).toEqual([expected]) + }) + }) + + describe('deduplication and subsumption', () => { + test('identical roots are deduplicated', () => { + const a = IS_WINDOWS ? 'C:\\foo' : '/foo' + expect(deriveAllowedRoots([a, a], cwd)).toEqual([a]) + }) + + test('child root is subsumed by parent', () => { + const parent = IS_WINDOWS ? 'C:\\foo' : '/foo' + const child = IS_WINDOWS ? 'C:\\foo\\bar' : '/foo/bar' + expect(deriveAllowedRoots([parent, child], cwd)).toEqual([parent]) + }) + + test('child first then parent still results in just parent', () => { + const parent = IS_WINDOWS ? 'C:\\foo' : '/foo' + const child = IS_WINDOWS ? 'C:\\foo\\bar' : '/foo/bar' + expect(deriveAllowedRoots([child, parent], cwd)).toEqual([parent]) + }) + + test('sibling prefix collision does NOT subsume', () => { + // /aa is NOT a child of /a — must be kept as a separate root. + const a = IS_WINDOWS ? 'C:\\a' : '/a' + const aa = IS_WINDOWS ? 'C:\\aa' : '/aa' + const result = deriveAllowedRoots([a, aa], cwd) + expect(result).toContain(a) + expect(result).toContain(aa) + expect(result).toHaveLength(2) + }) + + test('completely disjoint roots are both kept', () => { + const a = IS_WINDOWS ? 'C:\\foo' : '/foo' + const b = IS_WINDOWS ? 'C:\\bar' : '/bar' + const result = deriveAllowedRoots([a, b], cwd) + expect(result).toContain(a) + expect(result).toContain(b) + expect(result).toHaveLength(2) + }) + }) + + if (CASE_INSENSITIVE) { + test('case-insensitive FS: roots that differ only in case dedupe', () => { + const lower = IS_WINDOWS ? 'C:\\foo\\bar' : '/foo/bar' + const upper = IS_WINDOWS ? 'C:\\FOO\\bar' : '/FOO/bar' + const result = deriveAllowedRoots([lower, upper], cwd) + // Either form may win, but only one root should survive. + expect(result).toHaveLength(1) + }) + } +}) + +describe('validateEntry', () => { + const cwd = IS_WINDOWS ? 'C:\\workspace' : '/workspace' + const allowedRoots = [ + IS_WINDOWS ? 'C:\\workspace\\node_modules' : '/workspace/node_modules', + IS_WINDOWS ? 'C:\\workspace\\.cache' : '/workspace/.cache' + ] + + describe('legitimate entries (must pass)', () => { + test('regular file under root', () => { + const r = validateEntry( + 'node_modules/foo.js', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('deeply nested file', () => { + const r = validateEntry( + 'node_modules/' + Array(50).fill('sub').join('/') + '/foo.js', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('entry with leading ./', () => { + const r = validateEntry( + './node_modules/foo.js', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('entry with non-escaping .. segment', () => { + const r = validateEntry( + 'node_modules/sub/../foo.js', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('entry with double slash normalizes ok', () => { + const r = validateEntry( + 'node_modules//foo.js', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('directory entry (trailing slash)', () => { + const r = validateEntry( + 'node_modules/', + undefined, + 'Directory', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('filename with spaces', () => { + const r = validateEntry( + 'node_modules/My File.js', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('Unicode filename', () => { + const r = validateEntry( + 'node_modules/节点/файл.js', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('hidden file (.git/HEAD)', () => { + const r = validateEntry( + 'node_modules/.git/HEAD', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('filename containing .. as substring (not segment)', () => { + for (const name of ['..hidden', 'file..txt', 'a..b/c']) { + const r = validateEntry( + 'node_modules/' + name, + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + } + }) + + test('symlink within same root', () => { + const r = validateEntry( + 'node_modules/.bin/cmd', + '../foo/bin/cmd', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('symlink crossing into a different allowed root', () => { + const r = validateEntry( + 'node_modules/link', + // From /workspace/node_modules/, "../.cache/x" → /workspace/.cache/x (an allowed root) + '../.cache/x', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('hardlink within same root', () => { + const r = validateEntry( + 'node_modules/dup.js', + // Hardlink target is relative to extraction CWD + 'node_modules/orig.js', + 'Link', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + }) + + describe('path traversal attacks (must reject)', () => { + test('classic ../../etc/passwd', () => { + const r = validateEntry( + '../../../etc/passwd', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('OUTSIDE_ROOTS') + }) + + test('inside-then-out traversal', () => { + const r = validateEntry( + 'node_modules/../../etc/passwd', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('OUTSIDE_ROOTS') + }) + + test('hidden in middle', () => { + const r = validateEntry( + 'node_modules/sub/../../../etc/passwd', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('OUTSIDE_ROOTS') + }) + + test('multiple slashes around .. still rejected', () => { + const r = validateEntry( + 'node_modules/..//../etc/passwd', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('OUTSIDE_ROOTS') + }) + + test('trailing .. inside root is allowed (does not escape)', () => { + const r = validateEntry( + 'node_modules/sub/..', + undefined, + 'Directory', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + }) + + describe('absolute / UNC paths (must reject)', () => { + test('POSIX absolute path', () => { + const r = validateEntry( + '/etc/passwd', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') + }) + + test('POSIX root', () => { + const r = validateEntry('/', undefined, 'Directory', allowedRoots, cwd) + expect(r.ok).toBe(false) + }) + + test('Windows absolute path with backslash', () => { + const r = validateEntry( + 'C:\\Windows\\System32\\drivers\\etc\\hosts', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') + }) + + test('Windows absolute path with forward slash', () => { + const r = validateEntry( + 'C:/Windows/System32/drivers/etc/hosts', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') + }) + + test('Windows drive-relative (no slash)', () => { + const r = validateEntry( + 'C:foo', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') + }) + + test('UNC path with backslashes', () => { + const r = validateEntry( + '\\\\attacker\\share\\payload', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNC_PATH') + }) + + test('UNC path with forward slashes', () => { + const r = validateEntry( + '//attacker/share/payload', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNC_PATH') + }) + + test('UNC long-path prefix', () => { + const r = validateEntry( + '\\\\?\\C:\\foo', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNC_PATH') + }) + }) + + describe('NUL byte attacks (must reject)', () => { + test('NUL in path', () => { + const r = validateEntry( + 'node_modules/safe\0/../etc/passwd', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('NUL_BYTE') + }) + + test('NUL in symlink target', () => { + const r = validateEntry( + 'node_modules/link', + 'safe\0/etc/passwd', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('NUL_BYTE') + }) + }) + + describe('symlink/hardlink attacks (must reject)', () => { + test('symlink with absolute target', () => { + const r = validateEntry( + 'node_modules/link', + '/etc/passwd', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('LINK_OUTSIDE_ROOTS') + }) + + test('symlink with .. traversal', () => { + const r = validateEntry( + 'node_modules/link', + '../../../etc/passwd', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('LINK_OUTSIDE_ROOTS') + }) + + test('hardlink with absolute target', () => { + const r = validateEntry( + 'node_modules/dup', + '/etc/passwd', + 'Link', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('LINK_OUTSIDE_ROOTS') + }) + + test('hardlink with .. traversal', () => { + const r = validateEntry( + 'node_modules/dup', + '../../etc/passwd', + 'Link', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('LINK_OUTSIDE_ROOTS') + }) + + test('symlink-then-write-through-link attack: symlink target outside roots is rejected', () => { + // This is the critical "symlink-then-write" attack: + // Entry 1: node_modules/x → /etc (a symlink target outside allowed roots) + // Entry 2: node_modules/x/payload (which extracts THROUGH entry 1's link + // and lands at /etc/payload) + // We must reject Entry 1 because its target is outside the allowed roots. + // Entry 2's nominal path looks safe, so we cannot rely on per-entry path + // validation alone — we must catch the symlink target. + const r = validateEntry( + 'node_modules/x', + '/etc', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('LINK_OUTSIDE_ROOTS') + }) + + test('symlink with empty target is allowed (filtered as undefined linkpath)', () => { + // Empty string for linkpath is treated as "no link target to validate". + const r = validateEntry( + 'node_modules/x', + '', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('self-referential symlink (link to .)', () => { + const r = validateEntry( + 'node_modules/x', + '.', + 'SymbolicLink', + allowedRoots, + cwd + ) + // "." resolves to the entry's directory which is under the root. + expect(r.ok).toBe(true) + }) + }) + + describe('unsupported entry types (must reject)', () => { + test('CharacterDevice is rejected', () => { + const r = validateEntry( + 'node_modules/safe', + undefined, + 'CharacterDevice', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNSUPPORTED_TYPE') + }) + + test('BlockDevice is rejected', () => { + const r = validateEntry( + 'node_modules/safe', + undefined, + 'BlockDevice', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNSUPPORTED_TYPE') + }) + + test('FIFO is rejected', () => { + const r = validateEntry( + 'node_modules/safe', + undefined, + 'FIFO', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNSUPPORTED_TYPE') + }) + + test('ContiguousFile is rejected', () => { + const r = validateEntry( + 'node_modules/safe', + undefined, + 'ContiguousFile', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNSUPPORTED_TYPE') + }) + + test('unknown / future entry type is rejected by the allow-list', () => { + // node-tar can surface an unknown typeflag byte as a non-standard + // string. The allow-list approach guarantees we reject it rather than + // silently passing it through to the extractor. + const r = validateEntry( + 'node_modules/safe', + undefined, + 'SomeFutureType', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNSUPPORTED_TYPE') + }) + + test('arbitrary typeflag-byte string is rejected', () => { + // Simulate an attacker-supplied non-standard typeflag like '\u0001' + // surfacing as a literal string from the tar parser. + const r = validateEntry( + 'node_modules/safe', + undefined, + '\u0001', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNSUPPORTED_TYPE') + }) + + test('empty entry type string is rejected', () => { + const r = validateEntry( + 'node_modules/safe', + undefined, + '', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNSUPPORTED_TYPE') + }) + }) + + describe('allow-listed entry types (must accept)', () => { + test.each(['File', 'OldFile', 'Directory'])( + '%s is accepted for an in-root path', + entryType => { + const r = validateEntry( + 'node_modules/safe', + undefined, + entryType, + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + } + ) + + test('SymbolicLink is accepted for an in-root target', () => { + const r = validateEntry( + 'node_modules/link', + 'real', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('Link (hardlink) is accepted for an in-root target', () => { + const r = validateEntry( + 'node_modules/hardlink', + 'node_modules/real', + 'Link', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + }) + + describe('header-only entry types (must accept without validation)', () => { + test('GlobalExtendedHeader is accepted regardless of path content', () => { + // PAX global headers don't materialize as a file; their "path" is metadata. + const r = validateEntry( + '/anything/at/all', + undefined, + 'GlobalExtendedHeader', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('ExtendedHeader is accepted', () => { + const r = validateEntry( + 'PaxHeader/path-doesnt-matter', + undefined, + 'ExtendedHeader', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('NextFileHasLongPath is accepted', () => { + const r = validateEntry( + '@LongLink', + undefined, + 'NextFileHasLongPath', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('NextFileHasLongLinkpath is accepted', () => { + const r = validateEntry( + '@LongLink', + undefined, + 'NextFileHasLongLinkpath', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + }) + + describe('control characters in filenames (legal but unusual)', () => { + test('embedded newline is treated literally as part of the segment', () => { + // node-tar reports the full filename including the embedded newline. + // It's a single segment under node_modules — must be accepted. + const r = validateEntry( + 'node_modules/file\nwith newline', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + + test('embedded tab is accepted', () => { + const r = validateEntry( + 'node_modules/file\twith tab', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + }) + + describe('case sensitivity', () => { + if (CASE_INSENSITIVE) { + test('on Windows/macOS, NODE_MODULES matches node_modules', () => { + const r = validateEntry( + 'NODE_MODULES/foo.js', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(true) + }) + } else { + test('on Linux, NODE_MODULES does NOT match node_modules', () => { + const r = validateEntry( + 'NODE_MODULES/foo.js', + undefined, + 'File', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('OUTSIDE_ROOTS') + }) + } + }) + + describe('sibling-prefix non-collision', () => { + test('an entry under /foo-extra is NOT accepted by allowed-root /foo', () => { + const roots = [IS_WINDOWS ? 'C:\\workspace\\foo' : '/workspace/foo'] + const r = validateEntry( + '../foo-extra/payload', + undefined, + 'File', + roots, + IS_WINDOWS ? 'C:\\workspace\\foo' : '/workspace/foo' + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('OUTSIDE_ROOTS') + }) + }) +}) + +describe('formatViolationSummary', () => { + function v(reason: string): PathValidationViolation { + return { + path: 'x', + resolved: '', + entryType: 'File', + code: 'OUTSIDE_ROOTS', + reason + } + } + + test('empty list produces empty string', () => { + expect(formatViolationSummary([])).toBe('') + }) + + test('shows up to maxShown items verbatim', () => { + const out = formatViolationSummary( + [v('a'), v('b'), v('c')], + 5 + ) + expect(out).toContain(' - a') + expect(out).toContain(' - b') + expect(out).toContain(' - c') + expect(out).not.toContain('more') + }) + + test('truncates excess items with summary line', () => { + const items = ['a', 'b', 'c', 'd', 'e', 'f', 'g'].map(v) + const out = formatViolationSummary(items, 3) + expect(out).toContain(' - a') + expect(out).toContain(' - b') + expect(out).toContain(' - c') + expect(out).toContain('and 4 more') + expect(out).not.toContain(' - d') + }) +}) diff --git a/packages/cache/__tests__/restoreCache.test.ts b/packages/cache/__tests__/restoreCache.test.ts index 4b5c6f865f..3a2a56c895 100644 --- a/packages/cache/__tests__/restoreCache.test.ts +++ b/packages/cache/__tests__/restoreCache.test.ts @@ -166,7 +166,10 @@ test('restore with gzip compressed cache found', async () => { expect(getArchiveFileSizeInBytesMock).toHaveBeenCalledWith(archivePath) expect(extractTarMock).toHaveBeenCalledTimes(1) - expect(extractTarMock).toHaveBeenCalledWith(archivePath, compression) + expect(extractTarMock).toHaveBeenCalledWith(archivePath, compression, { + declaredPaths: paths, + pathValidation: undefined + }) expect(unlinkFileMock).toHaveBeenCalledTimes(1) expect(unlinkFileMock).toHaveBeenCalledWith(archivePath) @@ -227,7 +230,10 @@ test('restore with zstd compressed cache found', async () => { expect(infoMock).toHaveBeenCalledWith(`Cache Size: ~60 MB (62915000 B)`) expect(extractTarMock).toHaveBeenCalledTimes(1) - expect(extractTarMock).toHaveBeenCalledWith(archivePath, compression) + expect(extractTarMock).toHaveBeenCalledWith(archivePath, compression, { + declaredPaths: paths, + pathValidation: undefined + }) expect(getCompressionMock).toHaveBeenCalledTimes(1) }) @@ -285,7 +291,10 @@ test('restore with cache found for restore key', async () => { expect(infoMock).toHaveBeenCalledWith(`Cache Size: ~0 MB (142 B)`) expect(extractTarMock).toHaveBeenCalledTimes(1) - expect(extractTarMock).toHaveBeenCalledWith(archivePath, compression) + expect(extractTarMock).toHaveBeenCalledWith(archivePath, compression, { + declaredPaths: paths, + pathValidation: undefined + }) expect(getCompressionMock).toHaveBeenCalledTimes(1) }) diff --git a/packages/cache/__tests__/restoreCacheV2.test.ts b/packages/cache/__tests__/restoreCacheV2.test.ts index 421069db9d..5b8b657a0a 100644 --- a/packages/cache/__tests__/restoreCacheV2.test.ts +++ b/packages/cache/__tests__/restoreCacheV2.test.ts @@ -210,7 +210,10 @@ test('restore with gzip compressed cache found', async () => { expect(logInfoMock).toHaveBeenCalledWith(`Cache Size: ~0 MB (142 B)`) expect(extractTarMock).toHaveBeenCalledTimes(1) - expect(extractTarMock).toHaveBeenCalledWith(archivePath, compressionMethod) + expect(extractTarMock).toHaveBeenCalledWith(archivePath, compressionMethod, { + declaredPaths: paths, + pathValidation: undefined + }) expect(unlinkFileMock).toHaveBeenCalledTimes(1) expect(unlinkFileMock).toHaveBeenCalledWith(archivePath) @@ -287,7 +290,10 @@ test('restore with zstd compressed cache found', async () => { expect(logInfoMock).toHaveBeenCalledWith(`Cache Size: ~60 MB (62915000 B)`) expect(extractTarMock).toHaveBeenCalledTimes(1) - expect(extractTarMock).toHaveBeenCalledWith(archivePath, compressionMethod) + expect(extractTarMock).toHaveBeenCalledWith(archivePath, compressionMethod, { + declaredPaths: paths, + pathValidation: undefined + }) expect(unlinkFileMock).toHaveBeenCalledTimes(1) expect(unlinkFileMock).toHaveBeenCalledWith(archivePath) @@ -367,7 +373,10 @@ test('restore with cache found for restore key', async () => { expect(logInfoMock).toHaveBeenCalledWith(`Cache Size: ~0 MB (142 B)`) expect(extractTarMock).toHaveBeenCalledTimes(1) - expect(extractTarMock).toHaveBeenCalledWith(archivePath, compressionMethod) + expect(extractTarMock).toHaveBeenCalledWith(archivePath, compressionMethod, { + declaredPaths: paths, + pathValidation: undefined + }) expect(unlinkFileMock).toHaveBeenCalledTimes(1) expect(unlinkFileMock).toHaveBeenCalledWith(archivePath) diff --git a/packages/cache/__tests__/tarPathValidation.test.ts b/packages/cache/__tests__/tarPathValidation.test.ts new file mode 100644 index 0000000000..d2f186d9da --- /dev/null +++ b/packages/cache/__tests__/tarPathValidation.test.ts @@ -0,0 +1,349 @@ +import * as exec from '@actions/exec' +import * as core from '@actions/core' +import * as io from '@actions/io' +import * as path from 'path' +import {CompressionMethod} from '../src/internal/constants' +import * as tar from '../src/internal/tar' +import {CacheIntegrityError} from '../src/internal/cacheIntegrityError' +import * as listAndValidate from '../src/internal/listAndValidate' + +jest.mock('@actions/exec') +jest.mock('@actions/io') +jest.mock('../src/internal/listAndValidate') + +function getTempDir(): string { + return path.join(__dirname, '_temp', 'tarPathValidation') +} + +beforeAll(async () => { + jest.spyOn(io, 'which').mockImplementation(async tool => tool) + process.env['GITHUB_WORKSPACE'] = process.cwd() + await jest.requireActual('@actions/io').rmRF(getTempDir()) +}) + +beforeEach(() => { + jest.restoreAllMocks() + jest.spyOn(io, 'which').mockImplementation(async tool => tool) +}) + +afterAll(async () => { + delete process.env['GITHUB_WORKSPACE'] + await jest.requireActual('@actions/io').rmRF(getTempDir()) +}) + +const archive = 'cache.tar.gz' + +describe('extractTar path validation integration', () => { + describe("mode 'off' (default)", () => { + test('does not call listAndValidate when no options passed', async () => { + const listMock = jest + .spyOn(listAndValidate, 'listAndValidate') + .mockResolvedValue([]) + const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.Gzip) + + expect(listMock).not.toHaveBeenCalled() + expect(execMock).toHaveBeenCalledTimes(1) // system tar still runs + }) + + test("explicit pathValidation='off' skips validation", async () => { + const listMock = jest + .spyOn(listAndValidate, 'listAndValidate') + .mockResolvedValue([]) + const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'off' + }) + + expect(listMock).not.toHaveBeenCalled() + expect(execMock).toHaveBeenCalledTimes(1) + }) + }) + + describe("mode 'warn'", () => { + test('clean archive: no warnings emitted, extraction proceeds', async () => { + const listMock = jest + .spyOn(listAndValidate, 'listAndValidate') + .mockResolvedValue([]) + const warnSpy = jest.spyOn(core, 'warning').mockImplementation() + const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'warn' + }) + + expect(listMock).toHaveBeenCalledTimes(1) + expect(warnSpy).not.toHaveBeenCalled() + expect(execMock).toHaveBeenCalledTimes(1) + }) + + test('violations present: exactly one warning, debug per violation, extraction still proceeds', async () => { + jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue([ + { + path: '../escape.txt', + resolved: '', + entryType: 'File', + code: 'OUTSIDE_ROOTS', + reason: 'escapes allowed roots' + }, + { + path: 'cache/link', + linkpath: '/etc/passwd', + resolved: '', + entryType: 'SymbolicLink', + code: 'LINK_OUTSIDE_ROOTS', + reason: 'symlink target outside allowed roots' + } + ]) + const warnSpy = jest.spyOn(core, 'warning').mockImplementation() + const debugSpy = jest.spyOn(core, 'debug').mockImplementation() + const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'warn' + }) + + // Exactly one summary warning + expect(warnSpy).toHaveBeenCalledTimes(1) + expect(warnSpy.mock.calls[0][0]).toMatch(/2 entries/) + expect(warnSpy.mock.calls[0][0]).not.toMatch(/failed integrity/) + // One debug entry per violation + expect(debugSpy).toHaveBeenCalledTimes(2) + expect(debugSpy.mock.calls[0][0]).toMatch(/path-validation/) + // Extraction still happens + expect(execMock).toHaveBeenCalledTimes(1) + }) + + test('single violation: warning text uses singular wording', async () => { + jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue([ + { + path: '../boom.txt', + resolved: '', + entryType: 'File', + code: 'OUTSIDE_ROOTS', + reason: 'escapes' + } + ]) + const warnSpy = jest.spyOn(core, 'warning').mockImplementation() + jest.spyOn(core, 'debug').mockImplementation() + jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'warn' + }) + + expect(warnSpy.mock.calls[0][0]).toMatch(/1 entry/) + }) + }) + + describe("mode 'error'", () => { + test('violations present: throws CacheIntegrityError, system tar NEVER invoked', async () => { + jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue([ + { + path: '../etc/passwd', + resolved: '', + entryType: 'File', + code: 'OUTSIDE_ROOTS', + reason: 'escapes allowed roots' + } + ]) + jest.spyOn(core, 'warning').mockImplementation() + jest.spyOn(core, 'debug').mockImplementation() + const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) + const mkdirMock = jest.spyOn(io, 'mkdirP').mockResolvedValue() + + await expect( + tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'error' + }) + ).rejects.toThrow(CacheIntegrityError) + + // The critical security assertion: no extraction directory was created + // and system tar was never invoked. + expect(execMock).not.toHaveBeenCalled() + expect(mkdirMock).not.toHaveBeenCalled() + }) + + test('thrown error has code PATH_VIOLATION and exposes violations', async () => { + const violations = [ + { + path: '../boom.txt', + resolved: '', + entryType: 'File' as const, + code: 'OUTSIDE_ROOTS' as const, + reason: 'escapes' + } + ] + jest + .spyOn(listAndValidate, 'listAndValidate') + .mockResolvedValue(violations) + jest.spyOn(core, 'warning').mockImplementation() + jest.spyOn(core, 'debug').mockImplementation() + + try { + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'error' + }) + fail('expected CacheIntegrityError') + } catch (err) { + expect(err).toBeInstanceOf(CacheIntegrityError) + const e = err as CacheIntegrityError + expect(e.code).toBe('PATH_VIOLATION') + expect(e.violations).toEqual(violations) + expect(e.message).toMatch(/Refusing to extract/) + } + }) + + test('clean archive: no throw, extraction proceeds normally', async () => { + jest + .spyOn(listAndValidate, 'listAndValidate') + .mockResolvedValue([]) + const warnSpy = jest.spyOn(core, 'warning').mockImplementation() + const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'error' + }) + + expect(warnSpy).not.toHaveBeenCalled() + expect(execMock).toHaveBeenCalledTimes(1) + }) + + test('listAndValidate throws parse error: wrapped as CacheIntegrityError(PARSE_ERROR), no extraction', async () => { + jest + .spyOn(listAndValidate, 'listAndValidate') + .mockRejectedValue(new Error('tar parse error (TAR_BAD_ARCHIVE): bad')) + const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) + const mkdirMock = jest.spyOn(io, 'mkdirP').mockResolvedValue() + + try { + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'error' + }) + fail('expected throw') + } catch (err) { + expect(err).toBeInstanceOf(CacheIntegrityError) + expect((err as CacheIntegrityError).code).toBe('PARSE_ERROR') + expect((err as CacheIntegrityError).message).toMatch( + /tar parse error/ + ) + } + expect(execMock).not.toHaveBeenCalled() + expect(mkdirMock).not.toHaveBeenCalled() + }) + + test("listAndValidate parse failure in 'warn' mode: logs warning, skips validation, extraction proceeds", async () => { + jest + .spyOn(listAndValidate, 'listAndValidate') + .mockRejectedValue(new Error('bad bytes')) + const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) + const warnSpy = jest.spyOn(core, 'warning').mockImplementation() + + // In 'warn' mode a parse failure is non-fatal: the validator's tar + // parser is stricter than the system `tar` that performs the actual + // extraction, so the archive may still extract cleanly. We log a + // warning and proceed. + await expect( + tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'warn' + }) + ).resolves.toBeUndefined() + expect(warnSpy).toHaveBeenCalledTimes(1) + expect(warnSpy.mock.calls[0][0]).toMatch(/integrity check failed/) + expect(warnSpy.mock.calls[0][0]).toMatch(/bad bytes/) + expect(execMock).toHaveBeenCalledTimes(1) + }) + }) + + describe('allowed-roots derivation', () => { + test('declaredPaths is forwarded to listAndValidate', async () => { + const listMock = jest + .spyOn(listAndValidate, 'listAndValidate') + .mockResolvedValue([]) + jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['build/', 'node_modules/'], + pathValidation: 'warn' + }) + + expect(listMock).toHaveBeenCalledTimes(1) + const [, , allowedRoots] = listMock.mock.calls[0] + // Both declared roots should appear after resolution + expect(allowedRoots.length).toBeGreaterThanOrEqual(2) + }) + + test('missing declaredPaths falls back to workspace as the sole allowed root', async () => { + const listMock = jest + .spyOn(listAndValidate, 'listAndValidate') + .mockResolvedValue([]) + jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.Gzip, { + pathValidation: 'warn' + }) + + expect(listMock).toHaveBeenCalledTimes(1) + const [, , allowedRoots, extractCwd] = listMock.mock.calls[0] + // Empty declaredPaths array → fail-safe fallback to extractCwd + expect(allowedRoots).toEqual([extractCwd]) + }) + }) + + describe('compression method forwarding', () => { + test('Gzip', async () => { + const listMock = jest + .spyOn(listAndValidate, 'listAndValidate') + .mockResolvedValue([]) + jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'warn' + }) + + expect(listMock.mock.calls[0][1]).toBe(CompressionMethod.Gzip) + }) + + test('Zstd', async () => { + const listMock = jest + .spyOn(listAndValidate, 'listAndValidate') + .mockResolvedValue([]) + jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.Zstd, { + declaredPaths: ['cache/**'], + pathValidation: 'warn' + }) + + expect(listMock.mock.calls[0][1]).toBe(CompressionMethod.Zstd) + }) + + test('ZstdWithoutLong', async () => { + const listMock = jest + .spyOn(listAndValidate, 'listAndValidate') + .mockResolvedValue([]) + jest.spyOn(exec, 'exec').mockResolvedValue(0) + + await tar.extractTar(archive, CompressionMethod.ZstdWithoutLong, { + declaredPaths: ['cache/**'], + pathValidation: 'warn' + }) + + expect(listMock.mock.calls[0][1]).toBe( + CompressionMethod.ZstdWithoutLong + ) + }) + }) +}) diff --git a/packages/cache/docs/path-validation-test-plan.md b/packages/cache/docs/path-validation-test-plan.md new file mode 100644 index 0000000000..1b9aba6f55 --- /dev/null +++ b/packages/cache/docs/path-validation-test-plan.md @@ -0,0 +1,199 @@ +# Path Validation Test Plan — `@actions/cache` + +This document describes the test coverage for the client-side cache-archive path +validation feature introduced in `@actions/cache` v6.1.0. + +## Feature summary + +`extractTar()` now accepts a third argument: + +```ts +extractTar(archivePath, compressionMethod, { + declaredPaths?: string[], + pathValidation?: 'off' | 'warn' | 'error' +}) +``` + +When `pathValidation !== 'off'`, the archive is streamed through `node-tar`'s +`Parser` (no extraction) and every entry's path / linkpath is checked against +the set of allowed roots derived from `declaredPaths` (or, if that list is +empty, the GitHub Actions workspace as a single fail-safe root). Violations are +collected; in `'error'` mode a `CacheIntegrityError` is thrown **before** system +tar is invoked, so no bytes are ever written to the workspace. + +`restoreCacheV1` and `restoreCacheV2` forward the caller-supplied +`pathValidation` mode and the declared `paths` array to `extractTar`. They also +re-throw `CacheIntegrityError` instances unchanged, so callers can distinguish +integrity failures from ordinary cache-miss/network errors. + +## Files under test + +| Source | Tests | +|---|---| +| [`src/internal/pathValidation.ts`](../src/internal/pathValidation.ts) | [`__tests__/pathValidation.test.ts`](../__tests__/pathValidation.test.ts) | +| [`src/internal/listAndValidate.ts`](../src/internal/listAndValidate.ts) | [`__tests__/listAndValidate.test.ts`](../__tests__/listAndValidate.test.ts) | +| [`src/internal/tar.ts`](../src/internal/tar.ts) (integration into `extractTar`) | [`__tests__/tarPathValidation.test.ts`](../__tests__/tarPathValidation.test.ts) | +| [`src/internal/cacheIntegrityError.ts`](../src/internal/cacheIntegrityError.ts) | covered indirectly via the integration tests | +| [`src/options.ts`](../src/options.ts) (new `pathValidation` field) | [`__tests__/options.test.ts`](../__tests__/options.test.ts) | +| [`src/cache.ts`](../src/cache.ts) (forwarding + error re-throw) | [`__tests__/restoreCache.test.ts`](../__tests__/restoreCache.test.ts), [`__tests__/restoreCacheV2.test.ts`](../__tests__/restoreCacheV2.test.ts) | + +## Unit tests — pure logic (`pathValidation.test.ts`, 86 cases) + +These exercise the platform-agnostic validation logic with no filesystem or +network. The suite is split into two groups. + +### `deriveAllowedRoots` + +Verifies the longest-non-glob-prefix derivation, normalization, deduplication +and platform-specific behavior: + +- **Glob-prefix stripping**: `cache/**`, `*.log`, `?abc`, `[abc]`, `{a,b}`, `!neg` +- **Path expansion**: + - `~` and `~/...` → home directory + - `$VAR`, `${VAR}`, `%VAR%` → environment variables (with unknown vars expanding to empty) + - Mixed expansion-before-glob (`${VAR}` is not misread as a brace glob) +- **Normalization**: leading `./`, embedded `./`, embedded `../` +- **Negations**: any pattern starting with `!` is dropped +- **Edge inputs**: `.`, empty string, whitespace-only, undefined entries +- **Deduplication**: + - Identical roots collapsed + - Child roots subsumed by parents (`/a/b` dropped when `/a` is also present) + - Sibling-prefix non-collision (`/aa` is NOT subsumed by `/a`) +- **Case sensitivity**: case-insensitive on Windows/macOS, case-sensitive on Linux + +### `validateEntry` — accept + +- Plain files under allowed roots (`cache/file.txt`) +- Nested files (`cache/sub1/sub2/.../file.txt`) +- Files with leading `./` +- Files containing `..` as a substring within a segment (`..hidden`, `foo..bar`) +- Files in any of multiple allowed roots +- Unicode filenames, files with spaces, hidden files (`.git/HEAD`) +- Trailing `..` segments that don't escape the root +- Symlinks where the resolved target stays inside an allowed root +- Symlinks crossing from one allowed root into another allowed root +- Hardlinks within the same root +- Empty linkpath (treated as "no target to validate") + +### `validateEntry` — reject (security) + +- **Path traversal**: classic `../../etc/passwd`, inside-then-out (`cache/../../etc/x`), + hidden in middle, multi-slash separators (`cache//../../x`) +- **Absolute paths**: POSIX `/etc/x`, root `/`, Windows `C:\...`, Windows forward-slash + `C:/...`, Windows drive-relative `C:foo`, UNC `\\server\share`, UNC forward-slash, + UNC long-path prefix `\\?\C:\...` +- **NUL byte attacks**: NUL in path, NUL in symlink target +- **Symlink attacks**: + - Absolute symlink target + - Symlink target with `..` traversal + - **Symlink-then-write-through-link** (the critical TOCTOU-style attack: + archive declares `cache/link → /tmp/evil` followed by `cache/link/file`) + - Self-referential symlink to `.` +- **Hardlink attacks**: absolute target, `..` traversal +- **Unsupported entry types**: `CharacterDevice`, `BlockDevice`, `FIFO` +- **Header-only types accepted unconditionally** (these carry no path content): + `GlobalExtendedHeader`, `ExtendedHeader`, `NextFileHasLongPath`, `NextFileHasLongLinkpath` + +### `formatViolationSummary` + +- Empty list → empty string +- Shows up to N items verbatim +- Truncates the tail with a `(... and N more)` summary line + +## Integration tests — real archives (`listAndValidate.test.ts`, 14 cases) + +These build small tar archives in memory using `tar.Header`, write them to disk, +and run them through the production parser. They cover the gzip and zstd +codepaths and use the system `zstd` binary (matching production behavior). +Skipped on hosts without `zstd` installed. + +### Gzip-compressed + +- Clean multi-entry archive → 0 violations +- Single tiny-file archive → 0 violations +- Classic `../../../etc/passwd` traversal → 1 violation, correct path/type +- Absolute file path (`/etc/cron.d/evil` or `C:/Windows/...`) → 1 violation +- Symlink with absolute target → 1 violation, correct linkpath captured +- Symlink with traversing target → 1 violation +- Hardlink with traversing target → 1 violation +- Mixed clean + malicious entries → only bad ones reported +- Character-device entry → 1 violation +- Corrupted / non-tar bytes → throws `Error` (caller wraps as `PARSE_ERROR`) + +### Zstd-compressed (long & short window) + +- Clean archive compressed with `zstd --long=30` → 0 violations +- Traversal in zstd archive → 1 violation +- `ZstdWithoutLong` compression method also works + +## Integration tests — mocked downstream (`tarPathValidation.test.ts`, 15 cases) + +These mock `listAndValidate` so the test can deterministically inject "violation +lists" and observe `extractTar`'s reaction. They mock `@actions/exec`, +`@actions/io` and `@actions/core` to assert what does (and does not) get called. + +### `pathValidation: 'off'` (default) + +- No `options` argument → validator never called, system tar runs normally +- Explicit `'off'` → validator never called, system tar runs + +### `pathValidation: 'warn'` + +- Clean archive → no warning emitted, extraction proceeds +- Violations present → **exactly one** `core.warning` summary, one `core.debug` + per violation, system tar **still runs** +- Single violation → warning uses singular wording (`1 entry`) + +### `pathValidation: 'error'` + +- Violations present → throws `CacheIntegrityError`, **system tar is never + invoked, `mkdirP` is never called** (no extraction directory is created) +- Thrown error has `code === 'PATH_VIOLATION'` and exposes the violations array +- Clean archive → no warning, no throw, extraction proceeds +- `listAndValidate` throws → wrapped as `CacheIntegrityError(PARSE_ERROR)`, + system tar not invoked +- Parse failure in `'warn'` mode → still wrapped as `CacheIntegrityError(PARSE_ERROR)` + and surfaced (we can't trust the archive when we can't even read it) + +### Plumbing + +- `declaredPaths` is forwarded to `listAndValidate` +- Empty/missing `declaredPaths` → fall back to `[workingDirectory]` +- All three compression methods (`Gzip`, `Zstd`, `ZstdWithoutLong`) forward correctly + +## Regression coverage + +These pre-existing tests were updated to account for the new third argument to +`extractTar` but otherwise exercise the same behavior as before: + +- `restoreCache.test.ts` — V1 restore plumbs `paths` and `pathValidation` +- `restoreCacheV2.test.ts` — V2 restore plumbs `paths` and `pathValidation` +- `options.test.ts` — `getDownloadOptions` defaults `pathValidation: 'off'` + +## What is intentionally NOT tested at this layer + +- **End-to-end behaviour with real cache backend** — covered by the + `actions/cache` action's E2E workflow (matrix across ubuntu, macos, windows × + off/warn/error). +- **Action-level input parsing** (`strict-paths`, `fail-on-cache-invalid`) — + lives in the action repo's `actionUtils` + `restoreImpl` tests. +- **Symlink resolution against the live filesystem** — by design. We validate + the *declared* paths from the archive header, not what they would resolve to + on the running host. Live-fs resolution would re-introduce the TOCTOU window + this feature exists to close. + +## Running the tests + +```sh +# from the toolkit repo root +npx jest --testTimeout 70000 packages/cache + +# just the new path-validation suites +npx jest --testTimeout 70000 \ + packages/cache/__tests__/pathValidation.test.ts \ + packages/cache/__tests__/listAndValidate.test.ts \ + packages/cache/__tests__/tarPathValidation.test.ts +``` + +Total path-validation test cases: **115** (86 unit + 14 real-archive + 15 mocked). +Combined with the regression updates, the cache package runs **237 tests** total. diff --git a/packages/cache/package-lock.json b/packages/cache/package-lock.json index 92530faf35..58eaae5c88 100644 --- a/packages/cache/package-lock.json +++ b/packages/cache/package-lock.json @@ -17,7 +17,8 @@ "@azure/core-rest-pipeline": "^1.23.0", "@azure/storage-blob": "^12.31.0", "@protobuf-ts/runtime-rpc": "^2.11.1", - "semver": "^7.7.4" + "semver": "^7.7.4", + "tar": "^7.5.15" }, "devDependencies": { "@protobuf-ts/plugin": "^2.11.1", @@ -306,6 +307,18 @@ "node": ">=14.17" } }, + "node_modules/@isaacs/fs-minipass": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz", + "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==", + "license": "ISC", + "dependencies": { + "minipass": "^7.0.4" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@nodable/entities": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/@nodable/entities/-/entities-2.1.0.tgz", @@ -445,6 +458,15 @@ "concat-map": "0.0.1" } }, + "node_modules/chownr": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz", + "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -552,6 +574,27 @@ "node": "*" } }, + "node_modules/minipass": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.3.tgz", + "integrity": "sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/minizlib": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz", + "integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==", + "license": "MIT", + "dependencies": { + "minipass": "^7.1.2" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", @@ -597,6 +640,22 @@ ], "license": "MIT" }, + "node_modules/tar": { + "version": "7.5.15", + "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.15.tgz", + "integrity": "sha512-dzGK0boVlC4W5QFuQN1EFSl3bIDYsk7Tj40U6eIBnK2k/8ml7TZ5agbI5j5+qnoVcAA+rNtBml8SEiLxZpNqRQ==", + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/fs-minipass": "^4.0.0", + "chownr": "^3.0.0", + "minipass": "^7.1.2", + "minizlib": "^3.1.0", + "yallist": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", @@ -656,6 +715,15 @@ "engines": { "node": ">=16.0.0" } + }, + "node_modules/yallist": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz", + "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } } } } diff --git a/packages/cache/package.json b/packages/cache/package.json index 209212de77..c579505869 100644 --- a/packages/cache/package.json +++ b/packages/cache/package.json @@ -15,7 +15,8 @@ "exports": { ".": { "types": "./lib/cache.d.ts", - "import": "./lib/cache.js" + "import": "./lib/cache.js", + "default": "./lib/cache.js" } }, "directories": { @@ -42,6 +43,9 @@ "bugs": { "url": "https://github.com/actions/toolkit/issues" }, + "engines": { + "node": ">=20.0.0" + }, "dependencies": { "@actions/core": "^3.0.1", "@actions/exec": "^3.0.0", @@ -51,7 +55,8 @@ "@azure/core-rest-pipeline": "^1.23.0", "@azure/storage-blob": "^12.31.0", "@protobuf-ts/runtime-rpc": "^2.11.1", - "semver": "^7.7.4" + "semver": "^7.7.4", + "tar": "^7.5.15" }, "devDependencies": { "@protobuf-ts/plugin": "^2.11.1", diff --git a/packages/cache/src/cache.ts b/packages/cache/src/cache.ts index c53404049b..e4a0f7ac97 100644 --- a/packages/cache/src/cache.ts +++ b/packages/cache/src/cache.ts @@ -6,6 +6,7 @@ import * as cacheTwirpClient from './internal/shared/cacheTwirpClient.js' import {getCacheServiceVersion, isGhes} from './internal/config.js' import {DownloadOptions, UploadOptions} from './options.js' import {createTar, extractTar, listTar} from './internal/tar.js' +import {CacheIntegrityError} from './internal/cacheIntegrityError.js' import { CreateCacheEntryRequest, FinalizeCacheEntryUploadRequest, @@ -15,6 +16,14 @@ import { import {HttpClientError} from '@actions/http-client' export type {DownloadOptions, UploadOptions} +export {CacheIntegrityError} +export type { + CacheIntegrityErrorCode +} from './internal/cacheIntegrityError.js' +export type { + PathValidationMode, + PathValidationViolation +} from './internal/pathValidation.js' export class ValidationError extends Error { constructor(message: string) { super(message) @@ -121,6 +130,10 @@ export function isFeatureAvailable(): boolean { * @param downloadOptions cache download options * @param enableCrossOsArchive an optional boolean enabled to restore on windows any cache created on any platform * @returns string returns the key for the cache hit, otherwise returns undefined + * @throws {ValidationError} If paths is empty, any key is longer than 512 characters or contains a comma, + * or more than 10 keys are provided. + * @throws {CacheIntegrityError} If options.pathValidation is 'error' and a parse error or path violation + * is detected in the cache archive before extraction. */ export async function restoreCache( paths: string[], @@ -164,6 +177,10 @@ export async function restoreCache( * @param options cache download options * @param enableCrossOsArchive an optional boolean enabled to restore on Windows any cache created on any platform * @returns string returns the key for the cache hit, otherwise returns undefined + * @throws {ValidationError} If paths is empty, any key is longer than 512 characters or contains a comma, + * or more than 10 keys are provided. + * @throws {CacheIntegrityError} If options.pathValidation is 'error' and a parse error or path violation + * is detected in the cache archive before extraction. */ async function restoreCacheV1( paths: string[], @@ -229,7 +246,10 @@ async function restoreCacheV1( )} MB (${archiveFileSize} B)` ) - await extractTar(archivePath, compressionMethod) + await extractTar(archivePath, compressionMethod, { + declaredPaths: paths, + pathValidation: options?.pathValidation + }) core.info('Cache restored successfully') return cacheEntry.cacheKey @@ -237,6 +257,11 @@ async function restoreCacheV1( const typedError = error as Error if (typedError.name === ValidationError.name) { throw error + } else if (typedError instanceof CacheIntegrityError) { + // Integrity errors are surfaced to the caller (e.g. the cache action) + // so it can choose to fail the step rather than silently treating the + // restore as a cache miss. + throw error } else { // warn on cache restore failure and continue build // Log server errors (5xx) as errors, all other errors as warnings @@ -271,6 +296,10 @@ async function restoreCacheV1( * @param downloadOptions cache download options * @param enableCrossOsArchive an optional boolean enabled to restore on windows any cache created on any platform * @returns string returns the key for the cache hit, otherwise returns undefined + * @throws {ValidationError} If paths is empty, any key is longer than 512 characters or contains a comma, + * or more than 10 keys are provided. + * @throws {CacheIntegrityError} If options.pathValidation is 'error' and a parse error or path violation + * is detected in the cache archive before extraction. */ async function restoreCacheV2( paths: string[], @@ -361,7 +390,10 @@ async function restoreCacheV2( await listTar(archivePath, compressionMethod) } - await extractTar(archivePath, compressionMethod) + await extractTar(archivePath, compressionMethod, { + declaredPaths: paths, + pathValidation: options?.pathValidation + }) core.info('Cache restored successfully') return response.matchedKey @@ -369,6 +401,11 @@ async function restoreCacheV2( const typedError = error as Error if (typedError.name === ValidationError.name) { throw error + } else if (typedError instanceof CacheIntegrityError) { + // Integrity errors are surfaced to the caller (e.g. the cache action) + // so it can choose to fail the step rather than silently treating the + // restore as a cache miss. + throw error } else { // Supress all non-validation cache related errors because caching should be optional // Log server errors (5xx) as errors, all other errors as warnings diff --git a/packages/cache/src/internal/cacheIntegrityError.ts b/packages/cache/src/internal/cacheIntegrityError.ts new file mode 100644 index 0000000000..0b8482269a --- /dev/null +++ b/packages/cache/src/internal/cacheIntegrityError.ts @@ -0,0 +1,36 @@ +import {PathValidationViolation} from './pathValidation.js' + +/** + * Reason codes for cache integrity failures. Useful for callers (e.g. the + * `actions/cache` action) that want to distinguish between recoverable + * conditions and unambiguous corruption. + */ +export type CacheIntegrityErrorCode = + | 'PARSE_ERROR' // archive could not be opened or parsed + | 'PATH_VIOLATION' // path validation rejected one or more entries + | 'CHECKSUM_MISMATCH' // reserved for future checksum-based integrity checks + +/** + * Thrown when a downloaded cache archive fails an integrity check. Today + * this covers parse errors and path-validation failures (when the caller + * has opted into `pathValidation: 'error'`). + * + * Callers can `instanceof`-check this class to distinguish integrity + * failures from transient transport errors. + */ +export class CacheIntegrityError extends Error { + readonly code: CacheIntegrityErrorCode + readonly violations?: PathValidationViolation[] + + constructor( + code: CacheIntegrityErrorCode, + message: string, + violations?: PathValidationViolation[] + ) { + super(message) + this.name = 'CacheIntegrityError' + this.code = code + this.violations = violations + Object.setPrototypeOf(this, CacheIntegrityError.prototype) + } +} diff --git a/packages/cache/src/internal/listAndValidate.ts b/packages/cache/src/internal/listAndValidate.ts new file mode 100644 index 0000000000..e820039576 --- /dev/null +++ b/packages/cache/src/internal/listAndValidate.ts @@ -0,0 +1,208 @@ +import {spawn} from 'child_process' +import {createReadStream} from 'fs' +import {Readable} from 'stream' +import {pipeline} from 'stream/promises' +import {Parser, ReadEntry} from 'tar' +import {CompressionMethod} from './constants.js' +import { + PathValidationViolation, + prepareAllowedRoots, + validateEntry +} from './pathValidation.js' + +/** + * Stream the entries of a (possibly compressed) tar archive and validate each + * against the allowed roots. Does NOT extract any files — entries are read + * for header inspection only. Returns the list of violations (empty if the + * archive is clean). + * + * Throws an Error if the archive cannot be parsed (corrupt header, + * decompression failure, truncated stream, etc.). The caller is responsible + * for translating that into a CacheIntegrityError with code `PARSE_ERROR`. + */ +export async function listAndValidate( + archivePath: string, + compressionMethod: CompressionMethod, + allowedRoots: string[], + extractCwd: string +): Promise { + const violations: PathValidationViolation[] = [] + + // Precompute the normalized / case-folded form of each allowed root once, + // so the per-entry containment check is a handful of string compares + // rather than an O(roots) re-normalization on every entry. + const preparedRoots = prepareAllowedRoots(allowedRoots) + + // Captured parse error (if any). We don't throw synchronously from inside + // the parser's onwarn callback because doing so leaves the parser's + // internal stream in a half-broken state that hangs the surrounding + // pipeline. Instead we record the first critical warning and throw it + // after the pipeline completes. + let firstParseError: Error | undefined + + const recordParseError = (code: string, message: string): void => { + if (firstParseError) return + firstParseError = new Error(`tar parse error (${code}): ${message}`) + } + + // For gzip we let node-tar handle decompression internally (its built-in + // gzip support is mature). For zstd we spawn the system `zstd` binary so + // we get the same `--long=30` window-size handling as the existing + // extract codepath in tar.ts and avoid relying on Node's experimental + // zstd support. + const useNativeGzip = compressionMethod === CompressionMethod.Gzip + const parser = new Parser({ + gzip: useNativeGzip, + // Disable strict mode so recoverable warnings (e.g. unknown extended + // headers) don't abort parsing. Real corruption is surfaced explicitly + // via the captured error below. + strict: false, + // Treat structural problems (bad archive, bad header, bad chksum) as + // hard parse errors — silently ignoring them would let a corrupt + // archive sail through validation. We DO NOT throw on softer warnings + // (extended headers, unknown PAX keys, etc.). + onwarn: (code, message) => { + if ( + code === 'TAR_BAD_ARCHIVE' || + code === 'TAR_ENTRY_INVALID' || + code === 'TAR_ENTRY_ERROR' || + code === 'TAR_ABORT' + ) { + recordParseError(code, message) + } + }, + onReadEntry: (entry: ReadEntry) => { + try { + const result = validateEntry( + entry.path, + entry.linkpath || undefined, + entry.type, + preparedRoots, + extractCwd + ) + if (!result.ok) { + violations.push({ + path: entry.path, + linkpath: entry.linkpath || undefined, + resolved: result.resolved, + entryType: entry.type, + code: result.code, + reason: result.reason + }) + } + } finally { + // Drain the entry so the parser advances. Without this the stream + // stalls waiting for the consumer to read the entry body. + entry.resume() + } + } + }) + + await streamArchiveTo(archivePath, compressionMethod, parser) + + if (firstParseError) { + throw firstParseError + } + return violations +} + +async function streamArchiveTo( + archivePath: string, + compressionMethod: CompressionMethod, + destination: NodeJS.WritableStream +): Promise { + const fileStream = createReadStream(archivePath) + + if (compressionMethod === CompressionMethod.Gzip) { + // node-tar's Parser was constructed with `gzip: true`, so it handles + // decompression internally — just pipe the raw file stream in. + await pipeline(fileStream, destination) + return + } + + // zstd-compressed archive. node-tar does not natively decompress zstd, so + // we shell out to the `zstd` binary the same way tar.ts does for the + // existing extract codepath. This adds one extra decompression vs. the + // existing extract step (which runs its own zstd), but only on archives + // where path validation is enabled. + const zstdArgs: string[] = ['-d', '-c'] + if (compressionMethod === CompressionMethod.Zstd) { + zstdArgs.unshift('--long=30') + } + + const zstd = spawn('zstd', zstdArgs, { + stdio: ['pipe', 'pipe', 'pipe'] + }) + + // Cap stderr capture so a chatty/malicious zstd invocation can't grow + // memory without bound. 64 KiB is plenty for any realistic error message. + const STDERR_CAP_BYTES = 64 * 1024 + let zstdStderr = '' + let stderrBytes = 0 + let stderrTruncated = false + zstd.stderr.on('data', (chunk: Buffer) => { + if (stderrBytes >= STDERR_CAP_BYTES) { + stderrTruncated = true + return + } + const remaining = STDERR_CAP_BYTES - stderrBytes + if (chunk.length > remaining) { + zstdStderr += chunk.subarray(0, remaining).toString() + stderrBytes += remaining + stderrTruncated = true + } else { + zstdStderr += chunk.toString() + stderrBytes += chunk.length + } + }) + + const zstdExited = new Promise((resolve, reject) => { + zstd.on('error', reject) + zstd.on('exit', (code, signal) => { + if (code === 0) { + resolve() + } else { + // A SIGTERM here means we killed the child ourselves during cleanup + // after an upstream failure — surface a clearer message in that + // case rather than the bare exit code. + const cause = + signal !== null + ? `terminated by signal ${signal}` + : `exited with code ${code}` + const tail = stderrTruncated ? ' (stderr truncated)' : '' + reject( + new Error( + `zstd ${cause}${ + zstdStderr ? `: ${zstdStderr.trim()}` : '' + }${tail}` + ) + ) + } + }) + }) + + const stdin = zstd.stdin as unknown as NodeJS.WritableStream + const stdout = zstd.stdout as unknown as Readable + + // Run both sides of the pipeline concurrently, plus wait for the zstd + // process itself to exit cleanly. If decompression produces non-tar bytes + // the destination parser will reject; if zstd exits non-zero the exit + // promise will reject. Either way we surface the error. + const inPromise = pipeline(fileStream, stdin) + const outPromise = pipeline(stdout, destination) + // Suppress unhandled-rejection warnings on the individual promises. The + // first rejection is propagated via the Promise.all below; any later + // rejection (e.g. zstd reporting a non-zero exit after the parser + // already errored) would otherwise crash the process. + inPromise.catch(() => undefined) + outPromise.catch(() => undefined) + zstdExited.catch(() => undefined) + + try { + await Promise.all([inPromise, outPromise, zstdExited]) + } finally { + if (zstd.exitCode === null && zstd.signalCode === null && !zstd.killed) { + zstd.kill('SIGTERM') + } + } +} diff --git a/packages/cache/src/internal/pathValidation.ts b/packages/cache/src/internal/pathValidation.ts new file mode 100644 index 0000000000..8cdcadd92a --- /dev/null +++ b/packages/cache/src/internal/pathValidation.ts @@ -0,0 +1,522 @@ +import * as os from 'os' +import * as path from 'path' + +/** + * The validation mode controlling how off-root archive entries are handled + * during cache restore. + */ +export type PathValidationMode = 'off' | 'warn' | 'error' + +/** + * Describes a single archive entry that fails validation. + */ +export interface PathValidationViolation { + /** Path of the offending archive entry, as reported by the tar parser. */ + path: string + /** Link target for symlink/hardlink entries (undefined for regular files). */ + linkpath?: string + /** Resolved absolute path that triggered the violation. */ + resolved: string + /** Tar entry type (File, Directory, SymbolicLink, Link, etc.). */ + entryType: string + /** Machine-readable reason code. */ + code: + | 'ABSOLUTE_PATH' + | 'UNC_PATH' + | 'NUL_BYTE' + | 'OUTSIDE_ROOTS' + | 'LINK_OUTSIDE_ROOTS' + | 'UNSUPPORTED_TYPE' + /** Human-readable description of the violation. */ + reason: string +} + +/** + * Result of validating a single archive entry. + */ +export type ValidationResult = + | {ok: true} + | { + ok: false + code: PathValidationViolation['code'] + resolved: string + reason: string + } + +/** + * Pre-computed form of an allowed-roots list, suitable for repeated + * containment checks against many archive entries. Produced by + * {@link prepareAllowedRoots}. + * + * Holds the case-sensitivity flag and per-root normalized + comparison + * strings, so {@link validateEntry} can normalize each child path once and + * compare against pre-baked strings instead of re-normalizing/lower-casing + * the parent on every entry. + */ +export interface PreparedAllowedRoots { + readonly caseInsensitive: boolean + readonly roots: ReadonlyArray +} + +interface PreparedRoot { + /** Normalized parent path, kept verbatim for diagnostics. */ + readonly normParent: string + /** Normalized parent in comparison form (lowercased iff case-insensitive). */ + readonly normParentCmp: string + /** Normalized parent with trailing separator, in comparison form. */ + readonly parentWithSepCmp: string +} + +/** + * Tar entry types that produce a real file/directory/link on disk and are + * considered safe to extract from a cache archive. Anything outside this set + * (other than header-only metadata entries below) is rejected by the + * validator. + * + * This is intentionally an allow-list rather than a block-list: the cache + * format only ever needs files, directories, and links, and a paranoid + * security check should reject unknown entry types by default rather than + * silently passing through anything node-tar happens to surface from a + * non-standard typeflag byte. + */ +const ALLOWED_ENTRY_TYPES = new Set([ + 'File', + 'OldFile', // legacy ustar regular-file typeflag + 'Directory', + 'SymbolicLink', + 'Link' // hardlink — common in node_modules caches via npm +]) + +/** + * Tar entry types that describe metadata, not actual file content. These are + * absorbed by the parser into the next real entry and do not produce a file on + * disk, so we skip path validation for them. + */ +const HEADER_ONLY_ENTRY_TYPES = new Set([ + 'NextFileHasLongPath', + 'NextFileHasLongLinkpath', + 'GlobalExtendedHeader', + 'ExtendedHeader', + 'OldGnuLongPath', + 'OldGnuLongLink' +]) + +/** + * Characters that signal a glob portion of a declared cache path. Anything to + * the right of the first segment containing one of these is stripped when + * deriving the longest non-glob prefix. + */ +const GLOB_CHAR_REGEX = /[*?[\]{}!]/ + +/** + * Returns the working directory used for cache extraction. Mirrors the value + * used by `tar.ts` so validation matches the actual extraction CWD. + */ +export function getWorkingDirectory(): string { + return process.env['GITHUB_WORKSPACE'] ?? process.cwd() +} + +/** + * Derive the set of allowed extraction roots from the user-declared cache + * `paths` input. + * + * For each declared path: + * - The longest leading prefix that contains no glob metacharacters is taken. + * - `~` and `~/...` are expanded to the user's home directory. + * - `$VAR`, `${VAR}` (POSIX) and `%VAR%` (Windows) environment references + * are expanded. + * - Patterns starting with `!` (glob negation) are dropped — negations narrow + * what gets cached, not what extraction is allowed to write. + * - The result is resolved to an absolute path against `extractCwd` (or + * `getWorkingDirectory()` if not provided). + * + * The final list is deduplicated and parents subsume their children — if + * `/a` is allowed, `/a/b` is dropped from the list. + * + * No filesystem canonicalization (`realpath`) is performed: we honor what + * the user wrote literally. This preserves the user's intent if they + * deliberately included a directory that happens to be a symlink. + */ +export function deriveAllowedRoots( + declaredPaths: string[], + extractCwd: string = getWorkingDirectory() +): string[] { + const roots: string[] = [] + for (const raw of declaredPaths) { + if (raw === undefined || raw === null) continue + const trimmed = raw.trim() + if (trimmed === '') continue + if (trimmed.startsWith('!')) continue + const root = deriveRoot(trimmed, extractCwd) + if (root !== undefined) roots.push(root) + } + return collapseRoots(roots) +} + +function deriveRoot(declaredPath: string, extractCwd: string): string { + // Tilde expansion is structural (only valid at the very start) and cannot + // introduce glob characters, so apply it up-front. + let raw = declaredPath + if (raw === '~') { + raw = os.homedir() + } else if (raw.startsWith('~/') || raw.startsWith('~\\')) { + raw = path.join(os.homedir(), raw.slice(2)) + } + + // Identify the longest leading run of segments that contain no glob + // metacharacters BEFORE expanding env-var references. Doing so handles + // two distinct concerns: + // 1. `${VAR}` reference syntax contains `{` and `}`, which would + // otherwise match the brace-glob class and discard the entire + // reference from the prefix. + // 2. If an env value happens to contain a glob character (rare, but + // possible on attacker-influenced env), expanding before checking + // would shorten the prefix mid-path and silently broaden the + // allowed root. By checking the raw text and only expanding the + // kept prefix afterwards, env-derived characters are preserved + // verbatim in the resulting root rather than truncating it. + // Split on either separator so glob detection works for both styles; + // a leading empty segment from absolute POSIX paths (e.g. "/a/b" → + // ["", "a", "b"]) is preserved so the rejoin below produces "/a/b". + const segments = raw.split(/[\\/]/) + const nonGlobSegments: string[] = [] + for (const seg of segments) { + if (segmentHasGlob(seg)) break + nonGlobSegments.push(seg) + } + + // Expand env vars only on the kept prefix. + const prefix = expandEnvVars(nonGlobSegments.join('/')) + + // If the pattern starts with a glob metachar (e.g. "**/foo"), the prefix + // is empty — fall back to the extraction CWD. + if (prefix === '' || prefix === '.') { + return path.resolve(extractCwd) + } + + // Resolve relative to the extraction CWD; this produces an absolute path. + return path.resolve(extractCwd, prefix) +} + +/** + * True if `seg` contains a glob metacharacter that isn't part of an + * env-var reference. Strips `${VAR}`, `$VAR`, and `%VAR%` first so the + * curly braces in `${VAR}` aren't misread as a brace-glob. + */ +function segmentHasGlob(seg: string): boolean { + const stripped = seg + .replace(/\$\{[^}]+\}/g, '') + .replace(/\$[A-Za-z_][A-Za-z0-9_]*/g, '') + .replace(/%[^%]+%/g, '') + return GLOB_CHAR_REGEX.test(stripped) +} + +function expandEnvVars(input: string): string { + let result = input + // ${VAR} + result = result.replace(/\$\{([^}]+)\}/g, (_, name) => process.env[name] ?? '') + // $VAR (POSIX-style identifier) + result = result.replace( + /\$([A-Za-z_][A-Za-z0-9_]*)/g, + (_, name) => process.env[name] ?? '' + ) + // %VAR% (Windows-style) + result = result.replace(/%([^%]+)%/g, (_, name) => process.env[name] ?? '') + return result +} + +/** + * Deduplicate roots and drop any root that is contained within another root + * already in the set. Comparison is segment-aware so `/aa` is NOT considered + * a child of `/a`. + */ +function collapseRoots(roots: string[]): string[] { + const normalized: string[] = [] + for (const r of roots) { + const n = path.normalize(r) + if (!normalized.some(existing => pathsEqual(existing, n))) { + normalized.push(n) + } + } + // Sort so shorter (potentially-parent) paths come first. + normalized.sort((a, b) => a.length - b.length) + const result: string[] = [] + for (const candidate of normalized) { + if (!result.some(parent => isUnderRoot(candidate, parent))) { + result.push(candidate) + } + } + return result +} + +/** + * Validate a single archive entry against the allowed roots. + * + * For symlinks/hardlinks the link target is also validated: + * - For symlinks, the target is resolved relative to the entry's directory + * (matching POSIX symlink semantics). + * - For hardlinks, the target is resolved relative to the extraction CWD + * (matching tar's hardlink-target semantics). + * + * The validator never touches the filesystem; it operates purely on path + * strings. Allowed roots are not realpath'd — the caller is expected to + * derive them via {@link deriveAllowedRoots} which honors the user's + * declared paths literally. + */ +export function validateEntry( + entryPath: string, + linkPath: string | undefined, + entryType: string, + allowedRoots: string[] | PreparedAllowedRoots, + extractCwd: string = getWorkingDirectory() +): ValidationResult { + // Accept either a raw string[] (kept for callers / unit tests) or a + // pre-computed PreparedAllowedRoots produced by prepareAllowedRoots. The + // hot path (listAndValidate) always passes the prepared form. + const prepared = Array.isArray(allowedRoots) + ? prepareAllowedRoots(allowedRoots) + : allowedRoots + // Skip header-only entries — they describe the next real entry. + if (HEADER_ONLY_ENTRY_TYPES.has(entryType)) { + return {ok: true} + } + + // Compute the resolved entry path up-front so every failure branch can + // report it. `path.resolve` is pure string manipulation and is safe to + // call even for inputs that we will subsequently reject (NUL bytes, + // UNC paths, absolute paths). For absolute inputs the cwd component is + // discarded by path.resolve, which is the behavior we want — the + // reported `resolved` is the absolute location the entry would write to. + const resolvedEntry = resolveEntry(entryPath, extractCwd) + + // Reject anything that is not on the allow-list of known-safe entry types. + // This rejects device/FIFO/sparse entries, and also rejects any future or + // attacker-supplied typeflag byte that node-tar surfaces as an unknown + // string (since a cache archive should never legitimately contain one). + if (!ALLOWED_ENTRY_TYPES.has(entryType)) { + return { + ok: false, + code: 'UNSUPPORTED_TYPE', + resolved: resolvedEntry, + reason: `unsupported tar entry type: ${entryType}` + } + } + + // Reject NUL bytes anywhere in the path. + if (entryPath.includes('\0')) { + return { + ok: false, + code: 'NUL_BYTE', + resolved: resolvedEntry, + reason: 'NUL byte in entry path' + } + } + + // Reject UNC paths. Check the original string before separator normalization + // because UNC is identified by leading `\\` or `//`. + if ( + entryPath.startsWith('\\\\') || + entryPath.startsWith('//') || + /^[\\/]{2}\?[\\/]/.test(entryPath) + ) { + return { + ok: false, + code: 'UNC_PATH', + resolved: resolvedEntry, + reason: `UNC path not allowed: ${entryPath}` + } + } + + // Reject Windows-style absolute paths and drive-relative paths (`C:foo`). + // These can be present in archives created on Windows even when extracted + // on POSIX, so we reject them on every platform. + if (/^[a-zA-Z]:/.test(entryPath)) { + return { + ok: false, + code: 'ABSOLUTE_PATH', + resolved: resolvedEntry, + reason: `absolute or drive-relative path not allowed: ${entryPath}` + } + } + + // Reject POSIX absolute paths. + if (entryPath.startsWith('/') || entryPath.startsWith('\\')) { + return { + ok: false, + code: 'ABSOLUTE_PATH', + resolved: resolvedEntry, + reason: `absolute path not allowed: ${entryPath}` + } + } + + if (!isUnderAnyPreparedRoot(resolvedEntry, prepared)) { + return { + ok: false, + code: 'OUTSIDE_ROOTS', + resolved: resolvedEntry, + reason: `path resolves outside allowed roots: ${entryPath} -> ${resolvedEntry}` + } + } + + // Validate link targets for symlinks and hardlinks. + if ( + linkPath !== undefined && + linkPath !== '' && + (entryType === 'SymbolicLink' || entryType === 'Link') + ) { + let resolvedLink: string + if (entryType === 'SymbolicLink') { + // Symlink targets are resolved relative to the link's containing + // directory at extraction time. We mirror that here so a symlink + // pointing outside the allowed roots is rejected. + resolvedLink = path.resolve(path.dirname(resolvedEntry), linkPath) + } else { + // Hardlink targets are resolved relative to the extraction CWD. + resolvedLink = path.resolve(extractCwd, linkPath) + } + if (linkPath.includes('\0')) { + return { + ok: false, + code: 'NUL_BYTE', + resolved: resolvedLink, + reason: 'NUL byte in link target' + } + } + if (!isUnderAnyPreparedRoot(resolvedLink, prepared)) { + return { + ok: false, + code: 'LINK_OUTSIDE_ROOTS', + resolved: resolvedLink, + reason: `link target resolves outside allowed roots: ${linkPath} -> ${resolvedLink}` + } + } + } + + return {ok: true} +} + +function resolveEntry(entryPath: string, extractCwd: string): string { + // Tar paths are POSIX-style. Convert to native separators so path.resolve + // produces the right thing on Windows. + const native = entryPath.split(/[\\/]/).join(path.sep) + return path.resolve(extractCwd, native) +} + +function isUnderAnyRoot(resolved: string, roots: string[]): boolean { + for (const root of roots) { + if (isUnderRoot(resolved, root)) return true + } + return false +} + +/** + * Precompute a {@link PreparedAllowedRoots} from a raw roots array. Call + * once per `listAndValidate` invocation and reuse for every entry. + * + * Pulling these constants out of the inner loop is the difference between + * O(entries × roots) `path.normalize`/`toLowerCase` calls and O(entries + + * roots) calls — for large caches the per-entry validation reduces to a + * single normalize + (optional) lowercase plus N pre-baked string compares. + */ +export function prepareAllowedRoots(roots: string[]): PreparedAllowedRoots { + const caseInsensitive = caseInsensitiveFs() + const prepared: PreparedRoot[] = roots.map(root => { + const normParent = path.normalize(root) + const parentWithSep = normParent.endsWith(path.sep) + ? normParent + : normParent + path.sep + return { + normParent, + normParentCmp: caseInsensitive ? normParent.toLowerCase() : normParent, + parentWithSepCmp: caseInsensitive + ? parentWithSep.toLowerCase() + : parentWithSep + } + }) + return {caseInsensitive, roots: prepared} +} + +function isUnderAnyPreparedRoot( + resolved: string, + prepared: PreparedAllowedRoots +): boolean { + const normChild = path.normalize(resolved) + const childCmp = prepared.caseInsensitive + ? normChild.toLowerCase() + : normChild + for (const root of prepared.roots) { + if (childCmp === root.normParentCmp) return true + if (childCmp.startsWith(root.parentWithSepCmp)) return true + } + return false +} + +function isUnderRoot(child: string, parent: string): boolean { + const normChild = path.normalize(child) + const normParent = path.normalize(parent) + if (pathsEqual(normChild, normParent)) return true + const parentWithSep = normParent.endsWith(path.sep) + ? normParent + : normParent + path.sep + if (caseInsensitiveFs()) { + return normChild.toLowerCase().startsWith(parentWithSep.toLowerCase()) + } + return normChild.startsWith(parentWithSep) +} + +function pathsEqual(a: string, b: string): boolean { + return caseInsensitiveFs() ? a.toLowerCase() === b.toLowerCase() : a === b +} + +/** + * Conservative platform-level guess at filesystem case-sensitivity. Used by + * {@link isUnderRoot} and {@link pathsEqual} when comparing allowed roots + * against resolved entry paths. + * + * - Windows: NTFS is case-insensitive by default (per-directory case + * sensitivity via `fsutil` is opt-in and rare). + * - macOS: APFS and HFS+ default to case-insensitive; the case-sensitive + * APFS variant is selectable at format time but rarely used outside of + * developer setups. + * - Other platforms (Linux/BSD/etc.) are assumed case-sensitive. + * + * On a case-sensitive volume mounted under a case-insensitive platform + * (e.g. case-sensitive APFS on macOS) this guess errs toward treating the + * fs as case-insensitive, which **loosens** the comparison: an entry like + * `cache/Foo` could be considered to be under the allowed root + * `/workspace/cache/foo` even though the underlying fs would create them + * as distinct paths. The direction of the error is fewer violations + * reported, not more, so this is a soundness loosening rather than a + * security-relevant miss — the worst outcome is letting an entry through + * that the actual extraction would then write to a different path than + * the allowed root suggests. + * + * Probing the workspace with `fs.realpathSync.native` would be more + * accurate but adds I/O on every restore and complicates the otherwise + * pure path-string validator, so this platform-level heuristic is + * preferred for now. Revisit if real-world reports show false negatives + * on case-sensitive macOS/Windows volumes. + */ +function caseInsensitiveFs(): boolean { + return process.platform === 'win32' || process.platform === 'darwin' +} + +/** + * Format a violation list for human consumption in a single warning message. + * Truncates after `maxShown` entries and appends a count of the remainder so + * the warning stays a reasonable size. + */ +export function formatViolationSummary( + violations: PathValidationViolation[], + maxShown = 5 +): string { + const total = violations.length + if (total === 0) return '' + const shown = violations.slice(0, maxShown) + const lines = shown.map(v => ` - ${v.reason}`) + const remainder = total - shown.length + if (remainder > 0) { + lines.push(` - ...and ${remainder} more (see debug log for full list)`) + } + return lines.join('\n') +} diff --git a/packages/cache/src/internal/tar.ts b/packages/cache/src/internal/tar.ts index 73c126c7c2..5425dfc14d 100644 --- a/packages/cache/src/internal/tar.ts +++ b/packages/cache/src/internal/tar.ts @@ -1,4 +1,5 @@ import {exec} from '@actions/exec' +import * as core from '@actions/core' import * as io from '@actions/io' import {existsSync, writeFileSync} from 'fs' import * as path from 'path' @@ -11,6 +12,14 @@ import { TarFilename, ManifestFilename } from './constants.js' +import {CacheIntegrityError} from './cacheIntegrityError.js' +import {listAndValidate} from './listAndValidate.js' +import { + PathValidationMode, + PathValidationViolation, + deriveAllowedRoots, + formatViolationSummary +} from './pathValidation.js' const IS_WINDOWS = process.platform === 'win32' @@ -272,15 +281,107 @@ export async function listTar( // Extract a tar export async function extractTar( archivePath: string, - compressionMethod: CompressionMethod + compressionMethod: CompressionMethod, + options?: { + declaredPaths?: string[] + pathValidation?: PathValidationMode + } ): Promise { - // Create directory to extract tar into const workingDirectory = getWorkingDirectory() + const pathValidation: PathValidationMode = options?.pathValidation ?? 'off' + + // Run path validation BEFORE creating the extraction directory or invoking + // system tar. In 'error' mode, a CacheIntegrityError thrown here means no + // bytes are ever written to the workspace. In 'warn' mode, violations are + // logged and extraction proceeds. + if (pathValidation !== 'off') { + const declaredPaths = options?.declaredPaths ?? [] + let allowedRoots = deriveAllowedRoots(declaredPaths, workingDirectory) + // Fail-safe: if the caller didn't supply any declared paths (or all + // supplied paths were empty/negations), fall back to the workspace + // root as the sole allowed root. This still catches archives that try + // to escape the workspace via `..` or absolute paths. + if (allowedRoots.length === 0) { + allowedRoots = [workingDirectory] + } + let violations: PathValidationViolation[] | undefined + try { + violations = await listAndValidate( + archivePath, + compressionMethod, + allowedRoots, + workingDirectory + ) + } catch (error) { + // Parse / decompression failure encountered while validating. The + // validator's tar parser is stricter than the system `tar` that + // performs the actual extraction, so an archive can fail validation + // here yet still extract cleanly. Honor the caller's mode: + // - 'error': hard-fail; do not extract. + // - 'warn': log a warning, skip validation, and let system tar + // have a go. This preserves the legacy behavior where a + // quirky-but-extractable archive doesn't break the build + // just because the user opted into validation. + const message = `Cache archive integrity check failed: ${ + (error as Error).message + }` + if (pathValidation === 'error') { + throw new CacheIntegrityError('PARSE_ERROR', message) + } + core.warning( + `${message}\nSkipping path validation and proceeding with extraction because pathValidation is 'warn'.` + ) + } + if (violations && violations.length > 0) { + reportViolations(violations, pathValidation) + if (pathValidation === 'error') { + throw new CacheIntegrityError( + 'PATH_VIOLATION', + `Cache archive contains ${violations.length} entr${ + violations.length === 1 ? 'y' : 'ies' + } that resolve outside the declared cache paths. ` + + `Refusing to extract because pathValidation is 'error'.`, + violations + ) + } + } + } + + // Create directory to extract tar into await io.mkdirP(workingDirectory) const commands = await getCommands(compressionMethod, 'extract', archivePath) await execCommands(commands) } +function reportViolations( + violations: PathValidationViolation[], + mode: PathValidationMode +): void { + const header = + mode === 'error' + ? `Cache archive failed integrity check (${violations.length} violation${ + violations.length === 1 ? '' : 's' + }).` + : `Cache archive contains ${violations.length} entr${ + violations.length === 1 ? 'y' : 'ies' + } that resolve outside the declared cache paths.` + // One-line warning so the Actions log surfaces a single attention-grabbing + // entry. The truncated, human-readable list goes to `core.info` so users see + // it at default verbosity without us emitting multi-line warnings (which + // some log surfaces collapse). Full per-violation details still go to + // `core.debug` for triage of large archives. + core.warning(header) + core.info(formatViolationSummary(violations)) + for (const v of violations) { + core.debug( + `path-validation: code=${v.code} type=${v.entryType} path=${v.path}` + + (v.linkpath ? ` linkpath=${v.linkpath}` : '') + + ` resolved=${v.resolved}` + + ` reason=${v.reason}` + ) + } +} + // Create a tar export async function createTar( archiveFolder: string, diff --git a/packages/cache/src/options.ts b/packages/cache/src/options.ts index 3e4063f279..cabf5d0b8a 100644 --- a/packages/cache/src/options.ts +++ b/packages/cache/src/options.ts @@ -80,6 +80,32 @@ export interface DownloadOptions { * @default false */ lookupOnly?: boolean + + /** + * Whether to validate that every entry in the downloaded cache archive + * resolves to a path under one of the declared cache `paths` before + * extraction. Symlink and hardlink targets are also validated. + * + * - 'off' (default): no validation, identical to legacy behavior. + * - 'warn': validate before extraction; log a single aggregated + * `core.warning()` summary (with full per-entry detail in + * `core.debug()`) if violations are found, then extract anyway. + * - 'error': validate before extraction; throw `CacheIntegrityError` + * (with `code: 'PATH_VIOLATION'`) on any violation. Extraction + * does not run when this throws. + * + * Parse / decompression failures encountered while validating are + * handled per-mode: + * - 'warn': logged via `core.warning()`; validation is skipped and + * extraction proceeds (the system `tar` invoked for the actual + * extraction is more lenient than the validator and may still + * succeed). + * - 'error': surfaced as `CacheIntegrityError` with `code: 'PARSE_ERROR'` + * and extraction does not run. + * + * @default 'off' + */ + pathValidation?: 'off' | 'warn' | 'error' } /** @@ -147,7 +173,8 @@ export function getDownloadOptions(copy?: DownloadOptions): DownloadOptions { downloadConcurrency: 8, timeoutInMs: 30000, segmentTimeoutInMs: 600000, - lookupOnly: false + lookupOnly: false, + pathValidation: 'off' } if (copy) { @@ -174,6 +201,14 @@ export function getDownloadOptions(copy?: DownloadOptions): DownloadOptions { if (typeof copy.lookupOnly === 'boolean') { result.lookupOnly = copy.lookupOnly } + + if ( + copy.pathValidation === 'off' || + copy.pathValidation === 'warn' || + copy.pathValidation === 'error' + ) { + result.pathValidation = copy.pathValidation + } } const segmentDownloadTimeoutMins = process.env['SEGMENT_DOWNLOAD_TIMEOUT_MINS'] @@ -193,6 +228,7 @@ export function getDownloadOptions(copy?: DownloadOptions): DownloadOptions { ) core.debug(`Segment download timeout (ms): ${result.segmentTimeoutInMs}`) core.debug(`Lookup only: ${result.lookupOnly}`) + core.debug(`Path validation: ${result.pathValidation}`) return result } From 62e4d31b227971f6e1f146294dcec3f75d5a56da Mon Sep 17 00:00:00 2001 From: Jason Ginchereau Date: Wed, 20 May 2026 09:41:37 -1000 Subject: [PATCH 02/10] Update test plan doc regarding warn behavior Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- packages/cache/docs/path-validation-test-plan.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cache/docs/path-validation-test-plan.md b/packages/cache/docs/path-validation-test-plan.md index 1b9aba6f55..619e8fcd0d 100644 --- a/packages/cache/docs/path-validation-test-plan.md +++ b/packages/cache/docs/path-validation-test-plan.md @@ -152,8 +152,8 @@ lists" and observe `extractTar`'s reaction. They mock `@actions/exec`, - Clean archive → no warning, no throw, extraction proceeds - `listAndValidate` throws → wrapped as `CacheIntegrityError(PARSE_ERROR)`, system tar not invoked -- Parse failure in `'warn'` mode → still wrapped as `CacheIntegrityError(PARSE_ERROR)` - and surfaced (we can't trust the archive when we can't even read it) +- Parse failure in `'warn'` mode → warning is logged, validation is skipped, + and extraction still proceeds ### Plumbing From d14cd0e72247228d4cdde01a409a1f7723e42308 Mon Sep 17 00:00:00 2001 From: Jason Ginchereau Date: Wed, 20 May 2026 11:16:30 -1000 Subject: [PATCH 03/10] Suppress polynomial-redos warning --- packages/cache/src/internal/pathValidation.ts | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/packages/cache/src/internal/pathValidation.ts b/packages/cache/src/internal/pathValidation.ts index 8cdcadd92a..3cfc8ef108 100644 --- a/packages/cache/src/internal/pathValidation.ts +++ b/packages/cache/src/internal/pathValidation.ts @@ -202,26 +202,34 @@ function deriveRoot(declaredPath: string, extractCwd: string): string { * True if `seg` contains a glob metacharacter that isn't part of an * env-var reference. Strips `${VAR}`, `$VAR`, and `%VAR%` first so the * curly braces in `${VAR}` aren't misread as a brace-glob. + * + * The patterns `\$\{[^}]+\}` and `%[^%]+%` are O(n²) on pathological + * input (e.g. a long run of `${` with no closing `}`). That is not a + * security concern here: `seg` originates from the calling workflow's + * own declared cache `paths:`, so a hostile value would only DoS the + * same workflow that supplied it — it doesn't cross a trust boundary. + * The `lgtm` comments below suppress CodeQL's `js/polynomial-redos` + * alert on that basis. */ function segmentHasGlob(seg: string): boolean { const stripped = seg - .replace(/\$\{[^}]+\}/g, '') + .replace(/\$\{[^}]+\}/g, '') // lgtm[js/polynomial-redos] .replace(/\$[A-Za-z_][A-Za-z0-9_]*/g, '') - .replace(/%[^%]+%/g, '') + .replace(/%[^%]+%/g, '') // lgtm[js/polynomial-redos] return GLOB_CHAR_REGEX.test(stripped) } function expandEnvVars(input: string): string { let result = input - // ${VAR} - result = result.replace(/\$\{([^}]+)\}/g, (_, name) => process.env[name] ?? '') + // ${VAR} — see segmentHasGlob for the polynomial-redos rationale. + result = result.replace(/\$\{([^}]+)\}/g, (_, name) => process.env[name] ?? '') // lgtm[js/polynomial-redos] // $VAR (POSIX-style identifier) result = result.replace( /\$([A-Za-z_][A-Za-z0-9_]*)/g, (_, name) => process.env[name] ?? '' ) // %VAR% (Windows-style) - result = result.replace(/%([^%]+)%/g, (_, name) => process.env[name] ?? '') + result = result.replace(/%([^%]+)%/g, (_, name) => process.env[name] ?? '') // lgtm[js/polynomial-redos] return result } From 59a9650ce00095996c62000464277ca35a7dc893 Mon Sep 17 00:00:00 2001 From: Jason Ginchereau Date: Wed, 20 May 2026 11:33:37 -1000 Subject: [PATCH 04/10] Address copilot review feedback --- .../cache/__tests__/pathValidation.test.ts | 144 +++++++++++++++++- .../cache/docs/path-validation-test-plan.md | 25 +-- packages/cache/src/internal/pathValidation.ts | 142 +++++++++++------ 3 files changed, 246 insertions(+), 65 deletions(-) diff --git a/packages/cache/__tests__/pathValidation.test.ts b/packages/cache/__tests__/pathValidation.test.ts index 860c7b27b0..f8b57cad1b 100644 --- a/packages/cache/__tests__/pathValidation.test.ts +++ b/packages/cache/__tests__/pathValidation.test.ts @@ -74,6 +74,22 @@ describe('deriveAllowedRoots', () => { const negated = IS_WINDOWS ? '!C:\\foo\\secret' : '!/foo/secret' expect(deriveAllowedRoots([allowed, negated], cwd)).toEqual([allowed]) }) + + test('non-leading ! is treated as a literal path character, not a glob', () => { + // Regression: an earlier implementation included `!` in the glob + // metachar set, so a declared path like `cache/my!dir/data` would + // truncate to `cache/`, silently broadening the allowed root. + // `@actions/glob`/minimatch treats `!` literally except as a leading + // negation (handled by the case above), so non-leading `!` must be + // preserved verbatim in the derived root. + const input = IS_WINDOWS + ? 'C:\\cache\\my!dir\\data' + : '/cache/my!dir/data' + const expected = IS_WINDOWS + ? 'C:\\cache\\my!dir\\data' + : '/cache/my!dir/data' + expect(deriveAllowedRoots([input], cwd)).toEqual([expected]) + }) }) describe('path expansion', () => { @@ -588,7 +604,7 @@ describe('validateEntry', () => { }) describe('symlink/hardlink attacks (must reject)', () => { - test('symlink with absolute target', () => { + test('symlink with POSIX absolute target', () => { const r = validateEntry( 'node_modules/link', '/etc/passwd', @@ -597,7 +613,72 @@ describe('validateEntry', () => { cwd ) expect(r.ok).toBe(false) - if (!r.ok) expect(r.code).toBe('LINK_OUTSIDE_ROOTS') + // Syntactic rejection fires before the containment check. + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') + }) + + test('symlink with Windows absolute target', () => { + const r = validateEntry( + 'node_modules/link', + 'C:\\Windows\\System32\\drivers\\etc\\hosts', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') + }) + + test('symlink with Windows drive-relative target', () => { + // `C:foo` resolves against the per-drive working directory on Windows, + // not the symlink's containing directory — so even if it happens to + // land under an allowed root after `path.resolve`, its extract-time + // semantics are different. Reject the syntactic form outright. + const r = validateEntry( + 'node_modules/link', + 'C:foo', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') + }) + + test('symlink with UNC target', () => { + const r = validateEntry( + 'node_modules/link', + '\\\\attacker\\share\\payload', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNC_PATH') + }) + + test('symlink with UNC long-path prefix target', () => { + const r = validateEntry( + 'node_modules/link', + '\\\\?\\C:\\foo', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNC_PATH') + }) + + test('symlink with forward-slash UNC target', () => { + const r = validateEntry( + 'node_modules/link', + '//attacker/share/payload', + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNC_PATH') }) test('symlink with .. traversal', () => { @@ -612,7 +693,7 @@ describe('validateEntry', () => { if (!r.ok) expect(r.code).toBe('LINK_OUTSIDE_ROOTS') }) - test('hardlink with absolute target', () => { + test('hardlink with POSIX absolute target', () => { const r = validateEntry( 'node_modules/dup', '/etc/passwd', @@ -621,7 +702,31 @@ describe('validateEntry', () => { cwd ) expect(r.ok).toBe(false) - if (!r.ok) expect(r.code).toBe('LINK_OUTSIDE_ROOTS') + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') + }) + + test('hardlink with Windows absolute target', () => { + const r = validateEntry( + 'node_modules/dup', + 'C:\\Windows\\System32\\drivers\\etc\\hosts', + 'Link', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') + }) + + test('hardlink with UNC target', () => { + const r = validateEntry( + 'node_modules/dup', + '\\\\attacker\\share\\payload', + 'Link', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNC_PATH') }) test('hardlink with .. traversal', () => { @@ -641,9 +746,11 @@ describe('validateEntry', () => { // Entry 1: node_modules/x → /etc (a symlink target outside allowed roots) // Entry 2: node_modules/x/payload (which extracts THROUGH entry 1's link // and lands at /etc/payload) - // We must reject Entry 1 because its target is outside the allowed roots. - // Entry 2's nominal path looks safe, so we cannot rely on per-entry path - // validation alone — we must catch the symlink target. + // We must reject Entry 1. The absolute syntactic form `/etc` is now + // rejected up-front (ABSOLUTE_PATH) before the containment check, which + // is strictly stronger than the previous LINK_OUTSIDE_ROOTS reject + // (the entry never gets the chance to be "absolute but happens to + // resolve under a root"). const r = validateEntry( 'node_modules/x', '/etc', @@ -652,7 +759,28 @@ describe('validateEntry', () => { cwd ) expect(r.ok).toBe(false) - if (!r.ok) expect(r.code).toBe('LINK_OUTSIDE_ROOTS') + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') + }) + + test('symlink with absolute target that nominally lands under an allowed root is still rejected', () => { + // Before the syntactic fix, a symlink whose absolute target happened + // to resolve under an allowed root (e.g. POSIX `/workspace/.cache/x` + // when /workspace/.cache is allowed) would pass validation. With the + // syntactic reject, the absolute form alone is enough to fail — we + // don't trust that extract-time resolution will match what + // `path.resolve` says at validation time. + const absoluteUnderRoot = IS_WINDOWS + ? 'C:\\workspace\\.cache\\x' + : '/workspace/.cache/x' + const r = validateEntry( + 'node_modules/link', + absoluteUnderRoot, + 'SymbolicLink', + allowedRoots, + cwd + ) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') }) test('symlink with empty target is allowed (filtered as undefined linkpath)', () => { diff --git a/packages/cache/docs/path-validation-test-plan.md b/packages/cache/docs/path-validation-test-plan.md index 619e8fcd0d..601d9c54a3 100644 --- a/packages/cache/docs/path-validation-test-plan.md +++ b/packages/cache/docs/path-validation-test-plan.md @@ -37,7 +37,7 @@ integrity failures from ordinary cache-miss/network errors. | [`src/options.ts`](../src/options.ts) (new `pathValidation` field) | [`__tests__/options.test.ts`](../__tests__/options.test.ts) | | [`src/cache.ts`](../src/cache.ts) (forwarding + error re-throw) | [`__tests__/restoreCache.test.ts`](../__tests__/restoreCache.test.ts), [`__tests__/restoreCacheV2.test.ts`](../__tests__/restoreCacheV2.test.ts) | -## Unit tests — pure logic (`pathValidation.test.ts`, 86 cases) +## Unit tests — pure logic (`pathValidation.test.ts`) These exercise the platform-agnostic validation logic with no filesystem or network. The suite is split into two groups. @@ -84,12 +84,22 @@ and platform-specific behavior: UNC long-path prefix `\\?\C:\...` - **NUL byte attacks**: NUL in path, NUL in symlink target - **Symlink attacks**: - - Absolute symlink target - - Symlink target with `..` traversal + - **Syntactic link-target rejects** (these fire before the containment check, + on the same allow-list as entry-path syntax): POSIX absolute (`/etc/passwd`), + Windows absolute (`C:\Windows\…`), Windows drive-relative (`C:foo` — has + per-drive-CWD semantics on Windows), backslash UNC (`\\srv\share\x`), + forward-slash UNC (`//srv/share/x`), UNC long-path prefix (`\\?\C:\…`) + - **Absolute target that nominally lands under an allowed root is still + rejected** — defense-in-depth regression: `path.resolve` agreeing with + the allowed root at validation time isn't trusted, because Windows + extract-time resolution semantics can differ + - Symlink target with `..` traversal (rejected as `LINK_OUTSIDE_ROOTS`) - **Symlink-then-write-through-link** (the critical TOCTOU-style attack: archive declares `cache/link → /tmp/evil` followed by `cache/link/file`) - Self-referential symlink to `.` -- **Hardlink attacks**: absolute target, `..` traversal +- **Hardlink attacks**: + - Syntactic link-target rejects: POSIX absolute, Windows absolute, UNC + - `..` traversal (rejected as `LINK_OUTSIDE_ROOTS`) - **Unsupported entry types**: `CharacterDevice`, `BlockDevice`, `FIFO` - **Header-only types accepted unconditionally** (these carry no path content): `GlobalExtendedHeader`, `ExtendedHeader`, `NextFileHasLongPath`, `NextFileHasLongLinkpath` @@ -100,7 +110,7 @@ and platform-specific behavior: - Shows up to N items verbatim - Truncates the tail with a `(... and N more)` summary line -## Integration tests — real archives (`listAndValidate.test.ts`, 14 cases) +## Integration tests — real archives (`listAndValidate.test.ts`) These build small tar archives in memory using `tar.Header`, write them to disk, and run them through the production parser. They cover the gzip and zstd @@ -126,7 +136,7 @@ Skipped on hosts without `zstd` installed. - Traversal in zstd archive → 1 violation - `ZstdWithoutLong` compression method also works -## Integration tests — mocked downstream (`tarPathValidation.test.ts`, 15 cases) +## Integration tests — mocked downstream (`tarPathValidation.test.ts`) These mock `listAndValidate` so the test can deterministically inject "violation lists" and observe `extractTar`'s reaction. They mock `@actions/exec`, @@ -194,6 +204,3 @@ npx jest --testTimeout 70000 \ packages/cache/__tests__/listAndValidate.test.ts \ packages/cache/__tests__/tarPathValidation.test.ts ``` - -Total path-validation test cases: **115** (86 unit + 14 real-archive + 15 mocked). -Combined with the regression updates, the cache package runs **237 tests** total. diff --git a/packages/cache/src/internal/pathValidation.ts b/packages/cache/src/internal/pathValidation.ts index 3cfc8ef108..3caa670799 100644 --- a/packages/cache/src/internal/pathValidation.ts +++ b/packages/cache/src/internal/pathValidation.ts @@ -105,8 +105,16 @@ const HEADER_ONLY_ENTRY_TYPES = new Set([ * Characters that signal a glob portion of a declared cache path. Anything to * the right of the first segment containing one of these is stripped when * deriving the longest non-glob prefix. + * + * `!` is intentionally NOT included here: + * - As a leading character it indicates pattern negation, which is handled + * separately by `deriveAllowedRoots` (whole pattern is dropped). + * - In any other position it has no special meaning to `@actions/glob` + * (minimatch is invoked with extglobs disabled), so treating it as a + * metachar would truncate prefixes for legitimate paths containing `!` + * (e.g. `cache/my!dir/data`), silently broadening the allowed root. */ -const GLOB_CHAR_REGEX = /[*?[\]{}!]/ +const GLOB_CHAR_REGEX = /[*?[\]{}]/ /** * Returns the working directory used for cache extraction. Mirrors the value @@ -199,9 +207,10 @@ function deriveRoot(declaredPath: string, extractCwd: string): string { } /** - * True if `seg` contains a glob metacharacter that isn't part of an - * env-var reference. Strips `${VAR}`, `$VAR`, and `%VAR%` first so the - * curly braces in `${VAR}` aren't misread as a brace-glob. + * True if `seg` contains a glob metacharacter (per {@link GLOB_CHAR_REGEX}) + * that isn't part of an env-var reference. Strips `${VAR}`, `$VAR`, and + * `%VAR%` first so the curly braces in `${VAR}` aren't misread as a + * brace-glob. * * The patterns `\$\{[^}]+\}` and `%[^%]+%` are O(n²) on pathological * input (e.g. a long run of `${` with no closing `}`). That is not a @@ -310,50 +319,15 @@ export function validateEntry( } } - // Reject NUL bytes anywhere in the path. - if (entryPath.includes('\0')) { - return { - ok: false, - code: 'NUL_BYTE', - resolved: resolvedEntry, - reason: 'NUL byte in entry path' - } - } - - // Reject UNC paths. Check the original string before separator normalization - // because UNC is identified by leading `\\` or `//`. - if ( - entryPath.startsWith('\\\\') || - entryPath.startsWith('//') || - /^[\\/]{2}\?[\\/]/.test(entryPath) - ) { - return { - ok: false, - code: 'UNC_PATH', - resolved: resolvedEntry, - reason: `UNC path not allowed: ${entryPath}` - } - } - - // Reject Windows-style absolute paths and drive-relative paths (`C:foo`). - // These can be present in archives created on Windows even when extracted - // on POSIX, so we reject them on every platform. - if (/^[a-zA-Z]:/.test(entryPath)) { - return { - ok: false, - code: 'ABSOLUTE_PATH', - resolved: resolvedEntry, - reason: `absolute or drive-relative path not allowed: ${entryPath}` - } - } - - // Reject POSIX absolute paths. - if (entryPath.startsWith('/') || entryPath.startsWith('\\')) { + // Apply syntactic rejections (NUL byte, UNC, Windows absolute / + // drive-relative, POSIX absolute) to the entry path itself. + const entrySyntaxError = checkPathSyntax(entryPath, 'entry path') + if (entrySyntaxError) { return { ok: false, - code: 'ABSOLUTE_PATH', + code: entrySyntaxError.code, resolved: resolvedEntry, - reason: `absolute path not allowed: ${entryPath}` + reason: entrySyntaxError.reason } } @@ -382,12 +356,21 @@ export function validateEntry( // Hardlink targets are resolved relative to the extraction CWD. resolvedLink = path.resolve(extractCwd, linkPath) } - if (linkPath.includes('\0')) { + // Apply the same syntactic rejections to the link target. Even if a + // special-form link target happens to land under an allowed root after + // `path.resolve`, its semantics at actual extraction time can differ + // — most notably on Windows, where `C:foo` resolves against the + // per-drive working directory rather than the entry's directory, and + // UNC targets bypass the extraction root entirely. Rejecting these + // syntactic forms outright is defense-in-depth on top of the + // containment check below. + const linkSyntaxError = checkPathSyntax(linkPath, 'link target') + if (linkSyntaxError) { return { ok: false, - code: 'NUL_BYTE', + code: linkSyntaxError.code, resolved: resolvedLink, - reason: 'NUL byte in link target' + reason: linkSyntaxError.reason } } if (!isUnderAnyPreparedRoot(resolvedLink, prepared)) { @@ -403,6 +386,69 @@ export function validateEntry( return {ok: true} } +/** + * Categorization of a syntactically-unsafe path string. Returned by + * {@link checkPathSyntax} when a path/link target fails one of the + * pre-resolution syntactic checks. + */ +interface PathSyntaxError { + code: 'NUL_BYTE' | 'UNC_PATH' | 'ABSOLUTE_PATH' + reason: string +} + +/** + * Reject paths whose syntactic form is unsafe regardless of where they + * nominally resolve: NUL bytes, UNC paths (`\\server\share`, `//srv/x`, + * `\\?\…`), Windows absolute / drive-relative paths (`C:\…`, `C:foo`), + * and POSIX absolute paths (`/foo`, `\foo`). + * + * Applied to BOTH archive entry paths and link targets. A link target + * with a special-form path can mean something completely different at + * extract time than its `path.resolve(...)` output suggests at + * validation time — for example, on Windows `C:foo` resolves against + * the per-drive working directory rather than the link's containing + * directory — so we reject these forms outright even if they would + * happen to land under an allowed root. + * + * `kind` is woven into the reason string so the violation message tells + * the user whether the rejected path was an entry path or a link target. + */ +function checkPathSyntax( + p: string, + kind: 'entry path' | 'link target' +): PathSyntaxError | undefined { + // Reject NUL bytes anywhere in the path. + if (p.includes('\0')) { + return {code: 'NUL_BYTE', reason: `NUL byte in ${kind}`} + } + // Reject UNC paths. Check the original string before any separator + // normalization because UNC is identified by leading `\\` or `//`. + if ( + p.startsWith('\\\\') || + p.startsWith('//') || + /^[\\/]{2}\?[\\/]/.test(p) + ) { + return {code: 'UNC_PATH', reason: `UNC ${kind} not allowed: ${p}`} + } + // Reject Windows-style absolute paths and drive-relative paths (`C:foo`). + // These can be present in archives created on Windows even when extracted + // on POSIX, so we reject them on every platform. + if (/^[a-zA-Z]:/.test(p)) { + return { + code: 'ABSOLUTE_PATH', + reason: `absolute or drive-relative ${kind} not allowed: ${p}` + } + } + // Reject POSIX absolute paths. + if (p.startsWith('/') || p.startsWith('\\')) { + return { + code: 'ABSOLUTE_PATH', + reason: `absolute ${kind} not allowed: ${p}` + } + } + return undefined +} + function resolveEntry(entryPath: string, extractCwd: string): string { // Tar paths are POSIX-style. Convert to native separators so path.resolve // produces the right thing on Windows. From cc4da7ddff333ea7818159e2323e2c98690eeaa2 Mon Sep 17 00:00:00 2001 From: Jason Ginchereau Date: Wed, 20 May 2026 11:47:09 -1000 Subject: [PATCH 05/10] Fix lint issues --- .../cache/__tests__/listAndValidate.test.ts | 20 ++------ .../cache/__tests__/pathValidation.test.ts | 46 ++++++++----------- .../cache/__tests__/tarPathValidation.test.ts | 12 ++--- packages/cache/src/cache.ts | 4 +- .../cache/src/internal/listAndValidate.ts | 4 +- packages/cache/src/internal/pathValidation.ts | 14 ++---- packages/cache/src/internal/tar.ts | 7 ++- 7 files changed, 36 insertions(+), 71 deletions(-) diff --git a/packages/cache/__tests__/listAndValidate.test.ts b/packages/cache/__tests__/listAndValidate.test.ts index 33410f0444..743ad6375b 100644 --- a/packages/cache/__tests__/listAndValidate.test.ts +++ b/packages/cache/__tests__/listAndValidate.test.ts @@ -125,10 +125,7 @@ describe('listAndValidate (real archives)', () => { const archive = buildTarArchive([ {path: 'cache/file.txt', type: 'File', body: Buffer.from('hi')} ]) - const archivePath = writeArchive( - 'clean.tar.gz', - zlib.gzipSync(archive) - ) + const archivePath = writeArchive('clean.tar.gz', zlib.gzipSync(archive)) const violations = await listAndValidate( archivePath, CompressionMethod.Gzip, @@ -259,10 +256,7 @@ describe('listAndValidate (real archives)', () => { }, {path: 'cache/sub/c.txt', type: 'File', body: Buffer.from('4')} ]) - const archivePath = writeArchive( - 'mixed.tar.gz', - zlib.gzipSync(archive) - ) + const archivePath = writeArchive('mixed.tar.gz', zlib.gzipSync(archive)) const violations = await listAndValidate( archivePath, CompressionMethod.Gzip, @@ -277,10 +271,7 @@ describe('listAndValidate (real archives)', () => { const archive = buildTarArchive([ {path: 'cache/dev', type: 'CharacterDevice'} ]) - const archivePath = writeArchive( - 'chardev.tar.gz', - zlib.gzipSync(archive) - ) + const archivePath = writeArchive('chardev.tar.gz', zlib.gzipSync(archive)) const violations = await listAndValidate( archivePath, CompressionMethod.Gzip, @@ -295,10 +286,7 @@ describe('listAndValidate (real archives)', () => { const archive = buildTarArchive([ {path: 'cache/tiny.txt', type: 'File', body: Buffer.from('1')} ]) - const archivePath = writeArchive( - 'single.tar.gz', - zlib.gzipSync(archive) - ) + const archivePath = writeArchive('single.tar.gz', zlib.gzipSync(archive)) const violations = await listAndValidate( archivePath, CompressionMethod.Gzip, diff --git a/packages/cache/__tests__/pathValidation.test.ts b/packages/cache/__tests__/pathValidation.test.ts index f8b57cad1b..43463226f9 100644 --- a/packages/cache/__tests__/pathValidation.test.ts +++ b/packages/cache/__tests__/pathValidation.test.ts @@ -8,7 +8,8 @@ import { } from '../src/internal/pathValidation' const IS_WINDOWS = process.platform === 'win32' -const CASE_INSENSITIVE = process.platform === 'win32' || process.platform === 'darwin' +const CASE_INSENSITIVE = + process.platform === 'win32' || process.platform === 'darwin' describe('deriveAllowedRoots', () => { const cwd = IS_WINDOWS ? 'C:\\workspace' : '/workspace' @@ -63,9 +64,7 @@ describe('deriveAllowedRoots', () => { describe('negation handling', () => { test('negation pattern (! prefix) is dropped from allowed roots', () => { - const input = IS_WINDOWS - ? ['!C:\\foo\\secret'] - : ['!/foo/secret'] + const input = IS_WINDOWS ? ['!C:\\foo\\secret'] : ['!/foo/secret'] expect(deriveAllowedRoots(input, cwd)).toEqual([]) }) @@ -107,9 +106,9 @@ describe('deriveAllowedRoots', () => { process.env['CACHE_TEST_ROOT'] = IS_WINDOWS ? 'C:\\envroot' : '/envroot' try { const expected = IS_WINDOWS ? 'C:\\envroot\\sub' : '/envroot/sub' - expect( - deriveAllowedRoots(['${CACHE_TEST_ROOT}/sub'], cwd) - ).toEqual([expected]) + expect(deriveAllowedRoots(['${CACHE_TEST_ROOT}/sub'], cwd)).toEqual([ + expected + ]) } finally { if (original === undefined) delete process.env['CACHE_TEST_ROOT'] else process.env['CACHE_TEST_ROOT'] = original @@ -132,12 +131,14 @@ describe('deriveAllowedRoots', () => { test('%VAR% Windows-style expands an environment variable', () => { const original = process.env['CACHE_TEST_WIN_ROOT'] - process.env['CACHE_TEST_WIN_ROOT'] = IS_WINDOWS ? 'C:\\winroot' : '/winroot' + process.env['CACHE_TEST_WIN_ROOT'] = IS_WINDOWS + ? 'C:\\winroot' + : '/winroot' try { const expected = IS_WINDOWS ? 'C:\\winroot\\sub' : '/winroot/sub' - expect( - deriveAllowedRoots(['%CACHE_TEST_WIN_ROOT%/sub'], cwd) - ).toEqual([expected]) + expect(deriveAllowedRoots(['%CACHE_TEST_WIN_ROOT%/sub'], cwd)).toEqual([ + expected + ]) } finally { if (original === undefined) delete process.env['CACHE_TEST_WIN_ROOT'] else process.env['CACHE_TEST_WIN_ROOT'] = original @@ -170,9 +171,9 @@ describe('deriveAllowedRoots', () => { const expected = IS_WINDOWS ? 'C:\\envroot*odd\\sub' : '/envroot*odd/sub' - expect( - deriveAllowedRoots(['${CACHE_TEST_ROOT}/sub'], cwd) - ).toEqual([expected]) + expect(deriveAllowedRoots(['${CACHE_TEST_ROOT}/sub'], cwd)).toEqual([ + expected + ]) } finally { if (original === undefined) delete process.env['CACHE_TEST_ROOT'] else process.env['CACHE_TEST_ROOT'] = original @@ -290,7 +291,7 @@ describe('validateEntry', () => { test('deeply nested file', () => { const r = validateEntry( - 'node_modules/' + Array(50).fill('sub').join('/') + '/foo.js', + `node_modules/${Array(50).fill('sub').join('/')}/foo.js`, undefined, 'File', allowedRoots, @@ -379,7 +380,7 @@ describe('validateEntry', () => { test('filename containing .. as substring (not segment)', () => { for (const name of ['..hidden', 'file..txt', 'a..b/c']) { const r = validateEntry( - 'node_modules/' + name, + `node_modules/${name}`, undefined, 'File', allowedRoots, @@ -529,13 +530,7 @@ describe('validateEntry', () => { }) test('Windows drive-relative (no slash)', () => { - const r = validateEntry( - 'C:foo', - undefined, - 'File', - allowedRoots, - cwd - ) + const r = validateEntry('C:foo', undefined, 'File', allowedRoots, cwd) expect(r.ok).toBe(false) if (!r.ok) expect(r.code).toBe('ABSOLUTE_PATH') }) @@ -1069,10 +1064,7 @@ describe('formatViolationSummary', () => { }) test('shows up to maxShown items verbatim', () => { - const out = formatViolationSummary( - [v('a'), v('b'), v('c')], - 5 - ) + const out = formatViolationSummary([v('a'), v('b'), v('c')], 5) expect(out).toContain(' - a') expect(out).toContain(' - b') expect(out).toContain(' - c') diff --git a/packages/cache/__tests__/tarPathValidation.test.ts b/packages/cache/__tests__/tarPathValidation.test.ts index d2f186d9da..3fad47d297 100644 --- a/packages/cache/__tests__/tarPathValidation.test.ts +++ b/packages/cache/__tests__/tarPathValidation.test.ts @@ -203,9 +203,7 @@ describe('extractTar path validation integration', () => { }) test('clean archive: no throw, extraction proceeds normally', async () => { - jest - .spyOn(listAndValidate, 'listAndValidate') - .mockResolvedValue([]) + jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue([]) const warnSpy = jest.spyOn(core, 'warning').mockImplementation() const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) @@ -234,9 +232,7 @@ describe('extractTar path validation integration', () => { } catch (err) { expect(err).toBeInstanceOf(CacheIntegrityError) expect((err as CacheIntegrityError).code).toBe('PARSE_ERROR') - expect((err as CacheIntegrityError).message).toMatch( - /tar parse error/ - ) + expect((err as CacheIntegrityError).message).toMatch(/tar parse error/) } expect(execMock).not.toHaveBeenCalled() expect(mkdirMock).not.toHaveBeenCalled() @@ -341,9 +337,7 @@ describe('extractTar path validation integration', () => { pathValidation: 'warn' }) - expect(listMock.mock.calls[0][1]).toBe( - CompressionMethod.ZstdWithoutLong - ) + expect(listMock.mock.calls[0][1]).toBe(CompressionMethod.ZstdWithoutLong) }) }) }) diff --git a/packages/cache/src/cache.ts b/packages/cache/src/cache.ts index e4a0f7ac97..307392a2e7 100644 --- a/packages/cache/src/cache.ts +++ b/packages/cache/src/cache.ts @@ -17,9 +17,7 @@ import {HttpClientError} from '@actions/http-client' export type {DownloadOptions, UploadOptions} export {CacheIntegrityError} -export type { - CacheIntegrityErrorCode -} from './internal/cacheIntegrityError.js' +export type {CacheIntegrityErrorCode} from './internal/cacheIntegrityError.js' export type { PathValidationMode, PathValidationViolation diff --git a/packages/cache/src/internal/listAndValidate.ts b/packages/cache/src/internal/listAndValidate.ts index e820039576..da3b9ecc2b 100644 --- a/packages/cache/src/internal/listAndValidate.ts +++ b/packages/cache/src/internal/listAndValidate.ts @@ -172,9 +172,7 @@ async function streamArchiveTo( const tail = stderrTruncated ? ' (stderr truncated)' : '' reject( new Error( - `zstd ${cause}${ - zstdStderr ? `: ${zstdStderr.trim()}` : '' - }${tail}` + `zstd ${cause}${zstdStderr ? `: ${zstdStderr.trim()}` : ''}${tail}` ) ) } diff --git a/packages/cache/src/internal/pathValidation.ts b/packages/cache/src/internal/pathValidation.ts index 3caa670799..c5702eca1b 100644 --- a/packages/cache/src/internal/pathValidation.ts +++ b/packages/cache/src/internal/pathValidation.ts @@ -55,7 +55,7 @@ export type ValidationResult = */ export interface PreparedAllowedRoots { readonly caseInsensitive: boolean - readonly roots: ReadonlyArray + readonly roots: readonly PreparedRoot[] } interface PreparedRoot { @@ -231,7 +231,10 @@ function segmentHasGlob(seg: string): boolean { function expandEnvVars(input: string): string { let result = input // ${VAR} — see segmentHasGlob for the polynomial-redos rationale. - result = result.replace(/\$\{([^}]+)\}/g, (_, name) => process.env[name] ?? '') // lgtm[js/polynomial-redos] + result = result.replace( + /\$\{([^}]+)\}/g, + (_, name) => process.env[name] ?? '' + ) // lgtm[js/polynomial-redos] // $VAR (POSIX-style identifier) result = result.replace( /\$([A-Za-z_][A-Za-z0-9_]*)/g, @@ -456,13 +459,6 @@ function resolveEntry(entryPath: string, extractCwd: string): string { return path.resolve(extractCwd, native) } -function isUnderAnyRoot(resolved: string, roots: string[]): boolean { - for (const root of roots) { - if (isUnderRoot(resolved, root)) return true - } - return false -} - /** * Precompute a {@link PreparedAllowedRoots} from a raw roots array. Call * once per `listAndValidate` invocation and reuse for every entry. diff --git a/packages/cache/src/internal/tar.ts b/packages/cache/src/internal/tar.ts index 5425dfc14d..33cd29f13e 100644 --- a/packages/cache/src/internal/tar.ts +++ b/packages/cache/src/internal/tar.ts @@ -374,10 +374,9 @@ function reportViolations( core.info(formatViolationSummary(violations)) for (const v of violations) { core.debug( - `path-validation: code=${v.code} type=${v.entryType} path=${v.path}` + - (v.linkpath ? ` linkpath=${v.linkpath}` : '') + - ` resolved=${v.resolved}` + - ` reason=${v.reason}` + `path-validation: code=${v.code} type=${v.entryType} path=${v.path}${ + v.linkpath ? ` linkpath=${v.linkpath}` : '' + } resolved=${v.resolved} reason=${v.reason}` ) } } From 6cdcf724e25dabf4a68da28751b429d7fc52e330 Mon Sep 17 00:00:00 2001 From: Jason Ginchereau Date: Wed, 20 May 2026 11:51:22 -1000 Subject: [PATCH 06/10] Update dependencies to resolve audit issues --- packages/cache/package-lock.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/cache/package-lock.json b/packages/cache/package-lock.json index 58eaae5c88..36e4a00a77 100644 --- a/packages/cache/package-lock.json +++ b/packages/cache/package-lock.json @@ -25,6 +25,9 @@ "@types/node": "^25.6.0", "@types/semver": "^7.7.1", "typescript": "^5.9.3" + }, + "engines": { + "node": ">=20.0.0" } }, "node_modules/@actions/core": { From b2734aba362766e0fc64bfbbcba345ac71bbfd9f Mon Sep 17 00:00:00 2001 From: Jason Ginchereau Date: Wed, 20 May 2026 11:58:27 -1000 Subject: [PATCH 07/10] Remove suppressions that don't work --- packages/cache/src/internal/pathValidation.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/cache/src/internal/pathValidation.ts b/packages/cache/src/internal/pathValidation.ts index c5702eca1b..e268de43e9 100644 --- a/packages/cache/src/internal/pathValidation.ts +++ b/packages/cache/src/internal/pathValidation.ts @@ -217,14 +217,14 @@ function deriveRoot(declaredPath: string, extractCwd: string): string { * security concern here: `seg` originates from the calling workflow's * own declared cache `paths:`, so a hostile value would only DoS the * same workflow that supplied it — it doesn't cross a trust boundary. - * The `lgtm` comments below suppress CodeQL's `js/polynomial-redos` - * alert on that basis. + * CodeQL flags this as `js/polynomial-redos`; the alert should be + * dismissed in the Security tab with this comment as the rationale. */ function segmentHasGlob(seg: string): boolean { const stripped = seg - .replace(/\$\{[^}]+\}/g, '') // lgtm[js/polynomial-redos] + .replace(/\$\{[^}]+\}/g, '') .replace(/\$[A-Za-z_][A-Za-z0-9_]*/g, '') - .replace(/%[^%]+%/g, '') // lgtm[js/polynomial-redos] + .replace(/%[^%]+%/g, '') return GLOB_CHAR_REGEX.test(stripped) } @@ -234,14 +234,14 @@ function expandEnvVars(input: string): string { result = result.replace( /\$\{([^}]+)\}/g, (_, name) => process.env[name] ?? '' - ) // lgtm[js/polynomial-redos] + ) // $VAR (POSIX-style identifier) result = result.replace( /\$([A-Za-z_][A-Za-z0-9_]*)/g, (_, name) => process.env[name] ?? '' ) // %VAR% (Windows-style) - result = result.replace(/%([^%]+)%/g, (_, name) => process.env[name] ?? '') // lgtm[js/polynomial-redos] + result = result.replace(/%([^%]+)%/g, (_, name) => process.env[name] ?? '') return result } From 6e8379ea7a2a89b5dfe5a19a8aa5dd609c4e4480 Mon Sep 17 00:00:00 2001 From: Jason Ginchereau Date: Fri, 19 Jun 2026 09:17:41 -1000 Subject: [PATCH 08/10] Implement PAX header validation and path safety checks in tar extraction - Added a new test suite for parser-differential bypass detection in tar archives. - Introduced length-correct PAX extended-header re-parsing to catch discrepancies between node-tar and system tar. - Enhanced the listAndValidate function to return approved names alongside violations for better extraction control. - Implemented checks for unsafe characters and glob metacharacters in entry paths. - Updated the tar extraction logic to utilize an allow-list for approved entries when path validation is in error mode. - Added utility functions for writing temporary allow-list files for system tar extraction. --- .../cache/__tests__/listAndValidate.test.ts | 12 +- packages/cache/__tests__/pax-reparse.test.ts | 255 +++++++++++ .../cache/__tests__/tarPathValidation.test.ts | 148 +++++-- .../tarPathValidationAttacks.test.ts | 403 ++++++++++++++++++ .../cache/src/internal/listAndValidate.ts | 183 +++++++- packages/cache/src/internal/pathValidation.ts | 5 + packages/cache/src/internal/pax-reparse.ts | 271 ++++++++++++ packages/cache/src/internal/tar.ts | 102 ++++- 8 files changed, 1332 insertions(+), 47 deletions(-) create mode 100644 packages/cache/__tests__/pax-reparse.test.ts create mode 100644 packages/cache/__tests__/tarPathValidationAttacks.test.ts create mode 100644 packages/cache/src/internal/pax-reparse.ts diff --git a/packages/cache/__tests__/listAndValidate.test.ts b/packages/cache/__tests__/listAndValidate.test.ts index 743ad6375b..dcdda54a0b 100644 --- a/packages/cache/__tests__/listAndValidate.test.ts +++ b/packages/cache/__tests__/listAndValidate.test.ts @@ -5,7 +5,17 @@ import * as zlib from 'zlib' import {execSync} from 'child_process' import {Header} from 'tar' import {CompressionMethod} from '../src/internal/constants' -import {listAndValidate} from '../src/internal/listAndValidate' +import {listAndValidate as listAndValidateImpl} from '../src/internal/listAndValidate' + +/** + * Thin wrapper returning only the violations array, so the many existing + * assertions below can keep treating the result as a flat list. New tests that + * need the `approvedNames` allow-list call `listAndValidateImpl` directly. + */ +const listAndValidate = async ( + ...args: Parameters +): Promise>['violations']> => + (await listAndValidateImpl(...args)).violations /** * Real-archive integration tests for listAndValidate. These build small tar diff --git a/packages/cache/__tests__/pax-reparse.test.ts b/packages/cache/__tests__/pax-reparse.test.ts new file mode 100644 index 0000000000..4e052841a2 --- /dev/null +++ b/packages/cache/__tests__/pax-reparse.test.ts @@ -0,0 +1,255 @@ +import { + parsePaxLengthCorrect, + crossCheckMetaBodies, + PAX_KNOWN_KEYS +} from '../src/internal/pax-reparse' + +/** Build a length-correct PAX record string for `=` content. */ +function rec(content: string): string { + const base = 1 + Buffer.byteLength(content) + 1 // space + content + LF + let len = base + String(base).length + if (String(len).length !== String(base).length) { + len = base + String(len).length + } + return `${len} ${content}\n` +} + +describe('parsePaxLengthCorrect', () => { + test('single well-formed record', () => { + const buf = Buffer.from('17 path=safe.txt\n', 'ascii') + const {records, ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(true) + expect(records['path'].toString('utf8')).toBe('safe.txt') + }) + + test('newline-in-value: the embedded fake record is swallowed by comment', () => { + // This is the F2 PAX body. A naive split('\n') parser (node-tar) ends up + // with path=safe.txt; the length-correct parser must yield the real + // (malicious) path and treat the rest as the comment value. + const buf = Buffer.from( + '42 path=../../../../../../tmp/zip_slip_F2\n30 comment=x\n17 path=safe.txt\n', + 'ascii' + ) + const {records, ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(true) + expect(records['path'].toString('utf8')).toBe( + '../../../../../../tmp/zip_slip_F2' + ) + expect(records['comment'].toString('utf8')).toBe('x\n17 path=safe.txt') + // Crucially NOT safe.txt. + expect(records['path'].toString('utf8')).not.toBe('safe.txt') + }) + + test('empty buffer parses to empty record set', () => { + const {records, ok} = parsePaxLengthCorrect(Buffer.alloc(0)) + expect(ok).toBe(true) + expect(Object.keys(records)).toHaveLength(0) + }) + + test('truncated record: not ok', () => { + // length says 42 but the buffer is shorter + const buf = Buffer.from('42 path=too-short\n', 'ascii') + const {ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(false) + }) + + test('non-numeric length prefix: not ok', () => { + const buf = Buffer.from('xx path=foo\n', 'ascii') + const {ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(false) + }) + + test('missing trailing newline: not ok', () => { + // "16 path=safe.txt" is 16 bytes but the last byte is 't', not '\n' + const buf = Buffer.from('16 path=safe.txt', 'ascii') + const {ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(false) + }) + + test('length spans past buffer end: not ok', () => { + const buf = Buffer.from('99 path=x\n', 'ascii') + const {ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(false) + }) + + test('record without "=" : not ok', () => { + const buf = Buffer.from('10 nokeyval\n', 'ascii') + const {ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(false) + }) + + test('record with correct length+LF but no "=" : not ok', () => { + // "5 ab\n" is exactly 5 bytes and ends in LF, but has no '='. This + // exercises the missing-separator branch (distinct from a length/LF + // mismatch). + const buf = Buffer.from('5 ab\n', 'ascii') + const {ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(false) + }) + + test('value may itself contain "=" — only the first one is the separator', () => { + const buf = Buffer.from(rec('comment=a=b=c'), 'ascii') + const {records, ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(true) + expect(records['comment'].toString('utf8')).toBe('a=b=c') + }) + + test('zero-length prefix is rejected', () => { + const buf = Buffer.from('0 path=x\n', 'ascii') + const {ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(false) + }) + + test('high-bit value bytes are preserved as raw Buffer', () => { + const value = Buffer.from([0xc3, 0x28, 0xff]) // invalid utf8 on purpose + const inner = Buffer.concat([Buffer.from('path=', 'ascii'), value]) + // record = " " + inner + "\n" + const base = 1 + inner.length + 1 + let len = base + String(base).length + if (String(len).length !== String(base).length) + len = base + String(len).length + const buf = Buffer.concat([ + Buffer.from(`${len} `, 'ascii'), + inner, + Buffer.from('\n', 'ascii') + ]) + const {records, ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(true) + expect(Buffer.compare(records['path'], value)).toBe(0) + }) + + test('last write wins for repeated keys', () => { + const buf = Buffer.from('14 path=a.txt\n14 path=b.txt\n', 'ascii') + const {records, ok} = parsePaxLengthCorrect(buf) + expect(ok).toBe(true) + expect(records['path'].toString('utf8')).toBe('b.txt') + }) +}) + +describe('crossCheckMetaBodies', () => { + test('clean PAX path matching node-tar: no violations', () => { + const buf = Buffer.from('17 path=safe.txt\n', 'ascii') + const v = crossCheckMetaBodies([buf], 'safe.txt', undefined) + expect(v).toEqual([]) + }) + + test('F2 path desync: node-tar resolved safe.txt, length-correct disagrees', () => { + const buf = Buffer.from( + '42 path=../../../../../../tmp/zip_slip_F2\n30 comment=x\n17 path=safe.txt\n', + 'ascii' + ) + const v = crossCheckMetaBodies([buf], 'safe.txt', undefined) + expect(v.map(x => x.code)).toContain('PAX_DESYNC') + }) + + test('F2-linkpath desync: node-tar resolved safe/target, length-correct disagrees', () => { + const buf = Buffer.from( + '34 linkpath=../../../../../../tmp\n37 comment=x\n24 linkpath=safe/target\n', + 'ascii' + ) + const v = crossCheckMetaBodies([buf], 'cache/link', 'safe/target') + expect(v.map(x => x.code)).toContain('PAX_DESYNC') + }) + + test('unknown PAX key is rejected', () => { + const content = 'EVIL.placement=1' + const base = 1 + content.length + 1 + let len = base + String(base).length + if (String(len).length !== String(base).length) + len = base + String(len).length + const buf = Buffer.from(`${len} ${content}\n`, 'ascii') + const v = crossCheckMetaBodies([buf], 'cache/x', undefined) + expect(v.map(x => x.code)).toContain('PAX_UNKNOWN_KEY') + }) + + test('known SCHILY/GNU/LIBARCHIVE prefixed keys are accepted', () => { + const records = [ + 'SCHILY.xattr.user.foo=bar', + 'GNU.sparse.realsize=1024', + 'LIBARCHIVE.creationtime=1700000000' + ] + const body = records + .map(content => { + const base = 1 + content.length + 1 + let len = base + String(base).length + if (String(len).length !== String(base).length) + len = base + String(len).length + return `${len} ${content}\n` + }) + .join('') + const v = crossCheckMetaBodies( + [Buffer.from(body, 'ascii')], + 'cache/x', + undefined + ) + expect(v).toEqual([]) + }) + + test('GNU long-name raw body matching entry path: no violation', () => { + // A GNU LongName body is a raw NUL-terminated path with no length prefix. + const raw = Buffer.concat([ + Buffer.from('cache/a/very/long/name.txt', 'ascii'), + Buffer.from([0, 0]) + ]) + const v = crossCheckMetaBodies( + [raw], + 'cache/a/very/long/name.txt', + undefined + ) + expect(v).toEqual([]) + }) + + test('long-name body that matches neither path nor linkpath is flagged', () => { + const raw = Buffer.from('something-unaccountable', 'ascii') + const v = crossCheckMetaBodies([raw], 'cache/x', undefined) + expect(v.map(x => x.code)).toContain('PAX_PARSE_FAIL') + }) + + test('no meta bodies: no violations', () => { + expect(crossCheckMetaBodies([], 'cache/x', undefined)).toEqual([]) + }) + + test('PAX setting both path and linkpath in agreement: no violations', () => { + const body = Buffer.from( + rec('path=cache/link') + rec('linkpath=cache/target'), + 'ascii' + ) + const v = crossCheckMetaBodies([body], 'cache/link', 'cache/target') + expect(v).toEqual([]) + }) + + test('directory path with trailing slash compares equal (no false desync)', () => { + // node-tar may resolve a directory entry without the trailing slash that + // the PAX record carries; normalizeForCompare must treat them as equal. + const body = Buffer.from(rec('path=cache/dir/'), 'ascii') + const v = crossCheckMetaBodies([body], 'cache/dir', undefined) + expect(v).toEqual([]) + }) + + test('multiple PAX bodies merge with last-write-wins before comparison', () => { + const bodies = [ + Buffer.from(rec('path=cache/first'), 'ascii'), + Buffer.from(rec('path=cache/second'), 'ascii') + ] + // node-tar's resolved path is the last write; agreement => no violation. + expect(crossCheckMetaBodies(bodies, 'cache/second', undefined)).toEqual([]) + // Disagreement with the merged (last) value => desync. + expect( + crossCheckMetaBodies(bodies, 'cache/first', undefined).map(x => x.code) + ).toContain('PAX_DESYNC') + }) + + test('GNU long-name raw body matching the link target (not the path)', () => { + const raw = Buffer.concat([ + Buffer.from('cache/sub/target', 'ascii'), + Buffer.from([0]) + ]) + const v = crossCheckMetaBodies([raw], 'cache/link', 'cache/sub/target') + expect(v).toEqual([]) + }) + + test('PAX_KNOWN_KEYS includes path and linkpath', () => { + expect(PAX_KNOWN_KEYS.has('path')).toBe(true) + expect(PAX_KNOWN_KEYS.has('linkpath')).toBe(true) + }) +}) diff --git a/packages/cache/__tests__/tarPathValidation.test.ts b/packages/cache/__tests__/tarPathValidation.test.ts index 3fad47d297..32bdb678ee 100644 --- a/packages/cache/__tests__/tarPathValidation.test.ts +++ b/packages/cache/__tests__/tarPathValidation.test.ts @@ -1,6 +1,7 @@ import * as exec from '@actions/exec' import * as core from '@actions/core' import * as io from '@actions/io' +import * as fs from 'fs' import * as path from 'path' import {CompressionMethod} from '../src/internal/constants' import * as tar from '../src/internal/tar' @@ -82,23 +83,26 @@ describe('extractTar path validation integration', () => { }) test('violations present: exactly one warning, debug per violation, extraction still proceeds', async () => { - jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue([ - { - path: '../escape.txt', - resolved: '', - entryType: 'File', - code: 'OUTSIDE_ROOTS', - reason: 'escapes allowed roots' - }, - { - path: 'cache/link', - linkpath: '/etc/passwd', - resolved: '', - entryType: 'SymbolicLink', - code: 'LINK_OUTSIDE_ROOTS', - reason: 'symlink target outside allowed roots' - } - ]) + jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue({ + approvedNames: [], + violations: [ + { + path: '../escape.txt', + resolved: '', + entryType: 'File', + code: 'OUTSIDE_ROOTS', + reason: 'escapes allowed roots' + }, + { + path: 'cache/link', + linkpath: '/etc/passwd', + resolved: '', + entryType: 'SymbolicLink', + code: 'LINK_OUTSIDE_ROOTS', + reason: 'symlink target outside allowed roots' + } + ] + }) const warnSpy = jest.spyOn(core, 'warning').mockImplementation() const debugSpy = jest.spyOn(core, 'debug').mockImplementation() const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) @@ -120,15 +124,18 @@ describe('extractTar path validation integration', () => { }) test('single violation: warning text uses singular wording', async () => { - jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue([ - { - path: '../boom.txt', - resolved: '', - entryType: 'File', - code: 'OUTSIDE_ROOTS', - reason: 'escapes' - } - ]) + jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue({ + approvedNames: [], + violations: [ + { + path: '../boom.txt', + resolved: '', + entryType: 'File', + code: 'OUTSIDE_ROOTS', + reason: 'escapes' + } + ] + }) const warnSpy = jest.spyOn(core, 'warning').mockImplementation() jest.spyOn(core, 'debug').mockImplementation() jest.spyOn(exec, 'exec').mockResolvedValue(0) @@ -144,15 +151,18 @@ describe('extractTar path validation integration', () => { describe("mode 'error'", () => { test('violations present: throws CacheIntegrityError, system tar NEVER invoked', async () => { - jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue([ - { - path: '../etc/passwd', - resolved: '', - entryType: 'File', - code: 'OUTSIDE_ROOTS', - reason: 'escapes allowed roots' - } - ]) + jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue({ + approvedNames: [], + violations: [ + { + path: '../etc/passwd', + resolved: '', + entryType: 'File', + code: 'OUTSIDE_ROOTS', + reason: 'escapes allowed roots' + } + ] + }) jest.spyOn(core, 'warning').mockImplementation() jest.spyOn(core, 'debug').mockImplementation() const execMock = jest.spyOn(exec, 'exec').mockResolvedValue(0) @@ -183,7 +193,7 @@ describe('extractTar path validation integration', () => { ] jest .spyOn(listAndValidate, 'listAndValidate') - .mockResolvedValue(violations) + .mockResolvedValue({violations, approvedNames: []}) jest.spyOn(core, 'warning').mockImplementation() jest.spyOn(core, 'debug').mockImplementation() @@ -340,4 +350,70 @@ describe('extractTar path validation integration', () => { expect(listMock.mock.calls[0][1]).toBe(CompressionMethod.ZstdWithoutLong) }) }) + + describe("mode 'error' extraction allow-list", () => { + test('clean archive: extraction restricted to approved members via --null/-T', async () => { + jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue({ + violations: [], + approvedNames: ['cache/a.txt', 'cache/b.txt'] + }) + jest.spyOn(io, 'mkdirP').mockResolvedValue() + + let command = '' + let listPath: string | undefined + let listContents: Buffer | undefined + jest.spyOn(exec, 'exec').mockImplementation(async cmd => { + command = cmd + const m = /-T "([^"]+)"/.exec(command) + if (m) { + listPath = m[1] + listContents = fs.readFileSync(listPath) + } + return 0 + }) + + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'error' + }) + + // Allow-list flags are present and `--null` precedes `-T`. + expect(command).toContain('--null') + expect(command).toContain('--no-recursion') + expect(command).toMatch(/--null .*-T "[^"]+"/) + // GNU tar (the tool detected under the io.which mock on non-Windows) + // additionally receives the wildcard-hardening flags. + if (process.platform !== 'win32') { + expect(command).toContain('--no-wildcards') + expect(command).toContain('--anchored') + } + // The list is NUL-separated and contains exactly the approved names. + expect(listContents?.toString('utf8')).toBe('cache/a.txt\0cache/b.txt\0') + // The temporary allow-list file is cleaned up after extraction. + expect(listPath).toBeDefined() + expect(fs.existsSync(listPath as string)).toBe(false) + }) + + test('warn mode does not restrict extraction (no -T) even when clean', async () => { + jest.spyOn(listAndValidate, 'listAndValidate').mockResolvedValue({ + violations: [], + approvedNames: ['cache/a.txt'] + }) + jest.spyOn(io, 'mkdirP').mockResolvedValue() + + let command = '' + jest.spyOn(exec, 'exec').mockImplementation(async cmd => { + command = cmd + return 0 + }) + + await tar.extractTar(archive, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'warn' + }) + + expect(command).not.toContain('-T "') + expect(command).not.toContain('--no-recursion') + }) + }) }) diff --git a/packages/cache/__tests__/tarPathValidationAttacks.test.ts b/packages/cache/__tests__/tarPathValidationAttacks.test.ts new file mode 100644 index 0000000000..d363be9367 --- /dev/null +++ b/packages/cache/__tests__/tarPathValidationAttacks.test.ts @@ -0,0 +1,403 @@ +import { + mkdtempSync, + writeFileSync, + rmSync, + mkdirSync, + existsSync, + readFileSync +} from 'fs' +import * as os from 'os' +import * as path from 'path' +import {gzipSync} from 'zlib' +import {execSync} from 'child_process' +import {CompressionMethod} from '../src/internal/constants' +import {listAndValidate} from '../src/internal/listAndValidate' +import {extractTar} from '../src/internal/tar' +import {CacheIntegrityError} from '../src/internal/cacheIntegrityError' + +/** + * Parser-differential bypass regression tests. These build the F1 / F2 / + * F2-linkpath / F3 / F5 PoC archives from the security analysis as raw tar + * bytes (so we can craft malicious PAX bodies and typeflags that node-tar's + * Header encoder would never produce) and assert the validator now refuses + * each one. See docs/zip-slip-* for the analysis. + */ + +// --------------------------------------------------------------------------- +// Raw tar construction +// --------------------------------------------------------------------------- + +const BLOCK = 512 + +function octal(n: number, len: number): Buffer { + return Buffer.from(`${n.toString(8).padStart(len - 1, '0')}\0`, 'ascii') +} + +function put( + buf: Buffer, + offset: number, + data: string | Buffer, + max: number +): void { + const b = Buffer.isBuffer(data) ? data : Buffer.from(data, 'ascii') + b.copy(buf, offset, 0, Math.min(b.length, max)) +} + +function header(opts: { + name?: string + mode?: number + size?: number + typeflag?: string + linkname?: string +}): Buffer { + const { + name = '', + mode = 0o644, + size = 0, + typeflag = '0', + linkname = '' + } = opts + const h = Buffer.alloc(BLOCK) + put(h, 0, name, 100) + octal(mode, 8).copy(h, 100) + octal(0, 8).copy(h, 108) + octal(0, 8).copy(h, 116) + octal(size, 12).copy(h, 124) + octal(0, 12).copy(h, 136) + h.fill(0x20, 148, 156) // chksum field = spaces while summing + put(h, 156, typeflag, 1) + put(h, 157, linkname, 100) + put(h, 257, 'ustar\0', 6) + put(h, 263, '00', 2) + let sum = 0 + for (let i = 0; i < BLOCK; i++) sum += h[i] + put(h, 148, `${sum.toString(8).padStart(6, '0')}\0 `, 8) + return h +} + +function pad(buf: Buffer): Buffer { + const p = (BLOCK - (buf.length % BLOCK)) % BLOCK + return p > 0 ? Buffer.concat([buf, Buffer.alloc(p)]) : buf +} + +function fileEntry(name: string, contents: string, typeflag = '0'): Buffer { + const body = Buffer.from(contents, 'ascii') + return Buffer.concat([header({name, size: body.length, typeflag}), pad(body)]) +} + +function dirEntry(name: string): Buffer { + return header({ + name: name.endsWith('/') ? name : `${name}/`, + mode: 0o755, + typeflag: '5' + }) +} + +function paxEntry(body: Buffer): Buffer { + return Buffer.concat([ + header({name: 'PaxHeader', size: body.length, typeflag: 'x'}), + pad(body) + ]) +} + +function end(): Buffer { + return Buffer.alloc(BLOCK * 2) +} + +/** Build a single length-correct PAX record (`" =\n"`). */ +function paxRecord(content: string): string { + const base = 1 + Buffer.byteLength(content) + 1 + let len = base + String(base).length + if (String(len).length !== String(base).length) { + len = base + String(len).length + } + return `${len} ${content}\n` +} + +// --------------------------------------------------------------------------- +// PoC archives +// --------------------------------------------------------------------------- + +// F1 — unknown typeflag byte ('Z') is emitted by node-tar as an ignoredEntry. +const F1 = Buffer.concat([ + fileEntry('cache/safe.txt', 'ok'), + fileEntry('../../../../../../tmp/zip_slip_F1', 'F1 pwned', 'Z'), + end() +]) + +// F2 — PAX `path=` newline differential. +const F2 = Buffer.concat([ + paxEntry( + Buffer.from( + '42 path=../../../../../../tmp/zip_slip_F2\n30 comment=x\n17 path=safe.txt\n', + 'ascii' + ) + ), + fileEntry('cache/safe.txt', 'F2 pwned'), + end() +]) + +// F2-linkpath — same differential, applied to a symlink's `linkpath=`. +const F2L = Buffer.concat([ + paxEntry( + Buffer.from( + '34 linkpath=../../../../../../tmp\n37 comment=x\n24 linkpath=safe/target\n', + 'ascii' + ) + ), + header({name: 'cache/link', typeflag: '2', linkname: 'safe/target'}), + end() +]) + +// F3 — oversized PAX header (> 1 MiB) is dropped by node-tar's +// maxMetaEntrySize and would otherwise let a `path=` override slip through. +const F3 = Buffer.concat([ + paxEntry( + Buffer.concat([ + Buffer.from(`1048600 comment=${'A'.repeat(1048600 - 17)}\n`, 'ascii'), + Buffer.from('42 path=../../../../../../tmp/zip_slip_F3\n', 'ascii') + ]) + ), + fileEntry('cache/safe.txt', 'F3 pwned'), + end() +]) + +// F5 — sparse typeflag 'S' is mapped but ignored by node-tar's ReadEntry. +const F5 = Buffer.concat([ + fileEntry('cache/decoy.txt', 'ok'), + fileEntry('../../../../../../tmp/zip_slip_F5', '', 'S'), + end() +]) + +// --------------------------------------------------------------------------- +// Test harness +// --------------------------------------------------------------------------- + +const ROOT = mkdtempSync(path.join(os.tmpdir(), 'cache-attacks-')) + +function workspace(): string { + return path.join(ROOT, 'workspace') +} + +function writeGz(name: string, archive: Buffer): string { + mkdirSync(ROOT, {recursive: true}) + const p = path.join(ROOT, name) + writeFileSync(p, gzipSync(archive)) + return p +} + +async function validate( + archive: Buffer, + name: string +): Promise<{violations: string[]; approvedNames: string[]}> { + const p = writeGz(name, archive) + const result = await listAndValidate( + p, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + return { + violations: result.violations.map(v => v.code), + approvedNames: result.approvedNames + } +} + +const TAR_AVAILABLE = ((): boolean => { + try { + execSync(process.platform === 'win32' ? 'where tar' : 'which tar', { + stdio: 'ignore' + }) + return true + } catch { + return false + } +})() +const describeTar = TAR_AVAILABLE ? describe : describe.skip + +beforeAll(() => { + mkdirSync(workspace(), {recursive: true}) +}) + +afterAll(() => { + try { + rmSync(ROOT, {recursive: true, force: true}) + } catch { + // best-effort + } +}) + +describe('listAndValidate: parser-differential bypass detection', () => { + test('F1: unknown typeflag is rejected as UNSUPPORTED_TYPE', async () => { + const {violations, approvedNames} = await validate(F1, 'f1.tar.gz') + expect(violations).toContain('UNSUPPORTED_TYPE') + // The escaping entry must NOT be approved for extraction. + expect(approvedNames).not.toContain('../../../../../../tmp/zip_slip_F1') + }) + + test('F2: PAX path newline differential is rejected as PAX_DESYNC', async () => { + const {violations, approvedNames} = await validate(F2, 'f2.tar.gz') + expect(violations).toContain('PAX_DESYNC') + expect(approvedNames).toEqual([]) + }) + + test('F2-linkpath: PAX linkpath newline differential is rejected as PAX_DESYNC', async () => { + const {violations} = await validate(F2L, 'f2l.tar.gz') + expect(violations).toContain('PAX_DESYNC') + }) + + test('F3: oversized PAX header is rejected as UNSUPPORTED_TYPE', async () => { + const {violations} = await validate(F3, 'f3.tar.gz') + expect(violations).toContain('UNSUPPORTED_TYPE') + }) + + test('F5: sparse typeflag is rejected as UNSUPPORTED_TYPE', async () => { + const {violations, approvedNames} = await validate(F5, 'f5.tar.gz') + expect(violations).toContain('UNSUPPORTED_TYPE') + expect(approvedNames).not.toContain('../../../../../../tmp/zip_slip_F5') + }) + + test('glob metacharacter in entry path is rejected as GLOB_METACHAR', async () => { + const archive = Buffer.concat([fileEntry('cache/[id].js', 'x'), end()]) + const {violations} = await validate(archive, 'glob.tar.gz') + expect(violations).toContain('GLOB_METACHAR') + }) + + test('clean archive: approvedNames lists every concrete entry, no violations', async () => { + const archive = Buffer.concat([ + dirEntry('cache/'), + fileEntry('cache/file.txt', 'hi'), + dirEntry('cache/sub/'), + fileEntry('cache/sub/deep.txt', 'deep'), + end() + ]) + const {violations, approvedNames} = await validate(archive, 'clean.tar.gz') + expect(violations).toEqual([]) + expect(approvedNames).toEqual([ + 'cache/', + 'cache/file.txt', + 'cache/sub/', + 'cache/sub/deep.txt' + ]) + }) + + test('newline in entry path is rejected as UNSAFE_CHAR', async () => { + const archive = Buffer.concat([fileEntry('cache/a\nb.txt', 'x'), end()]) + const {violations} = await validate(archive, 'newline.tar.gz') + expect(violations).toContain('UNSAFE_CHAR') + }) + + test('NUL byte in a symlink target (via PAX) is rejected as UNSAFE_CHAR', async () => { + const archive = Buffer.concat([ + paxEntry(Buffer.from(paxRecord('linkpath=cache/sub/t\0'), 'ascii')), + header({name: 'cache/link', typeflag: '2', linkname: 'cache/sub/t'}), + end() + ]) + const {violations} = await validate(archive, 'nul-link.tar.gz') + expect(violations).toContain('UNSAFE_CHAR') + }) + + test('legitimate long path via PAX: no violations, approved by its PAX path', async () => { + const longName = `cache/${'d/'.repeat(60)}file.txt` + const archive = Buffer.concat([ + paxEntry(Buffer.from(paxRecord(`path=${longName}`), 'ascii')), + // ustar name is a short placeholder; the PAX `path` overrides it. + fileEntry('cache/placeholder', 'x'), + end() + ]) + const {violations, approvedNames} = await validate( + archive, + 'longpath.tar.gz' + ) + expect(violations).toEqual([]) + expect(approvedNames).toContain(longName) + }) + + test('unknown PAX key is rejected as PAX_UNKNOWN_KEY', async () => { + const archive = Buffer.concat([ + paxEntry(Buffer.from(paxRecord('EVIL.placement=1'), 'ascii')), + fileEntry('cache/x', 'y'), + end() + ]) + const {violations} = await validate(archive, 'unknown-key.tar.gz') + expect(violations).toContain('PAX_UNKNOWN_KEY') + }) + + test('flood of extended headers is rejected (pending-meta cap)', async () => { + const metas: Buffer[] = [] + for (let i = 0; i < 70; i++) { + metas.push(paxEntry(Buffer.from(paxRecord('comment=x'), 'ascii'))) + } + const archive = Buffer.concat([...metas, fileEntry('cache/x', 'y'), end()]) + const p = writeGz('meta-flood.tar.gz', archive) + await expect( + listAndValidate( + p, + CompressionMethod.Gzip, + [path.join(workspace(), 'cache')], + workspace() + ) + ).rejects.toThrow() + }) +}) + +describeTar('extractTar end-to-end with system tar allow-list', () => { + let savedWorkspace: string | undefined + + beforeEach(() => { + savedWorkspace = process.env['GITHUB_WORKSPACE'] + }) + + afterEach(() => { + if (savedWorkspace === undefined) { + delete process.env['GITHUB_WORKSPACE'] + } else { + process.env['GITHUB_WORKSPACE'] = savedWorkspace + } + }) + + test('error mode, clean archive: every approved member is extracted', async () => { + const dest = mkdtempSync(path.join(ROOT, 'extract-clean-')) + process.env['GITHUB_WORKSPACE'] = dest + const archive = Buffer.concat([ + dirEntry('cache/'), + fileEntry('cache/file.txt', 'hello'), + dirEntry('cache/sub/'), + fileEntry('cache/sub/deep.txt', 'deep'), + end() + ]) + const archivePath = path.join(dest, 'clean.tar.gz') + mkdirSync(dest, {recursive: true}) + writeFileSync(archivePath, gzipSync(archive)) + + await extractTar(archivePath, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'error' + }) + + expect(existsSync(path.join(dest, 'cache', 'file.txt'))).toBe(true) + expect(readFileSync(path.join(dest, 'cache', 'file.txt'), 'utf8')).toBe( + 'hello' + ) + expect(existsSync(path.join(dest, 'cache', 'sub', 'deep.txt'))).toBe(true) + }) + + test('error mode, F2 archive: throws and writes nothing to the workspace', async () => { + const dest = mkdtempSync(path.join(ROOT, 'extract-f2-')) + process.env['GITHUB_WORKSPACE'] = dest + const archivePath = path.join(dest, 'f2.tar.gz') + mkdirSync(dest, {recursive: true}) + writeFileSync(archivePath, gzipSync(F2)) + + await expect( + extractTar(archivePath, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'error' + }) + ).rejects.toThrow(CacheIntegrityError) + + // No member was extracted anywhere under the workspace. + expect(existsSync(path.join(dest, 'cache'))).toBe(false) + expect(existsSync(path.join(dest, 'safe.txt'))).toBe(false) + }) +}) diff --git a/packages/cache/src/internal/listAndValidate.ts b/packages/cache/src/internal/listAndValidate.ts index da3b9ecc2b..40a0e06354 100644 --- a/packages/cache/src/internal/listAndValidate.ts +++ b/packages/cache/src/internal/listAndValidate.ts @@ -9,12 +9,48 @@ import { prepareAllowedRoots, validateEntry } from './pathValidation.js' +import {crossCheckMetaBodies} from './pax-reparse.js' + +/** + * Result of streaming and validating a tar archive. + */ +export interface ListAndValidateResult { + /** Entries that failed validation. Empty iff the archive is clean. */ + violations: PathValidationViolation[] + /** + * The `entry.path` of every entry that passed validation, in archive order. + * Used to build the NUL-separated allow-list (`-T`) handed to system `tar` + * so extraction is restricted to members the validator approved — by the + * exact name node-tar derived from the same bytes. Only meaningful when + * `violations` is empty (otherwise extraction is blocked or unrestricted). + */ + approvedNames: string[] +} + +/** + * Upper bound on the size of a PAX / GNU extended-header body the validator is + * willing to re-parse. node-tar's default is 1 MiB; real extended headers are + * a few hundred bytes at most. Anything larger is emitted as an + * `ignoredEntry` (see the handler below) and recorded as a violation rather + * than silently dropped — this is the F3 (oversized-PAX) defence, made + * explicit instead of relying on node-tar's internal default. + */ +const META_REJECT_BYTES = 1024 * 1024 + +/** + * Maximum number of pending extended-header (meta) bodies to retain while + * waiting for the concrete entry they apply to. A legitimate entry is + * preceded by at most a handful of meta headers; an archive that streams an + * unbounded run of meta headers with no concrete entry is malformed and must + * not be allowed to grow memory without bound. + */ +const MAX_PENDING_META = 64 /** * Stream the entries of a (possibly compressed) tar archive and validate each * against the allowed roots. Does NOT extract any files — entries are read * for header inspection only. Returns the list of violations (empty if the - * archive is clean). + * archive is clean) and the names approved for extraction. * * Throws an Error if the archive cannot be parsed (corrupt header, * decompression failure, truncated stream, etc.). The caller is responsible @@ -25,8 +61,9 @@ export async function listAndValidate( compressionMethod: CompressionMethod, allowedRoots: string[], extractCwd: string -): Promise { +): Promise { const violations: PathValidationViolation[] = [] + const approvedNames: string[] = [] // Precompute the normalized / case-folded form of each allowed root once, // so the per-entry containment check is a handful of string compares @@ -45,6 +82,20 @@ export async function listAndValidate( firstParseError = new Error(`tar parse error (${code}): ${message}`) } + // Raw bodies of the extended-header (meta) entries seen since the last + // concrete entry. node-tar emits a `'meta'` event carrying the decoded + // body string of each PAX / GNU long-name header *before* it emits the + // concrete entry the header applies to. We re-parse these length-correctly + // (see pax-reparse.ts) and cross-check against node-tar's resolved + // path/linkpath to catch the F2 / F2-linkpath PAX newline differential. + let pendingMeta: Buffer[] = [] + + const consumePendingMeta = (): Buffer[] => { + const bodies = pendingMeta + pendingMeta = [] + return bodies + } + // For gzip we let node-tar handle decompression internally (its built-in // gzip support is mature). For zstd we spawn the system `zstd` binary so // we get the same `--long=30` window-size handling as the existing @@ -57,6 +108,10 @@ export async function listAndValidate( // headers) don't abort parsing. Real corruption is surfaced explicitly // via the captured error below. strict: false, + // Cap extended-header bodies explicitly so an oversized PAX header is + // turned into a recorded `ignoredEntry` violation rather than relying on + // node-tar's internal default (F3 defence). + maxMetaEntrySize: META_REJECT_BYTES, // Treat structural problems (bad archive, bad header, bad chksum) as // hard parse errors — silently ignoring them would let a corrupt // archive sail through validation. We DO NOT throw on softer warnings @@ -73,6 +128,32 @@ export async function listAndValidate( }, onReadEntry: (entry: ReadEntry) => { try { + const metaBodies = consumePendingMeta() + + // Cross-check any PAX / long-name headers that preceded this entry + // against node-tar's resolved view. A disagreement means node-tar + // mis-parsed the header relative to what GNU/BSD tar will extract. + for (const pax of crossCheckMetaBodies( + metaBodies, + entry.path, + entry.linkpath || undefined + )) { + violations.push({ + path: entry.path, + linkpath: entry.linkpath || undefined, + resolved: entry.path, + entryType: entry.type, + code: pax.code, + reason: pax.reason + }) + } + + // Reject characters that would corrupt the extraction allow-list or + // be reinterpreted by system tar's `-T` matching (glob metacharacters + // on bsdtar's fnmatch path). These have no legitimate use in a cache + // entry path. + const charViolation = checkUnsafeChars(entry.path, entry.linkpath) + const result = validateEntry( entry.path, entry.linkpath || undefined, @@ -90,6 +171,25 @@ export async function listAndValidate( reason: result.reason }) } + + if (charViolation) { + violations.push({ + path: entry.path, + linkpath: entry.linkpath || undefined, + resolved: entry.path, + entryType: entry.type, + code: charViolation.code, + reason: charViolation.reason + }) + } + + // Only entries that passed every check are eligible for the + // extraction allow-list. (When any violation exists the allow-list is + // unused — extraction is either blocked in 'error' mode or runs + // unrestricted in 'warn' mode — so this is belt-and-braces.) + if (result.ok && !charViolation) { + approvedNames.push(entry.path) + } } finally { // Drain the entry so the parser advances. Without this the stream // stalls waiting for the consumer to read the entry body. @@ -98,12 +198,89 @@ export async function listAndValidate( } }) + // Entries node-tar refuses to classify (unknown typeflag bytes, mapped + // typeflags it doesn't extract, and oversized meta headers) are emitted on + // `'ignoredEntry'` rather than `'entry'`, so they never reach onReadEntry. + // A cache archive should never legitimately contain one, and system tar may + // still extract them — so we fail closed and record each as a violation. + parser.on('ignoredEntry', (entry: ReadEntry) => { + // The meta header(s) that preceded an ignored entry belong to it; discard + // them so they aren't mis-associated with a later concrete entry. + consumePendingMeta() + violations.push({ + path: entry.path, + linkpath: entry.linkpath || undefined, + resolved: entry.path, + entryType: entry.type, + code: 'UNSUPPORTED_TYPE', + reason: `parser ignored entry of type ${entry.type}` + }) + }) + + // Capture the raw body of each extended-header (meta) entry. node-tar + // decodes the body to a string before emitting it; we re-encode to bytes so + // the length-correct PAX re-parser operates on the same view node-tar used. + parser.on('meta', (metaBody: string) => { + if (pendingMeta.length >= MAX_PENDING_META) { + recordParseError( + 'TAR_ENTRY_INVALID', + 'too many consecutive extended headers' + ) + return + } + pendingMeta.push(Buffer.from(metaBody, 'utf8')) + }) + await streamArchiveTo(archivePath, compressionMethod, parser) if (firstParseError) { throw firstParseError } - return violations + return {violations, approvedNames} +} + +/** + * Reject characters in an entry path (or link target) that have no legitimate + * place in a cache archive and that would either corrupt the extraction + * allow-list or be reinterpreted by system tar's `-T` member matching: + * + * - NUL / newline in the entry path — would split or terminate a list entry. + * - glob metacharacters (`* ? [ ]`) in the entry path — bsdtar matches `-T` + * names with `fnmatch()`, so an unescaped metacharacter could match (and + * extract) members other than the one approved. GNU tar's `--no-wildcards` + * also covers this, but rejecting unconditionally keeps the behaviour + * identical across tar implementations. + * - NUL in a link target — same list-corruption concern. + */ +function checkUnsafeChars( + entryPath: string, + linkPath: string | undefined +): {code: 'UNSAFE_CHAR' | 'GLOB_METACHAR'; reason: string} | undefined { + if (entryPath.includes('\0') || entryPath.includes('\n')) { + return { + code: 'UNSAFE_CHAR', + reason: `entry path contains an unsafe control character: ${JSON.stringify( + entryPath + )}` + } + } + if (/[*?[\]]/.test(entryPath)) { + return { + code: 'GLOB_METACHAR', + reason: `entry path contains a glob metacharacter: ${JSON.stringify( + entryPath + )}` + } + } + if (linkPath !== undefined && linkPath.includes('\0')) { + return { + code: 'UNSAFE_CHAR', + reason: `link target contains an unsafe control character: ${JSON.stringify( + linkPath + )}` + } + } + return undefined } async function streamArchiveTo( diff --git a/packages/cache/src/internal/pathValidation.ts b/packages/cache/src/internal/pathValidation.ts index e268de43e9..ab2d531f12 100644 --- a/packages/cache/src/internal/pathValidation.ts +++ b/packages/cache/src/internal/pathValidation.ts @@ -27,6 +27,11 @@ export interface PathValidationViolation { | 'OUTSIDE_ROOTS' | 'LINK_OUTSIDE_ROOTS' | 'UNSUPPORTED_TYPE' + | 'PAX_DESYNC' + | 'PAX_PARSE_FAIL' + | 'PAX_UNKNOWN_KEY' + | 'UNSAFE_CHAR' + | 'GLOB_METACHAR' /** Human-readable description of the violation. */ reason: string } diff --git a/packages/cache/src/internal/pax-reparse.ts b/packages/cache/src/internal/pax-reparse.ts new file mode 100644 index 0000000000..44f87e4c5e --- /dev/null +++ b/packages/cache/src/internal/pax-reparse.ts @@ -0,0 +1,271 @@ +/** + * Length-correct PAX extended-header re-parser. + * + * node-tar v7 parses PAX extended-header bodies with a naive `split('\n')` + * (see `pax.js`'s `parseKV`, whose own source carries an `XXX Values with \n + * in them will fail this` comment). A PAX record is actually a length-prefixed + * sequence: + * + * " =\n" + * + * where `` is the total byte length of the record including + * the digits, the space, the `=`, the value, and the trailing `\n`. Values may + * legally contain `\n`. GNU tar and libarchive (bsdtar / Windows `tar.exe`) + * both parse by consuming exactly `` bytes, so a value + * containing an embedded `"\n path=\n"` desynchronises node-tar + * from every real extractor — the F2 / F2-linkpath parser-differential + * bypasses. + * + * This module re-parses each captured PAX body the way every real extractor + * does (length-prefixed, byte-accurate) and cross-checks the result against + * node-tar's view of the entry. Any disagreement on `path` or `linkpath` is a + * `PAX_DESYNC` violation; a body node-tar treated as PAX that this parser + * cannot fully account for is a `PAX_PARSE_FAIL`; and any PAX key not on the + * known allow-list is a `PAX_UNKNOWN_KEY` (fail-closed against future + * placement-affecting extensions). + * + * Everything here is pure (no I/O, no node-tar dependency) so it can be unit + * tested in isolation. + */ + +/** Machine-readable reason codes produced by the PAX cross-check. */ +export type PaxReparseCode = 'PAX_DESYNC' | 'PAX_PARSE_FAIL' | 'PAX_UNKNOWN_KEY' + +/** A single disagreement surfaced by {@link crossCheckMetaBodies}. */ +export interface PaxReparseViolation { + code: PaxReparseCode + reason: string +} + +/** + * PAX keys that are known-legitimate and do not influence where extracted + * bytes land (or, for `path` / `linkpath`, are explicitly cross-checked). + * Anything outside this set — and the prefix set below — is rejected so a + * future placement-affecting PAX extension cannot silently slip past the + * validator. + * + * Sourced from POSIX.1-2017 §3.4.3 plus the GNU tar (`xheader.c`) and + * libarchive (`archive_read_format_tar.c`) extension vocabularies. + */ +export const PAX_KNOWN_KEYS: ReadonlySet = new Set([ + // POSIX.1-2017 §3.4.3 + 'atime', + 'ctime', + 'mtime', + 'charset', + 'comment', + 'gid', + 'gname', + 'hdrcharset', + 'linkpath', + 'path', + 'size', + 'uid', + 'uname', + // SCHILY / star vocabulary that node-tar itself emits and consumes + 'dev', + 'ino', + 'nlink', + 'mode' +]) + +/** + * Dotted-namespace PAX key prefixes that are known-legitimate. Matched as + * prefixes so e.g. `SCHILY.xattr.user.foo`, `GNU.sparse.realsize`, and + * `LIBARCHIVE.creationtime` are all accepted without enumerating every + * possible suffix. + */ +export const PAX_KNOWN_PREFIXES: readonly string[] = [ + 'SCHILY.', + 'GNU.', + 'LIBARCHIVE.', + 'POSIX.' +] + +function isKnownPaxKey(key: string): boolean { + if (PAX_KNOWN_KEYS.has(key)) return true + return PAX_KNOWN_PREFIXES.some(prefix => key.startsWith(prefix)) +} + +/** Result of a length-correct parse of a single PAX extended-header body. */ +export interface PaxParseResult { + /** Decoded records, last-write-wins per key (POSIX-correct). Values kept as + * raw bytes so non-ASCII / high-bit data is preserved until comparison. */ + records: Record + /** True iff every byte of the body was accounted for by length-prefixed + * records. False signals a body this parser could not fully account for. */ + ok: boolean +} + +const DIGIT_0 = 0x30 +const DIGIT_9 = 0x39 +const SPACE = 0x20 +const EQUALS = 0x3d +const LF = 0x0a + +/** + * Parse a PAX extended-header body using strict, byte-accurate, + * length-prefixed records — the same algorithm GNU tar and libarchive use. + * + * Returns `{ok: false}` (with whatever records were parsed up to the failure) + * the moment a record is not fully length-accountable: a non-digit length, a + * length that overruns the buffer, a missing trailing `\n`, or a record with + * no `=`. + */ +export function parsePaxLengthCorrect(buf: Buffer): PaxParseResult { + const records: Record = {} + let i = 0 + while (i < buf.length) { + // — a run of ASCII digits terminated by a single space. + let j = i + while (j < buf.length && buf[j] >= DIGIT_0 && buf[j] <= DIGIT_9) j++ + if (j === i || j >= buf.length || buf[j] !== SPACE) { + return {records, ok: false} + } + const len = parseInt(buf.subarray(i, j).toString('ascii'), 10) + // Length must be positive and the whole record must fit in the buffer. + if (!Number.isFinite(len) || len <= 0 || i + len > buf.length) { + return {records, ok: false} + } + // The record must end with a newline at exactly the length boundary. + if (buf[i + len - 1] !== LF) { + return {records, ok: false} + } + // " =\n" — the first '=' after the space separates + // key from value. The value runs to just before the trailing newline and + // may itself contain '=' and '\n'. + const eq = buf.indexOf(EQUALS, j + 1) + if (eq < 0 || eq >= i + len - 1) { + return {records, ok: false} + } + const key = buf.subarray(j + 1, eq).toString('utf8') + const value = buf.subarray(eq + 1, i + len - 1) // raw bytes, no trailing LF + records[key] = value // last write wins + i += len + } + return {records, ok: i === buf.length} +} + +/** + * Normalise a path for cross-parser comparison: collapse `\` to `/` (matching + * node-tar's `normalizeWindowsPath` on Windows; a no-op for typical POSIX + * paths) and drop a single trailing `/` so directory entries compare equal + * regardless of how each side renders them. Pure string manipulation. + */ +function normalizeForCompare(p: string): string { + let s = p.replace(/\\/g, '/') + if (s.length > 1 && s.endsWith('/')) { + s = s.slice(0, -1) + } + return s +} + +function nulTrim(buf: Buffer): Buffer { + const nul = buf.indexOf(0) + return nul === -1 ? buf : buf.subarray(0, nul) +} + +/** + * Cross-check the captured extended-header (meta) bodies that preceded a + * concrete entry against node-tar's resolved view of that entry. + * + * `metaBodies` are the raw bodies of every `ExtendedHeader` / + * `GlobalExtendedHeader` / GNU `LongName` / `LongLink` meta entry emitted + * since the previous concrete entry, in order. `entryPath` / `entryLinkpath` + * are node-tar's final (post-PAX, post-long-name) values for the entry. + * + * The check is sound without ever invoking the system `tar`: this re-parser + * computes the same `path` / `linkpath` every real extractor would, so a + * disagreement with node-tar's value is exactly the signal that node-tar + * mis-parsed the header. + */ +export function crossCheckMetaBodies( + metaBodies: Buffer[], + entryPath: string, + entryLinkpath: string | undefined +): PaxReparseViolation[] { + const violations: PaxReparseViolation[] = [] + let sawPax = false + // Merged length-correct view of path / linkpath across all PAX bodies + // (last write wins), matching POSIX precedence. + let lcPath: Buffer | undefined + let lcLinkpath: Buffer | undefined + + for (const body of metaBodies) { + if (body.length === 0) continue + const parsed = parsePaxLengthCorrect(body) + const keys = Object.keys(parsed.records) + + if (parsed.ok && keys.length > 0) { + // A well-formed PAX extended header. + sawPax = true + for (const key of keys) { + if (!isKnownPaxKey(key)) { + violations.push({ + code: 'PAX_UNKNOWN_KEY', + reason: `unknown PAX key '${key}' in extended header` + }) + } + } + if (parsed.records['path'] !== undefined) { + lcPath = parsed.records['path'] + } + if (parsed.records['linkpath'] !== undefined) { + lcLinkpath = parsed.records['linkpath'] + } + } else { + // Not a length-accountable PAX body. This is either a GNU `LongName` / + // `LongLink` body (raw NUL-terminated path, no length prefix) or a + // malformed PAX header crafted to desync node-tar. A legitimate GNU + // long-name body's NUL-trimmed bytes must equal exactly the path or + // linkpath node-tar resolved for the entry; anything else is a parser + // disagreement we refuse to extract. + const raw = nulTrim(body).toString('utf8') + const rawCmp = normalizeForCompare(raw) + const matchesPath = rawCmp === normalizeForCompare(entryPath) + const matchesLink = + entryLinkpath !== undefined && + rawCmp === normalizeForCompare(entryLinkpath) + if (!matchesPath && !matchesLink) { + violations.push({ + code: 'PAX_PARSE_FAIL', + reason: + 'extended header body is not length-accountable and does not ' + + "match the entry's resolved path or linkpath" + }) + } + } + } + + if (sawPax) { + if (lcPath !== undefined) { + const lc = normalizeForCompare(lcPath.toString('utf8')) + if (lc !== normalizeForCompare(entryPath)) { + violations.push({ + code: 'PAX_DESYNC', + reason: `PAX path disagreement: node-tar resolved ${JSON.stringify( + entryPath + )} but length-correct parse yields ${JSON.stringify( + lcPath.toString('utf8') + )}` + }) + } + } + if (lcLinkpath !== undefined) { + const lc = normalizeForCompare(lcLinkpath.toString('utf8')) + const ntLink = entryLinkpath ?? '' + if (lc !== normalizeForCompare(ntLink)) { + violations.push({ + code: 'PAX_DESYNC', + reason: `PAX linkpath disagreement: node-tar resolved ${JSON.stringify( + entryLinkpath + )} but length-correct parse yields ${JSON.stringify( + lcLinkpath.toString('utf8') + )}` + }) + } + } + } + + return violations +} diff --git a/packages/cache/src/internal/tar.ts b/packages/cache/src/internal/tar.ts index 33cd29f13e..14999489cf 100644 --- a/packages/cache/src/internal/tar.ts +++ b/packages/cache/src/internal/tar.ts @@ -1,7 +1,9 @@ import {exec} from '@actions/exec' import * as core from '@actions/core' import * as io from '@actions/io' -import {existsSync, writeFileSync} from 'fs' +import {existsSync, writeFileSync, unlinkSync} from 'fs' +import * as os from 'os' +import * as crypto from 'crypto' import * as path from 'path' import * as utils from './cacheUtils.js' import {ArchiveTool} from './contracts.js' @@ -64,7 +66,8 @@ async function getTarArgs( tarPath: ArchiveTool, compressionMethod: CompressionMethod, type: string, - archivePath = '' + archivePath = '', + allowListPath = '' ): Promise { const args = [`"${tarPath.path}"`] const cacheFileName = utils.getCacheFileName(compressionMethod) @@ -106,6 +109,34 @@ async function getTarArgs( '-C', workingDirectory.replace(new RegExp(`\\${path.sep}`, 'g'), '/') ) + // When an extraction allow-list is supplied (pathValidation: 'error' + // with a clean archive), restrict extraction to exactly the members the + // validator approved, matched by the names node-tar derived from the + // same bytes. This makes the extraction parser's view of any + // path-channel parser differential a no-op: a member system tar would + // place at a different path than node-tar simply isn't on the list. + // + // `--null` MUST precede `-T` on GNU tar (otherwise the list is read + // newline-delimited with stray NULs). `--no-recursion` stops an + // approved directory from implicitly pulling in unapproved children. + // The `--no-wildcards*` / `--anchored` flags are GNU-only defence in + // depth; bsdtar lacks them but glob metacharacters are already rejected + // during validation, so its `fnmatch()`-based `-T` matching degrades to + // exact-name matching. + if (allowListPath) { + args.push('--null', '--no-recursion') + if (tarPath.type === ArchiveToolType.GNU) { + args.push( + '--no-wildcards', + '--no-wildcards-match-slash', + '--anchored' + ) + } + args.push( + '-T', + `"${allowListPath.replace(new RegExp(`\\${path.sep}`, 'g'), '/')}"` + ) + } break case 'list': args.push( @@ -137,7 +168,8 @@ async function getTarArgs( async function getCommands( compressionMethod: CompressionMethod, type: string, - archivePath = '' + archivePath = '', + allowListPath = '' ): Promise { let args @@ -146,7 +178,8 @@ async function getCommands( tarPath, compressionMethod, type, - archivePath + archivePath, + allowListPath ) const compressionArgs = type !== 'create' @@ -290,6 +323,11 @@ export async function extractTar( const workingDirectory = getWorkingDirectory() const pathValidation: PathValidationMode = options?.pathValidation ?? 'off' + // Names approved for extraction by the validator. When pathValidation is + // 'error' and the archive is clean, system tar is restricted to exactly + // these members via a NUL-separated `-T` allow-list (see below). + let approvedNames: string[] | undefined + // Run path validation BEFORE creating the extraction directory or invoking // system tar. In 'error' mode, a CacheIntegrityError thrown here means no // bytes are ever written to the workspace. In 'warn' mode, violations are @@ -306,12 +344,14 @@ export async function extractTar( } let violations: PathValidationViolation[] | undefined try { - violations = await listAndValidate( + const result = await listAndValidate( archivePath, compressionMethod, allowedRoots, workingDirectory ) + violations = result.violations + approvedNames = result.approvedNames } catch (error) { // Parse / decompression failure encountered while validating. The // validator's tar parser is stricter than the system `tar` that @@ -344,13 +384,61 @@ export async function extractTar( violations ) } + // In 'warn' mode a violation means we must NOT restrict extraction to + // the (possibly incomplete) approved list — fall back to extracting + // everything, matching legacy behavior. + approvedNames = undefined } } // Create directory to extract tar into await io.mkdirP(workingDirectory) - const commands = await getCommands(compressionMethod, 'extract', archivePath) - await execCommands(commands) + + // In 'error' mode with a clean archive, write the approved member names to a + // NUL-separated allow-list and restrict system tar to exactly those members. + // This closes path-channel parser differentials: a member tar would extract + // to an escaped path is not on the list, so it is never extracted. + let allowListPath = '' + if (pathValidation === 'error' && approvedNames !== undefined) { + allowListPath = writeAllowList(approvedNames) + } + + try { + const commands = await getCommands( + compressionMethod, + 'extract', + archivePath, + allowListPath + ) + await execCommands(commands) + } finally { + if (allowListPath) { + try { + unlinkSync(allowListPath) + } catch { + // best-effort cleanup of the temporary allow-list file + } + } + } +} + +/** + * Write the approved member names to a temporary NUL-separated file suitable + * for `tar --null -T`. Returns the absolute path to the written file. The + * caller is responsible for unlinking it. + */ +function writeAllowList(approvedNames: string[]): string { + const allowListPath = path.join( + os.tmpdir(), + `cache-allow-${process.pid}-${Date.now()}-${crypto + .randomBytes(4) + .toString('hex')}.lst` + ) + const payload = Buffer.concat( + approvedNames.flatMap(name => [Buffer.from(name, 'utf8'), Buffer.from([0])]) + ) + writeFileSync(allowListPath, payload, {mode: 0o600}) + return allowListPath } function reportViolations( From bb827dbfd466120ed1b4adb4b47194e05a63a2fe Mon Sep 17 00:00:00 2001 From: Jason Ginchereau Date: Fri, 19 Jun 2026 09:34:38 -1000 Subject: [PATCH 09/10] Bump minor version, update test plan --- packages/cache/RELEASES.md | 4 + .../cache/docs/path-validation-test-plan.md | 112 ++++++++++++++++-- packages/cache/package-lock.json | 4 +- packages/cache/package.json | 2 +- 4 files changed, 110 insertions(+), 12 deletions(-) diff --git a/packages/cache/RELEASES.md b/packages/cache/RELEASES.md index cc9e33a202..e1d9a4a087 100644 --- a/packages/cache/RELEASES.md +++ b/packages/cache/RELEASES.md @@ -1,5 +1,9 @@ # @actions/cache Releases +## 6.2.0 + +- Add opt-in cache-archive path validation during restore via `DownloadOptions.pathValidation` (`'off' | 'warn' | 'error'`). When enabled, downloaded archives are streamed through an in-process `node-tar` validator before extraction; entries (and link targets) that escape the declared cache `paths` are reported as warnings or rejected with a `CacheIntegrityError`. In `'error'` mode on a clean archive, system `tar` extraction is additionally restricted to the validator-approved members, and parser-differential defenses (length-correct PAX re-parsing, unsafe-character / glob-metacharacter / unknown-PAX-key rejection) guard against listing-vs-extraction bypasses. + ## 6.1.0 - Handle cache write error due to read-only token: detect the `cache write denied:` prefix on cache reservation failures and surface it as a `core.warning` (without failing the run). diff --git a/packages/cache/docs/path-validation-test-plan.md b/packages/cache/docs/path-validation-test-plan.md index 601d9c54a3..9f6ea95b71 100644 --- a/packages/cache/docs/path-validation-test-plan.md +++ b/packages/cache/docs/path-validation-test-plan.md @@ -1,7 +1,7 @@ # Path Validation Test Plan — `@actions/cache` This document describes the test coverage for the client-side cache-archive path -validation feature introduced in `@actions/cache` v6.1.0. +validation feature introduced in `@actions/cache` v6.2.0. ## Feature summary @@ -17,9 +17,22 @@ extractTar(archivePath, compressionMethod, { When `pathValidation !== 'off'`, the archive is streamed through `node-tar`'s `Parser` (no extraction) and every entry's path / linkpath is checked against the set of allowed roots derived from `declaredPaths` (or, if that list is -empty, the GitHub Actions workspace as a single fail-safe root). Violations are -collected; in `'error'` mode a `CacheIntegrityError` is thrown **before** system -tar is invoked, so no bytes are ever written to the workspace. +empty, the GitHub Actions workspace as a single fail-safe root). Beyond simple +containment, the validator also defends against parser differentials between +`node-tar` (used for listing) and system `tar` (used for extraction): +length-correct PAX extended-header re-parsing, unknown-PAX-key rejection, and +unsafe-character / glob-metacharacter rejection. `listAndValidate` returns both +the collected `violations` and the `approvedNames` (the exact member names +`node-tar` derived from the archive bytes). + +Violations are collected; in `'error'` mode a `CacheIntegrityError` is thrown +**before** system tar is invoked, so no bytes are ever written to the workspace. +In `'error'` mode on a **clean** archive, extraction is additionally restricted +to exactly the `approvedNames` via a NUL-separated `tar --null --no-recursion +-T` allow-list, so a member that system `tar` would place at a different path +than `node-tar` computed is never extracted. `'warn'` mode never uses the +allow-list — on any violation it falls back to extracting everything (legacy +behavior). `restoreCacheV1` and `restoreCacheV2` forward the caller-supplied `pathValidation` mode and the declared `paths` array to `extractTar`. They also @@ -31,8 +44,9 @@ integrity failures from ordinary cache-miss/network errors. | Source | Tests | |---|---| | [`src/internal/pathValidation.ts`](../src/internal/pathValidation.ts) | [`__tests__/pathValidation.test.ts`](../__tests__/pathValidation.test.ts) | -| [`src/internal/listAndValidate.ts`](../src/internal/listAndValidate.ts) | [`__tests__/listAndValidate.test.ts`](../__tests__/listAndValidate.test.ts) | -| [`src/internal/tar.ts`](../src/internal/tar.ts) (integration into `extractTar`) | [`__tests__/tarPathValidation.test.ts`](../__tests__/tarPathValidation.test.ts) | +| [`src/internal/pax-reparse.ts`](../src/internal/pax-reparse.ts) | [`__tests__/pax-reparse.test.ts`](../__tests__/pax-reparse.test.ts) | +| [`src/internal/listAndValidate.ts`](../src/internal/listAndValidate.ts) | [`__tests__/listAndValidate.test.ts`](../__tests__/listAndValidate.test.ts), [`__tests__/tarPathValidationAttacks.test.ts`](../__tests__/tarPathValidationAttacks.test.ts) | +| [`src/internal/tar.ts`](../src/internal/tar.ts) (integration into `extractTar`) | [`__tests__/tarPathValidation.test.ts`](../__tests__/tarPathValidation.test.ts), [`__tests__/tarPathValidationAttacks.test.ts`](../__tests__/tarPathValidationAttacks.test.ts) | | [`src/internal/cacheIntegrityError.ts`](../src/internal/cacheIntegrityError.ts) | covered indirectly via the integration tests | | [`src/options.ts`](../src/options.ts) (new `pathValidation` field) | [`__tests__/options.test.ts`](../__tests__/options.test.ts) | | [`src/cache.ts`](../src/cache.ts) (forwarding + error re-throw) | [`__tests__/restoreCache.test.ts`](../__tests__/restoreCache.test.ts), [`__tests__/restoreCacheV2.test.ts`](../__tests__/restoreCacheV2.test.ts) | @@ -110,6 +124,74 @@ and platform-specific behavior: - Shows up to N items verbatim - Truncates the tail with a `(... and N more)` summary line +## Unit tests — PAX re-parser (`pax-reparse.test.ts`) + +These exercise the length-correct PAX extended-header re-parser in isolation +(pure logic, no archives). `node-tar` parses PAX bodies with a naive +`split('\n')`, which desynchronises from GNU tar / libarchive on values that +contain embedded newlines. This module re-parses each body byte-accurately and +cross-checks the result against `node-tar`'s view of the entry. + +### `parsePaxLengthCorrect` + +- Single well-formed `" =\n"` record +- **Newline-in-value**: an embedded fake `path=` record is swallowed by the + enclosing record's length, rather than parsed as its own record (the core + parser-differential) +- Empty buffer → empty record set +- `ok: false` for: truncated record, non-numeric length prefix, missing + trailing newline, length that spans past the buffer end, record without `=`, + record with correct length + LF but no `=`, zero-length prefix +- Value may itself contain `=` — only the first one is the separator +- High-bit value bytes preserved as a raw `Buffer` +- Last write wins for repeated keys + +### `crossCheckMetaBodies` + +- Clean PAX `path` matching `node-tar` → no violations +- **F2 path desync** → `PAX_DESYNC` (node-tar resolved a safe name, the + length-correct parse disagrees) +- **F2-linkpath desync** → `PAX_DESYNC` +- Unknown PAX key → `PAX_UNKNOWN_KEY` +- Known `SCHILY.` / `GNU.` / `LIBARCHIVE.` prefixed keys accepted +- GNU long-name raw body matching the entry path (or the link target) → no + violation; a body matching neither → flagged +- No meta bodies → no violations +- PAX setting both `path` and `linkpath` in agreement → no violations +- Directory path with trailing slash compares equal (no false desync) +- Multiple PAX bodies merge with last-write-wins before comparison +- `PAX_KNOWN_KEYS` includes `path` and `linkpath` + +## Integration tests — parser-differential attacks (`tarPathValidationAttacks.test.ts`) + +These build the F1 / F2 / F2-linkpath / F3 / F5 proof-of-concept archives from +the security analysis (see `docs/zip-slip-*`) as **raw tar bytes**, so they can +craft malicious PAX bodies and typeflags that `node-tar`'s `Header` encoder +would never emit. They assert the validator refuses each one, and they include +real-`tar` end-to-end extraction assertions (using a scratch dir via +`mkdtempSync`). + +### Bypass detection (via `listAndValidate`) + +- **F1**: unknown typeflag → `UNSUPPORTED_TYPE` +- **F2**: PAX `path` newline differential → `PAX_DESYNC` +- **F2-linkpath**: PAX `linkpath` newline differential → `PAX_DESYNC` +- **F3**: oversized PAX header → `UNSUPPORTED_TYPE` +- **F5**: sparse typeflag → `UNSUPPORTED_TYPE` +- Glob metacharacter in entry path → `GLOB_METACHAR` +- Newline in entry path → `UNSAFE_CHAR` +- NUL byte in a symlink target (delivered via PAX) → `UNSAFE_CHAR` +- Unknown PAX key → `PAX_UNKNOWN_KEY` +- Flood of extended headers → rejected by the pending-meta cap +- Clean archive → `approvedNames` lists every concrete entry, no violations +- Legitimate long path via PAX → no violations, approved by its PAX path + +### End-to-end extraction (real `tar`) + +- `'error'` mode, clean archive → every approved member is extracted +- `'error'` mode, F2 archive → throws `CacheIntegrityError` and writes nothing + to the workspace + ## Integration tests — real archives (`listAndValidate.test.ts`) These build small tar archives in memory using `tar.Header`, write them to disk, @@ -139,8 +221,9 @@ Skipped on hosts without `zstd` installed. ## Integration tests — mocked downstream (`tarPathValidation.test.ts`) These mock `listAndValidate` so the test can deterministically inject "violation -lists" and observe `extractTar`'s reaction. They mock `@actions/exec`, -`@actions/io` and `@actions/core` to assert what does (and does not) get called. +lists" and observe `extractTar`'s reaction. The mock returns the production +shape `{violations, approvedNames}`. They mock `@actions/exec`, `@actions/io` +and `@actions/core` to assert what does (and does not) get called. ### `pathValidation: 'off'` (default) @@ -165,6 +248,15 @@ lists" and observe `extractTar`'s reaction. They mock `@actions/exec`, - Parse failure in `'warn'` mode → warning is logged, validation is skipped, and extraction still proceeds +### `pathValidation: 'error'` extraction allow-list + +- Clean archive → extraction is restricted to the approved members: the command + contains `--null` immediately before `-T ""`, the `-T` file holds the + NUL-separated `approvedNames`, and the temporary allow-list file is cleaned up + after extraction +- `'warn'` mode does **not** restrict extraction (no `-T`) even on a clean + archive + ### Plumbing - `declaredPaths` is forwarded to `listAndValidate` @@ -201,6 +293,8 @@ npx jest --testTimeout 70000 packages/cache # just the new path-validation suites npx jest --testTimeout 70000 \ packages/cache/__tests__/pathValidation.test.ts \ + packages/cache/__tests__/pax-reparse.test.ts \ packages/cache/__tests__/listAndValidate.test.ts \ - packages/cache/__tests__/tarPathValidation.test.ts + packages/cache/__tests__/tarPathValidation.test.ts \ + packages/cache/__tests__/tarPathValidationAttacks.test.ts ``` diff --git a/packages/cache/package-lock.json b/packages/cache/package-lock.json index 36e4a00a77..dc1491f6be 100644 --- a/packages/cache/package-lock.json +++ b/packages/cache/package-lock.json @@ -1,12 +1,12 @@ { "name": "@actions/cache", - "version": "6.1.0", + "version": "6.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@actions/cache", - "version": "6.1.0", + "version": "6.2.0", "license": "MIT", "dependencies": { "@actions/core": "^3.0.1", diff --git a/packages/cache/package.json b/packages/cache/package.json index c579505869..950b4f58b6 100644 --- a/packages/cache/package.json +++ b/packages/cache/package.json @@ -1,6 +1,6 @@ { "name": "@actions/cache", - "version": "6.1.0", + "version": "6.2.0", "description": "Actions cache lib", "keywords": [ "github", From a41517f52ac2be9c6b69dcd5283b2e423d83c628 Mon Sep 17 00:00:00 2001 From: Jason Ginchereau Date: Fri, 19 Jun 2026 15:35:16 -1000 Subject: [PATCH 10/10] Improve path validations and test coverage --- .../cache/__tests__/pathValidation.test.ts | 12 +- packages/cache/__tests__/pax-reparse.test.ts | 41 +++- .../tarPathValidationAttacks.test.ts | 178 ++++++++++++++---- .../cache/docs/path-validation-test-plan.md | 55 ++++-- .../cache/src/internal/listAndValidate.ts | 152 ++++++--------- packages/cache/src/internal/pathValidation.ts | 34 +++- packages/cache/src/internal/pax-reparse.ts | 34 +++- packages/cache/src/internal/tar.ts | 34 +++- 8 files changed, 358 insertions(+), 182 deletions(-) diff --git a/packages/cache/__tests__/pathValidation.test.ts b/packages/cache/__tests__/pathValidation.test.ts index 43463226f9..199798641b 100644 --- a/packages/cache/__tests__/pathValidation.test.ts +++ b/packages/cache/__tests__/pathValidation.test.ts @@ -979,10 +979,11 @@ describe('validateEntry', () => { }) }) - describe('control characters in filenames (legal but unusual)', () => { - test('embedded newline is treated literally as part of the segment', () => { - // node-tar reports the full filename including the embedded newline. - // It's a single segment under node_modules — must be accepted. + describe('control characters in filenames', () => { + test('embedded newline is rejected as an unsafe control character', () => { + // A newline in a member name would corrupt a newline-delimited tar file + // list and enables log / line injection, so it is rejected even though + // it is a single segment under node_modules. const r = validateEntry( 'node_modules/file\nwith newline', undefined, @@ -990,7 +991,8 @@ describe('validateEntry', () => { allowedRoots, cwd ) - expect(r.ok).toBe(true) + expect(r.ok).toBe(false) + if (!r.ok) expect(r.code).toBe('UNSAFE_CHAR') }) test('embedded tab is accepted', () => { diff --git a/packages/cache/__tests__/pax-reparse.test.ts b/packages/cache/__tests__/pax-reparse.test.ts index 4e052841a2..2d5263cff0 100644 --- a/packages/cache/__tests__/pax-reparse.test.ts +++ b/packages/cache/__tests__/pax-reparse.test.ts @@ -23,17 +23,17 @@ describe('parsePaxLengthCorrect', () => { }) test('newline-in-value: the embedded fake record is swallowed by comment', () => { - // This is the F2 PAX body. A naive split('\n') parser (node-tar) ends up - // with path=safe.txt; the length-correct parser must yield the real + // An embedded-newline PAX body. A naive split('\n') parser (node-tar) ends + // up with path=safe.txt; the length-correct parser must yield the real // (malicious) path and treat the rest as the comment value. const buf = Buffer.from( - '42 path=../../../../../../tmp/zip_slip_F2\n30 comment=x\n17 path=safe.txt\n', + '42 path=../../../../../../tmp/escaped_pax\n30 comment=x\n17 path=safe.txt\n', 'ascii' ) const {records, ok} = parsePaxLengthCorrect(buf) expect(ok).toBe(true) expect(records['path'].toString('utf8')).toBe( - '../../../../../../tmp/zip_slip_F2' + '../../../../../../tmp/escaped_pax' ) expect(records['comment'].toString('utf8')).toBe('x\n17 path=safe.txt') // Crucially NOT safe.txt. @@ -133,16 +133,16 @@ describe('crossCheckMetaBodies', () => { expect(v).toEqual([]) }) - test('F2 path desync: node-tar resolved safe.txt, length-correct disagrees', () => { + test('PAX path desync: node-tar resolved safe.txt, length-correct disagrees', () => { const buf = Buffer.from( - '42 path=../../../../../../tmp/zip_slip_F2\n30 comment=x\n17 path=safe.txt\n', + '42 path=../../../../../../tmp/escaped_pax\n30 comment=x\n17 path=safe.txt\n', 'ascii' ) const v = crossCheckMetaBodies([buf], 'safe.txt', undefined) expect(v.map(x => x.code)).toContain('PAX_DESYNC') }) - test('F2-linkpath desync: node-tar resolved safe/target, length-correct disagrees', () => { + test('PAX linkpath desync: node-tar resolved safe/target, length-correct disagrees', () => { const buf = Buffer.from( '34 linkpath=../../../../../../tmp\n37 comment=x\n24 linkpath=safe/target\n', 'ascii' @@ -162,10 +162,9 @@ describe('crossCheckMetaBodies', () => { expect(v.map(x => x.code)).toContain('PAX_UNKNOWN_KEY') }) - test('known SCHILY/GNU/LIBARCHIVE prefixed keys are accepted', () => { + test('known SCHILY/LIBARCHIVE prefixed keys are accepted', () => { const records = [ 'SCHILY.xattr.user.foo=bar', - 'GNU.sparse.realsize=1024', 'LIBARCHIVE.creationtime=1700000000' ] const body = records @@ -185,6 +184,30 @@ describe('crossCheckMetaBodies', () => { expect(v).toEqual([]) }) + test('GNU.sparse.* keys are rejected as PAX_UNSUPPORTED_KEY', () => { + // node-tar v7 does not process GNU sparse keys, so it surfaces the entry + // under its header path while system tar would reconstruct the file at + // `GNU.sparse.name` (here pointing outside the cache roots). The path / + // linkpath cross-check cannot see this, so the sparse namespace must be + // rejected outright even though it is under the broadly-allowed `GNU.` + // prefix. + const body = Buffer.from( + rec('GNU.sparse.major=1') + + rec('GNU.sparse.minor=0') + + rec('GNU.sparse.name=../../../../tmp/evil') + + rec('GNU.sparse.realsize=1024'), + 'ascii' + ) + const v = crossCheckMetaBodies( + [body], + 'cache/GNUSparseFile.0/decoy', + undefined + ) + expect(v.map(x => x.code)).toContain('PAX_UNSUPPORTED_KEY') + // It must NOT also be misreported as merely an unknown key. + expect(v.map(x => x.code)).not.toContain('PAX_UNKNOWN_KEY') + }) + test('GNU long-name raw body matching entry path: no violation', () => { // A GNU LongName body is a raw NUL-terminated path with no length prefix. const raw = Buffer.concat([ diff --git a/packages/cache/__tests__/tarPathValidationAttacks.test.ts b/packages/cache/__tests__/tarPathValidationAttacks.test.ts index d363be9367..08a9c20b59 100644 --- a/packages/cache/__tests__/tarPathValidationAttacks.test.ts +++ b/packages/cache/__tests__/tarPathValidationAttacks.test.ts @@ -16,11 +16,13 @@ import {extractTar} from '../src/internal/tar' import {CacheIntegrityError} from '../src/internal/cacheIntegrityError' /** - * Parser-differential bypass regression tests. These build the F1 / F2 / - * F2-linkpath / F3 / F5 PoC archives from the security analysis as raw tar - * bytes (so we can craft malicious PAX bodies and typeflags that node-tar's - * Header encoder would never produce) and assert the validator now refuses - * each one. See docs/zip-slip-* for the analysis. + * Parser-differential bypass regression tests. Each case builds a malicious + * archive as raw tar bytes (so we can craft PAX bodies and typeflags that + * node-tar's Header encoder would never produce) designed to make node-tar's + * in-process listing disagree with the path the system `tar` extractor would + * write to, then asserts the validator refuses it. The vectors covered are: an + * unknown typeflag byte, a PAX `path=` / `linkpath=` record with an embedded + * newline, an oversized PAX header, and a GNU sparse typeflag. */ // --------------------------------------------------------------------------- @@ -115,30 +117,33 @@ function paxRecord(content: string): string { } // --------------------------------------------------------------------------- -// PoC archives +// Malicious archives // --------------------------------------------------------------------------- -// F1 — unknown typeflag byte ('Z') is emitted by node-tar as an ignoredEntry. -const F1 = Buffer.concat([ +// Unknown typeflag byte ('Z') is emitted by node-tar as an ignoredEntry, but +// system tar would extract it as a regular file. +const unknownTypeflagArchive = Buffer.concat([ fileEntry('cache/safe.txt', 'ok'), - fileEntry('../../../../../../tmp/zip_slip_F1', 'F1 pwned', 'Z'), + fileEntry('../../../../../../tmp/escaped_unknown_type', 'pwned', 'Z'), end() ]) -// F2 — PAX `path=` newline differential. -const F2 = Buffer.concat([ +// PAX `path=` record whose value carries an embedded newline. node-tar's naive +// `split('\n')` parse resolves the trailing `path=safe.txt`, while a +// length-correct parse (matching system tar) resolves the escaping path. +const paxPathNewlineArchive = Buffer.concat([ paxEntry( Buffer.from( - '42 path=../../../../../../tmp/zip_slip_F2\n30 comment=x\n17 path=safe.txt\n', + '42 path=../../../../../../tmp/escaped_pax\n30 comment=x\n17 path=safe.txt\n', 'ascii' ) ), - fileEntry('cache/safe.txt', 'F2 pwned'), + fileEntry('cache/safe.txt', 'pwned'), end() ]) -// F2-linkpath — same differential, applied to a symlink's `linkpath=`. -const F2L = Buffer.concat([ +// The same embedded-newline differential applied to a symlink's `linkpath=`. +const paxLinkpathNewlineArchive = Buffer.concat([ paxEntry( Buffer.from( '34 linkpath=../../../../../../tmp\n37 comment=x\n24 linkpath=safe/target\n', @@ -149,23 +154,24 @@ const F2L = Buffer.concat([ end() ]) -// F3 — oversized PAX header (> 1 MiB) is dropped by node-tar's -// maxMetaEntrySize and would otherwise let a `path=` override slip through. -const F3 = Buffer.concat([ +// Oversized PAX header (> 1 MiB) is dropped by node-tar's maxMetaEntrySize and +// would otherwise let the `path=` override slip through unseen. +const oversizedPaxHeaderArchive = Buffer.concat([ paxEntry( Buffer.concat([ Buffer.from(`1048600 comment=${'A'.repeat(1048600 - 17)}\n`, 'ascii'), - Buffer.from('42 path=../../../../../../tmp/zip_slip_F3\n', 'ascii') + Buffer.from('42 path=../../../../../../tmp/escaped_big\n', 'ascii') ]) ), - fileEntry('cache/safe.txt', 'F3 pwned'), + fileEntry('cache/safe.txt', 'pwned'), end() ]) -// F5 — sparse typeflag 'S' is mapped but ignored by node-tar's ReadEntry. -const F5 = Buffer.concat([ +// GNU sparse typeflag 'S' is mapped but ignored by node-tar's ReadEntry, while +// system tar would extract it. +const sparseTypeflagArchive = Buffer.concat([ fileEntry('cache/decoy.txt', 'ok'), - fileEntry('../../../../../../tmp/zip_slip_F5', '', 'S'), + fileEntry('../../../../../../tmp/escaped_sparse', '', 'S'), end() ]) @@ -228,33 +234,50 @@ afterAll(() => { }) describe('listAndValidate: parser-differential bypass detection', () => { - test('F1: unknown typeflag is rejected as UNSUPPORTED_TYPE', async () => { - const {violations, approvedNames} = await validate(F1, 'f1.tar.gz') + test('unknown typeflag is rejected as UNSUPPORTED_TYPE', async () => { + const {violations, approvedNames} = await validate( + unknownTypeflagArchive, + 'unknown-typeflag.tar.gz' + ) expect(violations).toContain('UNSUPPORTED_TYPE') // The escaping entry must NOT be approved for extraction. - expect(approvedNames).not.toContain('../../../../../../tmp/zip_slip_F1') + expect(approvedNames).not.toContain( + '../../../../../../tmp/escaped_unknown_type' + ) }) - test('F2: PAX path newline differential is rejected as PAX_DESYNC', async () => { - const {violations, approvedNames} = await validate(F2, 'f2.tar.gz') + test('PAX path newline differential is rejected as PAX_DESYNC', async () => { + const {violations, approvedNames} = await validate( + paxPathNewlineArchive, + 'pax-path-newline.tar.gz' + ) expect(violations).toContain('PAX_DESYNC') expect(approvedNames).toEqual([]) }) - test('F2-linkpath: PAX linkpath newline differential is rejected as PAX_DESYNC', async () => { - const {violations} = await validate(F2L, 'f2l.tar.gz') + test('PAX linkpath newline differential is rejected as PAX_DESYNC', async () => { + const {violations} = await validate( + paxLinkpathNewlineArchive, + 'pax-linkpath-newline.tar.gz' + ) expect(violations).toContain('PAX_DESYNC') }) - test('F3: oversized PAX header is rejected as UNSUPPORTED_TYPE', async () => { - const {violations} = await validate(F3, 'f3.tar.gz') + test('oversized PAX header is rejected as UNSUPPORTED_TYPE', async () => { + const {violations} = await validate( + oversizedPaxHeaderArchive, + 'oversized-pax.tar.gz' + ) expect(violations).toContain('UNSUPPORTED_TYPE') }) - test('F5: sparse typeflag is rejected as UNSUPPORTED_TYPE', async () => { - const {violations, approvedNames} = await validate(F5, 'f5.tar.gz') + test('sparse typeflag is rejected as UNSUPPORTED_TYPE', async () => { + const {violations, approvedNames} = await validate( + sparseTypeflagArchive, + 'sparse-typeflag.tar.gz' + ) expect(violations).toContain('UNSUPPORTED_TYPE') - expect(approvedNames).not.toContain('../../../../../../tmp/zip_slip_F5') + expect(approvedNames).not.toContain('../../../../../../tmp/escaped_sparse') }) test('glob metacharacter in entry path is rejected as GLOB_METACHAR', async () => { @@ -287,14 +310,14 @@ describe('listAndValidate: parser-differential bypass detection', () => { expect(violations).toContain('UNSAFE_CHAR') }) - test('NUL byte in a symlink target (via PAX) is rejected as UNSAFE_CHAR', async () => { + test('NUL byte in a symlink target (via PAX) is rejected as NUL_BYTE', async () => { const archive = Buffer.concat([ paxEntry(Buffer.from(paxRecord('linkpath=cache/sub/t\0'), 'ascii')), header({name: 'cache/link', typeflag: '2', linkname: 'cache/sub/t'}), end() ]) const {violations} = await validate(archive, 'nul-link.tar.gz') - expect(violations).toContain('UNSAFE_CHAR') + expect(violations).toContain('NUL_BYTE') }) test('legitimate long path via PAX: no violations, approved by its PAX path', async () => { @@ -323,6 +346,28 @@ describe('listAndValidate: parser-differential bypass detection', () => { expect(violations).toContain('PAX_UNKNOWN_KEY') }) + test('PAX sparse (GNU.sparse.name) is rejected as PAX_UNSUPPORTED_KEY', async () => { + // node-tar v7 ignores GNU sparse keys, so without this rejection the entry + // would be approved under its benign header path while system tar would + // reconstruct the file at GNU.sparse.name (here outside the cache roots). + const sparseName = '../../../../../../tmp/escaped_sparse_pax' + const body = Buffer.from( + paxRecord('GNU.sparse.major=1') + + paxRecord('GNU.sparse.minor=0') + + paxRecord(`GNU.sparse.name=${sparseName}`) + + paxRecord('GNU.sparse.realsize=4'), + 'ascii' + ) + const archive = Buffer.concat([ + paxEntry(body), + fileEntry('cache/GNUSparseFile.0/decoy', 'data'), + end() + ]) + const {violations, approvedNames} = await validate(archive, 'sparse.tar.gz') + expect(violations).toContain('PAX_UNSUPPORTED_KEY') + expect(approvedNames).not.toContain(sparseName) + }) + test('flood of extended headers is rejected (pending-meta cap)', async () => { const metas: Buffer[] = [] for (let i = 0; i < 70; i++) { @@ -382,12 +427,12 @@ describeTar('extractTar end-to-end with system tar allow-list', () => { expect(existsSync(path.join(dest, 'cache', 'sub', 'deep.txt'))).toBe(true) }) - test('error mode, F2 archive: throws and writes nothing to the workspace', async () => { - const dest = mkdtempSync(path.join(ROOT, 'extract-f2-')) + test('error mode, PAX path newline archive: throws and writes nothing to the workspace', async () => { + const dest = mkdtempSync(path.join(ROOT, 'extract-pax-path-newline-')) process.env['GITHUB_WORKSPACE'] = dest - const archivePath = path.join(dest, 'f2.tar.gz') + const archivePath = path.join(dest, 'pax-path-newline.tar.gz') mkdirSync(dest, {recursive: true}) - writeFileSync(archivePath, gzipSync(F2)) + writeFileSync(archivePath, gzipSync(paxPathNewlineArchive)) await expect( extractTar(archivePath, CompressionMethod.Gzip, { @@ -400,4 +445,55 @@ describeTar('extractTar end-to-end with system tar allow-list', () => { expect(existsSync(path.join(dest, 'cache'))).toBe(false) expect(existsSync(path.join(dest, 'safe.txt'))).toBe(false) }) + + test('error mode: a leading ./ entry still extracts (allow-list name matches)', async () => { + // node-tar surfaces the entry as `./cache/dotslash.txt`; the `-T` allow + // list must use the canonical `cache/dotslash.txt` so the member is not + // silently skipped. Verifies the canonicalMemberName normalization under + // whichever system tar is present (GNU on Linux CI, BSD on macOS). + const dest = mkdtempSync(path.join(ROOT, 'extract-dotslash-')) + process.env['GITHUB_WORKSPACE'] = dest + const archive = Buffer.concat([ + dirEntry('cache/'), + fileEntry('./cache/dotslash.txt', 'dot'), + end() + ]) + const archivePath = path.join(dest, 'dotslash.tar.gz') + mkdirSync(dest, {recursive: true}) + writeFileSync(archivePath, gzipSync(archive)) + + await extractTar(archivePath, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'error' + }) + + expect(existsSync(path.join(dest, 'cache', 'dotslash.txt'))).toBe(true) + expect(readFileSync(path.join(dest, 'cache', 'dotslash.txt'), 'utf8')).toBe( + 'dot' + ) + }) + + test('error mode: a long path via PAX is extracted, not dropped by the allow-list', async () => { + // > 100 bytes, so the name travels via a PAX `path=` record (and a GNU + // long-name on creation). Exercises long-name matching in the `-T` list so + // a legitimate long path is not silently skipped during extraction. + const dest = mkdtempSync(path.join(ROOT, 'extract-long-')) + process.env['GITHUB_WORKSPACE'] = dest + const longRel = `cache/${'x'.repeat(110)}.txt` + const archive = Buffer.concat([ + paxEntry(Buffer.from(paxRecord(`path=${longRel}`), 'ascii')), + fileEntry('cache/placeholder', 'L'), + end() + ]) + const archivePath = path.join(dest, 'long.tar.gz') + mkdirSync(dest, {recursive: true}) + writeFileSync(archivePath, gzipSync(archive)) + + await extractTar(archivePath, CompressionMethod.Gzip, { + declaredPaths: ['cache/**'], + pathValidation: 'error' + }) + + expect(existsSync(path.join(dest, longRel))).toBe(true) + }) }) diff --git a/packages/cache/docs/path-validation-test-plan.md b/packages/cache/docs/path-validation-test-plan.md index 9f6ea95b71..6151bad5b1 100644 --- a/packages/cache/docs/path-validation-test-plan.md +++ b/packages/cache/docs/path-validation-test-plan.md @@ -97,6 +97,12 @@ and platform-specific behavior: `C:/...`, Windows drive-relative `C:foo`, UNC `\\server\share`, UNC forward-slash, UNC long-path prefix `\\?\C:\...` - **NUL byte attacks**: NUL in path, NUL in symlink target +- **Unsafe control characters**: embedded newline in an entry path or link + target → `UNSAFE_CHAR` (would corrupt a newline-delimited tar file list and + enables log injection). A tab is still accepted (legal, non-corrupting). +- **Glob metacharacters** (`* ? [ ]`) in an entry path → `GLOB_METACHAR` + (entry paths populate the system-tar `-T` allow-list, which bsdtar matches + with `fnmatch()`; link targets are exempt since they are not written there) - **Symlink attacks**: - **Syntactic link-target rejects** (these fire before the containment check, on the same allow-list as entry-path syntax): POSIX absolute (`/etc/passwd`), @@ -149,11 +155,17 @@ cross-checks the result against `node-tar`'s view of the entry. ### `crossCheckMetaBodies` - Clean PAX `path` matching `node-tar` → no violations -- **F2 path desync** → `PAX_DESYNC` (node-tar resolved a safe name, the +- **PAX path desync** → `PAX_DESYNC` (node-tar resolved a safe name, the length-correct parse disagrees) -- **F2-linkpath desync** → `PAX_DESYNC` +- **PAX linkpath desync** → `PAX_DESYNC` - Unknown PAX key → `PAX_UNKNOWN_KEY` -- Known `SCHILY.` / `GNU.` / `LIBARCHIVE.` prefixed keys accepted +- **`GNU.sparse.*` keys → `PAX_UNSUPPORTED_KEY`** — node-tar v7 ignores GNU + sparse keys, so it surfaces the entry under its header `path` while system + tar would reconstruct the file at `GNU.sparse.name` (an attacker-controlled, + potentially escaping location). A cache archive never legitimately contains + sparse members, so the namespace is rejected outright even though it falls + under the broadly-allowed `GNU.` prefix. +- Known `SCHILY.` / `LIBARCHIVE.` prefixed keys accepted - GNU long-name raw body matching the entry path (or the link target) → no violation; a body matching neither → flagged - No meta bodies → no violations @@ -164,23 +176,25 @@ cross-checks the result against `node-tar`'s view of the entry. ## Integration tests — parser-differential attacks (`tarPathValidationAttacks.test.ts`) -These build the F1 / F2 / F2-linkpath / F3 / F5 proof-of-concept archives from -the security analysis (see `docs/zip-slip-*`) as **raw tar bytes**, so they can -craft malicious PAX bodies and typeflags that `node-tar`'s `Header` encoder -would never emit. They assert the validator refuses each one, and they include -real-`tar` end-to-end extraction assertions (using a scratch dir via -`mkdtempSync`). +These build malicious archives as **raw tar bytes**, so they can craft PAX +bodies and typeflags that `node-tar`'s `Header` encoder would never emit. Each +is designed to make node-tar's in-process listing disagree with the path the +system `tar` extractor would write to. They assert the validator refuses each +one, and they include real-`tar` end-to-end extraction assertions (using a +scratch dir via `mkdtempSync`). ### Bypass detection (via `listAndValidate`) -- **F1**: unknown typeflag → `UNSUPPORTED_TYPE` -- **F2**: PAX `path` newline differential → `PAX_DESYNC` -- **F2-linkpath**: PAX `linkpath` newline differential → `PAX_DESYNC` -- **F3**: oversized PAX header → `UNSUPPORTED_TYPE` -- **F5**: sparse typeflag → `UNSUPPORTED_TYPE` +- Unknown typeflag → `UNSUPPORTED_TYPE` +- PAX `path` newline differential → `PAX_DESYNC` +- PAX `linkpath` newline differential → `PAX_DESYNC` +- Oversized PAX header → `UNSUPPORTED_TYPE` +- GNU sparse typeflag → `UNSUPPORTED_TYPE` +- **PAX sparse** (`GNU.sparse.name` with a regular typeflag) → `PAX_UNSUPPORTED_KEY`, + and the escaping sparse name is not approved - Glob metacharacter in entry path → `GLOB_METACHAR` - Newline in entry path → `UNSAFE_CHAR` -- NUL byte in a symlink target (delivered via PAX) → `UNSAFE_CHAR` +- NUL byte in a symlink target (delivered via PAX) → `NUL_BYTE` - Unknown PAX key → `PAX_UNKNOWN_KEY` - Flood of extended headers → rejected by the pending-meta cap - Clean archive → `approvedNames` lists every concrete entry, no violations @@ -188,9 +202,16 @@ real-`tar` end-to-end extraction assertions (using a scratch dir via ### End-to-end extraction (real `tar`) +Run against whichever system `tar` is present, so CI exercises GNU tar on Linux +and bsdtar on macOS (and `tar.exe` on Windows): + - `'error'` mode, clean archive → every approved member is extracted -- `'error'` mode, F2 archive → throws `CacheIntegrityError` and writes nothing - to the workspace +- `'error'` mode, PAX path newline archive → throws `CacheIntegrityError` and + writes nothing to the workspace +- `'error'` mode, leading `./` entry → still extracted (the `-T` allow-list + uses the canonical `cache/f` name, so the member is not silently skipped) +- `'error'` mode, long path (> 100 bytes) delivered via PAX → extracted, not + dropped by the allow-list (exercises long-name matching in `-T`) ## Integration tests — real archives (`listAndValidate.test.ts`) diff --git a/packages/cache/src/internal/listAndValidate.ts b/packages/cache/src/internal/listAndValidate.ts index 40a0e06354..718304f283 100644 --- a/packages/cache/src/internal/listAndValidate.ts +++ b/packages/cache/src/internal/listAndValidate.ts @@ -29,11 +29,15 @@ export interface ListAndValidateResult { /** * Upper bound on the size of a PAX / GNU extended-header body the validator is - * willing to re-parse. node-tar's default is 1 MiB; real extended headers are - * a few hundred bytes at most. Anything larger is emitted as an + * willing to re-parse, passed to node-tar as `maxMetaEntrySize`. Real extended + * headers are a few hundred bytes at most; anything larger is emitted as an * `ignoredEntry` (see the handler below) and recorded as a violation rather - * than silently dropped — this is the F3 (oversized-PAX) defence, made - * explicit instead of relying on node-tar's internal default. + * than silently dropped — the oversized-extended-header defence. + * + * This deliberately matches node-tar's current 1 MiB default. Pinning it here + * (rather than leaving the option unset) keeps the oversized-header threshold + * under our control even if a future node-tar release changes its internal + * default. */ const META_REJECT_BYTES = 1024 * 1024 @@ -87,7 +91,9 @@ export async function listAndValidate( // body string of each PAX / GNU long-name header *before* it emits the // concrete entry the header applies to. We re-parse these length-correctly // (see pax-reparse.ts) and cross-check against node-tar's resolved - // path/linkpath to catch the F2 / F2-linkpath PAX newline differential. + // path / linkpath to catch PAX newline differentials, where an embedded + // newline in a PAX value makes node-tar resolve a different path than the + // system tar that performs the extraction. let pendingMeta: Buffer[] = [] const consumePendingMeta = (): Buffer[] => { @@ -96,6 +102,25 @@ export async function listAndValidate( return bodies } + // Record a single validation failure. Every call site shares the same shape + // — path / linkpath / type come from the ReadEntry; only the code, reason and + // resolved location vary — so funnel them through one helper. + const addViolation = ( + entry: ReadEntry, + code: PathValidationViolation['code'], + reason: string, + resolved: string + ): void => { + violations.push({ + path: entry.path, + linkpath: entry.linkpath || undefined, + resolved, + entryType: entry.type, + code, + reason + }) + } + // For gzip we let node-tar handle decompression internally (its built-in // gzip support is mature). For zstd we spawn the system `zstd` binary so // we get the same `--long=30` window-size handling as the existing @@ -109,8 +134,8 @@ export async function listAndValidate( // via the captured error below. strict: false, // Cap extended-header bodies explicitly so an oversized PAX header is - // turned into a recorded `ignoredEntry` violation rather than relying on - // node-tar's internal default (F3 defence). + // turned into a recorded `ignoredEntry` violation and the oversized-header + // threshold stays under our control (see META_REJECT_BYTES). maxMetaEntrySize: META_REJECT_BYTES, // Treat structural problems (bad archive, bad header, bad chksum) as // hard parse errors — silently ignoring them would let a corrupt @@ -129,6 +154,7 @@ export async function listAndValidate( onReadEntry: (entry: ReadEntry) => { try { const metaBodies = consumePendingMeta() + let approved = true // Cross-check any PAX / long-name headers that preceded this entry // against node-tar's resolved view. A disagreement means node-tar @@ -138,22 +164,16 @@ export async function listAndValidate( entry.path, entry.linkpath || undefined )) { - violations.push({ - path: entry.path, - linkpath: entry.linkpath || undefined, - resolved: entry.path, - entryType: entry.type, - code: pax.code, - reason: pax.reason - }) + addViolation(entry, pax.code, pax.reason, entry.path) + approved = false } - // Reject characters that would corrupt the extraction allow-list or - // be reinterpreted by system tar's `-T` matching (glob metacharacters - // on bsdtar's fnmatch path). These have no legitimate use in a cache - // entry path. - const charViolation = checkUnsafeChars(entry.path, entry.linkpath) - + // validateEntry performs every per-string syntactic check (NUL and + // other control characters, glob metacharacters, UNC / absolute / + // drive-relative forms) on BOTH the entry path and any link target, + // plus the allowed-root containment check. Keeping all of those checks + // in one place (pathValidation) is what makes entry paths and link + // targets treated consistently. const result = validateEntry( entry.path, entry.linkpath || undefined, @@ -162,32 +182,16 @@ export async function listAndValidate( extractCwd ) if (!result.ok) { - violations.push({ - path: entry.path, - linkpath: entry.linkpath || undefined, - resolved: result.resolved, - entryType: entry.type, - code: result.code, - reason: result.reason - }) - } - - if (charViolation) { - violations.push({ - path: entry.path, - linkpath: entry.linkpath || undefined, - resolved: entry.path, - entryType: entry.type, - code: charViolation.code, - reason: charViolation.reason - }) + addViolation(entry, result.code, result.reason, result.resolved) + approved = false } - // Only entries that passed every check are eligible for the - // extraction allow-list. (When any violation exists the allow-list is - // unused — extraction is either blocked in 'error' mode or runs - // unrestricted in 'warn' mode — so this is belt-and-braces.) - if (result.ok && !charViolation) { + // Only entries that passed every check (PAX cross-check and path + // validation) are eligible for the extraction allow-list. When any + // violation exists the allow-list is unused anyway — extraction is + // either blocked in 'error' mode or runs unrestricted in 'warn' mode — + // but tracking approval per entry keeps the contract simple. + if (approved) { approvedNames.push(entry.path) } } finally { @@ -207,14 +211,12 @@ export async function listAndValidate( // The meta header(s) that preceded an ignored entry belong to it; discard // them so they aren't mis-associated with a later concrete entry. consumePendingMeta() - violations.push({ - path: entry.path, - linkpath: entry.linkpath || undefined, - resolved: entry.path, - entryType: entry.type, - code: 'UNSUPPORTED_TYPE', - reason: `parser ignored entry of type ${entry.type}` - }) + addViolation( + entry, + 'UNSUPPORTED_TYPE', + `parser ignored entry of type ${entry.type}`, + entry.path + ) }) // Capture the raw body of each extended-header (meta) entry. node-tar @@ -239,50 +241,6 @@ export async function listAndValidate( return {violations, approvedNames} } -/** - * Reject characters in an entry path (or link target) that have no legitimate - * place in a cache archive and that would either corrupt the extraction - * allow-list or be reinterpreted by system tar's `-T` member matching: - * - * - NUL / newline in the entry path — would split or terminate a list entry. - * - glob metacharacters (`* ? [ ]`) in the entry path — bsdtar matches `-T` - * names with `fnmatch()`, so an unescaped metacharacter could match (and - * extract) members other than the one approved. GNU tar's `--no-wildcards` - * also covers this, but rejecting unconditionally keeps the behaviour - * identical across tar implementations. - * - NUL in a link target — same list-corruption concern. - */ -function checkUnsafeChars( - entryPath: string, - linkPath: string | undefined -): {code: 'UNSAFE_CHAR' | 'GLOB_METACHAR'; reason: string} | undefined { - if (entryPath.includes('\0') || entryPath.includes('\n')) { - return { - code: 'UNSAFE_CHAR', - reason: `entry path contains an unsafe control character: ${JSON.stringify( - entryPath - )}` - } - } - if (/[*?[\]]/.test(entryPath)) { - return { - code: 'GLOB_METACHAR', - reason: `entry path contains a glob metacharacter: ${JSON.stringify( - entryPath - )}` - } - } - if (linkPath !== undefined && linkPath.includes('\0')) { - return { - code: 'UNSAFE_CHAR', - reason: `link target contains an unsafe control character: ${JSON.stringify( - linkPath - )}` - } - } - return undefined -} - async function streamArchiveTo( archivePath: string, compressionMethod: CompressionMethod, diff --git a/packages/cache/src/internal/pathValidation.ts b/packages/cache/src/internal/pathValidation.ts index ab2d531f12..3369cf6398 100644 --- a/packages/cache/src/internal/pathValidation.ts +++ b/packages/cache/src/internal/pathValidation.ts @@ -30,6 +30,7 @@ export interface PathValidationViolation { | 'PAX_DESYNC' | 'PAX_PARSE_FAIL' | 'PAX_UNKNOWN_KEY' + | 'PAX_UNSUPPORTED_KEY' | 'UNSAFE_CHAR' | 'GLOB_METACHAR' /** Human-readable description of the violation. */ @@ -400,7 +401,12 @@ export function validateEntry( * pre-resolution syntactic checks. */ interface PathSyntaxError { - code: 'NUL_BYTE' | 'UNC_PATH' | 'ABSOLUTE_PATH' + code: + | 'NUL_BYTE' + | 'UNSAFE_CHAR' + | 'UNC_PATH' + | 'ABSOLUTE_PATH' + | 'GLOB_METACHAR' reason: string } @@ -429,6 +435,19 @@ function checkPathSyntax( if (p.includes('\0')) { return {code: 'NUL_BYTE', reason: `NUL byte in ${kind}`} } + // Reject newline characters. They have no legitimate place in a cache entry + // path or link target, would corrupt a newline-delimited tar file list, and + // enable log / line injection in the violation output. NUL is handled above + // with its own, more specific code. Applied to both entry paths and link + // targets so the two are treated consistently. + if (p.includes('\n')) { + return { + code: 'UNSAFE_CHAR', + reason: `unsafe control character (newline) in ${kind}: ${JSON.stringify( + p + )}` + } + } // Reject UNC paths. Check the original string before any separator // normalization because UNC is identified by leading `\\` or `//`. if ( @@ -454,6 +473,19 @@ function checkPathSyntax( reason: `absolute ${kind} not allowed: ${p}` } } + // Reject glob metacharacters, but only in entry paths. Approved entry paths + // are written to the system-tar `-T` extraction allow-list, and bsdtar + // matches those names with fnmatch(), so an unescaped `*`, `?`, `[` or `]` + // could select (and extract) members other than the one approved. GNU tar + // is additionally run with --no-wildcards, but rejecting here keeps the + // behavior identical across tar implementations. Link targets are not + // written to the allow-list, so they are exempt. + if (kind === 'entry path' && /[*?[\]]/.test(p)) { + return { + code: 'GLOB_METACHAR', + reason: `glob metacharacter in ${kind}: ${JSON.stringify(p)}` + } + } return undefined } diff --git a/packages/cache/src/internal/pax-reparse.ts b/packages/cache/src/internal/pax-reparse.ts index 44f87e4c5e..129043e795 100644 --- a/packages/cache/src/internal/pax-reparse.ts +++ b/packages/cache/src/internal/pax-reparse.ts @@ -13,8 +13,7 @@ * legally contain `\n`. GNU tar and libarchive (bsdtar / Windows `tar.exe`) * both parse by consuming exactly `` bytes, so a value * containing an embedded `"\n path=\n"` desynchronises node-tar - * from every real extractor — the F2 / F2-linkpath parser-differential - * bypasses. + * from every real extractor — a path / linkpath parser-differential bypass. * * This module re-parses each captured PAX body the way every real extractor * does (length-prefixed, byte-accurate) and cross-checks the result against @@ -29,7 +28,11 @@ */ /** Machine-readable reason codes produced by the PAX cross-check. */ -export type PaxReparseCode = 'PAX_DESYNC' | 'PAX_PARSE_FAIL' | 'PAX_UNKNOWN_KEY' +export type PaxReparseCode = + | 'PAX_DESYNC' + | 'PAX_PARSE_FAIL' + | 'PAX_UNKNOWN_KEY' + | 'PAX_UNSUPPORTED_KEY' /** A single disagreement surfaced by {@link crossCheckMetaBodies}. */ export interface PaxReparseViolation { @@ -87,6 +90,24 @@ function isKnownPaxKey(key: string): boolean { return PAX_KNOWN_PREFIXES.some(prefix => key.startsWith(prefix)) } +/** + * PAX key prefixes that system tar (GNU tar / libarchive) acts on to place or + * reconstruct file content but that node-tar v7 does NOT process. node-tar + * surfaces such an entry under its header `path`, while system tar would write + * it elsewhere — GNU sparse files are reconstructed at `GNU.sparse.name`, which + * an attacker can point outside the cache roots. That is a listing-vs-extraction + * parser differential the `path` / `linkpath` cross-check cannot see (the key is + * neither `path` nor `linkpath`). A cache archive never legitimately contains + * sparse members, so any key in this namespace is rejected outright. This is + * checked before {@link isKnownPaxKey} so it takes precedence over the broad + * `GNU.` allow-list prefix. + */ +const PAX_REJECTED_PREFIXES: readonly string[] = ['GNU.sparse.'] + +function isRejectedPaxKey(key: string): boolean { + return PAX_REJECTED_PREFIXES.some(prefix => key.startsWith(prefix)) +} + /** Result of a length-correct parse of a single PAX extended-header body. */ export interface PaxParseResult { /** Decoded records, last-write-wins per key (POSIX-correct). Values kept as @@ -200,7 +221,12 @@ export function crossCheckMetaBodies( // A well-formed PAX extended header. sawPax = true for (const key of keys) { - if (!isKnownPaxKey(key)) { + if (isRejectedPaxKey(key)) { + violations.push({ + code: 'PAX_UNSUPPORTED_KEY', + reason: `unsupported placement-affecting PAX key '${key}' in extended header (node-tar ignores it; system tar would act on it)` + }) + } else if (!isKnownPaxKey(key)) { violations.push({ code: 'PAX_UNKNOWN_KEY', reason: `unknown PAX key '${key}' in extended header` diff --git a/packages/cache/src/internal/tar.ts b/packages/cache/src/internal/tar.ts index 14999489cf..93c83f3868 100644 --- a/packages/cache/src/internal/tar.ts +++ b/packages/cache/src/internal/tar.ts @@ -20,7 +20,8 @@ import { PathValidationMode, PathValidationViolation, deriveAllowedRoots, - formatViolationSummary + formatViolationSummary, + getWorkingDirectory } from './pathValidation.js' const IS_WINDOWS = process.platform === 'win32' @@ -203,10 +204,6 @@ async function getCommands( return [args.join(' ')] } -function getWorkingDirectory(): string { - return process.env['GITHUB_WORKSPACE'] ?? process.cwd() -} - // Common function for extractTar and listTar to get the compression method async function getDecompressionProgram( tarPath: ArchiveTool, @@ -431,16 +428,37 @@ function writeAllowList(approvedNames: string[]): string { const allowListPath = path.join( os.tmpdir(), `cache-allow-${process.pid}-${Date.now()}-${crypto - .randomBytes(4) + .randomBytes(8) .toString('hex')}.lst` ) const payload = Buffer.concat( - approvedNames.flatMap(name => [Buffer.from(name, 'utf8'), Buffer.from([0])]) + approvedNames.flatMap(name => [ + Buffer.from(canonicalMemberName(name), 'utf8'), + Buffer.from([0]) + ]) ) - writeFileSync(allowListPath, payload, {mode: 0o600}) + // `flag: 'wx'` (O_CREAT | O_EXCL | O_WRONLY) makes the open fail if the path + // already exists, so a file or symlink pre-planted at the (randomized) temp + // path on a shared/self-hosted runner cannot redirect or capture the write. + // mode 0o600 keeps the list readable only by the current user. Both options + // behave consistently on Windows, macOS and Linux. + writeFileSync(allowListPath, payload, {mode: 0o600, flag: 'wx'}) return allowListPath } +/** + * Canonicalize an approved entry name to the form system `tar` matches against + * its archive members when reading the `-T` allow-list. node-tar already + * converts backslashes to forward slashes; here we additionally strip any + * leading `./` (both GNU tar and bsdtar normalize member names this way) so an + * entry node-tar surfaced as `./cache/f` still matches the member `cache/f` + * and is not silently skipped during extraction. A trailing slash on a + * directory entry is preserved because tar matches directories with it. + */ +function canonicalMemberName(name: string): string { + return name.replace(/^(?:\.\/)+/, '') +} + function reportViolations( violations: PathValidationViolation[], mode: PathValidationMode