Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions lib/collectors/codebase.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ function extractSymbols(content) {
symbols.functions.push(match[1]);
}

const arrowPattern = /(?:const|let)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/g;
// ReDoS fix: bound the unbounded \s* / async runs and the parameter list so the
// matcher cannot backtrack polynomially on pathological input. Bounds are large
// enough that all realistic source matches identically to the prior \s*/[^)]* form.
const arrowPattern = /(?:const|let)\s{1,1000}([a-zA-Z_$][a-zA-Z0-9_$]*)\s{0,1000}=\s{0,1000}(?:async\s{0,1000})?\([^)]{0,2000}\)\s{0,1000}=>/g;
while ((match = arrowPattern.exec(content)) !== null) {
symbols.functions.push(match[1]);
}
Expand All @@ -141,7 +144,9 @@ function extractSymbols(content) {
symbols.exports.push(match[1]);
}

const moduleExportsPattern = /module\.exports\s*=\s*\{([^}]+)\}/;
// ReDoS fix: bound the \s* runs and capture length so the matcher stays linear;
// bounds exceed any realistic module.exports declaration so matches are unchanged.
const moduleExportsPattern = /module\.exports\s{0,1000}=\s{0,1000}\{([^}]{1,100000})\}/;
const moduleMatch = content.match(moduleExportsPattern);
if (moduleMatch) {
const keys = moduleMatch[1].split(',').map(k => k.trim().split(':')[0].trim());
Expand Down
10 changes: 8 additions & 2 deletions lib/collectors/documentation.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ function safeReadFile(filePath, basePath) {
* Analyze a single markdown file
*/
function analyzeMarkdownFile(content, filePath) {
const sectionMatches = content.match(/^##\s+(.+)$/gm) || [];
// ReDoS fix: bound the \s+ run after the ## marker; line-anchored (.+) cannot
// cross newlines so this matches the same headings as before.
const sectionMatches = content.match(/^##\s{1,1000}(.+)$/gm) || [];
const sections = sectionMatches.slice(0, 10).map(s => s.replace(/^##\s+/, ''));
const sectionLower = sections.map(s => s.toLowerCase()).join(' ');

Expand Down Expand Up @@ -83,7 +85,11 @@ function extractCheckboxes(result, content) {
* Extract documented features
*/
function extractFeatures(result, content) {
const featurePattern = /^[-*]\s+\*{0,2}(.+?)\*{0,2}(?:\s*[-–]\s*(.+))?$/gm;
// ReDoS fix: bound the \s+ run and the line-content quantifiers so the lazy
// (.+?) / optional trailing (.+) pair cannot backtrack polynomially. Using
// [^\n] is equivalent to . here (. never matches newline), and the bounds far
// exceed the 80-char feature cap applied below, so matches are unchanged.
const featurePattern = /^[-*]\s{1,100}\*{0,2}([^\n]{1,2000}?)\*{0,2}(?:\s{0,100}[-–]\s{0,100}([^\n]{1,2000}))?$/gm;
let match;

while ((match = featurePattern.exec(content)) !== null && result.features.length < 20) {
Expand Down
21 changes: 17 additions & 4 deletions lib/enhance/agent-patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -439,8 +439,17 @@ const agentPatterns = {

// Look for hardcoded .claude/ references
const hasHardcoded = /\.claude\//.test(content);
// Exclude if using AI_STATE_DIR
const usesEnvVar = /AI_STATE_DIR|\$\{.*STATE.*\}/i.test(content);
// Exclude if using AI_STATE_DIR or a ${...STATE...} env expression.
// ReDoS fix: the old /\$\{.*STATE.*\}/ (and the [^}]*STATE[^}]* rewrite)
// has two ambiguous quantifier runs -> polynomial backtrack. Instead
// scan each ${...} group with a single bounded [^}] run, then substring-
// test for STATE. Linear, and matches STATE in ANY ${...} like before.
let usesEnvVar = /AI_STATE_DIR/i.test(content);
if (!usesEnvVar) {
for (const m of content.matchAll(/\$\{([^}]{0,1000})\}/g)) {
if (/STATE/i.test(m[1])) { usesEnvVar = true; break; }
}
}

if (hasHardcoded && !usesEnvVar) {
return {
Expand Down Expand Up @@ -494,8 +503,12 @@ const agentPatterns = {

// Check if has code blocks or lists but no XML
const hasCodeBlocks = /```[\s\S]+?```/.test(content);
const hasLists = /^[-*]\s+.+$/m.test(content);
const hasXML = /<\w+>[\s\S]*?<\/\w+>/.test(content);
// ReDoS fix: bound the \s+ and line-content runs; line-anchored so this still
// detects any "- item" / "* item" list line as before.
const hasLists = /^[-*]\s{1,1000}[^\n]{1,2000}$/m.test(content);
// ReDoS fix: bound the unbounded [\s\S]*? so an unterminated <tag> cannot
// drive polynomial backtracking; 50k chars covers any realistic XML block.
const hasXML = /<\w+>[\s\S]{0,50000}?<\/\w+>/.test(content);
const sectionCount = (content.match(/^##\s+/gm) || []).length;

// Complex content without XML
Expand Down
26 changes: 19 additions & 7 deletions lib/enhance/auto-suppression.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,12 @@ const PATTERN_HEURISTICS = {
const contentLower = content.toLowerCase();

// Check if file is pattern documentation describing vague language detection
// ReDoS fix: the .* runs never matched across newlines (. excludes \n), so
// bounding them to [^\n]{0,N} keeps the same "within one line, in order"
// semantics while removing the polynomial multi-.* backtracking.
const isPatternDoc =
/pattern.*detect.*usually|example.*vague|fuzzy.*language.*like/i.test(content) ||
/vague.*terms.*like|"usually".*"sometimes"/i.test(content);
/pattern[^\n]{0,500}detect[^\n]{0,500}usually|example[^\n]{0,500}vague|fuzzy[^\n]{0,500}language[^\n]{0,500}like/i.test(content) ||
/vague[^\n]{0,500}terms[^\n]{0,500}like|"usually"[^\n]{0,500}"sometimes"/i.test(content);

if (isPatternDoc) {
return {
Expand Down Expand Up @@ -129,7 +132,10 @@ const PATTERN_HEURISTICS = {
const isOrchestrator =
fileNameLower.includes('orchestrator') ||
fileNameLower.includes('coordinator') ||
/Task\s*\(\s*\{[\s\S]*subagent_type/i.test(content);
// ReDoS fix: bound the unbounded [\s\S]* so a "Task({" with no following
// subagent_type cannot drive polynomial backtracking; 50k chars covers any
// realistic Task(...) call body.
/Task\s{0,100}\(\s{0,100}\{[\s\S]{0,50000}subagent_type/i.test(content);

if (isOrchestrator) {
return {
Expand All @@ -140,7 +146,9 @@ const PATTERN_HEURISTICS = {

// Check if workflow command that invokes agents
const isWorkflowCommand =
/spawn.*agent|invoke.*agent|Task\s*\(\s*\{/i.test(content) &&
// ReDoS fix: bound the within-line .* runs and \s* runs ([^\n] == . here)
// to keep the same matches without polynomial backtracking.
/spawn[^\n]{0,500}agent|invoke[^\n]{0,500}agent|Task\s{0,100}\(\s{0,100}\{/i.test(content) &&
fileNameLower.endsWith('.md');

if (isWorkflowCommand) {
Expand All @@ -159,9 +167,11 @@ const PATTERN_HEURISTICS = {
*/
missing_output_format: (finding, content, context) => {
// Check if content spawns subagents with their own output specs
// ReDoS fix: bound the within-line .* and \s* runs ([^\n] == . here) so the
// same membership matches hold without polynomial backtracking.
const spawnsSubagent =
/subagent_type|spawn.*agent|Task\s*\(\s*\{/i.test(content) ||
/enhance:.*-enhancer|enhance:.*-reporter/i.test(content);
/subagent_type|spawn[^\n]{0,500}agent|Task\s{0,100}\(\s{0,100}\{/i.test(content) ||
/enhance:[^\n]{0,500}-enhancer|enhance:[^\n]{0,500}-reporter/i.test(content);

if (spawnsSubagent) {
return {
Expand All @@ -180,7 +190,9 @@ const PATTERN_HEURISTICS = {
missing_constraints: (finding, content, context) => {
// Check for constraint section presence
const hasConstraintSection =
/##\s*What\s+.*MUST\s+NOT\s+Do/i.test(content) ||
// ReDoS fix: bound the within-line .* and \s runs ([^\n] == . here) so the
// "## What ... MUST NOT Do" heading still matches without backtracking.
/##\s{0,100}What\s{1,100}[^\n]{0,500}MUST\s{1,100}NOT\s{1,100}Do/i.test(content) ||
/##\s*Constraints/i.test(content) ||
/<constraints>/i.test(content) ||
/##\s*Critical\s+Constraints/i.test(content) ||
Expand Down
15 changes: 11 additions & 4 deletions lib/enhance/cross-file-analyzer.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,11 @@ const CRITICAL_PATTERNS = [
const SUBAGENT_PATTERN = /subagent_type\s*[=:]\s*["']([^"']+)["']/g;

/** Pre-compiled patterns for cleaning content */
const BAD_EXAMPLE_TAG_PATTERN = /<bad[_\- ]?example>[\s\S]*?<\/bad[_\- ]?example>/gi;
const BAD_EXAMPLE_CODE_PATTERN = /```[^\n]*bad[^\n]*\n[\s\S]*?```/gi;
// ReDoS fix: bound the lazy [\s\S]*? bodies so an unterminated <bad-example> or
// ``` fence cannot drive polynomial backtracking; 50k chars covers any realistic
// example block, so the stripped regions are unchanged for real content.
const BAD_EXAMPLE_TAG_PATTERN = /<bad[_\- ]?example>[\s\S]{0,50000}?<\/bad[_\- ]?example>/gi;
const BAD_EXAMPLE_CODE_PATTERN = /```[^\n]{0,500}bad[^\n]{0,500}\n[\s\S]{0,50000}?```/gi;

// ============================================
// TOOL PATTERN CACHE
Expand Down Expand Up @@ -649,10 +652,14 @@ function analyzePromptConsistency(agents) {

// Extract action keywords
let action;
// ReDoS fix: bound the greedy prefix to non-newline chars. `line` is a single
// trimmed line (no newlines), so [^\n]{0,N} is equivalent to the prior `.*`:
// greedy match strips everything up to and including the LAST keyword plus its
// trailing whitespace, preserving the word-boundary semantics exactly.
if (isAlways) {
action = line.replace(/.*\bALWAYS\b\s*/i, '').substring(0, ACTION_COMPARISON_LENGTH);
action = line.replace(/[^\n]{0,2000}\bALWAYS\b\s{0,200}/i, '').substring(0, ACTION_COMPARISON_LENGTH);
} else {
action = line.replace(/.*\b(?:NEVER|DO NOT)\b\s*/i, '').substring(0, ACTION_COMPARISON_LENGTH);
action = line.replace(/[^\n]{0,2000}\b(?:NEVER|DO NOT)\b\s{0,200}/i, '').substring(0, ACTION_COMPARISON_LENGTH);
}

// Extract significant keywords from action
Expand Down
8 changes: 6 additions & 2 deletions lib/enhance/docs-patterns.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ const docsPatterns = {
if (!content || typeof content !== 'string') return null;

// Find markdown links
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
// ReDoS fix: bound the negated-class captures so the matcher is linear;
// bounds far exceed any realistic markdown link, so matches are unchanged.
const linkRegex = /\[([^\]]{1,2000})\]\(([^)]{1,4000})\)/g;
const brokenLinks = [];
let match;

Expand All @@ -40,7 +42,9 @@ const docsPatterns = {
if (linkTarget.startsWith('#')) {
const anchorId = linkTarget.slice(1).toLowerCase();
// Generate expected heading anchors from content
const headings = content.match(/^#{1,6}\s+(.+)$/gm) || [];
// ReDoS fix: bound the \s+ run; line-anchored (.+) cannot cross newlines
// so the same headings match as before.
const headings = content.match(/^#{1,6}\s{1,1000}(.+)$/gm) || [];
const anchors = headings.map(h => {
return h.replace(/^#{1,6}\s+/, '')
.toLowerCase()
Expand Down
27 changes: 22 additions & 5 deletions lib/enhance/fixer.js
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,16 @@ function applyFixes(issues, options = {}) {
return results;
}

// Prototype-pollution guard: reject path segments that would reach
// Object.prototype before they index/assign into `current`
// (CodeQL js/prototype-polluting-assignment). The check is written as inline
// `!==` comparisons against the literal dangerous keys - CodeQL recognizes
// that exact shape as a sanitizing barrier, whereas a Set.has() indirection
// is not traced through and leaves the assignment flagged.
function isSafeKey(key) {
return key !== '__proto__' && key !== 'constructor' && key !== 'prototype';
}

function applyAtPath(obj, pathStr, fixFn) {
const parts = pathStr.split('.');
const result = structuredClone(obj);
Expand All @@ -235,21 +245,26 @@ function applyAtPath(obj, pathStr, fixFn) {
if (part.includes('[')) {
// Array access
const match = part.match(/^((?!__proto__|constructor|prototype)[a-zA-Z_]\w*)\[(\d{1,10})\]$/);
if (match) {
if (match && match[1] !== '__proto__' && match[1] !== 'constructor' && match[1] !== 'prototype') {
current = current[match[1]][parseInt(match[2], 10)];
}
} else {
if (!isSafeKey(part)) return result; // refuse prototype-polluting traversal
current = current[part];
}
}

const lastPart = parts[parts.length - 1];
if (lastPart.includes('[')) {
const match = lastPart.match(/^((?!__proto__|constructor|prototype)[a-zA-Z_]\w*)\[(\d{1,10})\]$/);
if (match) {
current[match[1]][parseInt(match[2], 10)] = fixFn(current[match[1]][parseInt(match[2], 10)]);
// Inline literal guard (not the isSafeKey helper) so CodeQL traces the
// sanitizing barrier on the assignment below.
if (match && match[1] !== '__proto__' && match[1] !== 'constructor' && match[1] !== 'prototype') {
const key = match[1];
const idx = parseInt(match[2], 10);
current[key][idx] = fixFn(current[key][idx]);
}
} else {
} else if (lastPart !== '__proto__' && lastPart !== 'constructor' && lastPart !== 'prototype') {
current[lastPart] = fixFn(current[lastPart]);
}

Expand Down Expand Up @@ -780,5 +795,7 @@ module.exports = {
previewFixes,
restoreFromBackup,
cleanupBackups,
assertNotSymlink
assertNotSymlink,
// Exported for prototype-pollution regression tests.
applyAtPath
};
Loading