From a625100e16ec812499215c9382c05da480d00c3f Mon Sep 17 00:00:00 2001 From: agent-core-bot Date: Fri, 29 May 2026 11:15:55 +0000 Subject: [PATCH] chore: sync core lib and CLAUDE.md from agent-core --- lib/collectors/codebase.js | 9 +++++++-- lib/collectors/documentation.js | 10 ++++++++-- lib/enhance/agent-patterns.js | 21 +++++++++++++++++---- lib/enhance/auto-suppression.js | 26 +++++++++++++++++++------- lib/enhance/cross-file-analyzer.js | 15 +++++++++++---- lib/enhance/docs-patterns.js | 8 ++++++-- lib/enhance/fixer.js | 23 +++++++++++++++++++---- 7 files changed, 87 insertions(+), 25 deletions(-) diff --git a/lib/collectors/codebase.js b/lib/collectors/codebase.js index 8c3ddef..1a8c7a1 100644 --- a/lib/collectors/codebase.js +++ b/lib/collectors/codebase.js @@ -126,7 +126,10 @@ function extractSymbols(content) { symbols.functions.push(match[1]); } - const arrowPattern = /(?:const|let)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/g; + // ReDoS fix: bound the unbounded \s* / async runs and the parameter list so the + // matcher cannot backtrack polynomially on pathological input. Bounds are large + // enough that all realistic source matches identically to the prior \s*/[^)]* form. + const arrowPattern = /(?:const|let)\s{1,1000}([a-zA-Z_$][a-zA-Z0-9_$]*)\s{0,1000}=\s{0,1000}(?:async\s{0,1000})?\([^)]{0,2000}\)\s{0,1000}=>/g; while ((match = arrowPattern.exec(content)) !== null) { symbols.functions.push(match[1]); } @@ -141,7 +144,9 @@ function extractSymbols(content) { symbols.exports.push(match[1]); } - const moduleExportsPattern = /module\.exports\s*=\s*\{([^}]+)\}/; + // ReDoS fix: bound the \s* runs and capture length so the matcher stays linear; + // bounds exceed any realistic module.exports declaration so matches are unchanged. + const moduleExportsPattern = /module\.exports\s{0,1000}=\s{0,1000}\{([^}]{1,100000})\}/; const moduleMatch = content.match(moduleExportsPattern); if (moduleMatch) { const keys = moduleMatch[1].split(',').map(k => k.trim().split(':')[0].trim()); diff --git a/lib/collectors/documentation.js b/lib/collectors/documentation.js index ee8b8b4..41a54cd 100644 --- a/lib/collectors/documentation.js +++ b/lib/collectors/documentation.js @@ -50,7 +50,9 @@ function safeReadFile(filePath, basePath) { * Analyze a single markdown file */ function analyzeMarkdownFile(content, filePath) { - const sectionMatches = content.match(/^##\s+(.+)$/gm) || []; + // ReDoS fix: bound the \s+ run after the ## marker; line-anchored (.+) cannot + // cross newlines so this matches the same headings as before. + const sectionMatches = content.match(/^##\s{1,1000}(.+)$/gm) || []; const sections = sectionMatches.slice(0, 10).map(s => s.replace(/^##\s+/, '')); const sectionLower = sections.map(s => s.toLowerCase()).join(' '); @@ -83,7 +85,11 @@ function extractCheckboxes(result, content) { * Extract documented features */ function extractFeatures(result, content) { - const featurePattern = /^[-*]\s+\*{0,2}(.+?)\*{0,2}(?:\s*[-–]\s*(.+))?$/gm; + // ReDoS fix: bound the \s+ run and the line-content quantifiers so the lazy + // (.+?) / optional trailing (.+) pair cannot backtrack polynomially. Using + // [^\n] is equivalent to . here (. never matches newline), and the bounds far + // exceed the 80-char feature cap applied below, so matches are unchanged. + const featurePattern = /^[-*]\s{1,100}\*{0,2}([^\n]{1,2000}?)\*{0,2}(?:\s{0,100}[-–]\s{0,100}([^\n]{1,2000}))?$/gm; let match; while ((match = featurePattern.exec(content)) !== null && result.features.length < 20) { diff --git a/lib/enhance/agent-patterns.js b/lib/enhance/agent-patterns.js index d7c5479..ca5000f 100644 --- a/lib/enhance/agent-patterns.js +++ b/lib/enhance/agent-patterns.js @@ -439,8 +439,17 @@ const agentPatterns = { // Look for hardcoded .claude/ references const hasHardcoded = /\.claude\//.test(content); - // Exclude if using AI_STATE_DIR - const usesEnvVar = /AI_STATE_DIR|\$\{.*STATE.*\}/i.test(content); + // Exclude if using AI_STATE_DIR or a ${...STATE...} env expression. + // ReDoS fix: the old /\$\{.*STATE.*\}/ (and the [^}]*STATE[^}]* rewrite) + // has two ambiguous quantifier runs -> polynomial backtrack. Instead + // scan each ${...} group with a single bounded [^}] run, then substring- + // test for STATE. Linear, and matches STATE in ANY ${...} like before. + let usesEnvVar = /AI_STATE_DIR/i.test(content); + if (!usesEnvVar) { + for (const m of content.matchAll(/\$\{([^}]{0,1000})\}/g)) { + if (/STATE/i.test(m[1])) { usesEnvVar = true; break; } + } + } if (hasHardcoded && !usesEnvVar) { return { @@ -494,8 +503,12 @@ const agentPatterns = { // Check if has code blocks or lists but no XML const hasCodeBlocks = /```[\s\S]+?```/.test(content); - const hasLists = /^[-*]\s+.+$/m.test(content); - const hasXML = /<\w+>[\s\S]*?<\/\w+>/.test(content); + // ReDoS fix: bound the \s+ and line-content runs; line-anchored so this still + // detects any "- item" / "* item" list line as before. + const hasLists = /^[-*]\s{1,1000}[^\n]{1,2000}$/m.test(content); + // ReDoS fix: bound the unbounded [\s\S]*? so an unterminated cannot + // drive polynomial backtracking; 50k chars covers any realistic XML block. + const hasXML = /<\w+>[\s\S]{0,50000}?<\/\w+>/.test(content); const sectionCount = (content.match(/^##\s+/gm) || []).length; // Complex content without XML diff --git a/lib/enhance/auto-suppression.js b/lib/enhance/auto-suppression.js index 4880aa2..911a433 100644 --- a/lib/enhance/auto-suppression.js +++ b/lib/enhance/auto-suppression.js @@ -54,9 +54,12 @@ const PATTERN_HEURISTICS = { const contentLower = content.toLowerCase(); // Check if file is pattern documentation describing vague language detection + // ReDoS fix: the .* runs never matched across newlines (. excludes \n), so + // bounding them to [^\n]{0,N} keeps the same "within one line, in order" + // semantics while removing the polynomial multi-.* backtracking. const isPatternDoc = - /pattern.*detect.*usually|example.*vague|fuzzy.*language.*like/i.test(content) || - /vague.*terms.*like|"usually".*"sometimes"/i.test(content); + /pattern[^\n]{0,500}detect[^\n]{0,500}usually|example[^\n]{0,500}vague|fuzzy[^\n]{0,500}language[^\n]{0,500}like/i.test(content) || + /vague[^\n]{0,500}terms[^\n]{0,500}like|"usually"[^\n]{0,500}"sometimes"/i.test(content); if (isPatternDoc) { return { @@ -129,7 +132,10 @@ const PATTERN_HEURISTICS = { const isOrchestrator = fileNameLower.includes('orchestrator') || fileNameLower.includes('coordinator') || - /Task\s*\(\s*\{[\s\S]*subagent_type/i.test(content); + // ReDoS fix: bound the unbounded [\s\S]* so a "Task({" with no following + // subagent_type cannot drive polynomial backtracking; 50k chars covers any + // realistic Task(...) call body. + /Task\s{0,100}\(\s{0,100}\{[\s\S]{0,50000}subagent_type/i.test(content); if (isOrchestrator) { return { @@ -140,7 +146,9 @@ const PATTERN_HEURISTICS = { // Check if workflow command that invokes agents const isWorkflowCommand = - /spawn.*agent|invoke.*agent|Task\s*\(\s*\{/i.test(content) && + // ReDoS fix: bound the within-line .* runs and \s* runs ([^\n] == . here) + // to keep the same matches without polynomial backtracking. + /spawn[^\n]{0,500}agent|invoke[^\n]{0,500}agent|Task\s{0,100}\(\s{0,100}\{/i.test(content) && fileNameLower.endsWith('.md'); if (isWorkflowCommand) { @@ -159,9 +167,11 @@ const PATTERN_HEURISTICS = { */ missing_output_format: (finding, content, context) => { // Check if content spawns subagents with their own output specs + // ReDoS fix: bound the within-line .* and \s* runs ([^\n] == . here) so the + // same membership matches hold without polynomial backtracking. const spawnsSubagent = - /subagent_type|spawn.*agent|Task\s*\(\s*\{/i.test(content) || - /enhance:.*-enhancer|enhance:.*-reporter/i.test(content); + /subagent_type|spawn[^\n]{0,500}agent|Task\s{0,100}\(\s{0,100}\{/i.test(content) || + /enhance:[^\n]{0,500}-enhancer|enhance:[^\n]{0,500}-reporter/i.test(content); if (spawnsSubagent) { return { @@ -180,7 +190,9 @@ const PATTERN_HEURISTICS = { missing_constraints: (finding, content, context) => { // Check for constraint section presence const hasConstraintSection = - /##\s*What\s+.*MUST\s+NOT\s+Do/i.test(content) || + // ReDoS fix: bound the within-line .* and \s runs ([^\n] == . here) so the + // "## What ... MUST NOT Do" heading still matches without backtracking. + /##\s{0,100}What\s{1,100}[^\n]{0,500}MUST\s{1,100}NOT\s{1,100}Do/i.test(content) || /##\s*Constraints/i.test(content) || //i.test(content) || /##\s*Critical\s+Constraints/i.test(content) || diff --git a/lib/enhance/cross-file-analyzer.js b/lib/enhance/cross-file-analyzer.js index 61483b3..eea0de1 100644 --- a/lib/enhance/cross-file-analyzer.js +++ b/lib/enhance/cross-file-analyzer.js @@ -114,8 +114,11 @@ const CRITICAL_PATTERNS = [ const SUBAGENT_PATTERN = /subagent_type\s*[=:]\s*["']([^"']+)["']/g; /** Pre-compiled patterns for cleaning content */ -const BAD_EXAMPLE_TAG_PATTERN = /[\s\S]*?<\/bad[_\- ]?example>/gi; -const BAD_EXAMPLE_CODE_PATTERN = /```[^\n]*bad[^\n]*\n[\s\S]*?```/gi; +// ReDoS fix: bound the lazy [\s\S]*? bodies so an unterminated or +// ``` fence cannot drive polynomial backtracking; 50k chars covers any realistic +// example block, so the stripped regions are unchanged for real content. +const BAD_EXAMPLE_TAG_PATTERN = /[\s\S]{0,50000}?<\/bad[_\- ]?example>/gi; +const BAD_EXAMPLE_CODE_PATTERN = /```[^\n]{0,500}bad[^\n]{0,500}\n[\s\S]{0,50000}?```/gi; // ============================================ // TOOL PATTERN CACHE @@ -649,10 +652,14 @@ function analyzePromptConsistency(agents) { // Extract action keywords let action; + // ReDoS fix: bound the greedy prefix to non-newline chars. `line` is a single + // trimmed line (no newlines), so [^\n]{0,N} is equivalent to the prior `.*`: + // greedy match strips everything up to and including the LAST keyword plus its + // trailing whitespace, preserving the word-boundary semantics exactly. if (isAlways) { - action = line.replace(/.*\bALWAYS\b\s*/i, '').substring(0, ACTION_COMPARISON_LENGTH); + action = line.replace(/[^\n]{0,2000}\bALWAYS\b\s{0,200}/i, '').substring(0, ACTION_COMPARISON_LENGTH); } else { - action = line.replace(/.*\b(?:NEVER|DO NOT)\b\s*/i, '').substring(0, ACTION_COMPARISON_LENGTH); + action = line.replace(/[^\n]{0,2000}\b(?:NEVER|DO NOT)\b\s{0,200}/i, '').substring(0, ACTION_COMPARISON_LENGTH); } // Extract significant keywords from action diff --git a/lib/enhance/docs-patterns.js b/lib/enhance/docs-patterns.js index 384bc01..60f2f01 100644 --- a/lib/enhance/docs-patterns.js +++ b/lib/enhance/docs-patterns.js @@ -24,7 +24,9 @@ const docsPatterns = { if (!content || typeof content !== 'string') return null; // Find markdown links - const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g; + // ReDoS fix: bound the negated-class captures so the matcher is linear; + // bounds far exceed any realistic markdown link, so matches are unchanged. + const linkRegex = /\[([^\]]{1,2000})\]\(([^)]{1,4000})\)/g; const brokenLinks = []; let match; @@ -40,7 +42,9 @@ const docsPatterns = { if (linkTarget.startsWith('#')) { const anchorId = linkTarget.slice(1).toLowerCase(); // Generate expected heading anchors from content - const headings = content.match(/^#{1,6}\s+(.+)$/gm) || []; + // ReDoS fix: bound the \s+ run; line-anchored (.+) cannot cross newlines + // so the same headings match as before. + const headings = content.match(/^#{1,6}\s{1,1000}(.+)$/gm) || []; const anchors = headings.map(h => { return h.replace(/^#{1,6}\s+/, '') .toLowerCase() diff --git a/lib/enhance/fixer.js b/lib/enhance/fixer.js index 2057dde..329329d 100644 --- a/lib/enhance/fixer.js +++ b/lib/enhance/fixer.js @@ -225,6 +225,15 @@ function applyFixes(issues, options = {}) { return results; } +// Prototype-pollution guard: reject path segments that would reach +// Object.prototype. The bracket-access branches already exclude these via a +// negative lookahead; plain `.foo` segments need the same guard before they +// index/assign into `current` (CodeQL js/prototype-polluting-assignment). +const UNSAFE_KEYS = new Set(['__proto__', 'constructor', 'prototype']); +function isSafeKey(key) { + return !UNSAFE_KEYS.has(key); +} + function applyAtPath(obj, pathStr, fixFn) { const parts = pathStr.split('.'); const result = structuredClone(obj); @@ -235,10 +244,11 @@ function applyAtPath(obj, pathStr, fixFn) { if (part.includes('[')) { // Array access const match = part.match(/^((?!__proto__|constructor|prototype)[a-zA-Z_]\w*)\[(\d{1,10})\]$/); - if (match) { + if (match && isSafeKey(match[1])) { current = current[match[1]][parseInt(match[2], 10)]; } } else { + if (!isSafeKey(part)) return result; // refuse prototype-polluting traversal current = current[part]; } } @@ -246,10 +256,13 @@ function applyAtPath(obj, pathStr, fixFn) { const lastPart = parts[parts.length - 1]; if (lastPart.includes('[')) { const match = lastPart.match(/^((?!__proto__|constructor|prototype)[a-zA-Z_]\w*)\[(\d{1,10})\]$/); - if (match) { - current[match[1]][parseInt(match[2], 10)] = fixFn(current[match[1]][parseInt(match[2], 10)]); + if (match && isSafeKey(match[1])) { + const key = match[1]; + const idx = parseInt(match[2], 10); + current[key][idx] = fixFn(current[key][idx]); } } else { + if (!isSafeKey(lastPart)) return result; // refuse prototype-polluting assignment current[lastPart] = fixFn(current[lastPart]); } @@ -780,5 +793,7 @@ module.exports = { previewFixes, restoreFromBackup, cleanupBackups, - assertNotSymlink + assertNotSymlink, + // Exported for prototype-pollution regression tests. + applyAtPath };