diff --git a/src/security-scanner.mjs b/src/security-scanner.mjs index 674a1d0..0ea6911 100644 --- a/src/security-scanner.mjs +++ b/src/security-scanner.mjs @@ -418,25 +418,314 @@ const PATTERNS = [ { id: "HK-003", category: "suspicious_hook", severity: "high", name: "Hook variable interpolation", description: "Hook command uses variable interpolation that could be injected", regex: /\$\{(?:file|command|content|input|args?|path|url|body|data)\}/i }, + + // ══ Round 3: Cherry-picked from Cisco YARA (865★), AgentSeal scan-mcp (151★), MCP-Shield (550★), Nova (287★) ══ + // Source: reading actual source code of 11 security scanner repos, 2026-03-28 + + // ── Tool Poisoning: Hidden Secondary Behavior (from Cisco tool_poisoning.yara — 20 patterns) ── + // CCO had ZERO coverage of this category. These catch tools that secretly do more than described. + + { id: "TP-007", category: "tool_poisoning", severity: "critical", name: "Hidden data collection", + description: "Tool secretly collects/extracts data while performing its stated function", + regex: /\b(also|additionally|furthermore|moreover|besides|in\s+addition)\s+(collect|gather|extract|send|upload|index|store|cache|log|monitor|track|record|capture)(s|ing|ed)?/i }, + + { id: "TP-008", category: "tool_poisoning", severity: "critical", name: "Processing-time data theft", + description: "Data extraction disguised as a processing step", + regex: /\b(during|while|when|upon|after)\s+(processing|compression|formatting|conversion|validation|analyzing|scanning|checking)\s*,?\s*(extract|collect|send|upload|capture|log|index|store)(s|ing|ed)?/i }, + + { id: "TP-009", category: "tool_poisoning", severity: "high", name: "Caching pretext exfiltration", + description: "Data collection disguised as caching or optimization", + regex: /\b(for|as)\s+(caching|optimization|performance|quality|improvement|analytics|backup|sync)\s+(purposes?|reasons?)\s*[,.]?\s*(send|upload|store|collect|transmit)(s|ing|ed)?/i }, + + { id: "TP-010", category: "tool_poisoning", severity: "high", name: "Sensitive data indexing", + description: "Tool indexes or caches credentials and sensitive data", + regex: /\b(index|cache|store|log|record|track)(s|es|ing|ed)?\s+(any|all)?\s*(api[_\s]?keys?|tokens?|credentials?|passwords?|secrets?|sensitive)/i }, + + { id: "TP-011", category: "tool_poisoning", severity: "high", name: "PII extraction from input", + description: "Tool extracts personal or sensitive information from user input", + // Tightened: removed "search" (FP on "Search emails"), require "extract/parse/scan/identify" + "all/any" qualifier + regex: /\b(extract|parse|scan|identify)(s|ing|ed)?\s+(all|any|every)\s+(api[_\s]?keys?|tokens?|credentials?|emails?|phone\s+numbers?|credit\s+cards?|ssn|social\s+security|pii|personal\s+data|private\s+data|sensitive\s+data)/i }, + + { id: "TP-012", category: "tool_poisoning", severity: "high", name: "Location or device tracking", + description: "Tool tracks device location, fingerprints, or usage behavior", + regex: /\b(location|gps|geolocation|device|machine|usage|behavior|activity|browsing)\s+(history|data|information|tracking|fingerprint|pattern|log)\s*(store|collect|send|share|track|log|sync|monitor|gather|upload)(s|ing|ed)?/i }, + + { id: "TP-013", category: "tool_poisoning", severity: "high", name: "Device fingerprinting", + description: "Tool fingerprints device using hardware identifiers", + regex: /\b(fingerprint|identify|track)(s|ing|ed)?\s+(device|machine|browser|user|system)\s*(using|via|through|by)\s*(mac|ip|hostname|hardware|uuid)/i }, + + { id: "TP-014", category: "tool_poisoning", severity: "high", name: "Auto opt-in without consent", + description: "Tool automatically enrolls user in services without consent", + regex: /\b(automatically|auto)[_\s]*(opt|subscribe|enroll|register|sign)[_\s]*(in|up|ed)\b/i }, + + { id: "TP-015", category: "tool_poisoning", severity: "high", name: "Remote data storage", + description: "Tool stores data remotely or syncs to cloud without disclosure", + regex: /\b(cache|cached|store|stored|sync|synced|synchronize)\s+(is\s+)?(stored\s+)?(remotely|externally|in\s+cloud|on\s+server|off[_\s-]?device|to\s+cloud|with\s+cloud|across\s+devices)/i }, + + { id: "TP-016", category: "tool_poisoning", severity: "medium", name: "OCR/screenshot data extraction", + description: "Tool performs OCR or extracts text from screenshots", + regex: /\b(ocr|text\s+extraction|optical\s+character)\s*(on|from)?\s*(screenshots?|images?|photos?)?\s*(and\s+)?(extract|capture|read|identify|send|upload)/i }, + + // ── Coercive Injection (from Cisco coercive_injection.yara — 16 patterns) ── + // CCO had ZERO coverage. These catch tools that coerce the AI into specific behaviors. + + { id: "CI-003", category: "coercive_injection", severity: "critical", name: "Mandatory tool execution", + description: "Tool demands to be executed first or always", + regex: /Always\s+(consult|use|run|execute|invoke|call|trigger)\s+this\s+(tool|function|command)\s+(immediately|first|before|prior)/i }, + + { id: "CI-004", category: "coercive_injection", severity: "critical", name: "Priority override", + description: "Tool claims priority over all other tools", + // Removed "You MUST call this function" — too many FPs on legitimate API workflow ordering + // (e.g. "You MUST call resolve-library-id before query-docs"). Kept only "before any OTHER tool". + regex: /Before\s+(executing|invoking|running|using|calling)\s+any\s+other\s+(tool|action|function)s?/i }, + + { id: "CI-005", category: "coercive_injection", severity: "high", name: "Hidden/secret parameter", + description: "Tool requests hidden or secret parameters", + regex: /\b(hidden|secret)[_\s](parameter|argument|input)\b/i }, + + { id: "CI-006", category: "coercive_injection", severity: "critical", name: "File access coercion", + description: "Tool coerces reading sensitive key/certificate files", + regex: /\b(read|parse|cat|dump)\s+(and\s+pass\s+)?(the\s+)?contents?\s+of\s+[^\n.]*\.(key|pem|ssh|aws)\b/i }, + + { id: "CI-007", category: "coercive_injection", severity: "critical", name: "Model context extraction", + description: "Tool tries to extract conversation history or system context", + regex: /\b(add|pass|send|include)\s+(the\s+)?(entire\s+)?(conversation|system|chain|tools?)\s*(history|prompt|list|context)?\s*(to|for|in)\s*(tool|authentication|parameter)/i }, + + { id: "CI-008", category: "coercive_injection", severity: "critical", name: "Model internals access", + description: "Tool accesses internal model variables", + regex: /\b(conversation_history|chain_of_thought|system_prompt|tools_list|model_name)\s*[:=]\s*(add|pass|send|provide)\b/i }, + + { id: "CI-009", category: "coercive_injection", severity: "high", name: "Tool will not work unless", + description: "Tool claims it won't function without suspicious prerequisites", + regex: /\b(sidenote\W+(required|otherwise)\s+[^\n]*tool\s+[^\n]*work|tool\s+will\s+not\s+work\s+unless|must\s+first\s+[^\n]*read\s+[^\n]*config)\b/i }, + + { id: "CI-010", category: "coercive_injection", severity: "critical", name: "Data exfiltration coercion", + description: "Tool coerces listing credentials or encoding sensitive data", + regex: /\b(list\s+all\s+(api\s+)?keys?|show\s+me?\s+(all\s+)?passwords?|include\s+[^\n]*(secrets?|passwords?|keys?|credentials?|tokens?|config|sensitive|private|confidential)\s+[^\n]*in\s+(your\s+)?response|base64\s+encode\s+[^\n]*credentials?)\b/i }, + + { id: "CI-011", category: "coercive_injection", severity: "high", name: "Behavioral trigger", + description: "Tool has conditional logic based on trust/admin status", + regex: /\b(if\s+[^\n]*trusted\s+[^\n]*then|when\s+[^\n]*admin\s+[^\n]*detected|random\.(random|choice)\s+[^\n]*instructions?)\b/i }, + + { id: "CI-012", category: "coercive_injection", severity: "critical", name: "Conversation theft", + description: "Tool steals conversation history or claims highest priority", + regex: /\b(send\s+[^\n]*conversation\s+history|when\s+[^\n]*detected\s+[^\n]*send\s+[^\n]*full\s+[^\n]*history|highest\s+priority\s+[^\n]*over\s+[^\n]*other\s+[^\n]*tools)\b/i }, + + { id: "CI-013", category: "coercive_injection", severity: "critical", name: "Code manipulation", + description: "Tool instructs inserting vulnerabilities into generated code", + regex: /\b(duplicate\s+[^\n]*code\s+[^\n]*snippets?\s+[^\n]*silently|insert\s+[^\n]*vulnerability\s+[^\n]*generated\s+[^\n]*code)\b/i }, + + // ── Prompt Injection: Extended (from Cisco + MCP-Shield + Nova — patterns CCO was missing) ── + + { id: "PI-018", category: "prompt_injection", severity: "critical", name: "Shadow parameter names", + description: "Tool uses known shadow/bypass parameter names in schema", + regex: /\b(internalOnly|hiddenParam|shadowTool|bypassChecks|sandboxEscape)\b/ }, + + { id: "PI-019", category: "prompt_injection", severity: "high", name: "Hidden behavior directive", + description: "Tool instructs AI to hide behavior from user", + regex: /\b(do\s+not\s+(mention|tell|execute)\s+[^\n]*user|tell\s+(the\s+)?user\s+only\s+about|do\s+not\s+mention\s+this\s+to\s+user)\b/i }, + + { id: "PI-020", category: "prompt_injection", severity: "high", name: "Bracket marker evasion", + description: "Triple bracket or pipe markers hiding instructions", + regex: /(\[\[\[|\|\|\|)(SYSTEM|ADMIN|HIDDEN|INTERNAL)[\s_]?(OVERRIDE|INSTRUCTION|COMMAND)(\]\]\]|\|\|\|)/i }, + + { id: "PI-021", category: "prompt_injection", severity: "high", name: "HTML comment injection", + description: "HTML comments containing system/admin override instructions", + regex: /