|
| 1 | +/** |
| 2 | + * Privacy Filter for Unified Context Assembly |
| 3 | + * Filters sensitive patterns from content before including in context |
| 4 | + */ |
| 5 | + |
| 6 | +import { SENSITIVE_PATTERNS } from '../security/input-sanitizer.js'; |
| 7 | + |
| 8 | +export type PrivacyMode = 'strict' | 'standard' | 'permissive'; |
| 9 | + |
| 10 | +export interface PrivacyFilterConfig { |
| 11 | + mode: PrivacyMode; |
| 12 | +} |
| 13 | + |
| 14 | +export interface FilterResult { |
| 15 | + filtered: string; |
| 16 | + redactedCount: number; |
| 17 | +} |
| 18 | + |
| 19 | +/** |
| 20 | + * Additional patterns for privacy filtering beyond security patterns |
| 21 | + * Organized by strictness level |
| 22 | + */ |
| 23 | +const PRIVACY_PATTERNS: Record<PrivacyMode, RegExp[]> = { |
| 24 | + // Permissive: Only critical secrets |
| 25 | + permissive: [], |
| 26 | + |
| 27 | + // Standard: Secrets + PII basics |
| 28 | + standard: [ |
| 29 | + // Email addresses |
| 30 | + /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g, |
| 31 | + // Phone numbers (various formats) |
| 32 | + /\b(?:\+?1[-.\s]?)?(?:\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}\b/g, |
| 33 | + // SSN-like patterns |
| 34 | + /\b\d{3}[-.\s]?\d{2}[-.\s]?\d{4}\b/g, |
| 35 | + // IP addresses (v4) |
| 36 | + /\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/g, |
| 37 | + ], |
| 38 | + |
| 39 | + // Strict: All standard + additional PII |
| 40 | + strict: [ |
| 41 | + // Credit card-like numbers (13-19 digits, optionally separated) |
| 42 | + /\b(?:\d{4}[-.\s]?){3,4}\d{1,4}\b/g, |
| 43 | + // Date of birth patterns (various formats) |
| 44 | + /\b(?:0?[1-9]|1[0-2])[-/](?:0?[1-9]|[12][0-9]|3[01])[-/](?:19|20)\d{2}\b/g, |
| 45 | + // AWS-style keys |
| 46 | + /\b(?:AKIA|ABIA|ACCA|ASIA)[A-Z0-9]{16}\b/g, |
| 47 | + // Private key markers |
| 48 | + /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g, |
| 49 | + // JWT tokens |
| 50 | + /\beyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*/g, |
| 51 | + // URLs with credentials |
| 52 | + /(?:https?|ftp):\/\/[^\s:@]+:[^\s:@]+@[^\s]+/gi, |
| 53 | + // MAC addresses |
| 54 | + /\b(?:[0-9A-Fa-f]{2}[:-]){5}[0-9A-Fa-f]{2}\b/g, |
| 55 | + // UUID-like patterns that might be sensitive |
| 56 | + /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi, |
| 57 | + ], |
| 58 | +}; |
| 59 | + |
| 60 | +/** |
| 61 | + * Privacy Filter class for filtering sensitive content |
| 62 | + */ |
| 63 | +export class PrivacyFilter { |
| 64 | + private config: PrivacyFilterConfig; |
| 65 | + private patterns: RegExp[]; |
| 66 | + |
| 67 | + constructor(config: PrivacyFilterConfig) { |
| 68 | + this.config = config; |
| 69 | + this.patterns = this.buildPatternList(); |
| 70 | + } |
| 71 | + |
| 72 | + /** |
| 73 | + * Build the complete list of patterns based on privacy mode |
| 74 | + */ |
| 75 | + private buildPatternList(): RegExp[] { |
| 76 | + const patterns: RegExp[] = []; |
| 77 | + |
| 78 | + // Always include security patterns (API keys, tokens, etc.) |
| 79 | + patterns.push(...SENSITIVE_PATTERNS); |
| 80 | + |
| 81 | + // Add mode-specific patterns |
| 82 | + switch (this.config.mode) { |
| 83 | + case 'strict': |
| 84 | + patterns.push(...PRIVACY_PATTERNS.strict); |
| 85 | + patterns.push(...PRIVACY_PATTERNS.standard); |
| 86 | + break; |
| 87 | + case 'standard': |
| 88 | + patterns.push(...PRIVACY_PATTERNS.standard); |
| 89 | + break; |
| 90 | + case 'permissive': |
| 91 | + // Only security patterns (already added above) |
| 92 | + break; |
| 93 | + } |
| 94 | + |
| 95 | + return patterns; |
| 96 | + } |
| 97 | + |
| 98 | + /** |
| 99 | + * Filter sensitive patterns from content |
| 100 | + * @param content The content to filter |
| 101 | + * @returns Filtered content and count of redactions |
| 102 | + */ |
| 103 | + filter(content: string): FilterResult { |
| 104 | + if (!content) { |
| 105 | + return { filtered: '', redactedCount: 0 }; |
| 106 | + } |
| 107 | + |
| 108 | + let filtered = content; |
| 109 | + let redactedCount = 0; |
| 110 | + |
| 111 | + for (const pattern of this.patterns) { |
| 112 | + // Reset regex state for global patterns |
| 113 | + pattern.lastIndex = 0; |
| 114 | + |
| 115 | + // Count matches before replacing |
| 116 | + const matches = content.match(pattern); |
| 117 | + if (matches) { |
| 118 | + redactedCount += matches.length; |
| 119 | + } |
| 120 | + |
| 121 | + // Replace sensitive content |
| 122 | + filtered = filtered.replace(pattern, '[REDACTED]'); |
| 123 | + } |
| 124 | + |
| 125 | + return { filtered, redactedCount }; |
| 126 | + } |
| 127 | + |
| 128 | + /** |
| 129 | + * Check if content contains sensitive data without modifying it |
| 130 | + * @param content The content to check |
| 131 | + * @returns True if sensitive data is detected |
| 132 | + */ |
| 133 | + containsSensitive(content: string): boolean { |
| 134 | + if (!content) return false; |
| 135 | + |
| 136 | + for (const pattern of this.patterns) { |
| 137 | + pattern.lastIndex = 0; |
| 138 | + if (pattern.test(content)) { |
| 139 | + return true; |
| 140 | + } |
| 141 | + } |
| 142 | + |
| 143 | + return false; |
| 144 | + } |
| 145 | + |
| 146 | + /** |
| 147 | + * Get the current privacy mode |
| 148 | + */ |
| 149 | + getMode(): PrivacyMode { |
| 150 | + return this.config.mode; |
| 151 | + } |
| 152 | + |
| 153 | + /** |
| 154 | + * Update the privacy mode and rebuild patterns |
| 155 | + */ |
| 156 | + setMode(mode: PrivacyMode): void { |
| 157 | + this.config.mode = mode; |
| 158 | + this.patterns = this.buildPatternList(); |
| 159 | + } |
| 160 | + |
| 161 | + /** |
| 162 | + * Get the count of active patterns |
| 163 | + */ |
| 164 | + getPatternCount(): number { |
| 165 | + return this.patterns.length; |
| 166 | + } |
| 167 | +} |
| 168 | + |
| 169 | +/** |
| 170 | + * Create a privacy filter with default config |
| 171 | + */ |
| 172 | +export function createPrivacyFilter( |
| 173 | + mode: PrivacyMode = 'standard' |
| 174 | +): PrivacyFilter { |
| 175 | + return new PrivacyFilter({ mode }); |
| 176 | +} |
0 commit comments