@@ -226,15 +226,156 @@ class HTMLSmugglingBlocker {
226226
227227 setupObserver ( ) {
228228 const observer = new MutationObserver ( ( mutations ) => {
229- if ( mutations . some ( mutation => mutation . addedNodes . length > 0 ) ) {
230- this . analyzeContent ( ) ;
229+ if ( this . isUrlWhitelisted ) {
230+ return ;
231+ }
232+
233+ let shouldAnalyze = false ;
234+ const nodesToAnalyze = [ ] ;
235+
236+ for ( const mutation of mutations ) {
237+ if ( mutation . addedNodes . length > 0 ) {
238+ for ( const node of mutation . addedNodes ) {
239+ if ( node instanceof HTMLElement ) {
240+ nodesToAnalyze . push ( node ) ;
241+ shouldAnalyze = true ;
242+ }
243+ }
244+ }
245+
246+ if ( mutation . type === 'attributes' &&
247+ [ 'src' , 'href' , 'download' , 'data-*' ] . some ( attr =>
248+ mutation . attributeName === attr ||
249+ mutation . attributeName ?. startsWith ( 'data-' ) ) ) {
250+ if ( mutation . target instanceof HTMLElement ) {
251+ nodesToAnalyze . push ( mutation . target ) ;
252+ shouldAnalyze = true ;
253+ }
254+ }
255+ }
256+
257+ if ( shouldAnalyze ) {
258+ this . analyzeNodes ( nodesToAnalyze ) ;
231259 }
232260 } ) ;
233261
234262 observer . observe ( document . documentElement , {
235263 childList : true ,
236- subtree : true
264+ subtree : true ,
265+ attributes : true ,
266+ attributeFilter : [ 'src' , 'href' , 'download' , 'data-*' ]
237267 } ) ;
268+
269+ if ( ! this . isUrlWhitelisted ) {
270+ this . analyzeContent ( ) ;
271+ }
272+ }
273+
274+ analyzeNodes ( nodes ) {
275+ if ( this . isUrlWhitelisted || nodes . length === 0 ) {
276+ return ;
277+ }
278+
279+ for ( const node of nodes ) {
280+ if ( node . textContent && node . textContent . length < 50 ) {
281+ continue ;
282+ }
283+
284+ const htmlContent = node . outerHTML ;
285+
286+ const cacheKey = this . getCacheKey ( htmlContent ) ;
287+ const cachedResult = this . cache . get ( cacheKey ) ;
288+
289+ if ( cachedResult ) {
290+ this . metrics . cacheHits ++ ;
291+ if ( cachedResult . score >= this . threshold ) {
292+ this . handleSuspiciousNode ( node , cachedResult . detectedPatterns ) ;
293+ }
294+ continue ;
295+ }
296+
297+ this . metrics . cacheMisses ++ ;
298+
299+ const patternResult = this . analyzeWithPatterns ( htmlContent ) ;
300+ let mlResult = null ;
301+
302+ if ( htmlContent . length > 1000 || patternResult . score > 1 ) {
303+ mlResult = this . mlEnabled ? mlDetector . detect ( htmlContent ) : null ;
304+ }
305+
306+ const isSuspicious = patternResult . isSuspicious || ( mlResult ?. isSmuggling || false ) ;
307+
308+ if ( isSuspicious ) {
309+ this . handleSuspiciousNode ( node , patternResult . detectedPatterns ) ;
310+
311+ setTimeout ( ( ) => {
312+ if ( this . blocked ) {
313+ mlDetector . learn ( htmlContent , true ) ;
314+ }
315+ } , this . feedbackDelay ) ;
316+ } else {
317+
318+ if ( htmlContent . length > 1000 ) {
319+ mlDetector . learn ( htmlContent , false ) ;
320+ }
321+ }
322+ }
323+ }
324+
325+ handleSuspiciousNode ( node , detectedPatterns ) {
326+ if ( this . isUrlWhitelisted ) {
327+ return ;
328+ }
329+
330+ if ( node . tagName === 'SCRIPT' && ! node . src ) {
331+
332+ if ( this . isSuspiciousScript ( node . textContent ) ) {
333+ this . removeElement ( node ) ;
334+ this . blocked = true ;
335+ }
336+ } else if ( node . tagName === 'A' && node . hasAttribute ( 'download' ) &&
337+ ( node . href . startsWith ( 'data:' ) || node . href . startsWith ( 'blob:' ) ) ) {
338+
339+ this . removeElement ( node ) ;
340+ this . blocked = true ;
341+ } else if ( node . tagName === 'EMBED' ) {
342+
343+ this . removeElement ( node ) ;
344+ this . blocked = true ;
345+ } else if ( node . tagName === 'SVG' && node . querySelector ( 'script' ) ) {
346+
347+ const scripts = node . querySelectorAll ( 'script' ) ;
348+ scripts . forEach ( script => this . removeElement ( script ) ) ;
349+ this . blocked = true ;
350+ } else {
351+
352+ const suspiciousElements = node . querySelectorAll (
353+ 'a[download][href^="data:"], a[download][href^="blob:"], embed, svg script'
354+ ) ;
355+ if ( suspiciousElements . length > 0 ) {
356+ suspiciousElements . forEach ( el => this . removeElement ( el ) ) ;
357+ this . blocked = true ;
358+ }
359+
360+
361+ const inlineScripts = node . querySelectorAll ( 'script:not([src])' ) ;
362+ inlineScripts . forEach ( script => {
363+ if ( this . isSuspiciousScript ( script . textContent ) ) {
364+ this . removeElement ( script ) ;
365+ this . blocked = true ;
366+ }
367+ } ) ;
368+ }
369+
370+ if ( this . blocked ) {
371+ this . logWarning (
372+ 1 ,
373+ 0 ,
374+ 0 ,
375+ 0 ,
376+ detectedPatterns
377+ ) ;
378+ }
238379 }
239380
240381 async analyzeContent ( ) {
@@ -325,33 +466,69 @@ class HTMLSmugglingBlocker {
325466 let score = 0 ;
326467 const detectedPatterns = [ ] ;
327468
328- const weights = Object . keys ( this . patternsByWeight ) . sort ( ( a , b ) => b - a ) ;
469+ if ( content . length < 50 ) {
470+ return {
471+ isSuspicious : false ,
472+ detectedPatterns : [ ] ,
473+ score : 0
474+ } ;
475+ }
329476
330- let shouldTerminate = false ;
331-
332- for ( const weight of weights ) {
333- if ( shouldTerminate || score >= this . threshold ) {
334- break ;
477+ const quickCheck = / b l o b | a t o b | d o w n l o a d | b a s e 6 4 | a r r a y b u f f e r | u i n t 8 a r r a y | c r e a t e o b j e c t u r l | f r o m c h a r c o d e / i;
478+ if ( ! quickCheck . test ( content ) ) {
479+ return {
480+ isSuspicious : false ,
481+ detectedPatterns : [ ] ,
482+ score : 0
483+ } ;
484+ }
485+
486+ const highWeightPatterns = Object . keys ( this . patternsByWeight )
487+ . filter ( weight => parseInt ( weight ) >= 3 )
488+ . flatMap ( weight => this . patternsByWeight [ weight ] ) ;
489+
490+ for ( const { pattern, weight} of highWeightPatterns ) {
491+ if ( pattern . test ( content ) ) {
492+ score += weight ;
493+ detectedPatterns . push ( pattern . toString ( ) ) ;
494+ this . metrics . matchCount ++ ;
495+
496+ if ( score >= this . threshold ) {
497+ return {
498+ isSuspicious : true ,
499+ detectedPatterns,
500+ score
501+ } ;
502+ }
335503 }
336-
337- const patterns = this . patternsByWeight [ weight ] ;
338- for ( const { pattern, weight : patternWeight } of patterns ) {
504+ }
505+
506+ if ( score >= this . threshold - 2 ) {
507+ const lowWeightPatterns = Object . keys ( this . patternsByWeight )
508+ . filter ( weight => parseInt ( weight ) < 3 )
509+ . flatMap ( weight => this . patternsByWeight [ weight ] ) ;
510+
511+ for ( const { pattern, weight} of lowWeightPatterns ) {
339512 if ( pattern . test ( content ) ) {
340- score += patternWeight ;
513+ score += weight ;
341514 detectedPatterns . push ( pattern . toString ( ) ) ;
342515 this . metrics . matchCount ++ ;
343516
344517 if ( score >= this . threshold ) {
345- shouldTerminate = true ;
346- break ;
518+ return {
519+ isSuspicious : true ,
520+ detectedPatterns,
521+ score
522+ } ;
347523 }
348524 }
349525 }
350526 }
351527
352528 return {
353529 isSuspicious : score >= this . threshold ,
354- detectedPatterns
530+ detectedPatterns,
531+ score
355532 } ;
356533 }
357534
0 commit comments