2323import requests
2424import time
2525import subprocess
26+ from lxml .html .clean import Cleaner
2627from pathlib import Path
2728from typing import Dict , Any , Optional
2829from difflib import unified_diff
2930
3031
32+ def filter_html_tags (html : str ) -> str :
33+ """
34+ Clean HTML using lxml.html.Cleaner.
35+ Removes scripts, styles, and unsafe attributes while preserving DOM structure.
36+
37+ Args:
38+ html: HTML string to clean
39+
40+ Returns:
41+ Cleaned HTML string
42+ """
43+ cleaner = Cleaner (
44+ scripts = True , # drop <script> elements
45+ javascript = True , # remove on* event attributes (like onclick)
46+ style = True , # drop <style> blocks
47+ inline_style = True , # drop style="" attributes on tags
48+ safe_attrs_only = True , # remove any tag attributes not in a safe allowlist
49+ frames = False , # keep <iframe> elements (content already captured by API)
50+ forms = False # keep <form> elements
51+ )
52+ try :
53+ cleaned_html = cleaner .clean_html (html )
54+ return cleaned_html
55+ except Exception as e :
56+ print (f"⚠️ Warning: HTML cleaning failed ({ e } ), using original HTML" )
57+ return html
58+
59+
3160class SnapshotBasedEvalBuilder :
3261 """Build eval files using before/after snapshots."""
3362
34- def __init__ (self , file_path : Optional [str ] = None , workdir : Optional [str ] = None ):
63+ def __init__ (self , file_path : Optional [str ] = None , workdir : Optional [str ] = None , disable_filtering : bool = False ):
3564 self .file_path = file_path
3665 self .workdir = workdir # Working directory for snapshots and validation scripts
66+ self .disable_filtering = disable_filtering # If False, filter <style> and <script> tags
3767 self .eval_data : Dict [str , Any ] = {}
3868 self .client_id : Optional [str ] = None
3969 self .tab_id : Optional [str ] = None
@@ -294,9 +324,17 @@ async def step_7_generate_validation(self):
294324
295325 print ("🔍 Analyzing differences..." )
296326
297- # Find differences
298- before_lines = self .snapshot_before .split ('\n ' )
299- after_lines = self .snapshot_after .split ('\n ' )
327+ # Apply filtering FIRST if enabled (default behavior)
328+ before_content = self .snapshot_before
329+ after_content = self .snapshot_after
330+ if not self .disable_filtering :
331+ print ("🧹 Cleaning HTML with lxml.html.Cleaner..." )
332+ before_content = filter_html_tags (before_content )
333+ after_content = filter_html_tags (after_content )
334+
335+ # Find differences from FILTERED content
336+ before_lines = before_content .split ('\n ' )
337+ after_lines = after_content .split ('\n ' )
300338
301339 diff = list (unified_diff (
302340 before_lines ,
@@ -320,10 +358,10 @@ async def step_7_generate_validation(self):
320358 diff_file = f"{ snapshot_dir } /diff.txt"
321359
322360 with open (before_file , 'w' ) as f :
323- f .write (self . snapshot_before )
361+ f .write (before_content )
324362
325363 with open (after_file , 'w' ) as f :
326- f .write (self . snapshot_after )
364+ f .write (after_content )
327365
328366 with open (diff_file , 'w' ) as f :
329367 f .write ('\n ' .join (diff ))
@@ -332,6 +370,8 @@ async def step_7_generate_validation(self):
332370 print (f" BEFORE: { before_file } " )
333371 print (f" AFTER: { after_file } " )
334372 print (f" DIFF: { diff_file } " )
373+ if not self .disable_filtering :
374+ print (" (Cleaned: removed scripts, styles, and unsafe attributes)" )
335375
336376 # Show sample of changes
337377 if added_lines :
@@ -508,14 +548,11 @@ async def step_7_generate_validation(self):
508548 # Wait for Claude to create the validation file
509549 validation_file = f"{ snapshot_dir } /verify.js"
510550
511- print ("Options:" )
512- print ("1. Auto-run Claude Code subprocess (recommended)" )
513- print ("2. Wait for Claude Code manually (you run it)" )
514- print ("3. Enter validation JavaScript manually" )
551+ # Automatically run Claude Code subprocess (no user prompt)
552+ print ("🤖 Auto-running Claude Code subprocess to generate validation..." )
515553 print ()
516554
517- choice = input ("Choice (1/2/3): " ).strip ()
518-
555+ choice = '1'
519556 lines = []
520557
521558 if choice == '1' :
@@ -643,12 +680,13 @@ async def step_7_generate_validation(self):
643680 if lines :
644681 js_code = '\n ' .join (lines )
645682
646- # Test it with retry loop (unlimited retries until user cancels )
683+ # Test it with retry loop (max 3 attempts )
647684 validation_saved = False
648685 retry_count = 0
686+ max_retries = 3
649687
650- while not validation_saved :
651- print (f"\n 🧪 Testing validation... (attempt { retry_count + 1 } )" )
688+ while not validation_saved and retry_count < max_retries :
689+ print (f"\n 🧪 Testing validation... (attempt { retry_count + 1 } / { max_retries } )" )
652690
653691 if await self ._test_validation (js_code ):
654692 # Save validation JavaScript to external file
@@ -678,17 +716,17 @@ async def step_7_generate_validation(self):
678716 print ("✅ Validation saved" )
679717 validation_saved = True
680718 else :
681- # Test failed - allow unlimited retries
682- print (f"\n ⚠️ Validation test failed." )
683- print ("\n Options:" )
684- print ("1. Auto-run Claude Code to fix it (recommended)" )
685- print ("2. Enter new validation manually" )
686- print ("3. Save anyway (not recommended)" )
687- print ("4. Skip validation for now" )
719+ # Test failed - auto-retry with Claude Code
720+ retry_count += 1
688721
689- retry_choice = input ("\n Choice (1/2/3/4): " ).strip ()
722+ if retry_count < max_retries :
723+ print (f"\n ⚠️ Validation test failed. Auto-retrying with Claude Code... ({ retry_count } /{ max_retries } )" )
724+ else :
725+ print (f"\n ❌ Validation failed after { max_retries } attempts. Skipping validation." )
726+ break
690727
691- if retry_choice == '1' :
728+ # Auto-run Claude Code to fix (no user prompt)
729+ if retry_count < max_retries :
692730 # Auto-run Claude Code subprocess to fix the validation
693731 print (f"\n 🤖 Launching Claude Code subprocess to fix validation..." )
694732 print ()
@@ -738,71 +776,25 @@ async def step_7_generate_validation(self):
738776 print ("─" * 60 )
739777 print ()
740778 print ("🔄 Re-testing with updated code..." )
741-
742- retry_count += 1
779+ # Continue to next iteration (retry_count already incremented)
743780 continue
744781 else :
745782 print (f"⚠️ Claude Code ran but { validation_file } was not found" )
746- retry_count += 1
783+ # Skip to next retry
747784 continue
748785
749786 except subprocess .TimeoutExpired :
750787 print ("⏱️ Claude Code subprocess timed out (5 minutes)" )
751- retry_count += 1
788+ # Skip to next retry
752789 continue
753790 except FileNotFoundError :
754791 print ("❌ 'claude' command not found. Is Claude Code installed?" )
755- retry_count += 1
792+ # Skip to next retry
756793 continue
757794 except Exception as e :
758795 print (f"❌ Error running Claude Code: { e } " )
759- retry_count += 1
796+ # Skip to next retry
760797 continue
761-
762- elif retry_choice == '2' :
763- # Manual entry
764- print ("\n Enter validation JavaScript (type 'END' on new line when done):\n " )
765- new_lines = []
766- while True :
767- line = input ()
768- if line .strip () == 'END' :
769- break
770- new_lines .append (line )
771- js_code = '\n ' .join (new_lines )
772- retry_count += 1
773- continue
774-
775- elif retry_choice == '3' :
776- # Save anyway
777- # Save validation JavaScript to external file
778- eval_dir = os .path .dirname (self .file_path )
779- verify_js_path = os .path .join (eval_dir , 'verify.js' )
780-
781- # Ensure eval directory exists
782- os .makedirs (eval_dir , exist_ok = True )
783-
784- # Write JavaScript to external file
785- with open (verify_js_path , 'w' ) as f :
786- f .write (js_code )
787-
788- print (f"💾 Saved validation script to: { verify_js_path } " )
789-
790- # Reference external file in YAML
791- if 'validation' not in self .eval_data :
792- self .eval_data ['validation' ] = {}
793- if 'type' not in self .eval_data ['validation' ]:
794- self .eval_data ['validation' ]['type' ] = 'js-eval'
795- if 'js-eval' not in self .eval_data ['validation' ]:
796- self .eval_data ['validation' ]['js-eval' ] = {}
797- self .eval_data ['validation' ]['js-eval' ]['script' ] = 'verify.js'
798- self .eval_data ['validation' ]['js-eval' ]['expected_result' ] = True
799- self .eval_data ['validation' ]['js-eval' ]['timeout' ] = 5000
800- print ("⚠️ Validation saved (with errors - use caution!)" )
801- validation_saved = True
802-
803- else : # Choice 4 or anything else
804- print ("⏭️ Skipping validation" )
805- break
806798 else :
807799 print ("⚠️ No validation code entered" )
808800
@@ -928,6 +920,7 @@ async def main():
928920 parser = argparse .ArgumentParser (description = "Snapshot-based eval builder" )
929921 parser .add_argument ('--file' , '-f' , help = 'Eval file path (default: <workdir>/task.yaml)' )
930922 parser .add_argument ('--workdir' , '-w' , required = True , help = 'Working directory for snapshots and validation scripts' )
923+ parser .add_argument ('--disable-filtering' , action = 'store_true' , help = 'Disable HTML cleaning (keep raw HTML with scripts/styles)' )
931924 args = parser .parse_args ()
932925
933926 # Normalize workdir path (strip 'evals/' prefix if present and we're already in evals/)
@@ -939,18 +932,24 @@ async def main():
939932 # Strip trailing slashes to avoid double slashes in paths
940933 workdir = workdir .rstrip ('/' )
941934
942- # Auto-detect task.yaml in workdir if no file specified
935+ # Auto-detect task.yaml or task.yml in workdir if no file specified
943936 file_path = args .file
944937 if not file_path :
938+ # Check for both .yaml and .yml extensions
945939 task_yaml_path = os .path .join (workdir , 'task.yaml' )
940+ task_yml_path = os .path .join (workdir , 'task.yml' )
941+
946942 if os .path .exists (task_yaml_path ):
947943 file_path = task_yaml_path
948944 print (f"📋 Found existing task.yaml: { file_path } " )
945+ elif os .path .exists (task_yml_path ):
946+ file_path = task_yml_path
947+ print (f"📋 Found existing task.yml: { file_path } " )
949948 else :
950949 file_path = task_yaml_path # Will be created as new file
951950 print (f"📝 Will create new task.yaml: { file_path } " )
952951
953- builder = SnapshotBasedEvalBuilder (file_path = file_path , workdir = workdir )
952+ builder = SnapshotBasedEvalBuilder (file_path = file_path , workdir = workdir , disable_filtering = args . disable_filtering )
954953 await builder .run ()
955954
956955
0 commit comments