File tree Expand file tree Collapse file tree 2 files changed +14
-1
lines changed
Expand file tree Collapse file tree 2 files changed +14
-1
lines changed Original file line number Diff line number Diff line change @@ -509,8 +509,8 @@ def changeEncoding(self, newEncoding):
509509 self .charEncoding = (self .charEncoding [0 ], "certain" )
510510 else :
511511 self .rawStream .seek (0 )
512- self .reset ()
513512 self .charEncoding = (newEncoding , "certain" )
513+ self .reset ()
514514 raise ReparseException ("Encoding changed from %s to %s" % (self .charEncoding [0 ], newEncoding ))
515515
516516 def detectBOM (self ):
Original file line number Diff line number Diff line change @@ -21,6 +21,19 @@ def test_basic_prescan_length():
2121 assert 'utf-8' == stream .charEncoding [0 ].name
2222
2323
24+ def test_parser_reparse ():
25+ data = "<title>Caf\u00E9 </title><!--a--><meta charset='utf-8'>" .encode ('utf-8' )
26+ pad = 10240 - len (data ) + 1
27+ data = data .replace (b"-a-" , b"-" + (b"a" * pad ) + b"-" )
28+ assert len (data ) == 10240 # Sanity
29+ stream = inputstream .HTMLBinaryInputStream (data , chardet = False )
30+ assert 'windows-1252' == stream .charEncoding [0 ].name
31+ p = HTMLParser (namespaceHTMLElements = False )
32+ doc = p .parse (data , useChardet = False )
33+ assert 'utf-8' == p .documentEncoding
34+ assert doc .find (".//title" ).text == "Caf\u00E9 "
35+
36+
2437def runParserEncodingTest (data , encoding ):
2538 p = HTMLParser ()
2639 assert p .documentEncoding is None
You can’t perform that action at this time.
0 commit comments