File tree Expand file tree Collapse file tree 2 files changed +18
-0
lines changed
Expand file tree Collapse file tree 2 files changed +18
-0
lines changed Original file line number Diff line number Diff line change @@ -650,6 +650,21 @@ def test_canonicalize_url_idna_exceptions(self):
650650 "http://www.{label}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9" .format (
651651 label = u"example" * 11 ))
652652
653+ def test_preserve_nonfragment_hash (self ):
654+ # don't decode `%23` to `#`
655+ self .assertEqual (canonicalize_url ("http://www.example.com/path/to/%23/foo/bar" ),
656+ "http://www.example.com/path/to/%23/foo/bar" )
657+ self .assertEqual (canonicalize_url ("http://www.example.com/path/to/%23/foo/bar#frag" ),
658+ "http://www.example.com/path/to/%23/foo/bar" )
659+ self .assertEqual (canonicalize_url ("http://www.example.com/path/to/%23/foo/bar#frag" , keep_fragments = True ),
660+ "http://www.example.com/path/to/%23/foo/bar#frag" )
661+ self .assertEqual (canonicalize_url ("http://www.example.com/path/to/%23/foo/bar?url=http%3A%2F%2Fwww.example.com%2Fpath%2Fto%2F%23%2Fbar%2Ffoo" ),
662+ "http://www.example.com/path/to/%23/foo/bar?url=http%3A%2F%2Fwww.example.com%2Fpath%2Fto%2F%23%2Fbar%2Ffoo" )
663+ self .assertEqual (canonicalize_url ("http://www.example.com/path/to/%23/foo/bar?url=http%3A%2F%2Fwww.example.com%2F%2Fpath%2Fto%2F%23%2Fbar%2Ffoo#frag" ),
664+ "http://www.example.com/path/to/%23/foo/bar?url=http%3A%2F%2Fwww.example.com%2F%2Fpath%2Fto%2F%23%2Fbar%2Ffoo" )
665+ self .assertEqual (canonicalize_url ("http://www.example.com/path/to/%23/foo/bar?url=http%3A%2F%2Fwww.example.com%2F%2Fpath%2Fto%2F%23%2Fbar%2Ffoo#frag" , keep_fragments = True ),
666+ "http://www.example.com/path/to/%23/foo/bar?url=http%3A%2F%2Fwww.example.com%2F%2Fpath%2Fto%2F%23%2Fbar%2Ffoo#frag" )
667+
653668
654669class DataURITests (unittest .TestCase ):
655670
Original file line number Diff line number Diff line change @@ -34,6 +34,9 @@ def _quote_byte(error):
3434
3535_safe_chars = RFC3986_RESERVED + RFC3986_UNRESERVED + EXTRA_SAFE_CHARS + b'%'
3636
37+ # see https://github.com/scrapy/w3lib/issues/91
38+ _safe_chars = _safe_chars .replace (b'#' , b'' )
39+
3740_ascii_tab_newline_re = re .compile (r'[\t\n\r]' ) # see https://infra.spec.whatwg.org/#ascii-tab-or-newline
3841
3942def safe_url_string (url , encoding = 'utf8' , path_encoding = 'utf8' , quote_path = True ):
You can’t perform that action at this time.
0 commit comments