Skip to content

Commit 3f50548

Browse files
committed
Remove unneeded to_bytes() and to_unicode() usages
1 parent 1a6ff6b commit 3f50548

File tree

2 files changed

+23
-30
lines changed

2 files changed

+23
-30
lines changed

w3lib/url.py

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
urlunsplit,
2525
)
2626
from urllib.request import pathname2url, url2pathname
27-
from w3lib.util import to_bytes, to_unicode
27+
from w3lib.util import to_unicode
2828

2929

3030
# error handling function for bytes-to-Unicode decoding errors with URLs
3131
def _quote_byte(error):
32-
return (to_unicode(quote(error.object[error.start:error.end])), error.end)
32+
return (quote(error.object[error.start:error.end]), error.end)
3333

3434
codecs.register_error('percentencode', _quote_byte)
3535

@@ -77,26 +77,22 @@ def safe_url_string(url, encoding='utf8', path_encoding='utf8', quote_path=True)
7777
# IDNA encoding can fail for too long labels (>63 characters)
7878
# or missing labels (e.g. http://.example.com)
7979
try:
80-
netloc = parts.netloc.encode('idna')
80+
netloc = parts.netloc.encode('idna').decode()
8181
except UnicodeError:
8282
netloc = parts.netloc
8383

8484
# default encoding for path component SHOULD be UTF-8
8585
if quote_path:
86-
path = quote(to_bytes(parts.path, path_encoding), _path_safe_chars)
86+
path = quote(parts.path.encode(path_encoding), _path_safe_chars)
8787
else:
88-
path = to_unicode(parts.path)
88+
path = parts.path
8989

90-
# quote() in Python2 return type follows input type;
91-
# quote() in Python3 always returns Unicode (native str)
9290
return urlunsplit((
93-
to_unicode(parts.scheme),
94-
to_unicode(netloc).rstrip(':'),
91+
parts.scheme,
92+
netloc.rstrip(':'),
9593
path,
96-
# encoding of query and fragment follows page encoding
97-
# or form-charset (if known and passed)
98-
quote(to_bytes(parts.query, encoding), _safe_chars),
99-
quote(to_bytes(parts.fragment, encoding), _safe_chars),
94+
quote(parts.query.encode(encoding), _safe_chars),
95+
quote(parts.fragment.encode(encoding), _safe_chars),
10096
))
10197

10298

@@ -410,22 +406,17 @@ def _safe_ParseResult(parts, encoding='utf8', path_encoding='utf8'):
410406
# IDNA encoding can fail for too long labels (>63 characters)
411407
# or missing labels (e.g. http://.example.com)
412408
try:
413-
netloc = parts.netloc.encode('idna')
409+
netloc = parts.netloc.encode('idna').decode()
414410
except UnicodeError:
415411
netloc = parts.netloc
416412

417413
return (
418-
to_unicode(parts.scheme),
419-
to_unicode(netloc),
420-
421-
# default encoding for path component SHOULD be UTF-8
422-
quote(to_bytes(parts.path, path_encoding), _path_safe_chars),
423-
quote(to_bytes(parts.params, path_encoding), _safe_chars),
424-
425-
# encoding of query and fragment follows page encoding
426-
# or form-charset (if known and passed)
427-
quote(to_bytes(parts.query, encoding), _safe_chars),
428-
quote(to_bytes(parts.fragment, encoding), _safe_chars)
414+
parts.scheme,
415+
netloc,
416+
quote(parts.path.encode(path_encoding), _path_safe_chars),
417+
quote(parts.params.encode(path_encoding), _safe_chars),
418+
quote(parts.query.encode(encoding), _safe_chars),
419+
quote(parts.fragment.encode(encoding), _safe_chars)
429420
)
430421

431422

@@ -466,7 +457,7 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
466457
# if not for proper URL expected by remote website.
467458
try:
468459
scheme, netloc, path, params, query, fragment = _safe_ParseResult(
469-
parse_url(url), encoding=encoding)
460+
parse_url(url), encoding=encoding or 'utf8')
470461
except UnicodeEncodeError as e:
471462
scheme, netloc, path, params, query, fragment = _safe_ParseResult(
472463
parse_url(url), encoding='utf8')

w3lib/util.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@ def to_unicode(text, encoding=None, errors='strict'):
3333
if isinstance(text, str):
3434
return text
3535
if not isinstance(text, (bytes, str)):
36-
raise TypeError('to_unicode must receive a bytes, str or unicode '
37-
'object, got %s' % type(text).__name__)
36+
raise TypeError(
37+
f'to_unicode must receive bytes or str, got {type(text).__name__}'
38+
)
3839
if encoding is None:
3940
encoding = 'utf-8'
4041
return text.decode(encoding, errors)
@@ -45,8 +46,9 @@ def to_bytes(text, encoding=None, errors='strict'):
4546
if isinstance(text, bytes):
4647
return text
4748
if not isinstance(text, str):
48-
raise TypeError('to_bytes must receive a unicode, str or bytes '
49-
'object, got %s' % type(text).__name__)
49+
raise TypeError(
50+
f'to_bytes must receive str or bytes, got {type(text).__name__}'
51+
)
5052
if encoding is None:
5153
encoding = 'utf-8'
5254
return text.encode(encoding, errors)

0 commit comments

Comments
 (0)