@@ -32,41 +32,6 @@ def _quote_byte(error):
3232RFC3986_UNRESERVED = (string .ascii_letters + string .digits + "-._~" ).encode ('ascii' )
3333
3434
35- def urljoin_rfc (base , ref , encoding = 'utf-8' ):
36- r"""
37- .. warning::
38-
39- This function is deprecated and will be removed in future.
40- It is not supported with Python 3.
41- Please use ``urlparse.urljoin`` instead.
42-
43- Same as urlparse.urljoin but supports unicode values in base and ref
44- parameters (in which case they will be converted to str using the given
45- encoding).
46-
47- Always returns a str.
48-
49- >>> import w3lib.url
50- >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'/otherpath/index2.html')
51- 'http://www.example.com/otherpath/index2.html'
52- >>>
53-
54- >>> # Note: the following does not work in Python 3
55- >>> w3lib.url.urljoin_rfc(b'http://www.example.com/path/index.html', u'fran\u00e7ais/d\u00e9part.htm') # doctest: +SKIP
56- 'http://www.example.com/path/fran\xc3\xa7ais/d\xc3\xa9part.htm'
57- >>>
58-
59-
60- """
61-
62- warnings .warn ("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead" ,
63- DeprecationWarning )
64-
65- str_base = to_bytes (base , encoding )
66- str_ref = to_bytes (ref , encoding )
67- return urljoin (str_base , str_ref )
68-
69-
7035_safe_chars = RFC3986_RESERVED + RFC3986_UNRESERVED + b'%'
7136
7237def safe_url_string (url , encoding = 'utf8' , path_encoding = 'utf8' ):
@@ -116,6 +81,7 @@ def safe_url_string(url, encoding='utf8', path_encoding='utf8'):
11681 quote (to_bytes (parts .fragment , encoding ), _safe_chars ),
11782 ))
11883
84+
11985_parent_dirs = re .compile (r'/?(\.\./)+' )
12086
12187def safe_download_url (url ):
@@ -136,9 +102,11 @@ def safe_download_url(url):
136102 path = '/'
137103 return urlunsplit ((scheme , netloc , path , query , '' ))
138104
105+
139106def is_url (text ):
140107 return text .partition ("://" )[0 ] in ('file' , 'http' , 'https' )
141108
109+
142110def url_query_parameter (url , parameter , default = None , keep_blank_values = 0 ):
143111 """Return the value of a url parameter, given the url and parameter name
144112
@@ -174,6 +142,7 @@ def url_query_parameter(url, parameter, default=None, keep_blank_values=0):
174142 )
175143 return queryparams .get (parameter , [default ])[0 ]
176144
145+
177146def url_query_cleaner (url , parameterlist = (), sep = '&' , kvsep = '=' , remove = False , unique = True , keep_fragments = False ):
178147 """Clean URL arguments leaving only those passed in the parameterlist keeping order
179148
@@ -228,6 +197,7 @@ def url_query_cleaner(url, parameterlist=(), sep='&', kvsep='=', remove=False, u
228197 url += '#' + fragment
229198 return url
230199
200+
231201def add_or_replace_parameter (url , name , new_value ):
232202 """Add or remove a parameter to a given url
233203
@@ -269,13 +239,15 @@ def path_to_file_uri(path):
269239 x = x .replace ('|' , ':' ) # http://bugs.python.org/issue5861
270240 return 'file:///%s' % x .lstrip ('/' )
271241
242+
272243def file_uri_to_path (uri ):
273244 """Convert File URI to local filesystem path according to:
274245 http://en.wikipedia.org/wiki/File_URI_scheme
275246 """
276247 uri_path = urlparse (uri ).path
277248 return url2pathname (uri_path )
278249
250+
279251def any_to_uri (uri_or_path ):
280252 """If given a path name, return its File URI, otherwise return it
281253 unmodified
@@ -583,3 +555,38 @@ def parse_qsl_to_bytes(qs, keep_blank_values=False):
583555 value = _coerce_result (value )
584556 r .append ((name , value ))
585557 return r
558+
559+
560+ def urljoin_rfc (base , ref , encoding = 'utf-8' ):
561+ r"""
562+ .. warning::
563+
564+ This function is deprecated and will be removed in future.
565+ It is not supported with Python 3.
566+ Please use ``urlparse.urljoin`` instead.
567+
568+ Same as urlparse.urljoin but supports unicode values in base and ref
569+ parameters (in which case they will be converted to str using the given
570+ encoding).
571+
572+ Always returns a str.
573+
574+ >>> import w3lib.url
575+ >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'/otherpath/index2.html')
576+ 'http://www.example.com/otherpath/index2.html'
577+ >>>
578+
579+ >>> # Note: the following does not work in Python 3
580+ >>> w3lib.url.urljoin_rfc(b'http://www.example.com/path/index.html', u'fran\u00e7ais/d\u00e9part.htm') # doctest: +SKIP
581+ 'http://www.example.com/path/fran\xc3\xa7ais/d\xc3\xa9part.htm'
582+ >>>
583+
584+
585+ """
586+
587+ warnings .warn ("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead" ,
588+ DeprecationWarning )
589+
590+ str_base = to_bytes (base , encoding )
591+ str_ref = to_bytes (ref , encoding )
592+ return urljoin (str_base , str_ref )
0 commit comments