|
176 | 176 | except ImportError: |
177 | 177 | pass |
178 | 178 |
|
| 179 | +# Add the mechanize import check |
| 180 | +havemechanize = False |
| 181 | +try: |
| 182 | + import mechanize |
| 183 | + havemechanize = True |
| 184 | +except ImportError: |
| 185 | + pass |
| 186 | + |
179 | 187 | # Requests support |
180 | 188 | haverequests = False |
181 | 189 | try: |
@@ -319,7 +327,7 @@ def decode_unicode_escape(value): |
319 | 327 | __version_date__ = str(__version_date_info__[0]) + "." + str( |
320 | 328 | __version_date_info__[1]).zfill(2) + "." + str(__version_date_info__[2]).zfill(2) |
321 | 329 | __revision__ = __version_info__[3] |
322 | | -__revision_id__ = "$Id: eb01b2409a1f3679f46ba8893b53eb954440b706 $" |
| 330 | +__revision_id__ = "$Id$" |
323 | 331 | if(__version_info__[4] is not None): |
324 | 332 | __version_date_plusrc__ = __version_date__ + \ |
325 | 333 | "-" + str(__version_date_info__[4]) |
@@ -9646,65 +9654,86 @@ def read(self, size=-1): |
9646 | 9654 | def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__): |
9647 | 9655 | if headers is None: |
9648 | 9656 | headers = {} |
9649 | | - # Parse the URL to extract username and password if present |
9650 | 9657 | urlparts = urlparse(url) |
9651 | 9658 | username = urlparts.username |
9652 | 9659 | password = urlparts.password |
9653 | | - # Rebuild the URL without the username and password |
9654 | | - netloc = urlparts.hostname |
| 9660 | + |
| 9661 | + # Rebuild URL without username and password |
| 9662 | + netloc = urlparts.hostname or '' |
| 9663 | + if urlparts.port: |
| 9664 | + netloc += ':' + str(urlparts.port) |
| 9665 | + rebuilt_url = urlunparse((urlparts.scheme, netloc, urlparts.path, |
| 9666 | + urlparts.params, urlparts.query, urlparts.fragment)) |
| 9667 | + |
| 9668 | + # Handle SFTP/FTP |
9655 | 9669 | if urlparts.scheme == "sftp": |
9656 | 9670 | if __use_pysftp__: |
9657 | 9671 | return download_file_from_pysftp_file(url) |
9658 | 9672 | else: |
9659 | 9673 | return download_file_from_sftp_file(url) |
9660 | 9674 | elif urlparts.scheme == "ftp" or urlparts.scheme == "ftps": |
9661 | 9675 | return download_file_from_ftp_file(url) |
9662 | | - if urlparts.port: |
9663 | | - netloc += ':' + str(urlparts.port) |
9664 | | - rebuilt_url = urlunparse((urlparts.scheme, netloc, urlparts.path, |
9665 | | - urlparts.params, urlparts.query, urlparts.fragment)) |
| 9676 | + |
9666 | 9677 | # Create a temporary file object |
9667 | 9678 | httpfile = BytesIO() |
| 9679 | + |
| 9680 | + # 1) Requests branch |
9668 | 9681 | if usehttp == 'requests' and haverequests: |
9669 | | - # Use the requests library if selected and available |
9670 | 9682 | if username and password: |
9671 | | - response = requests.get(rebuilt_url, headers=headers, auth=( |
9672 | | - username, password), stream=True) |
| 9683 | + response = requests.get( |
| 9684 | + rebuilt_url, headers=headers, auth=(username, password), stream=True |
| 9685 | + ) |
9673 | 9686 | else: |
9674 | 9687 | response = requests.get(rebuilt_url, headers=headers, stream=True) |
9675 | 9688 | response.raw.decode_content = True |
9676 | 9689 | shutil.copyfileobj(response.raw, httpfile) |
| 9690 | + |
| 9691 | + # 2) HTTPX branch |
9677 | 9692 | elif usehttp == 'httpx' and havehttpx: |
9678 | | - # Use httpx if selected and available |
9679 | 9693 | with httpx.Client(follow_redirects=True) as client: |
9680 | 9694 | if username and password: |
9681 | 9695 | response = client.get( |
9682 | | - rebuilt_url, headers=headers, auth=(username, password)) |
| 9696 | + rebuilt_url, headers=headers, auth=(username, password) |
| 9697 | + ) |
9683 | 9698 | else: |
9684 | 9699 | response = client.get(rebuilt_url, headers=headers) |
9685 | 9700 | raw_wrapper = RawIteratorWrapper(response.iter_bytes()) |
9686 | 9701 | shutil.copyfileobj(raw_wrapper, httpfile) |
| 9702 | + |
| 9703 | + # 3) Mechanize branch |
| 9704 | + elif usehttp == 'mechanize' and havemechanize: |
| 9705 | + # Create a mechanize browser |
| 9706 | + br = mechanize.Browser() |
| 9707 | + # Optional: configure mechanize (disable robots.txt, handle redirects, etc.) |
| 9708 | + br.set_handle_robots(False) |
| 9709 | + # If you need custom headers, add them as a list of (header_name, header_value) |
| 9710 | + if headers: |
| 9711 | + br.addheaders = list(headers.items()) |
| 9712 | + |
| 9713 | + # If you need to handle basic auth: |
| 9714 | + if username and password: |
| 9715 | + # Mechanize has its own password manager; this is one way to do it: |
| 9716 | + br.add_password(rebuilt_url, username, password) |
| 9717 | + |
| 9718 | + # Open the URL and copy the response to httpfile |
| 9719 | + response = br.open(rebuilt_url) |
| 9720 | + shutil.copyfileobj(response, httpfile) |
| 9721 | + |
| 9722 | + # 4) Fallback to urllib |
9687 | 9723 | else: |
9688 | | - # Use urllib as a fallback |
9689 | | - # Build a Request object for urllib |
9690 | 9724 | request = Request(rebuilt_url, headers=headers) |
9691 | | - # Create an opener object for handling URLs |
9692 | 9725 | if username and password: |
9693 | | - # Create a password manager |
9694 | 9726 | password_mgr = HTTPPasswordMgrWithDefaultRealm() |
9695 | | - # Add the username and password |
9696 | 9727 | password_mgr.add_password(None, rebuilt_url, username, password) |
9697 | | - # Create an authentication handler using the password manager |
9698 | 9728 | auth_handler = HTTPBasicAuthHandler(password_mgr) |
9699 | | - # Build the opener with the authentication handler |
9700 | 9729 | opener = build_opener(auth_handler) |
9701 | 9730 | else: |
9702 | 9731 | opener = build_opener() |
9703 | 9732 | response = opener.open(request) |
9704 | 9733 | shutil.copyfileobj(response, httpfile) |
9705 | | - # Reset file pointer to the start |
| 9734 | + |
| 9735 | + # Reset file pointer to the start before returning |
9706 | 9736 | httpfile.seek(0, 0) |
9707 | | - # Return the temporary file object |
9708 | 9737 | return httpfile |
9709 | 9738 |
|
9710 | 9739 |
|
|
0 commit comments