Skip to content

Commit 4029a8c

Browse files
author
Kazuki Suzuki Przyborowski
committed
Update pycatfile.py
1 parent a8b6748 commit 4029a8c

1 file changed

Lines changed: 51 additions & 22 deletions

File tree

pycatfile.py

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,14 @@
176176
except ImportError:
177177
pass
178178

179+
# Add the mechanize import check
180+
havemechanize = False
181+
try:
182+
import mechanize
183+
havemechanize = True
184+
except ImportError:
185+
pass
186+
179187
# Requests support
180188
haverequests = False
181189
try:
@@ -319,7 +327,7 @@ def decode_unicode_escape(value):
319327
__version_date__ = str(__version_date_info__[0]) + "." + str(
320328
__version_date_info__[1]).zfill(2) + "." + str(__version_date_info__[2]).zfill(2)
321329
__revision__ = __version_info__[3]
322-
__revision_id__ = "$Id: eb01b2409a1f3679f46ba8893b53eb954440b706 $"
330+
__revision_id__ = "$Id$"
323331
if(__version_info__[4] is not None):
324332
__version_date_plusrc__ = __version_date__ + \
325333
"-" + str(__version_date_info__[4])
@@ -9646,65 +9654,86 @@ def read(self, size=-1):
96469654
def download_file_from_http_file(url, headers=None, usehttp=__use_http_lib__):
96479655
if headers is None:
96489656
headers = {}
9649-
# Parse the URL to extract username and password if present
96509657
urlparts = urlparse(url)
96519658
username = urlparts.username
96529659
password = urlparts.password
9653-
# Rebuild the URL without the username and password
9654-
netloc = urlparts.hostname
9660+
9661+
# Rebuild URL without username and password
9662+
netloc = urlparts.hostname or ''
9663+
if urlparts.port:
9664+
netloc += ':' + str(urlparts.port)
9665+
rebuilt_url = urlunparse((urlparts.scheme, netloc, urlparts.path,
9666+
urlparts.params, urlparts.query, urlparts.fragment))
9667+
9668+
# Handle SFTP/FTP
96559669
if urlparts.scheme == "sftp":
96569670
if __use_pysftp__:
96579671
return download_file_from_pysftp_file(url)
96589672
else:
96599673
return download_file_from_sftp_file(url)
96609674
elif urlparts.scheme == "ftp" or urlparts.scheme == "ftps":
96619675
return download_file_from_ftp_file(url)
9662-
if urlparts.port:
9663-
netloc += ':' + str(urlparts.port)
9664-
rebuilt_url = urlunparse((urlparts.scheme, netloc, urlparts.path,
9665-
urlparts.params, urlparts.query, urlparts.fragment))
9676+
96669677
# Create a temporary file object
96679678
httpfile = BytesIO()
9679+
9680+
# 1) Requests branch
96689681
if usehttp == 'requests' and haverequests:
9669-
# Use the requests library if selected and available
96709682
if username and password:
9671-
response = requests.get(rebuilt_url, headers=headers, auth=(
9672-
username, password), stream=True)
9683+
response = requests.get(
9684+
rebuilt_url, headers=headers, auth=(username, password), stream=True
9685+
)
96739686
else:
96749687
response = requests.get(rebuilt_url, headers=headers, stream=True)
96759688
response.raw.decode_content = True
96769689
shutil.copyfileobj(response.raw, httpfile)
9690+
9691+
# 2) HTTPX branch
96779692
elif usehttp == 'httpx' and havehttpx:
9678-
# Use httpx if selected and available
96799693
with httpx.Client(follow_redirects=True) as client:
96809694
if username and password:
96819695
response = client.get(
9682-
rebuilt_url, headers=headers, auth=(username, password))
9696+
rebuilt_url, headers=headers, auth=(username, password)
9697+
)
96839698
else:
96849699
response = client.get(rebuilt_url, headers=headers)
96859700
raw_wrapper = RawIteratorWrapper(response.iter_bytes())
96869701
shutil.copyfileobj(raw_wrapper, httpfile)
9702+
9703+
# 3) Mechanize branch
9704+
elif usehttp == 'mechanize' and havemechanize:
9705+
# Create a mechanize browser
9706+
br = mechanize.Browser()
9707+
# Optional: configure mechanize (disable robots.txt, handle redirects, etc.)
9708+
br.set_handle_robots(False)
9709+
# If you need custom headers, add them as a list of (header_name, header_value)
9710+
if headers:
9711+
br.addheaders = list(headers.items())
9712+
9713+
# If you need to handle basic auth:
9714+
if username and password:
9715+
# Mechanize has its own password manager; this is one way to do it:
9716+
br.add_password(rebuilt_url, username, password)
9717+
9718+
# Open the URL and copy the response to httpfile
9719+
response = br.open(rebuilt_url)
9720+
shutil.copyfileobj(response, httpfile)
9721+
9722+
# 4) Fallback to urllib
96879723
else:
9688-
# Use urllib as a fallback
9689-
# Build a Request object for urllib
96909724
request = Request(rebuilt_url, headers=headers)
9691-
# Create an opener object for handling URLs
96929725
if username and password:
9693-
# Create a password manager
96949726
password_mgr = HTTPPasswordMgrWithDefaultRealm()
9695-
# Add the username and password
96969727
password_mgr.add_password(None, rebuilt_url, username, password)
9697-
# Create an authentication handler using the password manager
96989728
auth_handler = HTTPBasicAuthHandler(password_mgr)
9699-
# Build the opener with the authentication handler
97009729
opener = build_opener(auth_handler)
97019730
else:
97029731
opener = build_opener()
97039732
response = opener.open(request)
97049733
shutil.copyfileobj(response, httpfile)
9705-
# Reset file pointer to the start
9734+
9735+
# Reset file pointer to the start before returning
97069736
httpfile.seek(0, 0)
9707-
# Return the temporary file object
97089737
return httpfile
97099738

97109739

0 commit comments

Comments
 (0)