From 079142185e484957565bb236c1415d7e7704aea2 Mon Sep 17 00:00:00 2001 From: Stanislav Pankevich Date: Sun, 16 Feb 2025 17:58:58 +0100 Subject: [PATCH] Switch to a custom Chrome downloader It turns out that the Web Driver Manager downloads the very latest version of Chrome every time which causes conflicts on the machines that have slightly older versions: ``` selenium.common.exceptions.SessionNotCreatedException: Message: session not created: This version of ChromeDriver only supports Chrome version 133 Current browser version is 132.0.6834.110 with binary path /Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing ``` https://github.com/strictdoc-project/strictdoc/pull/2077#issuecomment-2655802850 --- html2print/html2print.py | 249 +++++++++++++++++++++++++++------------ 1 file changed, 171 insertions(+), 78 deletions(-) diff --git a/html2print/html2print.py b/html2print/html2print.py index e46d2c0..c553613 100644 --- a/html2print/html2print.py +++ b/html2print/html2print.py @@ -3,10 +3,13 @@ import atexit import base64 import os.path +import platform +import re +import subprocess import sys +import zipfile from datetime import datetime from pathlib import Path -from shutil import copy from time import sleep from typing import Dict, List, Optional @@ -15,15 +18,9 @@ from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service -from webdriver_manager.chrome import ChromeDriverManager -from webdriver_manager.core.download_manager import WDMDownloadManager -from webdriver_manager.core.driver import Driver -from webdriver_manager.core.driver_cache import DriverCacheManager -from webdriver_manager.core.file_manager import FileManager -from webdriver_manager.core.http import HttpClient -from webdriver_manager.core.os_manager import OperationSystemManager +from webdriver_manager.core.os_manager import ChromeType, OperationSystemManager -__version__ = "0.0.8" +__version__ = "0.0.12" PATH_TO_HTML2PDF_JS = os.path.join( os.path.dirname(os.path.join(__file__)), "html2pdf_js", "html2pdf.min.js" @@ -39,52 +36,40 @@ sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf8", closefd=False) -class HTML2Print_HTTPClient(HttpClient): - def get(self, url, params=None, **kwargs) -> Response: - last_error: Optional[Exception] = None - for attempt in range(1, 3): - print( # noqa: T201 - f"html2print: sending GET request attempt {attempt}: {url}" - ) - try: - return requests.get(url, params, timeout=(5, 5), **kwargs) - except requests.exceptions.ConnectTimeout as connect_timeout_: - last_error = connect_timeout_ - except requests.exceptions.ReadTimeout as read_timeout_: - last_error = read_timeout_ - except Exception as exception_: - raise AssertionError( - "html2print: unknown exception", exception_ - ) from None +class ChromeDriverManager: + def get_chrome_driver(self, path_to_cache_dir: str): + chrome_version = self.get_chrome_version() + chrome_major_version = chrome_version.split(".")[0] + print( # noqa: T201 - f"html2print: " - f"failed to get response for URL: {url} with error: {last_error}" + f"html2print: Installed Chrome version: {chrome_version}" ) + system_map = { + "Windows": "win32", + "Darwin": "mac-arm64" + if platform.machine() == "arm64" + else "mac-x64", + "Linux": "linux64", + } + os_type = system_map[platform.system()] + is_windows = platform.system() == "Windows" -class HTML2Print_CacheManager(DriverCacheManager): - def __init__(self, file_manager: FileManager, path_to_cache_dir: str): - super().__init__(file_manager=file_manager) - self.path_to_cache_dir: str = path_to_cache_dir - - def find_driver(self, driver: Driver): - path_to_cached_chrome_driver_dir = os.path.join( - self.path_to_cache_dir, "chromedriver" - ) - - os_type = self.get_os_type() - browser_type = driver.get_browser_type() - browser_version = self._os_system_manager.get_browser_version_from_os( - browser_type + print( # noqa: T201 + f"html2print: OS system: {platform.system()}, OS type: {os_type}." ) - assert browser_version is not None, browser_version path_to_cached_chrome_driver_dir = os.path.join( - path_to_cached_chrome_driver_dir, browser_version, os_type + path_to_cache_dir, chrome_major_version ) path_to_cached_chrome_driver = os.path.join( - path_to_cached_chrome_driver_dir, "chromedriver" + path_to_cached_chrome_driver_dir, + f"chromedriver-{os_type}", + "chromedriver", ) + if is_windows: + path_to_cached_chrome_driver += ".exe" + if os.path.isfile(path_to_cached_chrome_driver): print( # noqa: T201 f"html2print: ChromeDriver exists in the local cache: " @@ -95,25 +80,144 @@ def find_driver(self, driver: Driver): f"html2print: ChromeDriver does not exist in the local cache: " f"{path_to_cached_chrome_driver}" ) - path_to_downloaded_chrome_driver = super().find_driver(driver) - if path_to_downloaded_chrome_driver is None: - print( # noqa: T201 - f"html2print: could not get a downloaded ChromeDriver: " - f"{path_to_cached_chrome_driver}" + + path_to_downloaded_chrome_driver = self._download_chromedriver( + chrome_major_version, + os_type, + path_to_cached_chrome_driver_dir, + path_to_cached_chrome_driver, + ) + assert os.path.isfile(path_to_downloaded_chrome_driver) + os.chmod(path_to_downloaded_chrome_driver, 0o755) + + return path_to_downloaded_chrome_driver + + @staticmethod + def _download_chromedriver( + chrome_major_version, + os_type: str, + path_to_driver_cache_dir, + path_to_cached_chrome_driver, + ): + url = "https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json" + response = ChromeDriverManager.send_http_get_request(url).json() + + matching_versions = [ + item + for item in response["versions"] + if item["version"].startswith(chrome_major_version) + ] + + if not matching_versions: + raise Exception( + f"No compatible ChromeDriver found for Chrome version {chrome_major_version}" + ) + + latest_version = matching_versions[-1] + + driver_url: str + chrome_downloadable_versions = latest_version["downloads"][ + "chromedriver" + ] + for chrome_downloadable_version_ in chrome_downloadable_versions: + if chrome_downloadable_version_["platform"] == os_type: + driver_url = chrome_downloadable_version_["url"] + break + else: + raise RuntimeError( + f"Could not find a downloadable URL from downloadable versions: {chrome_downloadable_versions}" ) - return None print( # noqa: T201 - f"html2print: saving chromedriver to StrictDoc's local cache: " - f"{path_to_downloaded_chrome_driver} -> {path_to_cached_chrome_driver}" + f"html2print: downloading ChromeDriver from: {driver_url}" ) - Path(path_to_cached_chrome_driver_dir).mkdir( - parents=True, exist_ok=True + response = ChromeDriverManager.send_http_get_request(driver_url) + + Path(path_to_driver_cache_dir).mkdir(parents=True, exist_ok=True) + zip_path = os.path.join(path_to_driver_cache_dir, "chromedriver.zip") + print( # noqa: T201 + f"html2print: saving downloaded ChromeDriver to path: {zip_path}" ) - copy(path_to_downloaded_chrome_driver, path_to_cached_chrome_driver) + with open(zip_path, "wb") as file: + file.write(response.content) + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(path_to_driver_cache_dir) + + print( # noqa: T201 + f"html2print: ChromeDriver downloaded to: {path_to_cached_chrome_driver}" + ) return path_to_cached_chrome_driver + @staticmethod + def send_http_get_request(url, params=None, **kwargs) -> Response: + last_error: Optional[Exception] = None + for attempt in range(1, 4): + print( # noqa: T201 + f"html2print: sending GET request attempt {attempt}: {url}" + ) + try: + return requests.get(url, params, timeout=(5, 5), **kwargs) + except requests.exceptions.ConnectTimeout as connect_timeout_: + last_error = connect_timeout_ + except requests.exceptions.ReadTimeout as read_timeout_: + last_error = read_timeout_ + except Exception as exception_: + raise AssertionError( + "html2print: unknown exception", exception_ + ) from None + print( # noqa: T201 + f"html2print: " + f"failed to get response for URL: {url} with error: {last_error}" + ) + + @staticmethod + def get_chrome_version(): + # Special case: GitHub Actions macOS CI machines have both + # Google Chrome for Testing and normal Google Chrome installed, and + # sometimes their versions are of different major version families. + # The solution is to check if the Google Chrome for Testing is available, + # and use its version instead of the normal one. + if platform.system() == "Darwin": + chrome_path = "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" + try: + print( # noqa: T201 + "html2print: " + "checking if there is Google Chrome for Testing instead of " + "a normal Chrome available." + ) + + version_output = subprocess.run( + [chrome_path, "--version"], + capture_output=True, + text=True, + check=True, + ) + chrome_version = version_output.stdout.strip() + match = re.search(r"\d+(\.\d+)+", chrome_version) + if not match: + raise RuntimeError( + "Cannot extract the version part using regex." + ) + + chrome_version = match.group(0) + + print( # noqa: T201 + f"html2print: Google Chrome for Testing Version: {chrome_version}" + ) + + return chrome_version + except FileNotFoundError: + print("html2print: Chrome for Testing not available.") # noqa: T201 + except Exception as e: + print( # noqa: T201 + f"html2print: Error getting Google Chrome for Testing version: {e}" + ) + + os_manager = OperationSystemManager(os_type=None) + version = os_manager.get_browser_version_from_os(ChromeType.GOOGLE) + return version + def get_inches_from_millimeters(mm: float) -> float: return mm / 25.4 @@ -190,23 +294,12 @@ class Done(Exception): return data -def get_chrome_driver(path_to_cache_dir: str) -> str: - cache_manager = HTML2Print_CacheManager( - file_manager=FileManager(os_system_manager=OperationSystemManager()), - path_to_cache_dir=path_to_cache_dir, - ) - - http_client = HTML2Print_HTTPClient() - download_manager = WDMDownloadManager(http_client) - path_to_chrome = ChromeDriverManager( - download_manager=download_manager, cache_manager=cache_manager - ).install() - return path_to_chrome - - def create_webdriver(chromedriver: Optional[str], path_to_cache_dir: str): + print("html2print: creating ChromeDriver service.", flush=True) # noqa: T201 if chromedriver is None: - path_to_chrome = get_chrome_driver(path_to_cache_dir) + path_to_chrome = ChromeDriverManager().get_chrome_driver( + path_to_cache_dir + ) else: path_to_chrome = chromedriver print(f"html2print: ChromeDriver available at path: {path_to_chrome}") # noqa: T201 @@ -254,6 +347,8 @@ def main(): command_subparsers = parser.add_subparsers(title="command", dest="command") command_subparsers.required = True + print(f"html2print: version {__version__}") # noqa: T201 + # # Get driver command. # @@ -295,12 +390,12 @@ def main(): path_to_cache_dir: str if args.command == "get_driver": path_to_cache_dir = ( - args.cache_dir - if args.cache_dir is not None - else (DEFAULT_CACHE_DIR) + args.cache_dir if args.cache_dir is not None else DEFAULT_CACHE_DIR ) - path_to_chrome = get_chrome_driver(path_to_cache_dir) + path_to_chrome = ChromeDriverManager().get_chrome_driver( + path_to_cache_dir + ) print(f"html2print: ChromeDriver available at path: {path_to_chrome}") # noqa: T201 sys.exit(0) @@ -308,9 +403,7 @@ def main(): paths: List[str] = args.paths path_to_cache_dir = ( - args.cache_dir - if args.cache_dir is not None - else (DEFAULT_CACHE_DIR) + args.cache_dir if args.cache_dir is not None else DEFAULT_CACHE_DIR ) driver = create_webdriver(args.chromedriver, path_to_cache_dir)