diff --git a/html2print/html2print.py b/html2print/html2print.py
index e46d2c0..c553613 100644
--- a/html2print/html2print.py
+++ b/html2print/html2print.py
@@ -3,10 +3,13 @@
import atexit
import base64
import os.path
+import platform
+import re
+import subprocess
import sys
+import zipfile
from datetime import datetime
from pathlib import Path
-from shutil import copy
from time import sleep
from typing import Dict, List, Optional
@@ -15,15 +18,9 @@
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
-from webdriver_manager.chrome import ChromeDriverManager
-from webdriver_manager.core.download_manager import WDMDownloadManager
-from webdriver_manager.core.driver import Driver
-from webdriver_manager.core.driver_cache import DriverCacheManager
-from webdriver_manager.core.file_manager import FileManager
-from webdriver_manager.core.http import HttpClient
-from webdriver_manager.core.os_manager import OperationSystemManager
+from webdriver_manager.core.os_manager import ChromeType, OperationSystemManager
-__version__ = "0.0.8"
+__version__ = "0.0.12"
PATH_TO_HTML2PDF_JS = os.path.join(
os.path.dirname(os.path.join(__file__)), "html2pdf_js", "html2pdf.min.js"
@@ -39,52 +36,40 @@
sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf8", closefd=False)
-class HTML2Print_HTTPClient(HttpClient):
- def get(self, url, params=None, **kwargs) -> Response:
- last_error: Optional[Exception] = None
- for attempt in range(1, 3):
- print( # noqa: T201
- f"html2print: sending GET request attempt {attempt}: {url}"
- )
- try:
- return requests.get(url, params, timeout=(5, 5), **kwargs)
- except requests.exceptions.ConnectTimeout as connect_timeout_:
- last_error = connect_timeout_
- except requests.exceptions.ReadTimeout as read_timeout_:
- last_error = read_timeout_
- except Exception as exception_:
- raise AssertionError(
- "html2print: unknown exception", exception_
- ) from None
+class ChromeDriverManager:
+ def get_chrome_driver(self, path_to_cache_dir: str):
+ chrome_version = self.get_chrome_version()
+ chrome_major_version = chrome_version.split(".")[0]
+
print( # noqa: T201
- f"html2print: "
- f"failed to get response for URL: {url} with error: {last_error}"
+ f"html2print: Installed Chrome version: {chrome_version}"
)
+ system_map = {
+ "Windows": "win32",
+ "Darwin": "mac-arm64"
+ if platform.machine() == "arm64"
+ else "mac-x64",
+ "Linux": "linux64",
+ }
+ os_type = system_map[platform.system()]
+ is_windows = platform.system() == "Windows"
-class HTML2Print_CacheManager(DriverCacheManager):
- def __init__(self, file_manager: FileManager, path_to_cache_dir: str):
- super().__init__(file_manager=file_manager)
- self.path_to_cache_dir: str = path_to_cache_dir
-
- def find_driver(self, driver: Driver):
- path_to_cached_chrome_driver_dir = os.path.join(
- self.path_to_cache_dir, "chromedriver"
- )
-
- os_type = self.get_os_type()
- browser_type = driver.get_browser_type()
- browser_version = self._os_system_manager.get_browser_version_from_os(
- browser_type
+ print( # noqa: T201
+ f"html2print: OS system: {platform.system()}, OS type: {os_type}."
)
- assert browser_version is not None, browser_version
path_to_cached_chrome_driver_dir = os.path.join(
- path_to_cached_chrome_driver_dir, browser_version, os_type
+ path_to_cache_dir, chrome_major_version
)
path_to_cached_chrome_driver = os.path.join(
- path_to_cached_chrome_driver_dir, "chromedriver"
+ path_to_cached_chrome_driver_dir,
+ f"chromedriver-{os_type}",
+ "chromedriver",
)
+ if is_windows:
+ path_to_cached_chrome_driver += ".exe"
+
if os.path.isfile(path_to_cached_chrome_driver):
print( # noqa: T201
f"html2print: ChromeDriver exists in the local cache: "
@@ -95,25 +80,144 @@ def find_driver(self, driver: Driver):
f"html2print: ChromeDriver does not exist in the local cache: "
f"{path_to_cached_chrome_driver}"
)
- path_to_downloaded_chrome_driver = super().find_driver(driver)
- if path_to_downloaded_chrome_driver is None:
- print( # noqa: T201
- f"html2print: could not get a downloaded ChromeDriver: "
- f"{path_to_cached_chrome_driver}"
+
+ path_to_downloaded_chrome_driver = self._download_chromedriver(
+ chrome_major_version,
+ os_type,
+ path_to_cached_chrome_driver_dir,
+ path_to_cached_chrome_driver,
+ )
+ assert os.path.isfile(path_to_downloaded_chrome_driver)
+ os.chmod(path_to_downloaded_chrome_driver, 0o755)
+
+ return path_to_downloaded_chrome_driver
+
+ @staticmethod
+ def _download_chromedriver(
+ chrome_major_version,
+ os_type: str,
+ path_to_driver_cache_dir,
+ path_to_cached_chrome_driver,
+ ):
+ url = "https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json"
+ response = ChromeDriverManager.send_http_get_request(url).json()
+
+ matching_versions = [
+ item
+ for item in response["versions"]
+ if item["version"].startswith(chrome_major_version)
+ ]
+
+ if not matching_versions:
+ raise Exception(
+ f"No compatible ChromeDriver found for Chrome version {chrome_major_version}"
+ )
+
+ latest_version = matching_versions[-1]
+
+ driver_url: str
+ chrome_downloadable_versions = latest_version["downloads"][
+ "chromedriver"
+ ]
+ for chrome_downloadable_version_ in chrome_downloadable_versions:
+ if chrome_downloadable_version_["platform"] == os_type:
+ driver_url = chrome_downloadable_version_["url"]
+ break
+ else:
+ raise RuntimeError(
+ f"Could not find a downloadable URL from downloadable versions: {chrome_downloadable_versions}"
)
- return None
print( # noqa: T201
- f"html2print: saving chromedriver to StrictDoc's local cache: "
- f"{path_to_downloaded_chrome_driver} -> {path_to_cached_chrome_driver}"
+ f"html2print: downloading ChromeDriver from: {driver_url}"
)
- Path(path_to_cached_chrome_driver_dir).mkdir(
- parents=True, exist_ok=True
+ response = ChromeDriverManager.send_http_get_request(driver_url)
+
+ Path(path_to_driver_cache_dir).mkdir(parents=True, exist_ok=True)
+ zip_path = os.path.join(path_to_driver_cache_dir, "chromedriver.zip")
+ print( # noqa: T201
+ f"html2print: saving downloaded ChromeDriver to path: {zip_path}"
)
- copy(path_to_downloaded_chrome_driver, path_to_cached_chrome_driver)
+ with open(zip_path, "wb") as file:
+ file.write(response.content)
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
+ zip_ref.extractall(path_to_driver_cache_dir)
+
+ print( # noqa: T201
+ f"html2print: ChromeDriver downloaded to: {path_to_cached_chrome_driver}"
+ )
return path_to_cached_chrome_driver
+ @staticmethod
+ def send_http_get_request(url, params=None, **kwargs) -> Response:
+ last_error: Optional[Exception] = None
+ for attempt in range(1, 4):
+ print( # noqa: T201
+ f"html2print: sending GET request attempt {attempt}: {url}"
+ )
+ try:
+ return requests.get(url, params, timeout=(5, 5), **kwargs)
+ except requests.exceptions.ConnectTimeout as connect_timeout_:
+ last_error = connect_timeout_
+ except requests.exceptions.ReadTimeout as read_timeout_:
+ last_error = read_timeout_
+ except Exception as exception_:
+ raise AssertionError(
+ "html2print: unknown exception", exception_
+ ) from None
+ print( # noqa: T201
+ f"html2print: "
+ f"failed to get response for URL: {url} with error: {last_error}"
+ )
+
+ @staticmethod
+ def get_chrome_version():
+ # Special case: GitHub Actions macOS CI machines have both
+ # Google Chrome for Testing and normal Google Chrome installed, and
+ # sometimes their versions are of different major version families.
+ # The solution is to check if the Google Chrome for Testing is available,
+ # and use its version instead of the normal one.
+ if platform.system() == "Darwin":
+ chrome_path = "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing"
+ try:
+ print( # noqa: T201
+ "html2print: "
+ "checking if there is Google Chrome for Testing instead of "
+ "a normal Chrome available."
+ )
+
+ version_output = subprocess.run(
+ [chrome_path, "--version"],
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+ chrome_version = version_output.stdout.strip()
+ match = re.search(r"\d+(\.\d+)+", chrome_version)
+ if not match:
+ raise RuntimeError(
+ "Cannot extract the version part using regex."
+ )
+
+ chrome_version = match.group(0)
+
+ print( # noqa: T201
+ f"html2print: Google Chrome for Testing Version: {chrome_version}"
+ )
+
+ return chrome_version
+ except FileNotFoundError:
+ print("html2print: Chrome for Testing not available.") # noqa: T201
+ except Exception as e:
+ print( # noqa: T201
+ f"html2print: Error getting Google Chrome for Testing version: {e}"
+ )
+
+ os_manager = OperationSystemManager(os_type=None)
+ version = os_manager.get_browser_version_from_os(ChromeType.GOOGLE)
+ return version
+
def get_inches_from_millimeters(mm: float) -> float:
return mm / 25.4
@@ -190,23 +294,12 @@ class Done(Exception):
return data
-def get_chrome_driver(path_to_cache_dir: str) -> str:
- cache_manager = HTML2Print_CacheManager(
- file_manager=FileManager(os_system_manager=OperationSystemManager()),
- path_to_cache_dir=path_to_cache_dir,
- )
-
- http_client = HTML2Print_HTTPClient()
- download_manager = WDMDownloadManager(http_client)
- path_to_chrome = ChromeDriverManager(
- download_manager=download_manager, cache_manager=cache_manager
- ).install()
- return path_to_chrome
-
-
def create_webdriver(chromedriver: Optional[str], path_to_cache_dir: str):
+ print("html2print: creating ChromeDriver service.", flush=True) # noqa: T201
if chromedriver is None:
- path_to_chrome = get_chrome_driver(path_to_cache_dir)
+ path_to_chrome = ChromeDriverManager().get_chrome_driver(
+ path_to_cache_dir
+ )
else:
path_to_chrome = chromedriver
print(f"html2print: ChromeDriver available at path: {path_to_chrome}") # noqa: T201
@@ -254,6 +347,8 @@ def main():
command_subparsers = parser.add_subparsers(title="command", dest="command")
command_subparsers.required = True
+ print(f"html2print: version {__version__}") # noqa: T201
+
#
# Get driver command.
#
@@ -295,12 +390,12 @@ def main():
path_to_cache_dir: str
if args.command == "get_driver":
path_to_cache_dir = (
- args.cache_dir
- if args.cache_dir is not None
- else (DEFAULT_CACHE_DIR)
+ args.cache_dir if args.cache_dir is not None else DEFAULT_CACHE_DIR
)
- path_to_chrome = get_chrome_driver(path_to_cache_dir)
+ path_to_chrome = ChromeDriverManager().get_chrome_driver(
+ path_to_cache_dir
+ )
print(f"html2print: ChromeDriver available at path: {path_to_chrome}") # noqa: T201
sys.exit(0)
@@ -308,9 +403,7 @@ def main():
paths: List[str] = args.paths
path_to_cache_dir = (
- args.cache_dir
- if args.cache_dir is not None
- else (DEFAULT_CACHE_DIR)
+ args.cache_dir if args.cache_dir is not None else DEFAULT_CACHE_DIR
)
driver = create_webdriver(args.chromedriver, path_to_cache_dir)