Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
249 changes: 171 additions & 78 deletions html2print/html2print.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
import atexit
import base64
import os.path
import platform
import re
import subprocess
import sys
import zipfile
from datetime import datetime
from pathlib import Path
from shutil import copy
from time import sleep
from typing import Dict, List, Optional

Expand All @@ -15,15 +18,9 @@
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.core.download_manager import WDMDownloadManager
from webdriver_manager.core.driver import Driver
from webdriver_manager.core.driver_cache import DriverCacheManager
from webdriver_manager.core.file_manager import FileManager
from webdriver_manager.core.http import HttpClient
from webdriver_manager.core.os_manager import OperationSystemManager
from webdriver_manager.core.os_manager import ChromeType, OperationSystemManager

__version__ = "0.0.8"
__version__ = "0.0.12"

PATH_TO_HTML2PDF_JS = os.path.join(
os.path.dirname(os.path.join(__file__)), "html2pdf_js", "html2pdf.min.js"
Expand All @@ -39,52 +36,40 @@
sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf8", closefd=False)


class HTML2Print_HTTPClient(HttpClient):
def get(self, url, params=None, **kwargs) -> Response:
last_error: Optional[Exception] = None
for attempt in range(1, 3):
print( # noqa: T201
f"html2print: sending GET request attempt {attempt}: {url}"
)
try:
return requests.get(url, params, timeout=(5, 5), **kwargs)
except requests.exceptions.ConnectTimeout as connect_timeout_:
last_error = connect_timeout_
except requests.exceptions.ReadTimeout as read_timeout_:
last_error = read_timeout_
except Exception as exception_:
raise AssertionError(
"html2print: unknown exception", exception_
) from None
class ChromeDriverManager:
def get_chrome_driver(self, path_to_cache_dir: str):
chrome_version = self.get_chrome_version()
chrome_major_version = chrome_version.split(".")[0]

print( # noqa: T201
f"html2print: "
f"failed to get response for URL: {url} with error: {last_error}"
f"html2print: Installed Chrome version: {chrome_version}"
)

system_map = {
"Windows": "win32",
"Darwin": "mac-arm64"
if platform.machine() == "arm64"
else "mac-x64",
"Linux": "linux64",
}
os_type = system_map[platform.system()]
is_windows = platform.system() == "Windows"

class HTML2Print_CacheManager(DriverCacheManager):
def __init__(self, file_manager: FileManager, path_to_cache_dir: str):
super().__init__(file_manager=file_manager)
self.path_to_cache_dir: str = path_to_cache_dir

def find_driver(self, driver: Driver):
path_to_cached_chrome_driver_dir = os.path.join(
self.path_to_cache_dir, "chromedriver"
)

os_type = self.get_os_type()
browser_type = driver.get_browser_type()
browser_version = self._os_system_manager.get_browser_version_from_os(
browser_type
print( # noqa: T201
f"html2print: OS system: {platform.system()}, OS type: {os_type}."
)
assert browser_version is not None, browser_version

path_to_cached_chrome_driver_dir = os.path.join(
path_to_cached_chrome_driver_dir, browser_version, os_type
path_to_cache_dir, chrome_major_version
)
path_to_cached_chrome_driver = os.path.join(
path_to_cached_chrome_driver_dir, "chromedriver"
path_to_cached_chrome_driver_dir,
f"chromedriver-{os_type}",
"chromedriver",
)
if is_windows:
path_to_cached_chrome_driver += ".exe"

if os.path.isfile(path_to_cached_chrome_driver):
print( # noqa: T201
f"html2print: ChromeDriver exists in the local cache: "
Expand All @@ -95,25 +80,144 @@ def find_driver(self, driver: Driver):
f"html2print: ChromeDriver does not exist in the local cache: "
f"{path_to_cached_chrome_driver}"
)
path_to_downloaded_chrome_driver = super().find_driver(driver)
if path_to_downloaded_chrome_driver is None:
print( # noqa: T201
f"html2print: could not get a downloaded ChromeDriver: "
f"{path_to_cached_chrome_driver}"

path_to_downloaded_chrome_driver = self._download_chromedriver(
chrome_major_version,
os_type,
path_to_cached_chrome_driver_dir,
path_to_cached_chrome_driver,
)
assert os.path.isfile(path_to_downloaded_chrome_driver)
os.chmod(path_to_downloaded_chrome_driver, 0o755)

return path_to_downloaded_chrome_driver

@staticmethod
def _download_chromedriver(
chrome_major_version,
os_type: str,
path_to_driver_cache_dir,
path_to_cached_chrome_driver,
):
url = "https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json"
response = ChromeDriverManager.send_http_get_request(url).json()

matching_versions = [
item
for item in response["versions"]
if item["version"].startswith(chrome_major_version)
]

if not matching_versions:
raise Exception(
f"No compatible ChromeDriver found for Chrome version {chrome_major_version}"
)

latest_version = matching_versions[-1]

driver_url: str
chrome_downloadable_versions = latest_version["downloads"][
"chromedriver"
]
for chrome_downloadable_version_ in chrome_downloadable_versions:
if chrome_downloadable_version_["platform"] == os_type:
driver_url = chrome_downloadable_version_["url"]
break
else:
raise RuntimeError(
f"Could not find a downloadable URL from downloadable versions: {chrome_downloadable_versions}"
)
return None

print( # noqa: T201
f"html2print: saving chromedriver to StrictDoc's local cache: "
f"{path_to_downloaded_chrome_driver} -> {path_to_cached_chrome_driver}"
f"html2print: downloading ChromeDriver from: {driver_url}"
)
Path(path_to_cached_chrome_driver_dir).mkdir(
parents=True, exist_ok=True
response = ChromeDriverManager.send_http_get_request(driver_url)

Path(path_to_driver_cache_dir).mkdir(parents=True, exist_ok=True)
zip_path = os.path.join(path_to_driver_cache_dir, "chromedriver.zip")
print( # noqa: T201
f"html2print: saving downloaded ChromeDriver to path: {zip_path}"
)
copy(path_to_downloaded_chrome_driver, path_to_cached_chrome_driver)
with open(zip_path, "wb") as file:
file.write(response.content)

with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(path_to_driver_cache_dir)

print( # noqa: T201
f"html2print: ChromeDriver downloaded to: {path_to_cached_chrome_driver}"
)
return path_to_cached_chrome_driver

@staticmethod
def send_http_get_request(url, params=None, **kwargs) -> Response:
last_error: Optional[Exception] = None
for attempt in range(1, 4):
print( # noqa: T201
f"html2print: sending GET request attempt {attempt}: {url}"
)
try:
return requests.get(url, params, timeout=(5, 5), **kwargs)
except requests.exceptions.ConnectTimeout as connect_timeout_:
last_error = connect_timeout_
except requests.exceptions.ReadTimeout as read_timeout_:
last_error = read_timeout_
except Exception as exception_:
raise AssertionError(
"html2print: unknown exception", exception_
) from None
print( # noqa: T201
f"html2print: "
f"failed to get response for URL: {url} with error: {last_error}"
)

@staticmethod
def get_chrome_version():
# Special case: GitHub Actions macOS CI machines have both
# Google Chrome for Testing and normal Google Chrome installed, and
# sometimes their versions are of different major version families.
# The solution is to check if the Google Chrome for Testing is available,
# and use its version instead of the normal one.
if platform.system() == "Darwin":
chrome_path = "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing"
try:
print( # noqa: T201
"html2print: "
"checking if there is Google Chrome for Testing instead of "
"a normal Chrome available."
)

version_output = subprocess.run(
[chrome_path, "--version"],
capture_output=True,
text=True,
check=True,
)
chrome_version = version_output.stdout.strip()
match = re.search(r"\d+(\.\d+)+", chrome_version)
if not match:
raise RuntimeError(
"Cannot extract the version part using regex."
)

chrome_version = match.group(0)

print( # noqa: T201
f"html2print: Google Chrome for Testing Version: {chrome_version}"
)

return chrome_version
except FileNotFoundError:
print("html2print: Chrome for Testing not available.") # noqa: T201
except Exception as e:
print( # noqa: T201
f"html2print: Error getting Google Chrome for Testing version: {e}"
)

os_manager = OperationSystemManager(os_type=None)
version = os_manager.get_browser_version_from_os(ChromeType.GOOGLE)
return version


def get_inches_from_millimeters(mm: float) -> float:
return mm / 25.4
Expand Down Expand Up @@ -190,23 +294,12 @@ class Done(Exception):
return data


def get_chrome_driver(path_to_cache_dir: str) -> str:
cache_manager = HTML2Print_CacheManager(
file_manager=FileManager(os_system_manager=OperationSystemManager()),
path_to_cache_dir=path_to_cache_dir,
)

http_client = HTML2Print_HTTPClient()
download_manager = WDMDownloadManager(http_client)
path_to_chrome = ChromeDriverManager(
download_manager=download_manager, cache_manager=cache_manager
).install()
return path_to_chrome


def create_webdriver(chromedriver: Optional[str], path_to_cache_dir: str):
print("html2print: creating ChromeDriver service.", flush=True) # noqa: T201
if chromedriver is None:
path_to_chrome = get_chrome_driver(path_to_cache_dir)
path_to_chrome = ChromeDriverManager().get_chrome_driver(
path_to_cache_dir
)
else:
path_to_chrome = chromedriver
print(f"html2print: ChromeDriver available at path: {path_to_chrome}") # noqa: T201
Expand Down Expand Up @@ -254,6 +347,8 @@ def main():
command_subparsers = parser.add_subparsers(title="command", dest="command")
command_subparsers.required = True

print(f"html2print: version {__version__}") # noqa: T201

#
# Get driver command.
#
Expand Down Expand Up @@ -295,22 +390,20 @@ def main():
path_to_cache_dir: str
if args.command == "get_driver":
path_to_cache_dir = (
args.cache_dir
if args.cache_dir is not None
else (DEFAULT_CACHE_DIR)
args.cache_dir if args.cache_dir is not None else DEFAULT_CACHE_DIR
)

path_to_chrome = get_chrome_driver(path_to_cache_dir)
path_to_chrome = ChromeDriverManager().get_chrome_driver(
path_to_cache_dir
)
print(f"html2print: ChromeDriver available at path: {path_to_chrome}") # noqa: T201
sys.exit(0)

elif args.command == "print":
paths: List[str] = args.paths

path_to_cache_dir = (
args.cache_dir
if args.cache_dir is not None
else (DEFAULT_CACHE_DIR)
args.cache_dir if args.cache_dir is not None else DEFAULT_CACHE_DIR
)
driver = create_webdriver(args.chromedriver, path_to_cache_dir)

Expand Down