Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:

- name: Download ChromeDriver
run: |
python html2pdf4doc/html2pdf4doc.py get_driver
python -m html2pdf4doc.main get_driver

- name: Run tests (Bash)
run: |
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/ci_fuzz_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
run: |
if [ "${{ github.event_name }}" = "schedule" ]; then
echo "🕒 Running long fuzzing..."
invoke test-fuzz --long
invoke test-fuzz --total-mutations 25
else
echo "🚀 Running short fuzzing..."
invoke test-fuzz
Expand All @@ -66,6 +66,6 @@ jobs:
if: failure() || always()
uses: actions/upload-artifact@v4
with:
name: broken-pdfs
path: output/
name: tests_fuzz_broken_pdfs
path: build/tests_fuzz
retention-days: 30
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ RUN if [ "$HTML2PDF4DOC_SOURCE" = "pypi" ]; then \
pip install --no-cache-dir html2pdf4doc; \
else \
pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir git+https://github.com/mettta/html2pdf_python.git@${HTML2PDF4DOC_SOURCE}; \
pip install --no-cache-dir git+https://github.com/strictdoc-project/html2pdf4doc_python.git@${HTML2PDF4DOC_SOURCE}; \
fi; \
chmod -R 777 /opt/venv;

Expand Down
18 changes: 18 additions & 0 deletions html2pdf4doc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
from pathlib import Path

__version__ = "0.0.22"

PATH_TO_HTML2PDF4DOC_PY = os.path.join(
os.path.dirname(os.path.join(__file__)),
"main.py",
)
PATH_TO_HTML2PDF4DOC_JS = os.path.join(
os.path.dirname(os.path.join(__file__)),
"html2pdf4doc_js",
"html2pdf4doc.min.js",
)

DEFAULT_CACHE_DIR = os.path.join(Path.home(), ".html2pdf4doc", "chromedriver")

PATH_TO_CHROME_DRIVER_DEBUG_LOG = "/tmp/chromedriver.log"
16 changes: 5 additions & 11 deletions html2pdf4doc/html2pdf4doc.py → html2pdf4doc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,13 @@
from selenium.webdriver.chrome.service import Service
from webdriver_manager.core.os_manager import ChromeType, OperationSystemManager

__version__ = "0.0.22"

PATH_TO_HTML2PDF4DOC_PY = __file__
PATH_TO_HTML2PDF4DOC_JS = os.path.join(
os.path.dirname(os.path.join(__file__)),
"html2pdf4doc_js",
"html2pdf4doc.min.js",
from . import (
DEFAULT_CACHE_DIR,
PATH_TO_CHROME_DRIVER_DEBUG_LOG,
PATH_TO_HTML2PDF4DOC_JS,
__version__,
)

DEFAULT_CACHE_DIR = os.path.join(Path.home(), ".html2pdf4doc", "chromedriver")

PATH_TO_CHROME_DRIVER_DEBUG_LOG = "/tmp/chromedriver.log"

# HTML2PDF4Doc.js prints unicode symbols to console. The following makes it work on
# Windows which otherwise complains:
# UnicodeEncodeError: 'charmap' codec can't encode characters in position 129-130: character maps to <undefined>
Expand Down
66 changes: 40 additions & 26 deletions html2pdf4doc/html2pdf4doc_fuzzer.py → html2pdf4doc/main_fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
from faker import Faker
from lxml import etree, html

from html2pdf4doc import PATH_TO_HTML2PDF4DOC_PY


@contextlib.contextmanager
def measure_performance(title: str) -> Iterator[None]:
Expand Down Expand Up @@ -71,7 +69,8 @@ def mutate_and_print(path_to_input_file: str, path_to_root: str) -> bool:

cmd: List[str] = [
sys.executable,
PATH_TO_HTML2PDF4DOC_PY,
"-m",
"html2pdf4doc.main",
"print",
"--strict",
]
Expand All @@ -80,7 +79,7 @@ def mutate_and_print(path_to_input_file: str, path_to_root: str) -> bool:
cmd.append(path_to_print_[0])
cmd.append(path_to_print_[1])

relative_path_to_mut_html = Path(path_to_root).relative_to(".")
relative_path_to_mut_html = Path(path_to_mut_html).relative_to(path_to_root)
path_to_mut_output = f"output/{relative_path_to_mut_html}"

def copy_files_if_needed() -> None:
Expand Down Expand Up @@ -143,30 +142,16 @@ def copy_mutated_file() -> None:
return True


def main() -> None:
parser = argparse.ArgumentParser()

parser.add_argument("input_file", type=str, help="TODO")
parser.add_argument("root_path", type=str, help="TODO")
parser.add_argument(
"--long",
action="store_true",
help="Run the fuzzer in long mode (more iterations).",
)

args = parser.parse_args()

path_to_input_file = args.input_file
path_to_root = args.root_path

def fuzz_test(
*, path_to_input_file: str, path_to_root: str, total_mutations: int = 20
) -> None:
shutil.rmtree("output", ignore_errors=True)
Path("output").mkdir(parents=True, exist_ok=True)

total_runs = 200 if args.long else 20
success_count, failure_count = 0, 0
for i in range(1, total_runs + 1):
for i in range(1, total_mutations + 1):
print( # noqa: T201
f"html2pdf4doc_fuzzer print cycle #{i}/{total_runs} — "
f"html2pdf4doc_fuzzer print cycle #{i}/{total_mutations} — "
f"So far: 🟢{success_count} / 🔴{failure_count}",
flush=True,
)
Expand All @@ -176,18 +161,47 @@ def main() -> None:
else:
failure_count += 1

assert total_runs > 0
success_rate_percent = (success_count / total_runs) * 100
assert total_mutations > 0
success_rate_percent = (success_count / total_mutations) * 100

print( # noqa: T201
f"html2pdf4doc_fuzzer: finished {'✅' if failure_count == 0 else '❌'} — "
f"Success rate: {success_count}/{total_runs} ({success_rate_percent}%)",
f"Success rate: {success_count}/{total_mutations} ({success_rate_percent}%)",
flush=True,
)

if failure_count > 0:
sys.exit(1)


def main() -> None:
# To avoid UnicodeEncodeError on Windows when printing emojis.
sys.stdout.reconfigure(encoding="utf-8") # type: ignore[union-attr]

parser = argparse.ArgumentParser()

parser.add_argument("input_file", type=str, help="TODO")
parser.add_argument("root_path", type=str, help="TODO")
parser.add_argument(
"--total-mutations",
type=int,
choices=range(1, 1001),
required=True,
help="An integer between 1 and 1000",
)

args = parser.parse_args()

path_to_input_file = args.input_file
path_to_root = args.root_path
total_mutations = args.total_mutations

fuzz_test(
path_to_input_file=path_to_input_file,
path_to_root=path_to_root,
total_mutations=total_mutations,
)


if __name__ == "__main__":
main()
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.version]
path = "html2pdf4doc/html2pdf4doc.py"
path = "html2pdf4doc/__init__.py"

[tool.hatch.build]
# Currently unused:
Expand All @@ -13,7 +13,7 @@ path = "html2pdf4doc/html2pdf4doc.py"
# ignore-vcs = true

include = [
"html2pdf4doc/html2pdf4doc.py",
"html2pdf4doc/*.py",
"html2pdf4doc/html2pdf4doc_js/html2pdf4doc.min.js",
]

Expand Down Expand Up @@ -69,8 +69,8 @@ development = [
]

[project.scripts]
html2pdf4doc = "html2pdf4doc.html2pdf4doc:main"
html2pdf4doc_fuzzer = "html2pdf4doc.html2pdf4doc_fuzzer:main"
html2pdf4doc = "html2pdf4doc.main:main"
html2pdf4doc_fuzzer = "html2pdf4doc.main_fuzzer:main"

[project.urls]
Changelog = "https://github.com/mettta/html2pdf_python/releases/"
Expand Down
5 changes: 5 additions & 0 deletions requirements.development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ twine
mypy>=0.910
ruff>=0.9

#
# Unit tests
#
pytest

#
# Integration tests
#
Expand Down
39 changes: 29 additions & 10 deletions tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def get_chrome_driver(
run_invoke(
context,
"""
python html2pdf4doc/html2pdf4doc.py get_driver
python -m html2pdf4doc.main get_driver
""",
)

Expand Down Expand Up @@ -173,9 +173,7 @@ def test_integration(

get_chrome_driver(context)

cwd = os.getcwd()

html2pdf_exec = f'python3 \\"{cwd}/html2pdf4doc/html2pdf4doc.py\\"'
html2pdf_exec = "python3 -m html2pdf4doc.main"

focus_or_none = f"--filter {focus}" if focus else ""
debug_opts = "-vv --show-all" if debug else ""
Expand All @@ -201,16 +199,37 @@ def test_integration(


@task(aliases=["tf"])
def test_fuzz(context, long: bool = False):
arg_long = "--long" if long else ""
def test_fuzz(context, focus=None, total_mutations: int = 10, output=False):
"""
@relation(SDOC-SRS-44, scope=function)
"""

test_reports_dir = "build/test_reports"

Path(test_reports_dir).mkdir(parents=True, exist_ok=True)

focus_argument = f"-k {focus}" if focus is not None else ""
long_argument = (
f"--fuzz-total-mutations {total_mutations}" if total_mutations else ""
)
output_argument = "--capture=no" if output else ""

run_invoke(
context,
"""
rm -rf build/tests_fuzz
""",
)

run_invoke(
context,
f"""
python html2pdf4doc/html2pdf4doc_fuzzer.py
tests/fuzz/01_strictdoc_guide_202510/strictdoc/docs/strictdoc_01_user_guide-PDF.html
tests/fuzz/01_strictdoc_guide_202510/
{arg_long}
pytest
{focus_argument}
{long_argument}
{output_argument}
-o cache_dir=build/tests_fuzz_cache
tests/fuzz/
""",
)

Expand Down
18 changes: 18 additions & 0 deletions tests/fuzz/01_strictdoc_guide_202510/test_case.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os

from html2pdf4doc.main_fuzzer import fuzz_test
from tests.fuzz.conftest import create_build_folder, FuzzConfig

PATH_TO_THIS_FOLDER = os.path.dirname(__file__)

def test(fuzz_config: FuzzConfig):
build_folder = create_build_folder(PATH_TO_THIS_FOLDER)

fuzz_test(
path_to_input_file=os.path.join(
build_folder,
"strictdoc/docs/strictdoc_01_user_guide-PDF.html"
),
path_to_root=build_folder,
total_mutations=fuzz_config.total_mutations
)
48 changes: 48 additions & 0 deletions tests/fuzz/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os
import shutil
from dataclasses import dataclass
from pathlib import Path

import pytest

PATH_TO_TESTS_FUZZ_FOLDER = os.path.dirname(__file__)


@dataclass
class FuzzConfig:
total_mutations: bool


def pytest_addoption(parser):
parser.addoption(
"--fuzz-total-mutations",
action="store",
type=int,
choices=range(1, 1001),
default=10,
help="Total number of mutations to perform (1-1000)"
)

@pytest.fixture
def fuzz_config(request):
return FuzzConfig(total_mutations=request.config.getoption("--fuzz-total-mutations"))


def create_build_folder(test_folder: str) -> str:
assert os.path.isdir(test_folder), test_folder
assert os.path.isabs(test_folder), test_folder

relative_path_to_test_folder = Path(test_folder).relative_to(PATH_TO_TESTS_FUZZ_FOLDER)

# IMPORTANT: The number of nested folders matches the number of nesting
# in the tests/fuzz/* test folders. Otherwise, the html2pdf4doc.js
# will not be found in either of tests/fuzz/* or build/tests_fuzz/*.
build_folder = os.path.join(
"build",
"tests_fuzz",
relative_path_to_test_folder
)

shutil.copytree(test_folder, build_folder)

return build_folder
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
RUN: %expect_exit 2 %html2pdf print --page-load-timeout 1000000 %S/index1.html %S/Output/index1.pdf 2>&1 | filecheck %s

CHECK: html2pdf4doc.py print: error: argument --page-load-timeout: Must be an integer in the range [0, 600].
CHECK: main.py print: error: argument --page-load-timeout: Must be an integer in the range [0, 600].
8 changes: 8 additions & 0 deletions tests/integration/20_fuzz_integration/test.itest
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# This test verifies that the main_fuzzer.py script works as a standalone
# command-line program.

RUN: mkdir -p %project_root/build/tests_integration_fuzz/
RUN: cp -rv %project_root/tests/fuzz/01_strictdoc_guide_202510 %project_root/build/tests_integration_fuzz/

RUN: PYTHONPATH=%project_root python -m html2pdf4doc.main_fuzzer %project_root/build/tests_integration_fuzz/01_strictdoc_guide_202510/strictdoc/docs/strictdoc_01_user_guide-PDF.html %project_root/build/tests_integration_fuzz/01_strictdoc_guide_202510 --total-mutations 1 | filecheck %s --dump-input=fail
CHECK: html2pdf4doc_fuzzer: finished ✅ — Success rate: 1/1 (100.0%)
4 changes: 3 additions & 1 deletion tests/integration/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
config.name = "html2pdf4doc Python API integration tests"
config.test_format = lit.formats.ShTest("0")

current_dir = os.getcwd()
current_dir = os.getcwd().replace("\\", "/")

html2pdf_exec = lit_config.params["HTML2PDF4DOC_EXEC"]
assert html2pdf_exec
Expand Down Expand Up @@ -45,3 +45,5 @@

# In Windows CI, %ProgramW6432% is required for Selenium to properly detect browsers
config.environment["ProgramW6432"] = os.environ.get("ProgramW6432", "")

config.environment["PYTHONPATH"] = current_dir
Loading