From d331e325c838f31fd6a1da73ac59d724a680ee30 Mon Sep 17 00:00:00 2001 From: Stanislav Pankevich Date: Fri, 21 Nov 2025 15:53:46 +0100 Subject: [PATCH] tests(fuzz): switch to Pytest for running multiple fuzz tests in a test suite --- .github/workflows/ci-windows.yml | 2 +- .github/workflows/ci_fuzz_linux.yml | 6 +- Dockerfile | 2 +- html2pdf4doc/__init__.py | 18 +++++ html2pdf4doc/{html2pdf4doc.py => main.py} | 16 ++--- ...{html2pdf4doc_fuzzer.py => main_fuzzer.py} | 66 +++++++++++-------- pyproject.toml | 8 +-- requirements.development.txt | 5 ++ tasks.py | 39 ++++++++--- .../01_strictdoc_guide_202510/test_case.py | 18 +++++ tests/fuzz/conftest.py | 48 ++++++++++++++ .../test.itest | 2 +- .../20_fuzz_integration/test.itest | 8 +++ tests/integration/lit.cfg.py | 4 +- 14 files changed, 184 insertions(+), 58 deletions(-) create mode 100644 html2pdf4doc/__init__.py rename html2pdf4doc/{html2pdf4doc.py => main.py} (98%) rename html2pdf4doc/{html2pdf4doc_fuzzer.py => main_fuzzer.py} (83%) create mode 100644 tests/fuzz/01_strictdoc_guide_202510/test_case.py create mode 100644 tests/fuzz/conftest.py create mode 100644 tests/integration/20_fuzz_integration/test.itest diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml index b66d1ef..04cc57e 100644 --- a/.github/workflows/ci-windows.yml +++ b/.github/workflows/ci-windows.yml @@ -57,7 +57,7 @@ jobs: - name: Download ChromeDriver run: | - python html2pdf4doc/html2pdf4doc.py get_driver + python -m html2pdf4doc.main get_driver - name: Run tests (Bash) run: | diff --git a/.github/workflows/ci_fuzz_linux.yml b/.github/workflows/ci_fuzz_linux.yml index f87f9a5..e10d57d 100644 --- a/.github/workflows/ci_fuzz_linux.yml +++ b/.github/workflows/ci_fuzz_linux.yml @@ -55,7 +55,7 @@ jobs: run: | if [ "${{ github.event_name }}" = "schedule" ]; then echo "🕒 Running long fuzzing..." - invoke test-fuzz --long + invoke test-fuzz --total-mutations 25 else echo "🚀 Running short fuzzing..." invoke test-fuzz @@ -66,6 +66,6 @@ jobs: if: failure() || always() uses: actions/upload-artifact@v4 with: - name: broken-pdfs - path: output/ + name: tests_fuzz_broken_pdfs + path: build/tests_fuzz retention-days: 30 diff --git a/Dockerfile b/Dockerfile index d69ef43..703ce31 100644 --- a/Dockerfile +++ b/Dockerfile @@ -35,7 +35,7 @@ RUN if [ "$HTML2PDF4DOC_SOURCE" = "pypi" ]; then \ pip install --no-cache-dir html2pdf4doc; \ else \ pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir git+https://github.com/mettta/html2pdf_python.git@${HTML2PDF4DOC_SOURCE}; \ + pip install --no-cache-dir git+https://github.com/strictdoc-project/html2pdf4doc_python.git@${HTML2PDF4DOC_SOURCE}; \ fi; \ chmod -R 777 /opt/venv; diff --git a/html2pdf4doc/__init__.py b/html2pdf4doc/__init__.py new file mode 100644 index 0000000..a042134 --- /dev/null +++ b/html2pdf4doc/__init__.py @@ -0,0 +1,18 @@ +import os +from pathlib import Path + +__version__ = "0.0.22" + +PATH_TO_HTML2PDF4DOC_PY = os.path.join( + os.path.dirname(os.path.join(__file__)), + "main.py", +) +PATH_TO_HTML2PDF4DOC_JS = os.path.join( + os.path.dirname(os.path.join(__file__)), + "html2pdf4doc_js", + "html2pdf4doc.min.js", +) + +DEFAULT_CACHE_DIR = os.path.join(Path.home(), ".html2pdf4doc", "chromedriver") + +PATH_TO_CHROME_DRIVER_DEBUG_LOG = "/tmp/chromedriver.log" diff --git a/html2pdf4doc/html2pdf4doc.py b/html2pdf4doc/main.py similarity index 98% rename from html2pdf4doc/html2pdf4doc.py rename to html2pdf4doc/main.py index 58b29f8..0ef4c9c 100644 --- a/html2pdf4doc/html2pdf4doc.py +++ b/html2pdf4doc/main.py @@ -21,19 +21,13 @@ from selenium.webdriver.chrome.service import Service from webdriver_manager.core.os_manager import ChromeType, OperationSystemManager -__version__ = "0.0.22" - -PATH_TO_HTML2PDF4DOC_PY = __file__ -PATH_TO_HTML2PDF4DOC_JS = os.path.join( - os.path.dirname(os.path.join(__file__)), - "html2pdf4doc_js", - "html2pdf4doc.min.js", +from . import ( + DEFAULT_CACHE_DIR, + PATH_TO_CHROME_DRIVER_DEBUG_LOG, + PATH_TO_HTML2PDF4DOC_JS, + __version__, ) -DEFAULT_CACHE_DIR = os.path.join(Path.home(), ".html2pdf4doc", "chromedriver") - -PATH_TO_CHROME_DRIVER_DEBUG_LOG = "/tmp/chromedriver.log" - # HTML2PDF4Doc.js prints unicode symbols to console. The following makes it work on # Windows which otherwise complains: # UnicodeEncodeError: 'charmap' codec can't encode characters in position 129-130: character maps to diff --git a/html2pdf4doc/html2pdf4doc_fuzzer.py b/html2pdf4doc/main_fuzzer.py similarity index 83% rename from html2pdf4doc/html2pdf4doc_fuzzer.py rename to html2pdf4doc/main_fuzzer.py index 1d22be7..deef5be 100644 --- a/html2pdf4doc/html2pdf4doc_fuzzer.py +++ b/html2pdf4doc/main_fuzzer.py @@ -13,8 +13,6 @@ from faker import Faker from lxml import etree, html -from html2pdf4doc import PATH_TO_HTML2PDF4DOC_PY - @contextlib.contextmanager def measure_performance(title: str) -> Iterator[None]: @@ -71,7 +69,8 @@ def mutate_and_print(path_to_input_file: str, path_to_root: str) -> bool: cmd: List[str] = [ sys.executable, - PATH_TO_HTML2PDF4DOC_PY, + "-m", + "html2pdf4doc.main", "print", "--strict", ] @@ -80,7 +79,7 @@ def mutate_and_print(path_to_input_file: str, path_to_root: str) -> bool: cmd.append(path_to_print_[0]) cmd.append(path_to_print_[1]) - relative_path_to_mut_html = Path(path_to_root).relative_to(".") + relative_path_to_mut_html = Path(path_to_mut_html).relative_to(path_to_root) path_to_mut_output = f"output/{relative_path_to_mut_html}" def copy_files_if_needed() -> None: @@ -143,30 +142,16 @@ def copy_mutated_file() -> None: return True -def main() -> None: - parser = argparse.ArgumentParser() - - parser.add_argument("input_file", type=str, help="TODO") - parser.add_argument("root_path", type=str, help="TODO") - parser.add_argument( - "--long", - action="store_true", - help="Run the fuzzer in long mode (more iterations).", - ) - - args = parser.parse_args() - - path_to_input_file = args.input_file - path_to_root = args.root_path - +def fuzz_test( + *, path_to_input_file: str, path_to_root: str, total_mutations: int = 20 +) -> None: shutil.rmtree("output", ignore_errors=True) Path("output").mkdir(parents=True, exist_ok=True) - total_runs = 200 if args.long else 20 success_count, failure_count = 0, 0 - for i in range(1, total_runs + 1): + for i in range(1, total_mutations + 1): print( # noqa: T201 - f"html2pdf4doc_fuzzer print cycle #{i}/{total_runs} — " + f"html2pdf4doc_fuzzer print cycle #{i}/{total_mutations} — " f"So far: 🟢{success_count} / 🔴{failure_count}", flush=True, ) @@ -176,12 +161,12 @@ def main() -> None: else: failure_count += 1 - assert total_runs > 0 - success_rate_percent = (success_count / total_runs) * 100 + assert total_mutations > 0 + success_rate_percent = (success_count / total_mutations) * 100 print( # noqa: T201 f"html2pdf4doc_fuzzer: finished {'✅' if failure_count == 0 else '❌'} — " - f"Success rate: {success_count}/{total_runs} ({success_rate_percent}%)", + f"Success rate: {success_count}/{total_mutations} ({success_rate_percent}%)", flush=True, ) @@ -189,5 +174,34 @@ def main() -> None: sys.exit(1) +def main() -> None: + # To avoid UnicodeEncodeError on Windows when printing emojis. + sys.stdout.reconfigure(encoding="utf-8") # type: ignore[union-attr] + + parser = argparse.ArgumentParser() + + parser.add_argument("input_file", type=str, help="TODO") + parser.add_argument("root_path", type=str, help="TODO") + parser.add_argument( + "--total-mutations", + type=int, + choices=range(1, 1001), + required=True, + help="An integer between 1 and 1000", + ) + + args = parser.parse_args() + + path_to_input_file = args.input_file + path_to_root = args.root_path + total_mutations = args.total_mutations + + fuzz_test( + path_to_input_file=path_to_input_file, + path_to_root=path_to_root, + total_mutations=total_mutations, + ) + + if __name__ == "__main__": main() diff --git a/pyproject.toml b/pyproject.toml index 26dbc09..a18d1b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.version] -path = "html2pdf4doc/html2pdf4doc.py" +path = "html2pdf4doc/__init__.py" [tool.hatch.build] # Currently unused: @@ -13,7 +13,7 @@ path = "html2pdf4doc/html2pdf4doc.py" # ignore-vcs = true include = [ - "html2pdf4doc/html2pdf4doc.py", + "html2pdf4doc/*.py", "html2pdf4doc/html2pdf4doc_js/html2pdf4doc.min.js", ] @@ -69,8 +69,8 @@ development = [ ] [project.scripts] -html2pdf4doc = "html2pdf4doc.html2pdf4doc:main" -html2pdf4doc_fuzzer = "html2pdf4doc.html2pdf4doc_fuzzer:main" +html2pdf4doc = "html2pdf4doc.main:main" +html2pdf4doc_fuzzer = "html2pdf4doc.main_fuzzer:main" [project.urls] Changelog = "https://github.com/mettta/html2pdf_python/releases/" diff --git a/requirements.development.txt b/requirements.development.txt index 98b08d1..0ed9e4f 100644 --- a/requirements.development.txt +++ b/requirements.development.txt @@ -11,6 +11,11 @@ twine mypy>=0.910 ruff>=0.9 +# +# Unit tests +# +pytest + # # Integration tests # diff --git a/tasks.py b/tasks.py index 43eb97b..a98d938 100644 --- a/tasks.py +++ b/tasks.py @@ -102,7 +102,7 @@ def get_chrome_driver( run_invoke( context, """ - python html2pdf4doc/html2pdf4doc.py get_driver + python -m html2pdf4doc.main get_driver """, ) @@ -173,9 +173,7 @@ def test_integration( get_chrome_driver(context) - cwd = os.getcwd() - - html2pdf_exec = f'python3 \\"{cwd}/html2pdf4doc/html2pdf4doc.py\\"' + html2pdf_exec = "python3 -m html2pdf4doc.main" focus_or_none = f"--filter {focus}" if focus else "" debug_opts = "-vv --show-all" if debug else "" @@ -201,16 +199,37 @@ def test_integration( @task(aliases=["tf"]) -def test_fuzz(context, long: bool = False): - arg_long = "--long" if long else "" +def test_fuzz(context, focus=None, total_mutations: int = 10, output=False): + """ + @relation(SDOC-SRS-44, scope=function) + """ + + test_reports_dir = "build/test_reports" + + Path(test_reports_dir).mkdir(parents=True, exist_ok=True) + + focus_argument = f"-k {focus}" if focus is not None else "" + long_argument = ( + f"--fuzz-total-mutations {total_mutations}" if total_mutations else "" + ) + output_argument = "--capture=no" if output else "" + + run_invoke( + context, + """ + rm -rf build/tests_fuzz + """, + ) run_invoke( context, f""" - python html2pdf4doc/html2pdf4doc_fuzzer.py - tests/fuzz/01_strictdoc_guide_202510/strictdoc/docs/strictdoc_01_user_guide-PDF.html - tests/fuzz/01_strictdoc_guide_202510/ - {arg_long} + pytest + {focus_argument} + {long_argument} + {output_argument} + -o cache_dir=build/tests_fuzz_cache + tests/fuzz/ """, ) diff --git a/tests/fuzz/01_strictdoc_guide_202510/test_case.py b/tests/fuzz/01_strictdoc_guide_202510/test_case.py new file mode 100644 index 0000000..37adc47 --- /dev/null +++ b/tests/fuzz/01_strictdoc_guide_202510/test_case.py @@ -0,0 +1,18 @@ +import os + +from html2pdf4doc.main_fuzzer import fuzz_test +from tests.fuzz.conftest import create_build_folder, FuzzConfig + +PATH_TO_THIS_FOLDER = os.path.dirname(__file__) + +def test(fuzz_config: FuzzConfig): + build_folder = create_build_folder(PATH_TO_THIS_FOLDER) + + fuzz_test( + path_to_input_file=os.path.join( + build_folder, + "strictdoc/docs/strictdoc_01_user_guide-PDF.html" + ), + path_to_root=build_folder, + total_mutations=fuzz_config.total_mutations + ) diff --git a/tests/fuzz/conftest.py b/tests/fuzz/conftest.py new file mode 100644 index 0000000..9bf69ab --- /dev/null +++ b/tests/fuzz/conftest.py @@ -0,0 +1,48 @@ +import os +import shutil +from dataclasses import dataclass +from pathlib import Path + +import pytest + +PATH_TO_TESTS_FUZZ_FOLDER = os.path.dirname(__file__) + + +@dataclass +class FuzzConfig: + total_mutations: bool + + +def pytest_addoption(parser): + parser.addoption( + "--fuzz-total-mutations", + action="store", + type=int, + choices=range(1, 1001), + default=10, + help="Total number of mutations to perform (1-1000)" + ) + +@pytest.fixture +def fuzz_config(request): + return FuzzConfig(total_mutations=request.config.getoption("--fuzz-total-mutations")) + + +def create_build_folder(test_folder: str) -> str: + assert os.path.isdir(test_folder), test_folder + assert os.path.isabs(test_folder), test_folder + + relative_path_to_test_folder = Path(test_folder).relative_to(PATH_TO_TESTS_FUZZ_FOLDER) + + # IMPORTANT: The number of nested folders matches the number of nesting + # in the tests/fuzz/* test folders. Otherwise, the html2pdf4doc.js + # will not be found in either of tests/fuzz/* or build/tests_fuzz/*. + build_folder = os.path.join( + "build", + "tests_fuzz", + relative_path_to_test_folder + ) + + shutil.copytree(test_folder, build_folder) + + return build_folder diff --git a/tests/integration/06_page_load_timeout_validation/test.itest b/tests/integration/06_page_load_timeout_validation/test.itest index 13d5458..2289831 100644 --- a/tests/integration/06_page_load_timeout_validation/test.itest +++ b/tests/integration/06_page_load_timeout_validation/test.itest @@ -1,3 +1,3 @@ RUN: %expect_exit 2 %html2pdf print --page-load-timeout 1000000 %S/index1.html %S/Output/index1.pdf 2>&1 | filecheck %s -CHECK: html2pdf4doc.py print: error: argument --page-load-timeout: Must be an integer in the range [0, 600]. +CHECK: main.py print: error: argument --page-load-timeout: Must be an integer in the range [0, 600]. diff --git a/tests/integration/20_fuzz_integration/test.itest b/tests/integration/20_fuzz_integration/test.itest new file mode 100644 index 0000000..a6ff63e --- /dev/null +++ b/tests/integration/20_fuzz_integration/test.itest @@ -0,0 +1,8 @@ +# This test verifies that the main_fuzzer.py script works as a standalone +# command-line program. + +RUN: mkdir -p %project_root/build/tests_integration_fuzz/ +RUN: cp -rv %project_root/tests/fuzz/01_strictdoc_guide_202510 %project_root/build/tests_integration_fuzz/ + +RUN: PYTHONPATH=%project_root python -m html2pdf4doc.main_fuzzer %project_root/build/tests_integration_fuzz/01_strictdoc_guide_202510/strictdoc/docs/strictdoc_01_user_guide-PDF.html %project_root/build/tests_integration_fuzz/01_strictdoc_guide_202510 --total-mutations 1 | filecheck %s --dump-input=fail +CHECK: html2pdf4doc_fuzzer: finished ✅ — Success rate: 1/1 (100.0%) diff --git a/tests/integration/lit.cfg.py b/tests/integration/lit.cfg.py index c88ec54..dd7d730 100644 --- a/tests/integration/lit.cfg.py +++ b/tests/integration/lit.cfg.py @@ -9,7 +9,7 @@ config.name = "html2pdf4doc Python API integration tests" config.test_format = lit.formats.ShTest("0") -current_dir = os.getcwd() +current_dir = os.getcwd().replace("\\", "/") html2pdf_exec = lit_config.params["HTML2PDF4DOC_EXEC"] assert html2pdf_exec @@ -45,3 +45,5 @@ # In Windows CI, %ProgramW6432% is required for Selenium to properly detect browsers config.environment["ProgramW6432"] = os.environ.get("ProgramW6432", "") + +config.environment["PYTHONPATH"] = current_dir