strictdoc-project · stanislaw · Nov 21, 2025 · Nov 21, 2025
diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml
@@ -57,7 +57,7 @@ jobs:
 
     - name: Download ChromeDriver
       run: |
-        python html2pdf4doc/html2pdf4doc.py get_driver
+        python -m html2pdf4doc.main get_driver
 
     - name: Run tests (Bash)
       run: |

diff --git a/.github/workflows/ci_fuzz_linux.yml b/.github/workflows/ci_fuzz_linux.yml
@@ -55,7 +55,7 @@ jobs:
       run: |
         if [ "${{ github.event_name }}" = "schedule" ]; then
           echo "🕒 Running long fuzzing..."
-          invoke test-fuzz --long
+          invoke test-fuzz --total-mutations 25
         else
           echo "🚀 Running short fuzzing..."
           invoke test-fuzz
@@ -66,6 +66,6 @@ jobs:
       if: failure() || always()
       uses: actions/upload-artifact@v4
       with:
-        name: broken-pdfs
-        path: output/
+        name: tests_fuzz_broken_pdfs
+        path: build/tests_fuzz
         retention-days: 30
diff --git a/Dockerfile b/Dockerfile
@@ -35,7 +35,7 @@ RUN if [ "$HTML2PDF4DOC_SOURCE" = "pypi" ]; then \
       pip install --no-cache-dir html2pdf4doc; \
     else \
       pip install --no-cache-dir --upgrade pip && \
-      pip install --no-cache-dir git+https://github.com/mettta/html2pdf_python.git@${HTML2PDF4DOC_SOURCE}; \
+      pip install --no-cache-dir git+https://github.com/strictdoc-project/html2pdf4doc_python.git@${HTML2PDF4DOC_SOURCE}; \
     fi; \
     chmod -R 777 /opt/venv;
 

diff --git a/html2pdf4doc/__init__.py b/html2pdf4doc/__init__.py
@@ -0,0 +1,18 @@
+import os
+from pathlib import Path
+
+__version__ = "0.0.22"
+
+PATH_TO_HTML2PDF4DOC_PY = os.path.join(
+    os.path.dirname(os.path.join(__file__)),
+    "main.py",
+)
+PATH_TO_HTML2PDF4DOC_JS = os.path.join(
+    os.path.dirname(os.path.join(__file__)),
+    "html2pdf4doc_js",
+    "html2pdf4doc.min.js",
+)
+
+DEFAULT_CACHE_DIR = os.path.join(Path.home(), ".html2pdf4doc", "chromedriver")
+
+PATH_TO_CHROME_DRIVER_DEBUG_LOG = "/tmp/chromedriver.log"
diff --git a/html2pdf4doc/html2pdf4doc.py → html2pdf4doc/main.py b/html2pdf4doc/html2pdf4doc.py → html2pdf4doc/main.py
@@ -21,19 +21,13 @@
 from selenium.webdriver.chrome.service import Service
 from webdriver_manager.core.os_manager import ChromeType, OperationSystemManager
 
-__version__ = "0.0.22"
-
-PATH_TO_HTML2PDF4DOC_PY = __file__
-PATH_TO_HTML2PDF4DOC_JS = os.path.join(
-    os.path.dirname(os.path.join(__file__)),
-    "html2pdf4doc_js",
-    "html2pdf4doc.min.js",
+from . import (
+    DEFAULT_CACHE_DIR,
+    PATH_TO_CHROME_DRIVER_DEBUG_LOG,
+    PATH_TO_HTML2PDF4DOC_JS,
+    __version__,
 )
 
-DEFAULT_CACHE_DIR = os.path.join(Path.home(), ".html2pdf4doc", "chromedriver")
-
-PATH_TO_CHROME_DRIVER_DEBUG_LOG = "/tmp/chromedriver.log"
-
 # HTML2PDF4Doc.js prints unicode symbols to console. The following makes it work on
 # Windows which otherwise complains:
 # UnicodeEncodeError: 'charmap' codec can't encode characters in position 129-130: character maps to <undefined>

diff --git a/html2pdf4doc/html2pdf4doc_fuzzer.py → html2pdf4doc/main_fuzzer.py b/html2pdf4doc/html2pdf4doc_fuzzer.py → html2pdf4doc/main_fuzzer.py
@@ -13,8 +13,6 @@
 from faker import Faker
 from lxml import etree, html
 
-from html2pdf4doc import PATH_TO_HTML2PDF4DOC_PY
-
 
 @contextlib.contextmanager
 def measure_performance(title: str) -> Iterator[None]:
@@ -71,7 +69,8 @@ def mutate_and_print(path_to_input_file: str, path_to_root: str) -> bool:
 
     cmd: List[str] = [
         sys.executable,
-        PATH_TO_HTML2PDF4DOC_PY,
+        "-m",
+        "html2pdf4doc.main",
         "print",
         "--strict",
     ]
@@ -80,7 +79,7 @@ def mutate_and_print(path_to_input_file: str, path_to_root: str) -> bool:
         cmd.append(path_to_print_[0])
         cmd.append(path_to_print_[1])
 
-    relative_path_to_mut_html = Path(path_to_root).relative_to(".")
+    relative_path_to_mut_html = Path(path_to_mut_html).relative_to(path_to_root)
     path_to_mut_output = f"output/{relative_path_to_mut_html}"
 
     def copy_files_if_needed() -> None:
@@ -143,30 +142,16 @@ def copy_mutated_file() -> None:
     return True
 
 
-def main() -> None:
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument("input_file", type=str, help="TODO")
-    parser.add_argument("root_path", type=str, help="TODO")
-    parser.add_argument(
-        "--long",
-        action="store_true",
-        help="Run the fuzzer in long mode (more iterations).",
-    )
-
-    args = parser.parse_args()
-
-    path_to_input_file = args.input_file
-    path_to_root = args.root_path
-
+def fuzz_test(
+    *, path_to_input_file: str, path_to_root: str, total_mutations: int = 20
+) -> None:
     shutil.rmtree("output", ignore_errors=True)
     Path("output").mkdir(parents=True, exist_ok=True)
 
-    total_runs = 200 if args.long else 20
     success_count, failure_count = 0, 0
-    for i in range(1, total_runs + 1):
+    for i in range(1, total_mutations + 1):
         print(  # noqa: T201
-            f"html2pdf4doc_fuzzer print cycle #{i}/{total_runs} — "
+            f"html2pdf4doc_fuzzer print cycle #{i}/{total_mutations} — "
             f"So far: 🟢{success_count} / 🔴{failure_count}",
             flush=True,
         )
@@ -176,18 +161,47 @@ def main() -> None:
         else:
             failure_count += 1
 
-    assert total_runs > 0
-    success_rate_percent = (success_count / total_runs) * 100
+    assert total_mutations > 0
+    success_rate_percent = (success_count / total_mutations) * 100
 
     print(  # noqa: T201
         f"html2pdf4doc_fuzzer: finished {'✅' if failure_count == 0 else '❌'} — "
-        f"Success rate: {success_count}/{total_runs} ({success_rate_percent}%)",
+        f"Success rate: {success_count}/{total_mutations} ({success_rate_percent}%)",
         flush=True,
     )
 
     if failure_count > 0:
         sys.exit(1)
 
 
+def main() -> None:
+    # To avoid UnicodeEncodeError on Windows when printing emojis.
+    sys.stdout.reconfigure(encoding="utf-8")  # type: ignore[union-attr]
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("input_file", type=str, help="TODO")
+    parser.add_argument("root_path", type=str, help="TODO")
+    parser.add_argument(
+        "--total-mutations",
+        type=int,
+        choices=range(1, 1001),
+        required=True,
+        help="An integer between 1 and 1000",
+    )
+
+    args = parser.parse_args()
+
+    path_to_input_file = args.input_file
+    path_to_root = args.root_path
+    total_mutations = args.total_mutations
+
+    fuzz_test(
+        path_to_input_file=path_to_input_file,
+        path_to_root=path_to_root,
+        total_mutations=total_mutations,
+    )
+
+
 if __name__ == "__main__":
     main()
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [tool.hatch.version]
-path = "html2pdf4doc/html2pdf4doc.py"
+path = "html2pdf4doc/__init__.py"
 
 [tool.hatch.build]
 # Currently unused:
@@ -13,7 +13,7 @@ path = "html2pdf4doc/html2pdf4doc.py"
 # ignore-vcs = true
 
 include = [
-    "html2pdf4doc/html2pdf4doc.py",
+    "html2pdf4doc/*.py",
     "html2pdf4doc/html2pdf4doc_js/html2pdf4doc.min.js",
 ]
 
@@ -69,8 +69,8 @@ development = [
 ]
 
 [project.scripts]
-html2pdf4doc = "html2pdf4doc.html2pdf4doc:main"
-html2pdf4doc_fuzzer = "html2pdf4doc.html2pdf4doc_fuzzer:main"
+html2pdf4doc = "html2pdf4doc.main:main"
+html2pdf4doc_fuzzer = "html2pdf4doc.main_fuzzer:main"
 
 [project.urls]
 Changelog = "https://github.com/mettta/html2pdf_python/releases/"

diff --git a/requirements.development.txt b/requirements.development.txt
@@ -11,6 +11,11 @@ twine
 mypy>=0.910
 ruff>=0.9
 
+#
+# Unit tests
+#
+pytest
+
 #
 # Integration tests
 #

diff --git a/tasks.py b/tasks.py
@@ -102,7 +102,7 @@ def get_chrome_driver(
     run_invoke(
         context,
         """
-        python html2pdf4doc/html2pdf4doc.py get_driver
+        python -m html2pdf4doc.main get_driver
     """,
     )
 
@@ -173,9 +173,7 @@ def test_integration(
 
     get_chrome_driver(context)
 
-    cwd = os.getcwd()
-
-    html2pdf_exec = f'python3 \\"{cwd}/html2pdf4doc/html2pdf4doc.py\\"'
+    html2pdf_exec = "python3 -m html2pdf4doc.main"
 
     focus_or_none = f"--filter {focus}" if focus else ""
     debug_opts = "-vv --show-all" if debug else ""
@@ -201,16 +199,37 @@ def test_integration(
 
 
 @task(aliases=["tf"])
-def test_fuzz(context, long: bool = False):
-    arg_long = "--long" if long else ""
+def test_fuzz(context, focus=None, total_mutations: int = 10, output=False):
+    """
+    @relation(SDOC-SRS-44, scope=function)
+    """
+
+    test_reports_dir = "build/test_reports"
+
+    Path(test_reports_dir).mkdir(parents=True, exist_ok=True)
+
+    focus_argument = f"-k {focus}" if focus is not None else ""
+    long_argument = (
+        f"--fuzz-total-mutations {total_mutations}" if total_mutations else ""
+    )
+    output_argument = "--capture=no" if output else ""
+
+    run_invoke(
+        context,
+        """
+            rm -rf build/tests_fuzz
+        """,
+    )
 
     run_invoke(
         context,
         f"""
-            python html2pdf4doc/html2pdf4doc_fuzzer.py
-                tests/fuzz/01_strictdoc_guide_202510/strictdoc/docs/strictdoc_01_user_guide-PDF.html
-                tests/fuzz/01_strictdoc_guide_202510/
-                {arg_long}
+            pytest
+            {focus_argument}
+            {long_argument}
+            {output_argument}
+            -o cache_dir=build/tests_fuzz_cache
+            tests/fuzz/
         """,
     )
 

diff --git a/tests/fuzz/01_strictdoc_guide_202510/test_case.py b/tests/fuzz/01_strictdoc_guide_202510/test_case.py
@@ -0,0 +1,18 @@
+import os
+
+from html2pdf4doc.main_fuzzer import fuzz_test
+from tests.fuzz.conftest import create_build_folder, FuzzConfig
+
+PATH_TO_THIS_FOLDER = os.path.dirname(__file__)
+
+def test(fuzz_config: FuzzConfig):
+    build_folder = create_build_folder(PATH_TO_THIS_FOLDER)
+
+    fuzz_test(
+        path_to_input_file=os.path.join(
+            build_folder,
+            "strictdoc/docs/strictdoc_01_user_guide-PDF.html"
+        ),
+        path_to_root=build_folder,
+        total_mutations=fuzz_config.total_mutations
+    )
diff --git a/tests/fuzz/conftest.py b/tests/fuzz/conftest.py
@@ -0,0 +1,48 @@
+import os
+import shutil
+from dataclasses import dataclass
+from pathlib import Path
+
+import pytest
+
+PATH_TO_TESTS_FUZZ_FOLDER = os.path.dirname(__file__)
+
+
+@dataclass
+class FuzzConfig:
+    total_mutations: bool
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--fuzz-total-mutations",
+        action="store",
+        type=int,
+        choices=range(1, 1001),
+        default=10,
+        help="Total number of mutations to perform (1-1000)"
+    )
+
+@pytest.fixture
+def fuzz_config(request):
+    return FuzzConfig(total_mutations=request.config.getoption("--fuzz-total-mutations"))
+
+
+def create_build_folder(test_folder: str) -> str:
+    assert os.path.isdir(test_folder), test_folder
+    assert os.path.isabs(test_folder), test_folder
+
+    relative_path_to_test_folder = Path(test_folder).relative_to(PATH_TO_TESTS_FUZZ_FOLDER)
+
+    # IMPORTANT: The number of nested folders matches the number of nesting
+    #            in the tests/fuzz/* test folders. Otherwise, the html2pdf4doc.js
+    #            will not be found in either of tests/fuzz/* or build/tests_fuzz/*.
+    build_folder = os.path.join(
+        "build",
+        "tests_fuzz",
+        relative_path_to_test_folder
+    )
+
+    shutil.copytree(test_folder, build_folder)
+
+    return build_folder
diff --git a/tests/integration/06_page_load_timeout_validation/test.itest b/tests/integration/06_page_load_timeout_validation/test.itest
@@ -1,3 +1,3 @@
 RUN: %expect_exit 2 %html2pdf print --page-load-timeout 1000000 %S/index1.html %S/Output/index1.pdf 2>&1 | filecheck %s
 
-CHECK: html2pdf4doc.py print: error: argument --page-load-timeout: Must be an integer in the range [0, 600].
+CHECK: main.py print: error: argument --page-load-timeout: Must be an integer in the range [0, 600].
diff --git a/tests/integration/20_fuzz_integration/test.itest b/tests/integration/20_fuzz_integration/test.itest
@@ -0,0 +1,8 @@
+# This test verifies that the main_fuzzer.py script works as a standalone
+# command-line program.
+
+RUN: mkdir -p %project_root/build/tests_integration_fuzz/
+RUN: cp -rv %project_root/tests/fuzz/01_strictdoc_guide_202510 %project_root/build/tests_integration_fuzz/
+
+RUN: PYTHONPATH=%project_root python -m html2pdf4doc.main_fuzzer %project_root/build/tests_integration_fuzz/01_strictdoc_guide_202510/strictdoc/docs/strictdoc_01_user_guide-PDF.html %project_root/build/tests_integration_fuzz/01_strictdoc_guide_202510 --total-mutations 1 | filecheck %s --dump-input=fail
+CHECK: html2pdf4doc_fuzzer: finished ✅ — Success rate: 1/1 (100.0%)
diff --git a/tests/integration/lit.cfg.py b/tests/integration/lit.cfg.py
@@ -9,7 +9,7 @@
 config.name = "html2pdf4doc Python API integration tests"
 config.test_format = lit.formats.ShTest("0")
 
-current_dir = os.getcwd()
+current_dir = os.getcwd().replace("\\", "/")
 
 html2pdf_exec = lit_config.params["HTML2PDF4DOC_EXEC"]
 assert html2pdf_exec
@@ -45,3 +45,5 @@
 
 # In Windows CI, %ProgramW6432% is required for Selenium to properly detect browsers
 config.environment["ProgramW6432"] = os.environ.get("ProgramW6432", "")
+
+config.environment["PYTHONPATH"] = current_dir