documentation_fw/exportPdfFromHtml.py at main · marctrommen/documentation_fw · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""Transforms all HTML files into PDF files.

Transforms all HTML files into PDF files, so they can offered as PDF hand out
of the HTML documentation.
Script can run independently or get called from another script "generator.py".
Only precondition is, that in the subdirectory "_www" HTML files are existing.

USAGE:
1)  Call as a stand alone script:
    python.exe exportPdfFromHtml.py
    just call the main() function
2)  Call as imported script from other scripts:
    import exportPdfFromHtml
    exportPdfFromHtml.run()

Expects following directory tree:

project_root_directory
├── _www ... here are all generated HTML files which get exported to PDFs
├── _pdf ... here all exported PDFs are stored by this script
└── exportPdfFromHtml.py ... Python exporter script for PDFs

It is strongly recommended to setup all necessary parameters in the
init() function, before running it for the first time.

Expects following directory tree for the external application installation:

User_home_directory parameters
└── installations
    └── wkhtmltox
        └── bin
            └── wkhtmltopdf.exe
"""

import os
import shutil
import subprocess
import time
from config_exportPdfFromHtml import init

# ---------------------------
def run():
    """
    Use this function from other scripts to start all PDF transformations
    """
    print("DEBUG: run()")

    parameters = init()

    # check if precondition is fulfilled: file path "_www" exists
    aPath = parameters['html_input_path']
    if not os.path.isdir(aPath):
        print("FATAL: No HTML files to transform into PDFs found!")
        print("INFO:  Maybe you need to run the HTML generator first?")
        exit(1)

    # delete PDF export path and create a new one to get filled
    #cleanup(parameters)

    handle_all_files(parameters)


# ---------------------------
def cleanup(parameters):
    """
    delete PDF export path and create a new one to get filled

    Parameter
    ---------
    parameters (dict)
        dictionary with globallly useed script parameters

    Returns
    -------
        none
    """
    aPath = parameters['pdf_output_path']
    if os.path.isdir(aPath):
        shutil.rmtree(aPath)
        print("DEBUG: aPath deleted:", aPath)

    os.makedirs(aPath)
    print("DEBUG: aPath created:", aPath)

    print("DEBUG: cleanup()")


# ---------------------------
def wait_for_file_written(file_path) -> bool:
    """
    checks if file got written within of 5 seconds timeout

    Parameter
    ---------
    file_path (str)
        absolut path to the file

    Returns
    -------
    bool
        True  file got written
        False file not written, due to timeout or no permission
    """
    print("DEBUG: wait_for_file_written()")
    timeout = 0
    SLEEP_TIME = 0.2 # 200 millisoconds
    MAX_TIMEOUT = 25 # 25 * 200ms = 5 sec

    while True:
        if os.path.exists(file_path):
            return True # file got written
        else:
            time.sleep(0.2)
            timeout += 1

            if timeout > MAX_TIMEOUT:
                print("FATAL: timeout exceeded!")
                return False # file not written after timeout

# ---------------------------
def handle_all_files(parameters):
    """
    Loops through all files in the input directory. Depending from the file
    extension the corresponding tranformer function for pdf transformation gets
    called

    Parameter
    ---------
    parameters (dict)
        dictionary with globallly useed script parameters

    Returns
    -------
        none
    """

    print("DEBUG: handle_all_files()")

    files = os.listdir(parameters['html_input_path'])

    for filename in files:
        filepath = os.path.join(parameters['html_input_path'], filename)

        if os.path.isfile(filepath):
            if filename.endswith(".html"):
                # Handle HTML files
                print("DEBUG: Handle", filename)
                filename = filename[:-5]
                export_html_to_pdf(parameters, filename)

# ---------------------------
def export_html_to_pdf(parameters, filename):
    """"
    exports HTML file into a PDF file

    Parameter
    ---------

    Returns
    --------
        none
    """
    print("DEBUG: export_html_to_pdf()")

    html_file_path = os.path.join(parameters['html_input_path'],
        filename + ".html")

    pdf_file_path = os.path.join(parameters['pdf_output_path'],
            filename + ".pdf")

    command = [
        parameters['wkhtmltopdf_app_path'],
        html_file_path,
        "--enable-local-file-access",
        pdf_file_path
    ]
    subprocess.run(command)

    if wait_for_file_written(pdf_file_path):
        print("PDF written to", pdf_file_path)
    else:
        print("ERROR:", pdf_file_path)

# ---------------------------
# MAIN
# ---------------------------
if __name__ == '__main__':
    """

    """
    print("DEBUG: MAIN started")

    run()

    print("DEBUG: MAIN done")