44from typing import Literal , Callable , Union
55import os
66import io
7+
8+ import markdown
9+ import pdfkit
10+
711from dol import Pipe
812
913# Define the allowed source kinds
10- SrcKind = Literal ["url" , "html" , "file" ]
14+ SrcKind = Literal ["url" , "html" , "file" , "md" , "markdown" , "text" ]
1115
1216
1317def _resolve_src_kind (src : str ) -> SrcKind :
@@ -42,7 +46,10 @@ def _resolve_src_kind(src: str) -> SrcKind:
4246 elif "<html" in s .lower ():
4347 return "html"
4448 elif os .path .exists (s ):
45- return "file"
49+ if s .endswith (".html" ) or s .endswith (".htm" ) or s .endswith (".xhtml" ):
50+ return "file"
51+ else :
52+ return "markdown"
4653 else :
4754 # Fallback: if it doesn't look like a URL or a file exists, assume it's text.
4855 return "text"
@@ -92,6 +99,73 @@ def write_to_file(b: bytes) -> str:
9299 raise ValueError ("egress must be None, a file path string, or a callable." )
93100
94101
102+ dflt_css = """
103+ <style>
104+ table {
105+ width: 100%;
106+ border-collapse: collapse;
107+ border: 1px solid black;
108+ }
109+ th, td {
110+ border: 1px solid black;
111+ padding: 8px;
112+ text-align: left;
113+ }
114+ th {
115+ background-color: #f2f2f2;
116+ }
117+ </style>
118+ """
119+
120+
121+ def add_css (html_text : str , css = dflt_css ) -> str :
122+ return f"<html><head>{ custom_css } </head><body>{ html_text } </body></html>"
123+
124+
125+ # Save to PDF with pdfkit
126+ dflt_pdfkit_kwargs = {
127+ "options" : {
128+ "encoding" : "UTF-8" ,
129+ "page-size" : "A4" ,
130+ "margin-top" : "10mm" ,
131+ "margin-right" : "10mm" ,
132+ "margin-bottom" : "10mm" ,
133+ "margin-left" : "10mm" ,
134+ }
135+ }
136+
137+ dflt_markdown_kwargs = {
138+ "extensions" : ("extra" , "tables" ),
139+ }
140+
141+
142+ def markdown_to_pdf (
143+ md_src : str ,
144+ egress : Union [None , str , Callable ] = None ,
145+ * ,
146+ markdown_extensions = dflt_markdown_kwargs ,
147+ ** pdfkit_kwargs ,
148+ ):
149+ pdfkit_kwargs = {** dflt_pdfkit_kwargs , ** pdfkit_kwargs }
150+
151+ if isinstance (md_src , str ) and os .path .isfile (md_src ):
152+ md_file = md_src
153+ with open (md_file , "r" , encoding = "utf-8" ) as f :
154+ md_src = f .read ()
155+
156+ # Convert Markdown to HTML
157+ html_text = markdown .markdown (md_src , ** dflt_markdown_kwargs )
158+
159+ if not callable (egress ):
160+ pdf_target = egress
161+ return pdfkit .from_string (html_text , pdf_target , ** pdfkit_kwargs )
162+ else :
163+ # if egress is a function, we'll get the bytes for the PDF
164+ # and apply egress to them
165+ pdf_bytes = pdfkit .from_string (html_text , None )
166+ return egress (pdf_bytes )
167+
168+
95169def get_pdf (
96170 src : str ,
97171 egress : Union [None , str , Callable ] = None ,
@@ -136,7 +210,7 @@ def get_pdf(
136210 css: (optional) string with path to css file which will be added to a single input file
137211 configuration: (optional) instance of pdfkit.configuration.Configuration()
138212 cover_first: (optional) if True, cover always precedes TOC
139- : verbose: (optional) By default '--quiet' is passed to all calls, set this to False to get wkhtmltopdf output to stdout.
213+ verbose: (optional) By default '--quiet' is passed to all calls, set this to False to get wkhtmltopdf output to stdout.
140214
141215
142216 Returns:
@@ -160,9 +234,8 @@ def get_pdf(
160234
161235
162236 """
163- import pdfkit
164-
165237 _kwargs = dict (
238+ dflt_pdfkit_kwargs ,
166239 options = options ,
167240 toc = toc ,
168241 cover = cover ,
@@ -175,19 +248,24 @@ def get_pdf(
175248 # Determine the source kind if not explicitly provided.
176249 if src_kind is None :
177250 src_kind = _resolve_src_kind (src )
251+ elif src_kind == 'md' :
252+ src_kind = 'markdown'
178253
179254 if src_kind == "url" :
180255 _kwargs .pop (
181256 "css" , None
182257 ) # because from_url, for some reason, doesn't have a css argument
183258
184- _add_options = lambda func : partial (func , ** _kwargs , ** kwargs )
259+ _pdfkit_kwargs = dict (** _kwargs , ** kwargs )
260+ _add_pdfkit_options = lambda func : partial (func , ** _pdfkit_kwargs )
185261 # Map the source kind to the corresponding pdfkit function.
186262 func_for_kind = {
187- "url" : _add_options (pdfkit .from_url ),
188- "text" : _add_options (pdfkit .from_string ),
189- "html" : Pipe (io .StringIO , _add_options (pdfkit .from_file )),
190- "file" : _add_options (pdfkit .from_file ),
263+ "url" : _add_pdfkit_options (pdfkit .from_url ),
264+ "text" : _add_pdfkit_options (pdfkit .from_string ),
265+ "html" : Pipe (io .StringIO , _add_pdfkit_options (pdfkit .from_file )),
266+ "file" : _add_pdfkit_options (pdfkit .from_file ),
267+ # egress=None to force bytes output in markdown:
268+ "markdown" : partial (markdown_to_pdf , egress = None , ** _pdfkit_kwargs ),
191269 }
192270 src_to_bytes_func = func_for_kind .get (src_kind )
193271 if src_to_bytes_func is None :
0 commit comments