-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdf-split-page-wise.py
More file actions
60 lines (48 loc) · 2.14 KB
/
pdf-split-page-wise.py
File metadata and controls
60 lines (48 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
import fitz
from pikepdf import Pdf
from datetime import datetime
print("\n PDF Splitter - Single Page wise PDF \n")
print("\n Application Split the PDF by each page wise \n")
print("\n Date: 15 June 2022 \n\n")
# using fitz library for creating the split pdf
# using pikepdf library for applying the file creation datetime & fast web view
filepath1 = input(" Enter the File path: ")
filepath = filepath1 + "\\"
filelist = os.path.isdir(filepath)
directory = "Output"
Out = filepath + directory + "\\"
# creating the output directory
if os.path.exists(Out):
pass
else:
os.mkdir(Out)
#delete if pdf file already present in the Output folder
for rem in os.listdir(Out):
if rem.endswith('.pdf'):
os.remove(Out + rem)
# Loop convert the combined pdf into page level pdf
print("\n Extracting the Pages ... \n")
for fname in os.listdir(filepath):
if not fname.endswith(".pdf"):
continue
path = os.path.join(filepath, fname)
pdfname = os.path.splitext(fname)[0]
input_file = fitz.open(path)
for n, page in enumerate(input_file, start=0): # n = for each page number
new_pdf = fitz.open()
new_pdf.insert_pdf(input_file, from_page=n, to_page=n)
output_file = Out + "Extract-" + pdfname + "_" + f"{n+1:03}" + '.pdf'
new_pdf.save(output_file) # saving the extracted pdf file (this is not final file)
new_pdf.close()
with Pdf.open(output_file) as pdf: # assign the pdf creation datetime & fastweb view for the page level pdf
rem = "Extract-"
final = output_file.replace(rem, "")
with pdf.open_metadata(set_pikepdf_as_editor=False) as meta: # set_pikepdf_as_editor=False for applying the Producer value
meta["pdf:Producer"] = "pikepdf"
meta["xmp:CreateDate"] = datetime.now(datetime.utcnow().astimezone().tzinfo).isoformat() # Current date of file saving date
meta["xmp:ModifyDate"] = datetime.now(datetime.utcnow().astimezone().tzinfo).isoformat()
pdf.save(final, linearize=True)
pdf.close()
print(final)
os.remove(output_file) # delete the intermediate extracted pdf file