-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdf_operations.py
More file actions
118 lines (91 loc) · 3.64 KB
/
pdf_operations.py
File metadata and controls
118 lines (91 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import PyPDF2 as pdf
from PyPDF2 import PdfReader, PdfWriter, PdfMerger
from PIL import Image
import os
def get_pdf_metadata(pdf_path):
with open(pdf_path, "rb") as f:
reader = PdfReader(f)
info = reader.metadata
return info
def extract_text_from_pdf(pdf_path):
with open(pdf_path, "rb") as f:
reader = PdfReader(f)
results = []
for i in range(0, len(reader.pages)):
selected_page = reader.pages[i]
text = selected_page.extract_text()
results.append(text)
return " ".join(results)
def split_pdf(pdf_path):
with open(pdf_path, "rb") as f:
reader = PdfReader(f)
for page_num in range(0, len(reader.pages)):
selected_page = reader.pages[page_num]
writer = PdfWriter()
writer.add_page(selected_page)
filename = os.path.split(pdf_path)[1]
output_filename = f"files/{filename}_{page_num+1}.pdf"
with open(output_filename, "wb") as out:
writer.write(out)
print(f"PDF criado com o nome: {output_filename}")
def get_pdf_upto(pdf_path, start_page: int = 0, stop_page: int = 0):
with open(pdf_path, "rb") as f:
reader = PdfReader(f)
writer = PdfWriter()
for page_num in range(start_page, stop_page):
selected_page = reader.pages[page_num]
writer.add_page(selected_page)
filename = os.path.split(pdf_path)[1]
output_filename = (
f"files/{filename}_from_{start_page+1}_to_{stop_page+1}.pdf"
)
with open(output_filename, "wb") as out:
writer.write(out)
def fetch_all_pdf_files(parent_folder: str):
target_files = []
for path, subdirs, files in os.walk(parent_folder):
for name in files:
if name.endswith(".pdf"):
target_files.append(os.path.join(path, name))
return target_files
def merge_pdf(list_pdfs, output_filename="files/final_pdf.pdf"):
merger = PdfMerger()
with open(output_filename, "wb") as f:
for file in list_pdfs:
merger.append(file)
merger.write(f)
def rotate_pdf(pdf_path, page_num: int, rotation: int = 90):
with open(pdf_path, "rb") as f:
reader = PdfReader(f)
writer = PdfWriter()
writer.add_page(reader.pages[page_num])
writer.pages[page_num].rotate(rotation)
filename = os.path.split(pdf_path)[1]
output_filename = f"files/{filename}_{rotation}_rotated_page.pdf"
with open(output_filename, "wb") as out:
writer.write(out)
def extract_images_from_pdf(pdf_path):
with open(pdf_path, "rb") as f:
reader = PdfReader(f)
for page_num in range(0, len(reader.pages)):
selected_page = reader.pages[page_num]
for img_file_obj in selected_page.images:
with open(f"files/{img_file_obj.name}", "wb") as out:
out.write(img_file_obj.data)
def convert_img_pdf(image_file):
my_image = Image.open(image_file)
img = my_image.convert("RGB")
filename = f"{os.path.splitext(image_file)[0]}.pdf"
img.save(filename)
# split_pdf("files/sample2.pdf")
# get_pdf_upto("files/sample2.pdf", 1, 2)
# print(fetch_all_pdf_files("files/"))
# pdf_list = fetch_all_pdf_files("files/")
# merge_pdf(pdf_list)
# rotate_pdf("files/sample.pdf", 0)
# print(get_pdf_metadata("files/sample.pdf"))
# print(get_pdf_metadata("files/sample.pdf").title)
# print(get_pdf_metadata("files/sample.pdf").author)
# print(extract_text_from_pdf("files/sample.pdf"))
# extract_images_from_pdf("files/test_pdf_image.pdf")
convert_img_pdf("files/estrela.png")