-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspdf_gui.py
More file actions
289 lines (225 loc) · 11.3 KB
/
spdf_gui.py
File metadata and controls
289 lines (225 loc) · 11.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
import tkinter as tk
from tkinter import ttk, scrolledtext, messagebox, filedialog
import os
import sys
from PyPDF2 import PdfReader, PdfWriter
import io
from threading import Thread, Event
import queue
"""
# main thread (GUI) <> # worker thread (PDF proc...)
button clicked
└─> start_processing()
└─> create & start thread ─────> split_pdf() runs
└─> return to GUI │
├─> log_message() queues updates
GUI remains responsive │ └─> messages added to queue
└─> update_output() at each N sec │
└─> check queue <─────────────┘
└─> update text widget
"""
class PdfSplitterGUI:
def __init__(self, root):
self.root = root
self.root.title("PDF Splitter")
# self.root.iconbitmap(self.resource_path("custom_icon.ico"))
self.root.geometry("600x500")
self.root.resizable(True, True)
# create a queue for thread-safe communication
self.message_queue = queue.Queue()
self.create_widgets() # create the GUI widgets like buttons, labels so on...
self.update_output()
self.dialog_response = None
self.dialog_event = Event()
def ask_user_from_main_thread(self, msg):
"""
Gambiarra...
Ask the user for a response in a separate dialog window.
This method is called from the main thread and waits for the user to respond.
"""
def show_dialog():
self.dialog_response = messagebox.askyesno(
"Confirm Action",
msg
)
self.dialog_event.set() # notify the main thread that the dialog has been closed
self.dialog_event.clear() # clear the event flag
self.root.after(0, show_dialog) # run the dialog in the main thread
self.dialog_event.wait() # wait for response
return self.dialog_response
# get the path to the bundled custom icon file (must use flag: --add-data="cropped-zarro-icon.ico;." with PyInstaller)
def resource_path(self, relative_icon_path):
if hasattr(sys, '_MEIPASS'):
return os.path.join(sys._MEIPASS, relative_icon_path) # using PyInstaller...
else:
return os.path.join(os.path.abspath("."), relative_icon_path)
def create_widgets(self):
main_frame = ttk.Frame(self.root, padding="10", style="Custom.TFrame")
main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
self.root.columnconfigure(0, weight=1)
self.root.rowconfigure(0, weight=1)
# input file section
ttk.Label(main_frame, text="Selected PDF:", style="Custom.TLabel").grid(row=0, column=0, sticky=tk.W, pady=5)
file_frame = ttk.Frame(main_frame, style="Custom.TFrame")
file_frame.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=5)
file_frame.columnconfigure(0, weight=1)
self.input_file = ttk.Entry(file_frame)
self.input_file.grid(row=0, column=0, sticky=(tk.W, tk.E))
select_btn = ttk.Button(file_frame, text="Browse", command=self.select_file, style="Custom.TButton")
select_btn.grid(row=0, column=1, padx=(5, 0))
# part size select
ttk.Label(main_frame, text="Part Size (MB):", style="Custom.TLabel").grid(row=1, column=0, sticky=tk.W, pady=5)
self.part_size = ttk.Combobox(main_frame, values=[1, 2, 5, 10, 20, 50])
self.part_size.grid(row=1, column=1, sticky=(tk.W, tk.E), padx=5)
self.part_size.set(5)
# subtmit button
self.process_btn = ttk.Button(main_frame, text="Split PDF", command=self.start_processing, style="Custom.TButton")
self.process_btn.grid(row=2, column=0, columnspan=2, pady=10)
# output log...
output_frame = ttk.LabelFrame(main_frame, text="Logs...", padding="5", style="Custom.TLabelframe")
output_frame.grid(row=3, column=0, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S))
main_frame.rowconfigure(3, weight=1)
main_frame.columnconfigure(1, weight=1)
self.output_text = scrolledtext.ScrolledText(output_frame, height=15) #
self.output_text.pack(fill=tk.BOTH, expand=True)
# status bar
self.status_var = tk.StringVar()
self.status_var.set("Waiting for input...")
self.status_bar = ttk.Label(main_frame, textvariable=self.status_var, relief=tk.SUNKEN, style="Custom.TLabel")
self.status_bar.grid(row=4, column=0, columnspan=2, sticky=(tk.W, tk.E), pady=5)
def select_file(self):
"""Open file dialog for selecting PDF file"""
filetypes = (
('PDF files', '*.pdf'),
('All files', '*.*')
)
filename = filedialog.askopenfilename(
title='Select a PDF file',
filetypes=filetypes
)
if filename:
self.input_file.delete(0, tk.END)
self.input_file.insert(0, filename)
def add_log_message(self, message):
# add messages to queue, to be processed by the main thread, GUI, (producer), at each N secs
self.message_queue.put(message)
def update_output(self):
"""
consumer...
get messages from the queue and update the output text...
updates the output text widget with messages from the queue at each N seconds, at the main thread.
"""
try:
while True:
message = self.message_queue.get_nowait()
self.output_text.insert(tk.END, message + "\n")
self.output_text.see(tk.END)
self.output_text.update_idletasks()
except queue.Empty:
pass
finally:
self.root.after(100, self.update_output) # Schedule the next update. Runs at main thread (consumer)
def start_processing(self):
"""
get selected file/part size, then start the PDF splitting process in a separate thread.
"""
input_file = self.input_file.get()
try:
part_size = int(self.part_size.get())
if part_size <= 0:
raise ValueError("Invalid part size")
except ValueError as e:
messagebox.showerror("Error", e.args[0])
return
if not os.path.exists(input_file):
messagebox.showerror("Error", f"File not found: {input_file}")
return
self.process_btn.state(['disabled'])
self.status_var.set("Processing...")
# start processing in a separate thread (worker thread)
Thread(target=self.split_pdf, args=(input_file, "output", part_size), daemon=True).start()
def get_page_sizes(self, pdf_reader):
"""
Get the size of each page in the PDF file.
Returns a list of page sizes in bytes.
Used to check if page will fit in part size.
"""
page_sizes = []
for page in pdf_reader.pages:
temp_writer = PdfWriter()
temp_writer.add_page(page)
temp_buffer = io.BytesIO()
temp_writer.write(temp_buffer) # write to buffer
page_length = len(temp_buffer.getvalue())
page_sizes.append(page_length)
return page_sizes
def split_pdf(self, input_path, output_folder, part_size_mb):
"""
function that will actually split the PDF file into parts of a maximum given size...
Args:
input_path: str, path to the input PDF file
output_folder: str, path to the output folder where the parts will be saved
part_size_mb: int, maximum size of each part in MB
"""
try:
if not os.path.exists(output_folder):
os.makedirs(output_folder)
else:
for file in os.listdir(output_folder):
os.remove(os.path.join(output_folder, file))
# open the input PDF file
self.add_log_message(f"Opening PDF: {input_path}")
pdf_reader = PdfReader(input_path)
page_sizes = self.get_page_sizes(pdf_reader)
total_pages = len(pdf_reader.pages)
self.add_log_message(f"Original PDF total pages count: {total_pages}")
part_size_bytes = part_size_mb * 1024 * 1024
current_part = 1
current_writer = PdfWriter()
current_page_count = 0
total_pages_final = 0
accumulated_size = 0
for page_num in range(total_pages):
accumulated_size += page_sizes[page_num] # simulate adding page to writer
if accumulated_size >= part_size_bytes:
# create new writer without the last page
finished_part_length = current_page_count
# save current part
output_filename = f"{os.path.splitext(os.path.basename(input_path))[0]}_part_{current_part}.pdf"
output_path = os.path.join(output_folder, output_filename)
with open(output_path, "wb") as output_file:
current_writer.write(output_file) # write to file
total_pages_final += finished_part_length
self.add_log_message(f"Part #{current_part} has {finished_part_length} pages.")
# start new part
current_part += 1
current_writer = PdfWriter()
current_page_count = 0
accumulated_size = page_sizes[page_num]
current_writer.add_page(pdf_reader.pages[page_num]) # add page to writer for real
current_page_count += 1
# save the last part if not empty
if current_page_count > 0:
output_filename = f"{os.path.splitext(os.path.basename(input_path))[0]}_part_{current_part}.pdf"
output_path = os.path.join(output_folder, output_filename)
with open(output_path, "wb") as output_file:
current_writer.write(output_file)
self.add_log_message(f"Part #{current_part} has {current_page_count} pages.")
total_pages_final += current_page_count
self.add_log_message(f"PDF splited into {current_part} parts.")
if total_pages_final != total_pages:
self.add_log_message(f"WARNING: Total pages in parts ({total_pages_final}) does not match original PDF ({total_pages})")
else:
self.add_log_message(f"Total pages in parts match original PDF: {total_pages_final}")
self.status_var.set("Done!")
except Exception as e:
self.add_log_message(f"Error: {str(e)}")
self.status_var.set("Something went wrong!")
finally:
self.root.after(0, lambda: self.process_btn.state(['!disabled'])) # finally enable the button again
def main():
root = tk.Tk()
app = PdfSplitterGUI(root)
root.mainloop()
if __name__ == "__main__":
main()