-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert.py
More file actions
36 lines (31 loc) · 1 KB
/
convert.py
File metadata and controls
36 lines (31 loc) · 1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from pdfminer.high_level import extract_text
import os
import glob
import sys
pdf_folder = "documents"
text_folder = "text_files"
if not os.path.exists(text_folder):
os.makedirs(text_folder)
success_count = 0
fail_count = 0
failed_files = []
for pdf_path in glob.glob(f"{pdf_folder}/*.pdf"):
try:
text = extract_text(pdf_path)
filename = os.path.basename(pdf_path).replace(".pdf", ".txt")
output_path = f"{text_folder}/{filename}"
with open(output_path, "w", encoding="utf-8") as f:
f.write(text)
print(f"[SUCCESS] Converted: {filename}")
success_count += 1
except Exception as e:
print(f"[FAIL] Could not convert {pdf_path}: {e}", file=sys.stderr)
fail_count += 1
failed_files.append(pdf_path)
print("\nConversion Summary:")
print(f" Successful conversions: {success_count}")
print(f" Failed conversions: {fail_count}")
if failed_files:
print(" Failed files:")
for f in failed_files:
print(f" - {f}")