forked from martinakaduc/PDF-Extractor
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdemo0.py
More file actions
23 lines (20 loc) · 632 Bytes
/
demo0.py
File metadata and controls
23 lines (20 loc) · 632 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from __future__ import division
import os
import re
import numpy as np
import pdftotext
if __name__ == '__main__':
# file = os.listdir()
# file = list(filter(lambda ef: ef[0] != "." and ef[-3:] == "pdf", file))
file = ["VN101466/SI_HANV07496600.pdf"]
for filename in file:
# Covert PDF to string by page
print(filename)
with open(filename, "rb") as f:
# for i in f:
# print(i)
pdf = pdftotext.PDF(f)
if (pdf[0] != ""):
with open(filename[:-3]+"txt", "w+") as f:
for page in pdf:
f.write(page)