lab-ocr/ocr.py at main · nsander57/lab-ocr · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from paddleocr import PaddleOCR
from img_rotate import rotate_picture_90_right
from os import walk
from os import makedirs
import pandas as pd
from PIL import Image


ocr = PaddleOCR(
    use_doc_orientation_classify=False, # Disables document orientation classification model via this parameter
    use_doc_unwarping=False, # Disables text image rectification model via this parameter
    use_textline_orientation=False, # Disables text line orientation classification model via this parameter
)


# ocr = PaddleOCR(lang="en") # Uses English model by specifying language parameter
# ocr = PaddleOCR(ocr_version="PP-OCRv4") # Uses other PP-OCR versions via version parameter
# ocr = PaddleOCR(device="gpu") # Enables GPU acceleration for model inference via device parameter
# ocr = PaddleOCR(
#     text_detection_model_name="PP-OCRv5_mobile_det",
#     text_recognition_model_name="PP-OCRv5_mobile_rec",
#     use_doc_orientation_classify=False,
#     use_doc_unwarping=False,
#     use_textline_orientation=False,
# ) # Switch to PP-OCRv5_mobile models


# img_path = "./gel_images/povale"
img_dir_path = ""

if not img_dir_path:
    dir_path_answer = input("Pathway to directory with images: ")
    if dir_path_answer == "":
        dir_path_answer = "./gel_images/povale"
    print(f"You chose: {dir_path_answer}")
    img_dir_path = dir_path_answer

png_file_list = [] # file list
for (dirpath, dirnames, filenames) in walk(img_dir_path):
    print(filenames)
    for name in filenames:
        if name.endswith(".png"):
            print(name)
            png_file_list.append(name)

    break

file_list = pd.DataFrame(png_file_list)

print("here are the files we're about to process: ")
print(file_list)

print(png_file_list)

# ./gel_images/povale/
i = 0
img_array = []
while i < 1:
    rotate_answer = input("do these images need to be rotated 90 degrees? (Y / N): ")
    if rotate_answer == "Y" or rotate_answer == "y":
        # first make a new directory for the rotated files
        makedirs(f"{img_dir_path}/rotated", exist_ok=True)
        for img_name in png_file_list:

            img = Image.open(f"{img_dir_path}/{img_name}")
            new_img = rotate_picture_90_right(img)
            new_img = new_img.save(f"{img_dir_path}/rotated/{img_name}")
            print("rotated image saved.")
            img_array.append(f"{img_dir_path}/rotated/{img_name}")
        print(img_array)
        i = 1
    elif rotate_answer == "N" or rotate_answer == "n":
        for img_name in png_file_list:
            img = Image.open(f"{img_dir_path}/{img_name}")
            img_array.append(f"{img_dir_path}/{img_name}")
        print("not rotating...")
        i = 1
    else:
        print("you did not select Y or N... please try again")


# make a new dir for output files
makedirs(f"{img_dir_path}/output", exist_ok=True)
for idx, img in enumerate(img_array):
    result = ocr.predict(img)
    for res in result:
        res.save_to_img(f"{img_dir_path}/output")
        res.save_to_json(f"{img_dir_path}/output")
    print(f"image #{idx} ({img}) done.")


# result = ocr.predict("./gel8_l2_vert.png")
# for res in result:
#     res.print()
#     res.save_to_img("output")
#     res.save_to_json("output")