LineRemoverNN/infer.py at master · PastaLaPate/LineRemoverNN · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import sys
import os

sys.path.insert(0, os.path.realpath(os.path.dirname(__file__)))
from train import loadBestModel
from torch import Tensor, mean
from torchvision.io import read_image
from torchvision.io.image import ImageReadMode
from torchvision.transforms import ToTensor
from typing import List, Union
from data.IAM import split_into_blocks, reconstruct_image
import torch.cuda as torchc
from postProcessing import thresholdImage
import numpy as np
import cv2

device = "cuda" if torchc.is_available() else "cpu"

print("[LineRemoverNN] Loading model...")
network = loadBestModel()
network.eval()
network.to(device)


def processImg(img: Union[str, np.ndarray, Tensor], postProcess=True) -> np.ndarray:
    """
    ## Process an image and remove ruled lines
    ### Args:
      img: (``str | numpy.ndarray | torch.Tensor``)
        **img must be 512*512**
        - If image is ``str``, it loads it into a Tensor
        - If image is ``ndarray`` and not grayscale, converts it to grayscale and then to ``Tensor``
        - If image is ``ndarray`` and grayscale, converts it to ``Tensor``
        - If image is ``Tensor``, the image is directly moved to the best device.

      postProcess: (``bool``)
        - If ``True``, applies a thresholding to the image to have better contrast between text and background
    ### Returns:
      ``numpy.ndarray`` Grayscale image ndarray of shape [1, 512, 512] range 0<->255
    ### Raises:
      Exception: The image isn't 512x512
    ### Notes:
      - If the input image is not 512x512, you can use `LineRemoverNN.data.IAM.split_into_blocks` to preprocess it.
    ### Examples:
    >>> # Using str as path :
    >>> img = processImg('./Image-Path.png')
    >>> # Using RGB ndarray :
    >>> img = cv2.imread('./Image-Path.png')
    >>> img = processImg(img)
    >>> # Using GrayScale ndarray:
    >>> img = cv2.imread('./Image-Path.png', cv2.IMREAD_GRAYSCALE)
    >>> img = processImg(img)
    >>> # Using Tensor:
    >>> img = torchvision.io.read_image('./Image-Path.png', torchvision.io.image.ImageReadMode.GRAY)
    >>> img = processImg(img)

    """
    tensorimg: Tensor = Tensor()
    if isinstance(img, str):
        tensorimg = read_image(img, ImageReadMode.GRAY)
    if isinstance(img, np.ndarray):
        if not (len(img.shape) == 2 or img.shape[2] == 1):  # Is Image grayscale
            tensorimg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        tensorimg = ToTensor()(img)
    if isinstance(img, Tensor):
        tensorimg = img
    tensorimg = tensorimg.to(device)
    if not (
        tensorimg.shape[1] == 512 and tensorimg.shape[2] == 512
    ):  # Check if image is 512x512 (required size)
        raise Exception(
            "Provided image isn't 512x512 you may use LineRemoverNN.data.IAM.split_into_blocks function to split it"
        )
    imgs = tensorimg.unsqueeze(0)  # Add batch dimension
    outputs: Tensor = network(imgs)  # Process img
    outputs = mean(outputs, dim=1, keepdim=True)
    output: Tensor = outputs.squeeze(0)  # Remove batch dimension
    output = output - tensorimg  # Apply filter
    output = output.detach().cpu()  # Detach and to cpu for conversion
    output = output * 255  # * 255 for normalisation between 0 and 255
    outputImage: np.ndarray = output.numpy()  # Convert to numpy aray
    if postProcess:
        return thresholdImage(outputImage)  # If postprocess, threshold the image

    return outputImage


def splitAndProcessImg(
    img: Union[str, np.ndarray], postProcess=True
) -> List[np.ndarray]:
    """
    ## Split and process an image using the `processImg` function
    Splits the input image into 512x512 blocks and processes each block to remove ruled lines.

    ### Args:
      img: (``str | numpy.ndarray``) The input image.
          - If ``str``, the path to the image. The image is loaded in grayscale.
          - If ``numpy.ndarray``, the image array in grayscale.

      postProcess: (``bool``)
          - See ``processImg.postProcess``

    ### Returns:
      ``List[numpy.ndarray]``: A list of processed grayscale image arrays, one for each 512x512 block. You can reconstruct the image using data.IAM.reconstruct_image

    ### Examples:
    >>> # Using an image path:
    >>> blocks = splitAndProcessImg('./largeImage.png')
    >>> # Using a grayscale numpy image:
    >>> img = cv2.imread('./largeImage.png', cv2.IMREAD_GRAYSCALE)
    >>> blocks = splitAndProcessImg(img)
    """
    img = img if isinstance(img, np.ndarray) else cv2.imread(img, cv2.IMREAD_GRAYSCALE)
    img = np.expand_dims(img, 2) if len(img.shape) < 3 else img
    _, width, height = img.shape
    blocks = split_into_blocks(
        img,
        block_size=512,
    )
    processed = processImgs(
        blocks,
        postProcess=postProcess,
    )

    return processed


def processImgs(
    imgs: List[Union[str, np.ndarray, Tensor]], postProcess=True
) -> List[np.ndarray]:
    """
    ## Process a list of images and remove ruled lines
    Applies `processImg` to each image in the provided list.

    ### Args:
      imgs: (``List[str | numpy.ndarray | torch.Tensor]``) The list of images to process.
      postProcess: (``bool``) Whether to apply thresholding to each processed image.

    ### Returns:
      ``List[numpy.ndarray]``: A list of processed grayscale image arrays.

    ### Examples:
    >>> # Using image paths:
    >>> imgs = ['./img1.png', './img2.png']
    >>> processed_imgs = processImgs(imgs)
    >>> # Using numpy images:
    >>> imgs = [cv2.imread('./img1.png'), cv2.imread('./img2.png')]
    >>> processed_imgs = processImgs(imgs)
    """
    returnImgs = []
    for img in imgs:
        returnImgs.append(processImg(img, postProcess=postProcess))
    return returnImgs