EDAPGui/OCR.py at main · SumZer0-git/EDAPGui · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
from __future__ import annotations
import time
from datetime import datetime

import cv2
import numpy as np
from cv2.typing import MatLike
from paddleocr import PaddleOCR
from strsimpy import SorensenDice
from strsimpy.jaro_winkler import JaroWinkler
from strsimpy.normalized_levenshtein import NormalizedLevenshtein
from EDlogger import logger
from tkinter import messagebox
import tkinter as tk

from Screen_Regions import Quad

"""
File:OCR.py

Description:
  Class for OCR processing using PaddleOCR.

Author: Stumpii
"""


class OCR:
    def __init__(self, ed_ap, screen):
        self.ap = ed_ap
        self.screen = screen
        self.paddleocr = PaddleOCR(
            use_doc_orientation_classify=False,
            use_doc_unwarping=False,
            use_textline_orientation=False)  # text detection + text recognition
        # Class for text similarity metrics
        self.jarowinkler = JaroWinkler()
        self.sorensendice = SorensenDice()
        self.normalized_levenshtein = NormalizedLevenshtein()

    def string_similarity(self, s1: str, s2: str) -> float:
        """ Performs a string similarity check and returns the result.
        @param s1: The first string to compare.
        @param s2: The second string to compare.
        @return: The similarity from 0.0 (no match) to 1.0 (identical).
        """
        s1_new = s1.replace("['",  "")
        s1_new = s1_new.replace("']",  "")
        s1_new = s1_new.replace('["',  "")
        s1_new = s1_new.replace('"]',  "")
        s1_new = s1_new.replace("', '",  "")
        s1_new = s1_new.replace("<",  "")
        s1_new = s1_new.replace(">",  "")
        s1_new = s1_new.replace(" ",  "")

        s2_new = s2.replace("['",  "")
        s2_new = s2_new.replace("']",  "")
        s2_new = s2_new.replace('["',  "")
        s2_new = s2_new.replace('"]',  "")
        s2_new = s2_new.replace("', '",  "")
        s2_new = s2_new.replace("<",  "")
        s2_new = s2_new.replace(">",  "")
        s2_new = s2_new.replace(" ",  "")

        # return self.jarowinkler.similarity(s1, s2)
        return self.normalized_levenshtein.similarity(s1_new, s2_new)
        # return self.sorensendice.similarity(s1_new, s2_new)

    def image_ocr(self, image, name = ''):
        """ Perform OCR with no filtering. Returns the full OCR data and a simplified list of strings.
        This routine is slower than the simplified OCR.
        @param name:
        @param image: The image to check.

        'ocr_data' is returned in the following format, or (None, None):
        [[[[[86.0, 8.0], [208.0, 8.0], [208.0, 34.0], [86.0, 34.0]], ('ROBIGO 1 A', 0.9815958738327026)]]]
        'ocr_textlist' is returned in the following format, or None:
        ['DESTINATION', 'SIRIUS ATMOSPHERICS']
        """
        # Remove Alpha channel if it exists
        image2 = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
        try:
            ocr_data = self.paddleocr.predict(image2)

            if ocr_data is None:
                return None, None
            else:
                ocr_textlist = []
                for res in ocr_data:
                    if res is None:
                        return None, None

                    # Debug - places all detected data to 'output' folder
                    if self.ap.debug_ocr:
                        # x = datetime.now().strftime("%Y-%m-%d %H-%M-%S.%f")[:-3]  # Date time with mS.
                        res.save_to_img(f"./ocr_output/{name}")
                        res.save_to_json(f"./ocr_output/{name}")

                    # Added detected text to list
                    ocr_textlist.extend(res['rec_texts'])

                # print(f"image_simple_ocr: {ocr_textlist}")
                # logger.info(f"image_simple_ocr: {ocr_textlist}")
                return ocr_data, ocr_textlist

        except Exception as e:
            logger.error(f"OCR failed: {e}")
            return None, None

    def image_simple_ocr(self, image, name='') -> list[str] | None:
        """ Perform OCR with no filtering. Returns a simplified list of strings with no positional data.
        This routine is faster than the function that returns the full data. Generally good when you
        expect to only return one or two lines of text.
        @param name:
        @param image: The image to check.
        'ocr_textlist' is returned in the following format, or None:
        ['DESTINATION', 'SIRIUS ATMOSPHERICS']
        """
        if image is None:
            return None

        # start_time = time.time()

        # Remove Alpha channel if it exists
        image2 = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
        try:
            ocr_data = self.paddleocr.predict(image2)

            # elapsed_time = time.time() - start_time
            # print(f"OCR took {elapsed_time} secs")

            if ocr_data is None:
                return None
            else:
                ocr_textlist = []
                for res in ocr_data:
                    if res is None:
                        return None

                    # Debug - places all detected data to 'output' folder
                    if self.ap.debug_ocr:
                        # x = datetime.now().strftime("%Y-%m-%d %H-%M-%S.%f")[:-3]  # Date time with mS.
                        res.save_to_img(f"./ocr_output/{name}")
                        res.save_to_json(f"./ocr_output/{name}")
                        # res.save_to_img("ocr_output")
                        # res.save_to_json("ocr_output")

                    # Added detected text to list
                    ocr_textlist.extend(res['rec_texts'])

                # print(f"image_simple_ocr: {ocr_textlist}")
                # logger.info(f"image_simple_ocr: {ocr_textlist}")
                return ocr_textlist

        except Exception as e:
            logger.error(f"OCR failed: {e}")
            return None

    def get_highlighted_item_data(self, image, min_w, min_h, name=''):
        """ Attempts to find a selected item in an image. The selected item is identified by being solid orange or blue
            rectangle with dark text, instead of orange/blue text on a dark background.
            The OCR daya of the first item matching the criteria is returned, otherwise None.
            @param name:
            @param image: The image to check.
            @param min_h: Minimum height in percent of the input image.
            @param min_w: Minimum width in percent of the input image.
     """
        # Find the selected item/menu (solid orange)
        img_selected, quad = self.get_highlighted_item_in_image(image, min_w, min_h)
        if img_selected is not None:
            # cv2.imshow("img", img_selected)

            ocr_data, ocr_textlist = self.image_ocr(img_selected, name)

            if ocr_data is not None:
                return img_selected, ocr_data, ocr_textlist, quad
            else:
                return None, None, None, None

        else:
            return None, None, None, None

    @staticmethod
    def get_highlighted_item_in_image(image, min_w, min_h) -> (MatLike, Quad):
        """ Attempts to find a selected item in an image. The selected item is identified by being solid orange or blue
        rectangle with dark text, instead of orange/blue text on a dark background.
        The image of the first item matching the criteria and minimum width and height is returned
        with x and y co-ordinates, otherwise None.
        @param image: The image to check.
        @param min_h: Minimum height in percent of the input image.
        @param min_w: Minimum width in percent of the input image.
        @return: The highlighted image and the matching Quad position in percentage of the image size, or (None, None)
        """
        # Existing size
        img_h, img_w, _ = image.shape

        # The input image
        cv2.imwrite('test/nav-panel/out/1-input.png', image)

        # Perform HSV mask
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        lower_range = np.array([0, 100, 180])
        upper_range = np.array([255, 255, 255])
        mask = cv2.inRange(hsv, lower_range, upper_range)
        masked_image = cv2.bitwise_and(image, image, mask=mask)
        cv2.imwrite('test/nav-panel/out/2-masked.png', masked_image)

        # Convert to gray scale and invert
        gray = cv2.cvtColor(masked_image, cv2.COLOR_BGR2GRAY)
        cv2.imwrite('test/nav-panel/out/3-gray.png', gray)

        # Convert to B&W to allow FindContours to find rectangles.
        ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)  # | cv2.THRESH_BINARY_INV)
        cv2.imwrite('test/nav-panel/out/4-thresh1.png', thresh1)

        # Perform opening. Opening  is just another name of erosion followed by dilation. This will remove specs and
        # edges and then embolden the remaining edges. This works to remove text and stray lines.
        k = int(min(img_w * min_w, img_h * min_h) / 10)  # Make kernel 10% of the smallest image side
        kernel = np.ones((k, k), np.uint8)
        opening = cv2.morphologyEx(thresh1, cv2.MORPH_OPEN, kernel)
        cv2.imwrite('test/nav-panel/out/5-opened.png', opening)

        # Finding contours in B&W image. White are the areas detected
        contours, hierarchy = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        output = image
        cv2.drawContours(output, contours, -1, (0, 255, 0), 2)
        cv2.imwrite('test/nav-panel/out/6-contours.png', output)

        # bounds = image
        cropped = image
        for cnt in contours:
            x, y, w, h = cv2.boundingRect(cnt)
            # Check the item is greater than 85% of the minimum width or height. Which allows for some variation.
            if w > (img_w * min_w * 0.85) and h > (img_h * min_h * 0.85):
                # print(f"Selected item size: {round(w / img_w, 4)}(%) x {round(h / img_h, 4)}(%)")
                # logger.debug(f"Selected item size: {round(w / img_w, 4)}(%) x {round(h / img_h, 4)}(%)")

                # Drawing a rectangle on the copied image
                # bounds = cv2.rectangle(bounds, (x, y), (x + w, y + h), (0, 255, 0), 2)

                # Crop to leave only the contour (the selected rectangle)
                cropped = image[y:y + h, x:x + w]

                # cv2.imshow("cropped", cropped)
                cv2.imwrite('test/nav-panel/out/7-selected_item.png', cropped)
                q = Quad.from_rect([x / img_w, y / img_h, (x + w) / img_w, (y + h) / img_h])
                return cropped, q

        # No good matches, then return None
        return None, None

    def capture_region_pct(self, region):
        """ Grab the image based on the region name/rect.
        Returns an unfiltered image, either from screenshot or provided image.
        @param region: The region to check in % (0.0 - 1.0).
        """
        rect = region['rect']
        image = self.screen.get_screen_rect_pct(rect)
        return image

    def is_text_in_selected_item_in_image(self, img, text, min_w, min_h, name=''):
        """ Does the selected item in the region include the text being checked for.
        Checks if text exists in a region using OCR.
        Return True if found, False if not and None if no item was selected.
        @param name:
        @param img: The image to check.
        @param text: The text to find.
        @param min_h: Minimum height in percent of the input image.
        @param min_w: Minimum width in percent of the input image.
        """
        img_selected, _ = self.get_highlighted_item_in_image(img, min_w, min_h)
        if img_selected is None:
            logger.debug(f"Did not find a selected item in the region.")
            return None

        found, results = self.is_text_in_image(text, img_selected, name)
        return found, results

    def is_text_in_region(self, text, region) -> (bool, str):
        """ Does the region include the text being checked for. The region does not need
        to include highlighted areas.
        Checks if text exists in a region using OCR.
        Return True if found, False if not and None if no item was selected.
        @param text: The text to check for.
        @param region: The region to check in % (0.0 - 1.0).
        """

        img = self.capture_region_pct(region)

        found, results = self.is_text_in_image(text, img)
        return found, results

    def is_text_in_image(self, text, image, name='') -> (bool, str):
        """ Does the image include the text being checked for. The image does not need
        to include highlighted areas.
        Checks if text exists in an image using OCR.
        Return True if found, False if not and None if no item was selected.
        @param text: The text to check for.
        @param image: The image to check.
        @return: True with the string of results, or False with the string of results.
        """
        if image is None:
            logger.debug(f"is_text_in_image: No image supplied.")
            return None, ""

        ocr_textlist = self.image_simple_ocr(image, name)
        # print(str(ocr_textlist))

        # PaddleOCR has difficulty detecting spaces, so strip out spaces for the compare
        text_ns = text.replace(' ', '').upper()
        ocr_textlist_ns = str(ocr_textlist).replace(' ', '').upper()

        if text_ns in ocr_textlist_ns:
            logger.debug(f"Found '{text}' text in item text '{str(ocr_textlist)}'.")
            return True, str(ocr_textlist)
        else:
            logger.debug(f"Did not find '{text}' text in item text '{str(ocr_textlist)}'.")
            return False, str(ocr_textlist)

    def select_item_in_list(self, text, region, keys, min_w, min_h, name='') -> bool:
        """ Attempt to find the item by text in a list defined by the region.
        If found, leaves it selected for further actions.
        @param keys:
        @param text: Text to find.
        @param region: The region to check in % (0.0 - 1.0).
        @param min_h: Minimum height in percent of the input image.
        @param min_w: Minimum width in percent of the input image.
        """

        in_list = False  # Have we seen one item yet? Prevents quiting if we have not selected the first item.
        while 1:
            img = self.capture_region_pct(region)
            if img is None:
                return False

            found = self.is_text_in_selected_item_in_image(img, text, min_w, min_h, name)

            # Check if end of list.
            if found is None and in_list:
                logger.debug(f"Did not find '{text}' in {region} list.")
                return False

            if found:
                logger.debug(f"Found '{text}' in {region} list.")
                return True
            else:
                # Next item
                in_list = True
                keys.send("UI_Down")

    def wait_for_text(self, ap, texts: list[str], region, timeout=30) -> bool:
        """ Wait for a screen to appear by checking for text to appear in the region.
        @param ap: ED_AP instance.
        @param texts: List of text to check for. Success occurs if any in the list is found.
        @param region: The screen region to check in % (0.0 - 1.0) of the full screen.
        @param timeout: Time to wait for screen in seconds
        @return: True if text found, else False
        """
        # Draw box around region
        abs_rect = self.screen.screen_rect_to_abs(region['rect'])
        if ap.debug_overlay:
            ap.overlay.overlay_rect1('wait_for_text', abs_rect, (0, 255, 0), 2)
            ap.overlay.overlay_paint()

        start_time = time.time()
        text_found = False
        while True:
            # Check for timeout.
            if time.time() > (start_time + timeout):
                break

            # Check if screen has appeared.
            for text in texts:
                text_found, ocr_text = self.is_text_in_region(text, region)

                # Overlay OCR result
                if ap.debug_overlay:
                    ap.overlay.overlay_floating_text('wait_for_text', f'{ocr_text}', abs_rect[0], abs_rect[1] - 25, (0, 255, 0))
                    ap.overlay.overlay_paint()

                if text_found:
                    break

            if text_found:
                break

            time.sleep(0.25)

        return text_found


# class RegionCalibration:
#     def __init__(self, root, ed_ap, cb):
#         self.scr = ed_ap.scr
#         self.root = root
#         self.ap = ed_ap
#         self.ap_ckb = cb
#         self.calibration_overlay = None
#         self.ocr_calibration_data = None
#         self.selected_region = None
#         self.calibration_canvas = None
#         self.current_rect = None
#         self.start_y = None
#         self.start_x = None
#
#     def calibrate_ocr_region(self, ocr_calibration_data, selected_region: str):
#         # selected_region = self.calibration_region_var.get()
#         self.ocr_calibration_data = ocr_calibration_data
#         self.selected_region = selected_region
#         if not self.selected_region:
#             messagebox.showerror("Error", "Please select a region to calibrate.")
#             return
#
#         self.ap_ckb('log', f"Starting calibration for: {selected_region}")
#
#         self.calibration_overlay = tk.Toplevel(self.root)
#         self.calibration_overlay.overrideredirect(True)
#
#         screen_w = self.scr.screen_width
#         screen_h = self.scr.screen_height
#         screen_x = self.scr.screen_left
#         screen_y = self.scr.screen_top
#
#         # screen_w = self.root.winfo_screenwidth()
#         # screen_h = self.root.winfo_screenheight()
#         # screen_x = self.root.winfo_x()
#         # screen_y = self.root.winfo_y()
#         # self.calibration_overlay.geometry(f"{screen_w}x{screen_h}+0+0")
#         self.calibration_overlay.geometry(f"{screen_w}x{screen_h}+{screen_x}+{screen_y}")
#
#         self.calibration_overlay.attributes('-alpha', 0.3)
#
#         self.calibration_canvas = tk.Canvas(self.calibration_overlay, highlightthickness=0, bg='black')
#         self.calibration_canvas.pack(fill=tk.BOTH, expand=True)
#
#         # Draw current region
#         rect_pct = self.ocr_calibration_data[selected_region]['rect']
#
#         display_rect_pct = rect_pct
#
#         x1 = display_rect_pct[0] * screen_w
#         y1 = display_rect_pct[1] * screen_h
#         x2 = display_rect_pct[2] * screen_w
#         y2 = display_rect_pct[3] * screen_h
#         self.calibration_canvas.create_rectangle(x1, y1, x2, y2, outline='green1', width=5)
#
#         self.start_x = None
#         self.start_y = None
#         self.current_rect = None
#
#         self.calibration_canvas.bind("<ButtonPress-1>", self.on_calibration_press)
#         self.calibration_canvas.bind("<B1-Motion>", self.on_calibration_drag)
#         self.calibration_canvas.bind("<ButtonRelease-1>", self.on_calibration_release)
#         self.calibration_canvas.bind("<ButtonPress-3>", self.on_calibration_cancel)
#         self.calibration_overlay.bind("<Escape>", lambda e: self.calibration_overlay.destroy())
#
#     def on_calibration_cancel(self, event):
#         self.calibration_overlay.destroy()
#
#     def on_calibration_press(self, event):
#         self.start_x = event.x
#         self.start_y = event.y
#         self.current_rect = self.calibration_canvas.create_rectangle(self.start_x, self.start_y, self.start_x, self.start_y, outline='cyan', width=5)
#
#     def on_calibration_drag(self, event):
#         if self.current_rect:
#             self.calibration_canvas.coords(self.current_rect, self.start_x, self.start_y, event.x, event.y)
#
#     def on_calibration_release(self, event):
#         end_x = event.x
#         end_y = event.y
#
#         screen_w = self.root.winfo_screenwidth()
#         screen_h = self.root.winfo_screenheight()
#
#         # Ensure coordinates are ordered correctly
#         left = min(self.start_x, end_x)
#         top = min(self.start_y, end_y)
#         right = max(self.start_x, end_x)
#         bottom = max(self.start_y, end_y)
#
#         left_pct = left / screen_w
#         top_pct = top / screen_h
#         right_pct = right / screen_w
#         bottom_pct = bottom / screen_h
#
#         # selected_region = self.calibration_region_var.get()
#
#         # Regions that require special scaling normalization to a 1920x1080 reference resolution
#         station_scaled_regions = [
#             "EDGalaxyMap.cartographics",
#             "EDSystemMap.cartographics"
#         ]
#
#         # Get the raw percentages from the drawn box
#         raw_rect_pct = [left_pct, top_pct, right_pct, bottom_pct]
#         raw_rect_pct = [round(left_pct, 4), round(top_pct, 4), round(right_pct, 4), round(bottom_pct, 4)]
#
#         # if self.selected_region.startswith("EDStationServicesInShip.") or self.selected_region in station_scaled_regions:
#         #     new_rect_pct = self._normalize_for_station(raw_rect_pct, screen_w, screen_h)
#         #     if new_rect_pct != raw_rect_pct:
#         #         self.ap_ckb('log', f"Applying station-style normalization for {self.selected_region}.")
#         # else:
#         new_rect_pct = raw_rect_pct
#
#         self.ocr_calibration_data[self.selected_region]['rect'] = new_rect_pct
#         self.ap_ckb('log', f"New rect for {self.selected_region}: {new_rect_pct}")
#
#         # Update label
#         # self.on_region_select(None)
#
#         self.calibration_overlay.destroy()