-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathextractor.py
More file actions
75 lines (60 loc) · 2.33 KB
/
extractor.py
File metadata and controls
75 lines (60 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import cv2
import easyocr
import pytesseract
import matplotlib.pyplot as plt
import numpy as np
import csv
import os
# Function to save data to a CSV file
def save_to_csv(data, csv_path='extracted_text.csv'):
headers = ['Text', 'Score', 'BoundingBox']
# Check if the CSV file already exists
file_exists = os.path.isfile(csv_path)
with open(csv_path, mode='a', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
if not file_exists:
writer.writerow(headers) # Write headers if the file is new
writer.writerows(data)
# Function to search for text in the CSV file
def search_text(query, csv_path='extracted_text.csv'):
results = []
if not os.path.isfile(csv_path):
print(f"No data found. {csv_path} does not exist.")
return results
with open(csv_path, mode='r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
if query.lower() in row['Text'].lower():
results.append(row)
return results
# Code to read image
image = cv2.imread('images (1).jfif')
# Create an EasyOCR text detector instance to process the image and extract text data
reader = easyocr.Reader(['en'], gpu=False)
#Perform Optical Character Recognition (OCR) on the image to detect text, including its content,
#bounding box coordinates, and confidence score
text_ = reader.readtext(image, min_size=3, text_threshold=0.5, low_text=0.3)
threshold = 0.3
# Organize data for storage
extracted_data = []
for t_, t in enumerate(text_):
bbox, text, score = t
if score > threshold:
cv2.rectangle(image, tuple(map(int, bbox[0])), tuple(map(int, bbox[2])), (0, 255, 0), 5)
cv2.putText(image, text, tuple(map(int, bbox[0])), cv2.FONT_HERSHEY_COMPLEX, 0.65, (255, 0, 0), 2)
extracted_data.append([text, score, bbox]) # Store in list format for saving
# Save data to CSV
save_to_csv(extracted_data)
# Display annotated image
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()
# Search functionality
query = input("Enter text to search: ")
search_results = search_text(query)
if search_results:
print("Search Results:")
for result in search_results:
print(f"Text: {result['Text']}, Score: {result['Score']}, BoundingBox: {result['BoundingBox']}")
else:
print("No matches found.")