-
Notifications
You must be signed in to change notification settings - Fork 0
/
text_detection.py
311 lines (237 loc) · 13.7 KB
/
text_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
"""
Text Detection Module
---------------------
Description:
This file contains code for text detection from images. It uses image processing and machine learning
techniques to identify and extract text from images.
Authors: LE GOURRIEREC Titouan, CONNESSON Léna, PROUVOST Axel
Date: 04/06/2024
"""
# import libraries
import re
import numpy as np
import cv2
import easyocr
########################################################################################################
############################ Parameters & Constants #############################
########################################################################################################
# Constants
TRESHOLD = 100
COMPRESSION_RATIO = 0.9
READER = easyocr.Reader(['en'], gpu=True)
########################################################################################################
############################ Main Functions #############################
########################################################################################################
def text_detection(img: np.ndarray,
reader: easyocr.Reader = READER,
threshold: int = 100,
compression_ratio: float = 0.9) -> tuple:
"""
Process an image to extract certain information and measure the processing time.
Parameters:
- img (numpy.ndarray): The image to process.
- reader (easyocr.Reader): The OCR reader to use for text detection.
- threshold (int, optional): The threshold to use for grouping detections. Default is 100.
Returns:
- tuple: A tuple containing the values of R, P, code_champ, M, EPO.
"""
# Use the OCR reader to detect text in the image
detections = reader.readtext(img)
# If no text is detected, return None for all values and an empty image
if not detections:
empty_image = np.zeros_like(img)
return None, None, None, None, None, empty_image
# Create a text box around the detected text and compress it according to the specified ratio
text_box_result = text_box(img, detections, compression_ratio)
# Sort the detections and remove unwanted elements
detections_sorted = sort_detections(detections)
detections_sorted = remove_unwanted_elements(detections_sorted, ['T'])
# Extract the values of R and P from the sorted detections
R = get_value_starting_with(detections_sorted, 'R')
P = get_value_starting_with(detections_sorted, 'P')
#if no P is detected
if P is not None:
P = P[-1]
# Remove the elements starting with R and P from the sorted detections
detections_sorted = remove_elements_starting_with(detections_sorted, ['R', 'P'])
# Group the detections according to the specified threshold
groups = group_detections(detections_sorted, threshold=threshold)
# Extract the values of code_champ, M, and EPO from the groups
code_champ = get_number_from_groups(groups, ['code', 'champ'])
M = get_number_from_groups(groups, ['M'])
EPO = get_number_from_groups(groups, ['EPO'])
return R, P, code_champ, M, EPO, text_box_result
########################################################################################################
############################ Helper Functions #############################
########################################################################################################
def sort_detections(detections: list[tuple]) -> list:
"""
Sort a list of detections based on the average y-coordinate of each detection's bounding box.
The average y-coordinate is calculated as the mean of the y-coordinate of the top-left and
the bottom-right corners of the bounding box. The list is sorted in ascending order,
meaning detections higher in the image (with a smaller y-coordinate) will appear before
detections lower in the image.
Parameters:
- detections (list): A list of detections. Each detection is a tuple where the first element
is a list of bounding box coordinates, the second element is the detected
text, and the third element is the confidence score.
Returns:
- list: The sorted list of detections.
Note:
The bounding box coordinates are expected to be in the format [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
where (x1, y1) is the top-left corner and (x3, y3) is the bottom-right corner.
"""
return sorted(detections, key=lambda detection: (detection[0][0][1] + detection[0][2][1]) / 2)
def group_detections(detections_sorted: list[tuple],
threshold: int) -> list:
"""
Groups the sorted detections based on a threshold value.
This function takes a list of sorted detections and a threshold value. It groups the detections based on the
vertical distance between them. If the vertical distance between two detections is less than or equal to the
threshold, they are considered part of the same group. Each group is then sorted based on the horizontal position
of the detections.
Parameters:
- detections_sorted (list): A list of detections sorted based on their vertical position. Each detection is a tuple
where the first element is a list of coordinates and the second element is the detected text.
- threshold (int): The maximum vertical distance between two detections for them to be considered part of the same group.
Returns:
- groups (list): A list of groups of detections. Each group is a list of detections that are close to each other
based on the threshold value.
"""
groups = []
current_group = [detections_sorted[0]] # Start with the first detection as the first group
for i in range(1, len(detections_sorted)):
prev_detection = detections_sorted[i - 1]
current_detection = detections_sorted[i]
# Calculate the vertical midpoint of the previous and current detections
prev_y = (prev_detection[0][0][1] + prev_detection[0][2][1]) / 2
current_y = (current_detection[0][0][1] + current_detection[0][2][1]) / 2
# If the vertical distance between the midpoints is less than or equal to the threshold, add the current detection to the current group
if abs(current_y - prev_y) <= threshold:
current_group.append(current_detection)
else:
# If the vertical distance is greater than the threshold, sort the current group based on the horizontal position of the detections and add it to the groups
groups.append(sorted(current_group, key=lambda detection: (detection[0][0][0] + detection[0][2][0]) / 2))
# Start a new group with the current detection
current_group = [current_detection]
# If there is a current group at the end, sort it and add it to the groups
if current_group:
groups.append(sorted(current_group, key=lambda detection: (detection[0][0][0] + detection[0][2][0]) / 2))
return groups
def remove_unwanted_elements(detections: list[tuple],
unwanted_elements: list[str]) -> list:
"""
Remove unwanted detections from a list of detections.
Parameters:
- detections (list): A list of detections. Each detection is a tuple where the second element
is the detected text.
- unwanted_elements (list): A list of strings representing the detected text of unwanted detections.
Returns:
- list: The list of detections after unwanted detections have been removed.
Note:
This function uses list comprehension to create a new list that only includes detections
whose detected text is not in the list of unwanted elements.
"""
return [elt for elt in detections if elt[1] not in unwanted_elements]
def get_value_starting_with(detections: list[tuple],
start_char: str) -> str:
"""
Search through a list of detections and return the detected text of the first detection
that starts with a specific character, excluding that character.
Parameters:
- detections (list): A list of detections. Each detection is a tuple where the second element
is the detected text.
- start_char (str): The character that the desired detection's text should start with.
Returns:
- str: The detected text of the first detection that starts with `start_char`, excluding `start_char`.
If no such detection is found, return None.
Note:
This function uses a for loop to iterate over the list of detections. For each detection,
it checks if the detected text starts with `start_char` using the `str.startswith` method.
If it finds a match, it returns the detected text excluding the first character (i.e., `start_char`).
If it doesn't find a match after checking all detections, it returns None.
"""
for elt in detections:
if elt[1].startswith(start_char):
return elt[1][1:]
return None
def remove_elements_starting_with(detections: list[tuple],
start_chars: str) -> list:
"""
Remove detections from a list of detections if their detected text starts with any of the specified characters.
Parameters:
- detections (list): A list of detections. Each detection is a tuple where the second element
is the detected text.
- start_chars (list): A list of characters. Detections whose detected text starts with any of these
characters will be removed.
Returns:
- list: The list of detections after detections with unwanted starting characters have been removed.
Note:
This function uses list comprehension to create a new list that only includes detections
whose detected text does not start with any of the characters in `start_chars`. It uses the
`str.startswith` method to check if the detected text starts with each character.
"""
return [elt for elt in detections if not any(elt[1].startswith(char) for char in start_chars)]
def get_number_from_groups(groups: list[list[list[tuple]]],
keywords: list[str]) -> str:
"""
Search through a nested list of groups for a keyword and return the first number found in the same group.
Parameters:
- groups (list): A nested list where each inner list represents a group. Each group is a list of detections,
and each detection is a tuple where the second element is the detected text.
- keywords (list): A list of keywords to search for in the detected text of the detections.
Returns:
- str: The first number found in the detected text of the detections in the same group as a keyword.
The number is returned as a string. If no number is found, return None.
Note:
This function uses nested for loops to iterate over the groups and the detections within each group.
For each detection, it checks if any of the keywords are in the detected text. If a keyword is found,
it then searches for a number in the detected text of all detections in the same group using the
`re.findall` function with a regular expression that matches any sequence of digits. It returns the
first number found, or None if no number is found.
"""
for i in range(len(groups)):
for j in range(len(groups[i])):
if any(keyword in groups[i][j][1] for keyword in keywords):
for elt in groups[i]:
numbers = re.findall(r'\d+', elt[1])
if numbers:
return numbers[0]
return None
def text_box(image: np.ndarray,
detections: list[tuple],
compression_ratio: float) -> np.ndarray:
"""
Extract the part of the image within the bounding box that encompasses all detections.
Parameters:
- image (numpy.ndarray): The image from which to extract the part.
- detections (list): A list of detections. Each detection is a tuple containing the bounding box coordinates,
the detected text, and the detection score.
- threshold (float, optional): The threshold to use for filtering detections. Only detections with a score
greater than this threshold are considered. Default is 0.25.
Returns:
- numpy.ndarray: The part of the image within the bounding box that encompasses all detections.
If there are no detections above the threshold, return None.
Note:
This function creates a list to store the coordinates of all detected bounding boxes.
For each detection, if the detection score is above the threshold, it extracts the coordinates of the
current bounding box and adds them to the list. It then calculates the coordinates of the encompassing
bounding box and crops the image to this bounding box.
"""
all_bboxes = []
for bbox, _, _ in detections:
x_min, y_min = map(int, bbox[0])
x_max, y_max = map(int, bbox[2])
all_bboxes.append((x_min, y_min, x_max, y_max))
# Calculate the coordinates of the encompassing bounding box
if all_bboxes:
x_min = min(bbox[0] for bbox in all_bboxes)
y_min = min(bbox[1] for bbox in all_bboxes)
x_max = max(bbox[2] for bbox in all_bboxes)
y_max = max(bbox[3] for bbox in all_bboxes)
cropped_image = image[y_min:y_max, x_min:x_max]
#diminue les dimensions de l'image pour économiser de la mémoire
cropped_image = cv2.resize(cropped_image, (0, 0), fx=1-compression_ratio, fy=1-compression_ratio)
#convert to grayscale
cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
return cropped_image