Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include paragraph confidence as average of box confidence #1171

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions easyocr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@ def diff(input_list):

def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'):
# create basic attributes

box_group = []
for box in raw_result:
all_x = [int(coord[0]) for coord in box[0]]
Expand All @@ -651,7 +652,7 @@ def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'):
min_y = min(all_y)
max_y = max(all_y)
height = max_y - min_y
box_group.append([box[1], min_x, max_x, min_y, max_y, height, 0.5*(min_y+max_y), 0]) # last element indicates group
box_group.append([box[1], min_x, max_x, min_y, max_y, height, 0.5*(min_y+max_y), 0,box[-1]]) # last element indicates group
# cluster boxes into paragraph
current_group = 1
while len([box for box in box_group if box[7]==0]) > 0:
Expand Down Expand Up @@ -682,6 +683,7 @@ def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'):
result = []
for i in set(box[7] for box in box_group):
current_box_group = [box for box in box_group if box[7]==i]
group_confidence = sum([box[8] for box in current_box_group])/len(current_box_group)
mean_height = np.mean([box[5] for box in current_box_group])
min_gx = min([box[1] for box in current_box_group])
max_gx = max([box[2] for box in current_box_group])
Expand All @@ -704,7 +706,7 @@ def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'):
text += ' '+best_box[0]
current_box_group.remove(best_box)

result.append([ [[min_gx,min_gy],[max_gx,min_gy],[max_gx,max_gy],[min_gx,max_gy]], text[1:]])
result.append([ [[min_gx,min_gy],[max_gx,min_gy],[max_gx,max_gy],[min_gx,max_gy]], text[1:],group_confidence])

return result

Expand Down