diff --git a/easyocr/utils.py b/easyocr/utils.py index 987baf2c9..48f9f4918 100644 --- a/easyocr/utils.py +++ b/easyocr/utils.py @@ -642,6 +642,7 @@ def diff(input_list): def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'): # create basic attributes + box_group = [] for box in raw_result: all_x = [int(coord[0]) for coord in box[0]] @@ -651,7 +652,7 @@ def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'): min_y = min(all_y) max_y = max(all_y) height = max_y - min_y - box_group.append([box[1], min_x, max_x, min_y, max_y, height, 0.5*(min_y+max_y), 0]) # last element indicates group + box_group.append([box[1], min_x, max_x, min_y, max_y, height, 0.5*(min_y+max_y), 0,box[-1]]) # last element indicates group # cluster boxes into paragraph current_group = 1 while len([box for box in box_group if box[7]==0]) > 0: @@ -682,6 +683,7 @@ def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'): result = [] for i in set(box[7] for box in box_group): current_box_group = [box for box in box_group if box[7]==i] + group_confidence = sum([box[8] for box in current_box_group])/len(current_box_group) mean_height = np.mean([box[5] for box in current_box_group]) min_gx = min([box[1] for box in current_box_group]) max_gx = max([box[2] for box in current_box_group]) @@ -704,7 +706,7 @@ def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'): text += ' '+best_box[0] current_box_group.remove(best_box) - result.append([ [[min_gx,min_gy],[max_gx,min_gy],[max_gx,max_gy],[min_gx,max_gy]], text[1:]]) + result.append([ [[min_gx,min_gy],[max_gx,min_gy],[max_gx,max_gy],[min_gx,max_gy]], text[1:],group_confidence]) return result