do_order_of_regions / order_of_regions: simplify

- array-convert only once (before returning from `order_of_regions`)
- avoid passing `matrix_of_orders` unnecessarily between
  `order_of_regions` and `order_and_id_of_texts`
This commit is contained in:
Robert Sachunsky 2025-10-02 21:41:37 +02:00
parent 415b2cbad8
commit a1c8fd4467
3 changed files with 38 additions and 43 deletions

View file

@ -2567,26 +2567,25 @@ class Eynollah:
ref_point = 0 ref_point = 0
order_of_texts_tot = [] order_of_texts_tot = []
id_of_texts_tot = [] id_of_texts_tot = []
for iij in range(len(boxes)): for iij, box in enumerate(boxes):
ys = slice(*boxes[iij][2:4]) ys = slice(*box[2:4])
xs = slice(*boxes[iij][0:2]) xs = slice(*box[0:2])
args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box = args_contours[np.array(arg_text_con) == iij]
args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij]
con_inter_box = contours_only_text_parent[args_contours_box] con_inter_box = contours_only_text_parent[args_contours_box]
con_inter_box_h = contours_only_text_parent_h[args_contours_box_h] con_inter_box_h = contours_only_text_parent_h[args_contours_box_h]
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2])
order_of_texts, id_of_texts = order_and_id_of_texts( order_of_texts, id_of_texts = order_and_id_of_texts(
con_inter_box, con_inter_box_h, con_inter_box, con_inter_box_h,
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2]
indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2]
for zahler, _ in enumerate(args_contours_box): for zahler, _ in enumerate(args_contours_box):
arg_order_v = indexes_sorted_main[zahler] arg_order_v = indexes_sorted_main[zahler]
@ -2664,25 +2663,25 @@ class Eynollah:
ref_point = 0 ref_point = 0
order_of_texts_tot = [] order_of_texts_tot = []
id_of_texts_tot = [] id_of_texts_tot = []
for iij, _ in enumerate(boxes): for iij, box in enumerate(boxes):
ys = slice(*boxes[iij][2:4]) ys = slice(*box[2:4])
xs = slice(*boxes[iij][0:2]) xs = slice(*box[0:2])
args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box = args_contours[np.array(arg_text_con) == iij]
args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij]
con_inter_box = contours_only_text_parent[args_contours_box] con_inter_box = contours_only_text_parent[args_contours_box]
con_inter_box_h = contours_only_text_parent_h[args_contours_box_h] con_inter_box_h = contours_only_text_parent_h[args_contours_box_h]
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
order_of_texts, id_of_texts = order_and_id_of_texts( order_of_texts, id_of_texts = order_and_id_of_texts(
con_inter_box, con_inter_box_h, con_inter_box, con_inter_box_h,
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2]
indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2]
for zahler, _ in enumerate(args_contours_box): for zahler, _ in enumerate(args_contours_box):
arg_order_v = indexes_sorted_main[zahler] arg_order_v = indexes_sorted_main[zahler]
@ -2747,22 +2746,22 @@ class Eynollah:
ref_point = 0 ref_point = 0
order_of_texts_tot = [] order_of_texts_tot = []
id_of_texts_tot = [] id_of_texts_tot = []
for iij in range(len(boxes)): for iij, box in enumerate(boxes):
ys = slice(*boxes[iij][2:4]) ys = slice(*box[2:4])
xs = slice(*boxes[iij][0:2]) xs = slice(*box[0:2])
args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box = args_contours[np.array(arg_text_con) == iij]
con_inter_box = contours_only_text_parent[args_contours_box] con_inter_box = contours_only_text_parent[args_contours_box]
con_inter_box_h = [] con_inter_box_h = []
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
order_of_texts, id_of_texts = order_and_id_of_texts( order_of_texts, id_of_texts = order_and_id_of_texts(
con_inter_box, con_inter_box_h, con_inter_box, con_inter_box_h,
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
for zahler, _ in enumerate(args_contours_box): for zahler, _ in enumerate(args_contours_box):
arg_order_v = indexes_sorted_main[zahler] arg_order_v = indexes_sorted_main[zahler]
@ -2808,24 +2807,24 @@ class Eynollah:
ref_point = 0 ref_point = 0
order_of_texts_tot = [] order_of_texts_tot = []
id_of_texts_tot = [] id_of_texts_tot = []
for iij in range(len(boxes)): for iij, box in enumerate(boxes):
ys = slice(*boxes[iij][2:4]) ys = slice(*box[2:4])
xs = slice(*boxes[iij][0:2]) xs = slice(*box[0:2])
args_contours_box = args_contours[np.array(arg_text_con) == iij] args_contours_box = args_contours[np.array(arg_text_con) == iij]
con_inter_box = [] con_inter_box = []
con_inter_box_h = [] con_inter_box_h = []
for i in range(len(args_contours_box)): for i in range(len(args_contours_box)):
con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) con_inter_box.append(contours_only_text_parent[args_contours_box[i]])
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
order_of_texts, id_of_texts = order_and_id_of_texts( order_of_texts, id_of_texts = order_and_id_of_texts(
con_inter_box, con_inter_box_h, con_inter_box, con_inter_box_h,
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
for zahler, _ in enumerate(args_contours_box): for zahler, _ in enumerate(args_contours_box):
arg_order_v = indexes_sorted_main[zahler] arg_order_v = indexes_sorted_main[zahler]

View file

@ -1325,7 +1325,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
final_types.append(1) final_types.append(1)
final_index_type.append(ind_missed) final_index_type.append(ind_missed)
return final_indexers_sorted, matrix_of_orders, final_types, final_index_type return np.array(final_indexers_sorted), np.array(final_types), np.array(final_index_type)
def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
img_p_in_ver, img_in_hor,num_col_classifier): img_p_in_ver, img_in_hor,num_col_classifier):

View file

@ -65,11 +65,7 @@ def xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_margina
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal)) og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
region_counter.inc('region') region_counter.inc('region')
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, indexes_sorted, index_of_types, kind_of_texts, ref_point):
indexes_sorted = np.array(indexes_sorted)
index_of_types = np.array(index_of_types)
kind_of_texts = np.array(kind_of_texts)
id_of_texts = [] id_of_texts = []
order_of_texts = [] order_of_texts = []