mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-15 19:09:58 +02:00
do_order_of_regions / order_of_regions: simplify
- array-convert only once (before returning from `order_of_regions`) - avoid passing `matrix_of_orders` unnecessarily between `order_of_regions` and `order_and_id_of_texts`
This commit is contained in:
parent
415b2cbad8
commit
a1c8fd4467
3 changed files with 38 additions and 43 deletions
|
@ -2567,26 +2567,25 @@ class Eynollah:
|
||||||
ref_point = 0
|
ref_point = 0
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = []
|
||||||
id_of_texts_tot = []
|
id_of_texts_tot = []
|
||||||
for iij in range(len(boxes)):
|
for iij, box in enumerate(boxes):
|
||||||
ys = slice(*boxes[iij][2:4])
|
ys = slice(*box[2:4])
|
||||||
xs = slice(*boxes[iij][0:2])
|
xs = slice(*box[0:2])
|
||||||
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
||||||
args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij]
|
args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij]
|
||||||
con_inter_box = contours_only_text_parent[args_contours_box]
|
con_inter_box = contours_only_text_parent[args_contours_box]
|
||||||
con_inter_box_h = contours_only_text_parent_h[args_contours_box_h]
|
con_inter_box_h = contours_only_text_parent_h[args_contours_box_h]
|
||||||
|
|
||||||
|
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
||||||
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
|
||||||
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2])
|
|
||||||
|
|
||||||
order_of_texts, id_of_texts = order_and_id_of_texts(
|
order_of_texts, id_of_texts = order_and_id_of_texts(
|
||||||
con_inter_box, con_inter_box_h,
|
con_inter_box, con_inter_box_h,
|
||||||
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
||||||
|
|
||||||
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1]
|
indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
|
||||||
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1]
|
indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
|
||||||
indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2]
|
indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2]
|
||||||
indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2]
|
indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2]
|
||||||
|
|
||||||
for zahler, _ in enumerate(args_contours_box):
|
for zahler, _ in enumerate(args_contours_box):
|
||||||
arg_order_v = indexes_sorted_main[zahler]
|
arg_order_v = indexes_sorted_main[zahler]
|
||||||
|
@ -2664,25 +2663,25 @@ class Eynollah:
|
||||||
ref_point = 0
|
ref_point = 0
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = []
|
||||||
id_of_texts_tot = []
|
id_of_texts_tot = []
|
||||||
for iij, _ in enumerate(boxes):
|
for iij, box in enumerate(boxes):
|
||||||
ys = slice(*boxes[iij][2:4])
|
ys = slice(*box[2:4])
|
||||||
xs = slice(*boxes[iij][0:2])
|
xs = slice(*box[0:2])
|
||||||
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
||||||
args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij]
|
args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij]
|
||||||
con_inter_box = contours_only_text_parent[args_contours_box]
|
con_inter_box = contours_only_text_parent[args_contours_box]
|
||||||
con_inter_box_h = contours_only_text_parent_h[args_contours_box_h]
|
con_inter_box_h = contours_only_text_parent_h[args_contours_box_h]
|
||||||
|
|
||||||
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
||||||
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2])
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
|
||||||
|
|
||||||
order_of_texts, id_of_texts = order_and_id_of_texts(
|
order_of_texts, id_of_texts = order_and_id_of_texts(
|
||||||
con_inter_box, con_inter_box_h,
|
con_inter_box, con_inter_box_h,
|
||||||
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
||||||
|
|
||||||
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1]
|
indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
|
||||||
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1]
|
indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
|
||||||
indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2]
|
indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2]
|
||||||
indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2]
|
indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2]
|
||||||
|
|
||||||
for zahler, _ in enumerate(args_contours_box):
|
for zahler, _ in enumerate(args_contours_box):
|
||||||
arg_order_v = indexes_sorted_main[zahler]
|
arg_order_v = indexes_sorted_main[zahler]
|
||||||
|
@ -2747,22 +2746,22 @@ class Eynollah:
|
||||||
ref_point = 0
|
ref_point = 0
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = []
|
||||||
id_of_texts_tot = []
|
id_of_texts_tot = []
|
||||||
for iij in range(len(boxes)):
|
for iij, box in enumerate(boxes):
|
||||||
ys = slice(*boxes[iij][2:4])
|
ys = slice(*box[2:4])
|
||||||
xs = slice(*boxes[iij][0:2])
|
xs = slice(*box[0:2])
|
||||||
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
||||||
con_inter_box = contours_only_text_parent[args_contours_box]
|
con_inter_box = contours_only_text_parent[args_contours_box]
|
||||||
con_inter_box_h = []
|
con_inter_box_h = []
|
||||||
|
|
||||||
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
||||||
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2])
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
|
||||||
|
|
||||||
order_of_texts, id_of_texts = order_and_id_of_texts(
|
order_of_texts, id_of_texts = order_and_id_of_texts(
|
||||||
con_inter_box, con_inter_box_h,
|
con_inter_box, con_inter_box_h,
|
||||||
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
||||||
|
|
||||||
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1]
|
indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
|
||||||
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1]
|
indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
|
||||||
|
|
||||||
for zahler, _ in enumerate(args_contours_box):
|
for zahler, _ in enumerate(args_contours_box):
|
||||||
arg_order_v = indexes_sorted_main[zahler]
|
arg_order_v = indexes_sorted_main[zahler]
|
||||||
|
@ -2808,24 +2807,24 @@ class Eynollah:
|
||||||
ref_point = 0
|
ref_point = 0
|
||||||
order_of_texts_tot = []
|
order_of_texts_tot = []
|
||||||
id_of_texts_tot = []
|
id_of_texts_tot = []
|
||||||
for iij in range(len(boxes)):
|
for iij, box in enumerate(boxes):
|
||||||
ys = slice(*boxes[iij][2:4])
|
ys = slice(*box[2:4])
|
||||||
xs = slice(*boxes[iij][0:2])
|
xs = slice(*box[0:2])
|
||||||
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
args_contours_box = args_contours[np.array(arg_text_con) == iij]
|
||||||
con_inter_box = []
|
con_inter_box = []
|
||||||
con_inter_box_h = []
|
con_inter_box_h = []
|
||||||
for i in range(len(args_contours_box)):
|
for i in range(len(args_contours_box)):
|
||||||
con_inter_box.append(contours_only_text_parent[args_contours_box[i]])
|
con_inter_box.append(contours_only_text_parent[args_contours_box[i]])
|
||||||
|
|
||||||
indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
|
||||||
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2])
|
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2])
|
||||||
|
|
||||||
order_of_texts, id_of_texts = order_and_id_of_texts(
|
order_of_texts, id_of_texts = order_and_id_of_texts(
|
||||||
con_inter_box, con_inter_box_h,
|
con_inter_box, con_inter_box_h,
|
||||||
matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point)
|
||||||
|
|
||||||
indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1]
|
indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1]
|
||||||
indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1]
|
indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1]
|
||||||
|
|
||||||
for zahler, _ in enumerate(args_contours_box):
|
for zahler, _ in enumerate(args_contours_box):
|
||||||
arg_order_v = indexes_sorted_main[zahler]
|
arg_order_v = indexes_sorted_main[zahler]
|
||||||
|
|
|
@ -1325,7 +1325,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
|
||||||
final_types.append(1)
|
final_types.append(1)
|
||||||
final_index_type.append(ind_missed)
|
final_index_type.append(ind_missed)
|
||||||
|
|
||||||
return final_indexers_sorted, matrix_of_orders, final_types, final_index_type
|
return np.array(final_indexers_sorted), np.array(final_types), np.array(final_index_type)
|
||||||
|
|
||||||
def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
|
def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
|
||||||
img_p_in_ver, img_in_hor,num_col_classifier):
|
img_p_in_ver, img_in_hor,num_col_classifier):
|
||||||
|
|
|
@ -65,11 +65,7 @@ def xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_margina
|
||||||
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
|
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
|
||||||
region_counter.inc('region')
|
region_counter.inc('region')
|
||||||
|
|
||||||
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point):
|
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, indexes_sorted, index_of_types, kind_of_texts, ref_point):
|
||||||
indexes_sorted = np.array(indexes_sorted)
|
|
||||||
index_of_types = np.array(index_of_types)
|
|
||||||
kind_of_texts = np.array(kind_of_texts)
|
|
||||||
|
|
||||||
id_of_texts = []
|
id_of_texts = []
|
||||||
order_of_texts = []
|
order_of_texts = []
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue