From 62fa710f272f8322e0860ebb55d98027c1a34c1a Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 2 Mar 2021 13:13:41 +0100 Subject: [PATCH] move order_and_id_of_texts to utils.xml --- qurator/eynollah/eynollah.py | 4 +-- qurator/eynollah/utils/__init__.py | 39 ------------------------------ qurator/eynollah/utils/xml.py | 32 ++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 41 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index cb5b028..75971ec 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -63,11 +63,11 @@ from .utils import ( putt_bb_of_drop_capitals_of_model_in_patches_in_layout, check_any_text_region_in_model_one_is_main_or_header, small_textlines_to_parent_adherence2, - order_and_id_of_texts, order_of_regions, find_number_of_columns_in_document, return_boxes_of_images_by_order_of_reading_new) from .utils.pil_cv2 import check_dpi +from .utils.xml import order_and_id_of_texts from .plot import EynollahPlotter from .writer import EynollahXmlWriter @@ -1308,7 +1308,7 @@ class Eynollah: tartib = np.where(indexes_sorted == arg_order_v)[0][0] order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = tartib + ref_point - for jji in range(len(id_of_texts)): + for jji, _ in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point = ref_point + len(id_of_texts) diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index ac72ef9..a44c6c8 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -977,45 +977,6 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) textlines_con_changed.append(textlines_big_org_form) return textlines_con_changed -def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): - indexes_sorted = np.array(indexes_sorted) - index_of_types = np.array(index_of_types) - kind_of_texts = np.array(kind_of_texts) - - id_of_texts = [] - order_of_texts = [] - - index_of_types_1 = index_of_types[kind_of_texts == 1] - indexes_sorted_1 = indexes_sorted[kind_of_texts == 1] - - index_of_types_2 = index_of_types[kind_of_texts == 2] - indexes_sorted_2 = indexes_sorted[kind_of_texts == 2] - - ##print(index_of_types,'index_of_types') - ##print(kind_of_texts,'kind_of_texts') - ##print(len(found_polygons_text_region),'found_polygons_text_region') - ##print(index_of_types_1,'index_of_types_1') - ##print(indexes_sorted_1,'indexes_sorted_1') - index_b = 0 + ref_point - for mm in range(len(found_polygons_text_region)): - - id_of_texts.append("r" + str(index_b)) - interest = indexes_sorted_1[indexes_sorted_1 == index_of_types_1[mm]] - - if len(interest) > 0: - order_of_texts.append(interest[0]) - index_b += 1 - else: - pass - - for mm in range(len(found_polygons_text_region_h)): - id_of_texts.append("r" + str(index_b)) - interest = indexes_sorted_2[index_of_types_2[mm]] - order_of_texts.append(interest) - index_b += 1 - - return order_of_texts, id_of_texts - def order_of_regions(textline_mask, contours_main, contours_header, y_ref): ##plt.imshow(textline_mask) diff --git a/qurator/eynollah/utils/xml.py b/qurator/eynollah/utils/xml.py index fe806e9..4f41461 100644 --- a/qurator/eynollah/utils/xml.py +++ b/qurator/eynollah/utils/xml.py @@ -1,4 +1,6 @@ +# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member from lxml import etree as ET +import numpy as np NAMESPACES = {} NAMESPACES['page'] = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" @@ -60,3 +62,33 @@ def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found indexer_region += 1 return id_of_marginalia +def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): + indexes_sorted = np.array(indexes_sorted) + index_of_types = np.array(index_of_types) + kind_of_texts = np.array(kind_of_texts) + + id_of_texts = [] + order_of_texts = [] + + index_of_types_1 = index_of_types[kind_of_texts == 1] + indexes_sorted_1 = indexes_sorted[kind_of_texts == 1] + + index_of_types_2 = index_of_types[kind_of_texts == 2] + indexes_sorted_2 = indexes_sorted[kind_of_texts == 2] + + index_b = 0 + ref_point + for mm, _ in enumerate(found_polygons_text_region): + id_of_texts.append("r" + str(index_b)) + interest = indexes_sorted_1[indexes_sorted_1 == index_of_types_1[mm]] + if len(interest) > 0: + order_of_texts.append(interest[0]) + index_b += 1 + + for mm, _ in enumerate(found_polygons_text_region_h): + id_of_texts.append("r" + str(index_b)) + interest = indexes_sorted_2[index_of_types_2[mm]] + order_of_texts.append(interest) + index_b += 1 + + return order_of_texts, id_of_texts +