From 9f5e4af5f087b9a542ad4b8617f1a682eb69b72f Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 2 Mar 2021 14:13:56 +0100 Subject: [PATCH] factor out marginalia ID calc from xml_reading_order --- qurator/eynollah/utils/xml.py | 7 +------ qurator/eynollah/writer.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/qurator/eynollah/utils/xml.py b/qurator/eynollah/utils/xml.py index 4ae4a06..3e76e68 100644 --- a/qurator/eynollah/utils/xml.py +++ b/qurator/eynollah/utils/xml.py @@ -40,10 +40,7 @@ def add_textequiv(parent, text=''): unireg = ET.SubElement(textequiv, 'Unicode') unireg.text = text -def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals): - """ - XXX side-effect: extends id_of_marginalia - """ +def xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals): region_order = ET.SubElement(page, 'ReadingOrder') region_order_sub = ET.SubElement(region_order, 'OrderedGroup') region_order_sub.set('id', "ro357564684568544579089") @@ -54,12 +51,10 @@ def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found name.set('regionRef', id_of_texts[idx_text]) indexer_region += 1 for _ in found_polygons_marginals: - id_of_marginalia.append('r%s' % indexer_region) name = ET.SubElement(region_order_sub, 'RegionRefIndexed') name.set('index', str(indexer_region)) name.set('regionRef', 'r%s' % indexer_region) indexer_region += 1 - return id_of_marginalia def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): indexes_sorted = np.array(indexes_sorted) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index c8c34e4..874b69c 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -140,10 +140,14 @@ class EynollahXmlWriter(): coord_page.set('points', self.calculate_page_coords(cont_page)) id_of_marginalia = [] + for idx_marginal, _ in enumerate(found_polygons_marginals): + id_of_marginalia.append('r%s' % len(order_of_texts) + idx_marginal) + id_indexer = 0 id_indexer_l = 0 + if len(found_polygons_text_region) > 0: - id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals) + xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals) for mm in range(len(found_polygons_text_region)): textregion = ET.SubElement(page, 'TextRegion') textregion.set('id', 'r%s' % id_indexer) @@ -191,9 +195,11 @@ class EynollahXmlWriter(): id_indexer = 0 id_indexer_l = 0 id_of_marginalia = [] + for idx_marginal, _ in enumerate(found_polygons_marginals): + id_of_marginalia.append('r%s' % len(order_of_texts) + idx_marginal) if len(found_polygons_text_region) > 0: - id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals) + xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals) for mm in range(len(found_polygons_text_region)): textregion=ET.SubElement(page, 'TextRegion') textregion.set('id', 'r%s' % id_indexer)