From 7eb973b3aa36091343ca2c177f593c94289cad97 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 2 Mar 2021 17:23:31 +0100 Subject: [PATCH] xml_reading_order takes id_of_marginals directly --- qurator/eynollah/utils/xml.py | 11 +++++------ qurator/eynollah/writer.py | 11 ++--------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/qurator/eynollah/utils/xml.py b/qurator/eynollah/utils/xml.py index 194e7eb..3123412 100644 --- a/qurator/eynollah/utils/xml.py +++ b/qurator/eynollah/utils/xml.py @@ -41,21 +41,20 @@ def add_textequiv(parent, text=''): unireg = ET.SubElement(textequiv, 'Unicode') unireg.text = text -def xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals): +def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia): region_order = ET.SubElement(page, 'ReadingOrder') region_order_sub = ET.SubElement(region_order, 'OrderedGroup') region_order_sub.set('id', "ro357564684568544579089") indexer_region = 0 - for idx_text in order_of_texts: + for id_of_textregion in order_of_texts: name = ET.SubElement(region_order_sub, 'RegionRefIndexed') name.set('index', str(indexer_region)) - name.set('regionRef', id_of_texts[idx_text]) + name.set('regionRef', id_of_textregion) indexer_region += 1 - for _ in found_polygons_marginals: + for id_marginal in id_of_marginalia: name = ET.SubElement(region_order_sub, 'RegionRefIndexed') name.set('index', str(indexer_region)) - name.set('regionRef', 'r%s' % indexer_region) - indexer_region += 1 + name.set('regionRef', id_marginal) def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): indexes_sorted = np.array(indexes_sorted) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index 535393e..1d027a5 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -140,12 +140,8 @@ class EynollahXmlWriter(): counter_textregions = EynollahIdCounter() counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) - id_of_marginalia = [] - for _ in found_polygons_marginals: - id_of_marginalia.append(counter_marginals.next_region_id) - if len(found_polygons_text_region) > 0: - xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals) + xml_reading_order(page, order_of_texts, id_of_texts, [counter_marginals.next_region_id for _ in found_polygons_marginals]) for mm in range(len(found_polygons_text_region)): textregion = ET.SubElement(page, 'TextRegion') @@ -190,11 +186,8 @@ class EynollahXmlWriter(): counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) counter_textregions = EynollahIdCounter() - id_of_marginalia = [] - for _ in found_polygons_marginals: - id_of_marginalia.append(counter_marginals.next_region_id) + xml_reading_order(page, order_of_texts, id_of_texts, [counter_marginals.next_region_id for _ in found_polygons_marginals]) - xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals) for mm in range(len(found_polygons_text_region)): textregion=ET.SubElement(page, 'TextRegion') textregion.set('id', counter_textregions.next_region_id)