factor out marginalia ID calc from xml_reading_order

pull/28/head
Konstantin Baierer 4 years ago
parent 630002d96d
commit 9f5e4af5f0

@ -40,10 +40,7 @@ def add_textequiv(parent, text=''):
unireg = ET.SubElement(textequiv, 'Unicode') unireg = ET.SubElement(textequiv, 'Unicode')
unireg.text = text unireg.text = text
def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals): def xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals):
"""
XXX side-effect: extends id_of_marginalia
"""
region_order = ET.SubElement(page, 'ReadingOrder') region_order = ET.SubElement(page, 'ReadingOrder')
region_order_sub = ET.SubElement(region_order, 'OrderedGroup') region_order_sub = ET.SubElement(region_order, 'OrderedGroup')
region_order_sub.set('id', "ro357564684568544579089") region_order_sub.set('id', "ro357564684568544579089")
@ -54,12 +51,10 @@ def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found
name.set('regionRef', id_of_texts[idx_text]) name.set('regionRef', id_of_texts[idx_text])
indexer_region += 1 indexer_region += 1
for _ in found_polygons_marginals: for _ in found_polygons_marginals:
id_of_marginalia.append('r%s' % indexer_region)
name = ET.SubElement(region_order_sub, 'RegionRefIndexed') name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
name.set('index', str(indexer_region)) name.set('index', str(indexer_region))
name.set('regionRef', 'r%s' % indexer_region) name.set('regionRef', 'r%s' % indexer_region)
indexer_region += 1 indexer_region += 1
return id_of_marginalia
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point):
indexes_sorted = np.array(indexes_sorted) indexes_sorted = np.array(indexes_sorted)

@ -140,10 +140,14 @@ class EynollahXmlWriter():
coord_page.set('points', self.calculate_page_coords(cont_page)) coord_page.set('points', self.calculate_page_coords(cont_page))
id_of_marginalia = [] id_of_marginalia = []
for idx_marginal, _ in enumerate(found_polygons_marginals):
id_of_marginalia.append('r%s' % len(order_of_texts) + idx_marginal)
id_indexer = 0 id_indexer = 0
id_indexer_l = 0 id_indexer_l = 0
if len(found_polygons_text_region) > 0: if len(found_polygons_text_region) > 0:
id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals) xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals)
for mm in range(len(found_polygons_text_region)): for mm in range(len(found_polygons_text_region)):
textregion = ET.SubElement(page, 'TextRegion') textregion = ET.SubElement(page, 'TextRegion')
textregion.set('id', 'r%s' % id_indexer) textregion.set('id', 'r%s' % id_indexer)
@ -191,9 +195,11 @@ class EynollahXmlWriter():
id_indexer = 0 id_indexer = 0
id_indexer_l = 0 id_indexer_l = 0
id_of_marginalia = [] id_of_marginalia = []
for idx_marginal, _ in enumerate(found_polygons_marginals):
id_of_marginalia.append('r%s' % len(order_of_texts) + idx_marginal)
if len(found_polygons_text_region) > 0: if len(found_polygons_text_region) > 0:
id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals) xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals)
for mm in range(len(found_polygons_text_region)): for mm in range(len(found_polygons_text_region)):
textregion=ET.SubElement(page, 'TextRegion') textregion=ET.SubElement(page, 'TextRegion')
textregion.set('id', 'r%s' % id_indexer) textregion.set('id', 'r%s' % id_indexer)

Loading…
Cancel
Save