xml_reading_order takes id_of_marginals directly

pull/28/head
Konstantin Baierer 4 years ago
parent 98568402c7
commit 7eb973b3aa

@ -41,21 +41,20 @@ def add_textequiv(parent, text=''):
unireg = ET.SubElement(textequiv, 'Unicode') unireg = ET.SubElement(textequiv, 'Unicode')
unireg.text = text unireg.text = text
def xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals): def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia):
region_order = ET.SubElement(page, 'ReadingOrder') region_order = ET.SubElement(page, 'ReadingOrder')
region_order_sub = ET.SubElement(region_order, 'OrderedGroup') region_order_sub = ET.SubElement(region_order, 'OrderedGroup')
region_order_sub.set('id', "ro357564684568544579089") region_order_sub.set('id', "ro357564684568544579089")
indexer_region = 0 indexer_region = 0
for idx_text in order_of_texts: for id_of_textregion in order_of_texts:
name = ET.SubElement(region_order_sub, 'RegionRefIndexed') name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
name.set('index', str(indexer_region)) name.set('index', str(indexer_region))
name.set('regionRef', id_of_texts[idx_text]) name.set('regionRef', id_of_textregion)
indexer_region += 1 indexer_region += 1
for _ in found_polygons_marginals: for id_marginal in id_of_marginalia:
name = ET.SubElement(region_order_sub, 'RegionRefIndexed') name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
name.set('index', str(indexer_region)) name.set('index', str(indexer_region))
name.set('regionRef', 'r%s' % indexer_region) name.set('regionRef', id_marginal)
indexer_region += 1
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point):
indexes_sorted = np.array(indexes_sorted) indexes_sorted = np.array(indexes_sorted)

@ -140,12 +140,8 @@ class EynollahXmlWriter():
counter_textregions = EynollahIdCounter() counter_textregions = EynollahIdCounter()
counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
id_of_marginalia = []
for _ in found_polygons_marginals:
id_of_marginalia.append(counter_marginals.next_region_id)
if len(found_polygons_text_region) > 0: if len(found_polygons_text_region) > 0:
xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals) xml_reading_order(page, order_of_texts, id_of_texts, [counter_marginals.next_region_id for _ in found_polygons_marginals])
for mm in range(len(found_polygons_text_region)): for mm in range(len(found_polygons_text_region)):
textregion = ET.SubElement(page, 'TextRegion') textregion = ET.SubElement(page, 'TextRegion')
@ -190,11 +186,8 @@ class EynollahXmlWriter():
counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
counter_textregions = EynollahIdCounter() counter_textregions = EynollahIdCounter()
id_of_marginalia = [] xml_reading_order(page, order_of_texts, id_of_texts, [counter_marginals.next_region_id for _ in found_polygons_marginals])
for _ in found_polygons_marginals:
id_of_marginalia.append(counter_marginals.next_region_id)
xml_reading_order(page, order_of_texts, id_of_texts, found_polygons_marginals)
for mm in range(len(found_polygons_text_region)): for mm in range(len(found_polygons_text_region)):
textregion=ET.SubElement(page, 'TextRegion') textregion=ET.SubElement(page, 'TextRegion')
textregion.set('id', counter_textregions.next_region_id) textregion.set('id', counter_textregions.next_region_id)

Loading…
Cancel
Save