factor out add_textequiv

pull/19/head
Konstantin Baierer 4 years ago
parent b3dd6685e7
commit 23e97a5e0b

@ -107,7 +107,7 @@ from .utils import (
return_boxes_of_images_by_order_of_reading_new,
)
from .utils.xml import create_page_xml
from .utils.xml import create_page_xml, add_textequiv
from .utils.pil_cv2 import check_dpi
from .plot import EynollahPlotter
@ -1164,11 +1164,7 @@ class eynollah:
textline.set('id','l'+str(id_indexer_l))
id_indexer_l += 1
coord = ET.SubElement(textline, 'Coords')
texteq = ET.SubElement(textline, 'TextEquiv')
uni = ET.SubElement(texteq, 'Unicode')
uni.text = ' '
#points = ET.SubElement(coord, 'Points')
add_textequiv(textline)
points_co=''
for l in range(len(all_found_texline_polygons[region_idx][j])):
@ -1303,7 +1299,6 @@ class eynollah:
coord_page = ET.SubElement(page_print_sub, "Coords")
coord_page.set('points', self.calculate_page_coords())
id_of_marginalia = []
id_indexer = 0
id_indexer_l = 0
@ -1322,9 +1317,7 @@ class eynollah:
textline.set('id', 'l' + str(id_indexer_l))
id_indexer_l += 1
coord = ET.SubElement(textline, 'Coords')
texteq=ET.SubElement(textline, 'TextEquiv')
uni=ET.SubElement(texteq, 'Unicode')
uni.text = ' '
add_textequiv(textline)
points_co=''
for l in range(len(all_found_texline_polygons[mm][j])):
#point = ET.SubElement(coord, 'Point')
@ -1360,9 +1353,7 @@ class eynollah:
points_co += ' '
coord.set('points', points_co)
texteqreg = ET.SubElement(textregion, 'TextEquiv')
unireg = ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
add_textequiv(textregion)
try:
#id_indexer_l=0
try:
@ -1381,10 +1372,8 @@ class eynollah:
textline.set('id','l'+str(id_indexer_l))
id_indexer_l+=1
coord = ET.SubElement(textline, 'Coords')
texteq = ET.SubElement(textline, 'TextEquiv')
uni = ET.SubElement(texteq, 'Unicode')
uni.text = ' '
points_co=''
add_textequiv(textline)
points_co = ''
for l in range(len(all_found_texline_polygons_marginals[mm][j])):
if not curved_line:
if len(all_found_texline_polygons_marginals[mm][j][l]) == 2:
@ -1453,9 +1442,7 @@ class eynollah:
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l)
texteqreg = ET.SubElement(textregion, 'TextEquiv')
unireg = ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
add_textequiv(textregion)
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
if len(found_polygons_text_region_h) > 0:
@ -1467,9 +1454,7 @@ class eynollah:
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h, mm, page_coord))
id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, id_indexer_l)
texteqreg = ET.SubElement(textregion, 'TextEquiv')
unireg = ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
add_textequiv(textregion)
if len(found_polygons_drop_capitals) > 0:
id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals)
@ -1480,9 +1465,7 @@ class eynollah:
textregion.set('type','drop-capital')
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord))
texteqreg = ET.SubElement(textregion, 'TextEquiv')
unireg=ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
add_textequiv(textregion)
try:
for mm in range(len(found_polygons_marginals)):
textregion = ET.SubElement(page, 'TextRegion')
@ -1496,9 +1479,7 @@ class eynollah:
textline.set('id', 'l%s' % id_indexer_l)
id_indexer_l += 1
coord = ET.SubElement(textline, 'Coords')
texteq = ET.SubElement(textline, 'TextEquiv')
uni = ET.SubElement(texteq, 'Unicode')
uni.text = ' '
add_textequiv(textline)
points_co=''
for l in range(len(all_found_texline_polygons_marginals[mm][j])):
if not self.curved_line:
@ -1523,9 +1504,7 @@ class eynollah:
if l<(len(all_found_texline_polygons_marginals[mm][j])-1):
points_co=points_co+' '
coord.set('points',points_co)
texteqreg = ET.SubElement(textregion, 'TextEquiv')
unireg = ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
add_textequiv(textregion)
except:
pass

@ -32,3 +32,7 @@ def create_page_xml(imageFilename, height, width):
return pcgts, page
def add_textequiv(parent, text=''):
textequiv = ET.SubElement(parent, 'TextEquiv')
unireg = ET.SubElement(textequiv, 'Unicode')
unireg.text = text

Loading…
Cancel
Save