factor out add_textequiv

pull/19/head
Konstantin Baierer 4 years ago
parent b3dd6685e7
commit 23e97a5e0b

@ -107,7 +107,7 @@ from .utils import (
return_boxes_of_images_by_order_of_reading_new, return_boxes_of_images_by_order_of_reading_new,
) )
from .utils.xml import create_page_xml from .utils.xml import create_page_xml, add_textequiv
from .utils.pil_cv2 import check_dpi from .utils.pil_cv2 import check_dpi
from .plot import EynollahPlotter from .plot import EynollahPlotter
@ -1164,11 +1164,7 @@ class eynollah:
textline.set('id','l'+str(id_indexer_l)) textline.set('id','l'+str(id_indexer_l))
id_indexer_l += 1 id_indexer_l += 1
coord = ET.SubElement(textline, 'Coords') coord = ET.SubElement(textline, 'Coords')
texteq = ET.SubElement(textline, 'TextEquiv') add_textequiv(textline)
uni = ET.SubElement(texteq, 'Unicode')
uni.text = ' '
#points = ET.SubElement(coord, 'Points')
points_co='' points_co=''
for l in range(len(all_found_texline_polygons[region_idx][j])): for l in range(len(all_found_texline_polygons[region_idx][j])):
@ -1303,7 +1299,6 @@ class eynollah:
coord_page = ET.SubElement(page_print_sub, "Coords") coord_page = ET.SubElement(page_print_sub, "Coords")
coord_page.set('points', self.calculate_page_coords()) coord_page.set('points', self.calculate_page_coords())
id_of_marginalia = [] id_of_marginalia = []
id_indexer = 0 id_indexer = 0
id_indexer_l = 0 id_indexer_l = 0
@ -1322,9 +1317,7 @@ class eynollah:
textline.set('id', 'l' + str(id_indexer_l)) textline.set('id', 'l' + str(id_indexer_l))
id_indexer_l += 1 id_indexer_l += 1
coord = ET.SubElement(textline, 'Coords') coord = ET.SubElement(textline, 'Coords')
texteq=ET.SubElement(textline, 'TextEquiv') add_textequiv(textline)
uni=ET.SubElement(texteq, 'Unicode')
uni.text = ' '
points_co='' points_co=''
for l in range(len(all_found_texline_polygons[mm][j])): for l in range(len(all_found_texline_polygons[mm][j])):
#point = ET.SubElement(coord, 'Point') #point = ET.SubElement(coord, 'Point')
@ -1360,9 +1353,7 @@ class eynollah:
points_co += ' ' points_co += ' '
coord.set('points', points_co) coord.set('points', points_co)
texteqreg = ET.SubElement(textregion, 'TextEquiv') add_textequiv(textregion)
unireg = ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
try: try:
#id_indexer_l=0 #id_indexer_l=0
try: try:
@ -1381,10 +1372,8 @@ class eynollah:
textline.set('id','l'+str(id_indexer_l)) textline.set('id','l'+str(id_indexer_l))
id_indexer_l+=1 id_indexer_l+=1
coord = ET.SubElement(textline, 'Coords') coord = ET.SubElement(textline, 'Coords')
texteq = ET.SubElement(textline, 'TextEquiv') add_textequiv(textline)
uni = ET.SubElement(texteq, 'Unicode') points_co = ''
uni.text = ' '
points_co=''
for l in range(len(all_found_texline_polygons_marginals[mm][j])): for l in range(len(all_found_texline_polygons_marginals[mm][j])):
if not curved_line: if not curved_line:
if len(all_found_texline_polygons_marginals[mm][j][l]) == 2: if len(all_found_texline_polygons_marginals[mm][j][l]) == 2:
@ -1453,9 +1442,7 @@ class eynollah:
coord_text = ET.SubElement(textregion, 'Coords') coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord)) coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l) id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l)
texteqreg = ET.SubElement(textregion, 'TextEquiv') add_textequiv(textregion)
unireg = ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
if len(found_polygons_text_region_h) > 0: if len(found_polygons_text_region_h) > 0:
@ -1467,9 +1454,7 @@ class eynollah:
coord_text = ET.SubElement(textregion, 'Coords') coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h, mm, page_coord)) coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h, mm, page_coord))
id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, id_indexer_l) id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, id_indexer_l)
texteqreg = ET.SubElement(textregion, 'TextEquiv') add_textequiv(textregion)
unireg = ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
if len(found_polygons_drop_capitals) > 0: if len(found_polygons_drop_capitals) > 0:
id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals)
@ -1480,9 +1465,7 @@ class eynollah:
textregion.set('type','drop-capital') textregion.set('type','drop-capital')
coord_text = ET.SubElement(textregion, 'Coords') coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord)) coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord))
texteqreg = ET.SubElement(textregion, 'TextEquiv') add_textequiv(textregion)
unireg=ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
try: try:
for mm in range(len(found_polygons_marginals)): for mm in range(len(found_polygons_marginals)):
textregion = ET.SubElement(page, 'TextRegion') textregion = ET.SubElement(page, 'TextRegion')
@ -1496,9 +1479,7 @@ class eynollah:
textline.set('id', 'l%s' % id_indexer_l) textline.set('id', 'l%s' % id_indexer_l)
id_indexer_l += 1 id_indexer_l += 1
coord = ET.SubElement(textline, 'Coords') coord = ET.SubElement(textline, 'Coords')
texteq = ET.SubElement(textline, 'TextEquiv') add_textequiv(textline)
uni = ET.SubElement(texteq, 'Unicode')
uni.text = ' '
points_co='' points_co=''
for l in range(len(all_found_texline_polygons_marginals[mm][j])): for l in range(len(all_found_texline_polygons_marginals[mm][j])):
if not self.curved_line: if not self.curved_line:
@ -1523,9 +1504,7 @@ class eynollah:
if l<(len(all_found_texline_polygons_marginals[mm][j])-1): if l<(len(all_found_texline_polygons_marginals[mm][j])-1):
points_co=points_co+' ' points_co=points_co+' '
coord.set('points',points_co) coord.set('points',points_co)
texteqreg = ET.SubElement(textregion, 'TextEquiv') add_textequiv(textregion)
unireg = ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
except: except:
pass pass

@ -32,3 +32,7 @@ def create_page_xml(imageFilename, height, width):
return pcgts, page return pcgts, page
def add_textequiv(parent, text=''):
textequiv = ET.SubElement(parent, 'TextEquiv')
unireg = ET.SubElement(textequiv, 'Unicode')
unireg.text = text

Loading…
Cancel
Save