|
|
@ -1307,20 +1307,19 @@ class eynollah:
|
|
|
|
|
|
|
|
|
|
|
|
for mm in range(len(found_polygons_text_region)):
|
|
|
|
for mm in range(len(found_polygons_text_region)):
|
|
|
|
textregion = ET.SubElement(page, 'TextRegion')
|
|
|
|
textregion = ET.SubElement(page, 'TextRegion')
|
|
|
|
textregion.set('id', 'r'+str(id_indexer))
|
|
|
|
textregion.set('id', 'r%s' % id_indexer)
|
|
|
|
id_indexer += 1
|
|
|
|
id_indexer += 1
|
|
|
|
textregion.set('type', 'paragraph')
|
|
|
|
textregion.set('type', 'paragraph')
|
|
|
|
coord_text = ET.SubElement(textregion, 'Coords')
|
|
|
|
coord_text = ET.SubElement(textregion, 'Coords')
|
|
|
|
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
|
|
|
|
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
|
|
|
|
for j in range(len(all_found_texline_polygons[mm])):
|
|
|
|
for j in range(len(all_found_texline_polygons[mm])):
|
|
|
|
textline = ET.SubElement(textregion, 'TextLine')
|
|
|
|
textline = ET.SubElement(textregion, 'TextLine')
|
|
|
|
textline.set('id', 'l' + str(id_indexer_l))
|
|
|
|
textline.set('id', 'l%s' % id_indexer_l)
|
|
|
|
id_indexer_l += 1
|
|
|
|
id_indexer_l += 1
|
|
|
|
coord = ET.SubElement(textline, 'Coords')
|
|
|
|
coord = ET.SubElement(textline, 'Coords')
|
|
|
|
add_textequiv(textline)
|
|
|
|
add_textequiv(textline)
|
|
|
|
points_co=''
|
|
|
|
points_co=''
|
|
|
|
for l in range(len(all_found_texline_polygons[mm][j])):
|
|
|
|
for l in range(len(all_found_texline_polygons[mm][j])):
|
|
|
|
#point = ET.SubElement(coord, 'Point')
|
|
|
|
|
|
|
|
if not curved_line:
|
|
|
|
if not curved_line:
|
|
|
|
if len(all_found_texline_polygons[mm][j][l]) == 2:
|
|
|
|
if len(all_found_texline_polygons[mm][j][l]) == 2:
|
|
|
|
textline_x_coord = max(0, int((all_found_texline_polygons[mm][j][l][0] + all_box_coord[mm][2] + page_coord[2]) / self.scale_x))
|
|
|
|
textline_x_coord = max(0, int((all_found_texline_polygons[mm][j][l][0] + all_box_coord[mm][2] + page_coord[2]) / self.scale_x))
|
|
|
@ -1354,12 +1353,6 @@ class eynollah:
|
|
|
|
coord.set('points', points_co)
|
|
|
|
coord.set('points', points_co)
|
|
|
|
|
|
|
|
|
|
|
|
add_textequiv(textregion)
|
|
|
|
add_textequiv(textregion)
|
|
|
|
try:
|
|
|
|
|
|
|
|
#id_indexer_l=0
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
id_indexer_l = id_indexer_l
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
|
|
|
id_indexer_l = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for mm in range(len(found_polygons_marginals)):
|
|
|
|
for mm in range(len(found_polygons_marginals)):
|
|
|
|
textregion = ET.SubElement(page, 'TextRegion')
|
|
|
|
textregion = ET.SubElement(page, 'TextRegion')
|
|
|
@ -1396,8 +1389,6 @@ class eynollah:
|
|
|
|
if l < len(all_found_texline_polygons_marginals[mm][j]) - 1:
|
|
|
|
if l < len(all_found_texline_polygons_marginals[mm][j]) - 1:
|
|
|
|
points_co += ' '
|
|
|
|
points_co += ' '
|
|
|
|
coord.set('points',points_co)
|
|
|
|
coord.set('points',points_co)
|
|
|
|
except:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals)
|
|
|
|
id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals)
|
|
|
|
for mm in range(len(found_polygons_text_region_img)):
|
|
|
|
for mm in range(len(found_polygons_text_region_img)):
|
|
|
@ -1414,7 +1405,6 @@ class eynollah:
|
|
|
|
points_co += ' '
|
|
|
|
points_co += ' '
|
|
|
|
coord_text.set('points', points_co)
|
|
|
|
coord_text.set('points', points_co)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.logger.info("filename stem: '%s'", self.image_filename_stem)
|
|
|
|
self.logger.info("filename stem: '%s'", self.image_filename_stem)
|
|
|
|
tree = ET.ElementTree(pcgts)
|
|
|
|
tree = ET.ElementTree(pcgts)
|
|
|
|
tree.write(os.path.join(dir_of_image, self.image_filename_stem) + ".xml")
|
|
|
|
tree.write(os.path.join(dir_of_image, self.image_filename_stem) + ".xml")
|
|
|
@ -1466,7 +1456,7 @@ class eynollah:
|
|
|
|
coord_text = ET.SubElement(textregion, 'Coords')
|
|
|
|
coord_text = ET.SubElement(textregion, 'Coords')
|
|
|
|
coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord))
|
|
|
|
coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord))
|
|
|
|
add_textequiv(textregion)
|
|
|
|
add_textequiv(textregion)
|
|
|
|
try:
|
|
|
|
|
|
|
|
for mm in range(len(found_polygons_marginals)):
|
|
|
|
for mm in range(len(found_polygons_marginals)):
|
|
|
|
textregion = ET.SubElement(page, 'TextRegion')
|
|
|
|
textregion = ET.SubElement(page, 'TextRegion')
|
|
|
|
textregion.set('id', id_of_marginalia[mm])
|
|
|
|
textregion.set('id', id_of_marginalia[mm])
|
|
|
@ -1505,10 +1495,7 @@ class eynollah:
|
|
|
|
points_co=points_co+' '
|
|
|
|
points_co=points_co+' '
|
|
|
|
coord.set('points',points_co)
|
|
|
|
coord.set('points',points_co)
|
|
|
|
add_textequiv(textregion)
|
|
|
|
add_textequiv(textregion)
|
|
|
|
except:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) + len(found_polygons_drop_capitals)
|
|
|
|
id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) + len(found_polygons_drop_capitals)
|
|
|
|
for mm in range(len(found_polygons_text_region_img)):
|
|
|
|
for mm in range(len(found_polygons_text_region_img)):
|
|
|
|
textregion=ET.SubElement(page, 'ImageRegion')
|
|
|
|
textregion=ET.SubElement(page, 'ImageRegion')
|
|
|
@ -1516,18 +1503,13 @@ class eynollah:
|
|
|
|
id_indexer += 1
|
|
|
|
id_indexer += 1
|
|
|
|
coord_text = ET.SubElement(textregion, 'Coords')
|
|
|
|
coord_text = ET.SubElement(textregion, 'Coords')
|
|
|
|
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_img, mm, page_coord))
|
|
|
|
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_img, mm, page_coord))
|
|
|
|
except:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
for mm in range(len(found_polygons_tables)):
|
|
|
|
for mm in range(len(found_polygons_tables)):
|
|
|
|
textregion = ET.SubElement(page, 'TableRegion')
|
|
|
|
textregion = ET.SubElement(page, 'TableRegion')
|
|
|
|
textregion.set('id', 'r%s' %id_indexer)
|
|
|
|
textregion.set('id', 'r%s' %id_indexer)
|
|
|
|
id_indexer += 1
|
|
|
|
id_indexer += 1
|
|
|
|
coord_text = ET.SubElement(textregion, 'Coords')
|
|
|
|
coord_text = ET.SubElement(textregion, 'Coords')
|
|
|
|
coord_text.set('points', self.calculate_polygon_coords(found_polygons_tables, mm, page_coord))
|
|
|
|
coord_text.set('points', self.calculate_polygon_coords(found_polygons_tables, mm, page_coord))
|
|
|
|
except:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.logger.info("filename stem: '%s'", self.image_filename_stem)
|
|
|
|
self.logger.info("filename stem: '%s'", self.image_filename_stem)
|
|
|
|
tree = ET.ElementTree(pcgts)
|
|
|
|
tree = ET.ElementTree(pcgts)
|
|
|
|