remove unnecessary (hope so) try-except

pull/19/head
Konstantin Baierer 4 years ago
parent 23e97a5e0b
commit 6d476230ce

@ -1306,21 +1306,20 @@ class eynollah:
self.xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals) self.xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals)
for mm in range(len(found_polygons_text_region)): for mm in range(len(found_polygons_text_region)):
textregion=ET.SubElement(page, 'TextRegion') textregion = ET.SubElement(page, 'TextRegion')
textregion.set('id', 'r'+str(id_indexer)) textregion.set('id', 'r%s' % id_indexer)
id_indexer += 1 id_indexer += 1
textregion.set('type', 'paragraph') textregion.set('type', 'paragraph')
coord_text = ET.SubElement(textregion, 'Coords') coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord)) coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
for j in range(len(all_found_texline_polygons[mm])): for j in range(len(all_found_texline_polygons[mm])):
textline=ET.SubElement(textregion, 'TextLine') textline = ET.SubElement(textregion, 'TextLine')
textline.set('id', 'l' + str(id_indexer_l)) textline.set('id', 'l%s' % id_indexer_l)
id_indexer_l += 1 id_indexer_l += 1
coord = ET.SubElement(textline, 'Coords') coord = ET.SubElement(textline, 'Coords')
add_textequiv(textline) add_textequiv(textline)
points_co='' points_co=''
for l in range(len(all_found_texline_polygons[mm][j])): for l in range(len(all_found_texline_polygons[mm][j])):
#point = ET.SubElement(coord, 'Point')
if not curved_line: if not curved_line:
if len(all_found_texline_polygons[mm][j][l]) == 2: if len(all_found_texline_polygons[mm][j][l]) == 2:
textline_x_coord = max(0, int((all_found_texline_polygons[mm][j][l][0] + all_box_coord[mm][2] + page_coord[2]) / self.scale_x)) textline_x_coord = max(0, int((all_found_texline_polygons[mm][j][l][0] + all_box_coord[mm][2] + page_coord[2]) / self.scale_x))
@ -1354,50 +1353,42 @@ class eynollah:
coord.set('points', points_co) coord.set('points', points_co)
add_textequiv(textregion) add_textequiv(textregion)
try:
#id_indexer_l=0 for mm in range(len(found_polygons_marginals)):
try: textregion = ET.SubElement(page, 'TextRegion')
id_indexer_l = id_indexer_l textregion.set('id', id_of_marginalia[mm])
except: textregion.set('type', 'marginalia')
id_indexer_l = 0 coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
for mm in range(len(found_polygons_marginals)): for j in range(len(all_found_texline_polygons_marginals[mm])):
textregion = ET.SubElement(page, 'TextRegion') textline = ET.SubElement(textregion, 'TextLine')
textregion.set('id', id_of_marginalia[mm]) textline.set('id','l'+str(id_indexer_l))
textregion.set('type', 'marginalia') id_indexer_l += 1
coord_text = ET.SubElement(textregion, 'Coords') coord = ET.SubElement(textline, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord)) add_textequiv(textline)
for j in range(len(all_found_texline_polygons_marginals[mm])): points_co = ''
textline=ET.SubElement(textregion, 'TextLine') for l in range(len(all_found_texline_polygons_marginals[mm][j])):
textline.set('id','l'+str(id_indexer_l)) if not curved_line:
id_indexer_l+=1 if len(all_found_texline_polygons_marginals[mm][j][l]) == 2:
coord = ET.SubElement(textline, 'Coords') points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0] + all_box_coord_marginals[mm][2] + page_coord[2]) / self.scale_x))
add_textequiv(textline) points_co += ','
points_co = '' points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][1] + all_box_coord_marginals[mm][0] + page_coord[0]) / self.scale_y))
for l in range(len(all_found_texline_polygons_marginals[mm][j])):
if not curved_line:
if len(all_found_texline_polygons_marginals[mm][j][l]) == 2:
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0] + all_box_coord_marginals[mm][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][1] + all_box_coord_marginals[mm][0] + page_coord[0]) / self.scale_y))
else:
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][0] + all_box_coord_marginals[mm][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + all_box_coord_marginals[mm][0] + page_coord[0])/self.scale_y))
else: else:
if len(all_found_texline_polygons_marginals[mm][j][l]) == 2: points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][0] + all_box_coord_marginals[mm][2] + page_coord[2]) / self.scale_x))
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0] + page_coord[2]) / self.scale_x)) points_co += ','
points_co += ',' points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + all_box_coord_marginals[mm][0] + page_coord[0])/self.scale_y))
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][1] + page_coord[0]) / self.scale_y)) else:
else: if len(all_found_texline_polygons_marginals[mm][j][l]) == 2:
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][0] + page_coord[2]) / self.scale_x)) points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0] + page_coord[2]) / self.scale_x))
points_co += ',' points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + page_coord[0]) / self.scale_y)) points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][1] + page_coord[0]) / self.scale_y))
if l < len(all_found_texline_polygons_marginals[mm][j]) - 1: else:
points_co += ' ' points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][0] + page_coord[2]) / self.scale_x))
coord.set('points',points_co) points_co += ','
except: points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + page_coord[0]) / self.scale_y))
pass if l < len(all_found_texline_polygons_marginals[mm][j]) - 1:
points_co += ' '
coord.set('points',points_co)
id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals) id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals)
for mm in range(len(found_polygons_text_region_img)): for mm in range(len(found_polygons_text_region_img)):
@ -1414,7 +1405,6 @@ class eynollah:
points_co += ' ' points_co += ' '
coord_text.set('points', points_co) coord_text.set('points', points_co)
self.logger.info("filename stem: '%s'", self.image_filename_stem) self.logger.info("filename stem: '%s'", self.image_filename_stem)
tree = ET.ElementTree(pcgts) tree = ET.ElementTree(pcgts)
tree.write(os.path.join(dir_of_image, self.image_filename_stem) + ".xml") tree.write(os.path.join(dir_of_image, self.image_filename_stem) + ".xml")
@ -1462,72 +1452,64 @@ class eynollah:
textregion=ET.SubElement(page, 'TextRegion') textregion=ET.SubElement(page, 'TextRegion')
textregion.set('id',' r%s' % id_indexer) textregion.set('id',' r%s' % id_indexer)
id_indexer += 1 id_indexer += 1
textregion.set('type','drop-capital') textregion.set('type', 'drop-capital')
coord_text = ET.SubElement(textregion, 'Coords') coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord)) coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord))
add_textequiv(textregion) add_textequiv(textregion)
try:
for mm in range(len(found_polygons_marginals)):
textregion = ET.SubElement(page, 'TextRegion')
textregion.set('id', id_of_marginalia[mm])
textregion.set('type', 'marginalia')
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
for j in range(len(all_found_texline_polygons_marginals[mm])): for mm in range(len(found_polygons_marginals)):
textline = ET.SubElement(textregion, 'TextLine') textregion = ET.SubElement(page, 'TextRegion')
textline.set('id', 'l%s' % id_indexer_l) textregion.set('id', id_of_marginalia[mm])
id_indexer_l += 1 textregion.set('type', 'marginalia')
coord = ET.SubElement(textline, 'Coords') coord_text = ET.SubElement(textregion, 'Coords')
add_textequiv(textline) coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
points_co=''
for l in range(len(all_found_texline_polygons_marginals[mm][j])): for j in range(len(all_found_texline_polygons_marginals[mm])):
if not self.curved_line: textline = ET.SubElement(textregion, 'TextLine')
if len(all_found_texline_polygons_marginals[mm][j][l]) == 2: textline.set('id', 'l%s' % id_indexer_l)
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0] + all_box_coord_marginals[mm][2] + page_coord[2]) / self.scale_x)) id_indexer_l += 1
points_co += ',' coord = ET.SubElement(textline, 'Coords')
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][1] + all_box_coord_marginals[mm][0] + page_coord[0]) / self.scale_y)) add_textequiv(textline)
else: points_co=''
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][0] + all_box_coord_marginals[mm][2] + page_coord[2]) / self.scale_x)) for l in range(len(all_found_texline_polygons_marginals[mm][j])):
points_co += ',' if not self.curved_line:
points_co+= str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + all_box_coord_marginals[mm][0] + page_coord[0]) / self.scale_y)) if len(all_found_texline_polygons_marginals[mm][j][l]) == 2:
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0] + all_box_coord_marginals[mm][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][1] + all_box_coord_marginals[mm][0] + page_coord[0]) / self.scale_y))
else: else:
if len(all_found_texline_polygons_marginals[mm][j][l])==2: points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][0] + all_box_coord_marginals[mm][2] + page_coord[2]) / self.scale_x))
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0] + page_coord[2]) / self.scale_x)) points_co += ','
points_co += ',' points_co+= str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + all_box_coord_marginals[mm][0] + page_coord[0]) / self.scale_y))
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][1] + page_coord[0]) / self.scale_y)) else:
else: if len(all_found_texline_polygons_marginals[mm][j][l])==2:
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][0] + page_coord[2]) / self.scale_x)) points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0] + page_coord[2]) / self.scale_x))
points_co += ',' points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + page_coord[0]) / self.scale_y)) points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][1] + page_coord[0]) / self.scale_y))
else:
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + page_coord[0]) / self.scale_y))
if l<(len(all_found_texline_polygons_marginals[mm][j])-1): if l<(len(all_found_texline_polygons_marginals[mm][j])-1):
points_co=points_co+' ' points_co=points_co+' '
coord.set('points',points_co) coord.set('points',points_co)
add_textequiv(textregion) add_textequiv(textregion)
except:
pass
try: id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) + len(found_polygons_drop_capitals)
id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) + len(found_polygons_drop_capitals) for mm in range(len(found_polygons_text_region_img)):
for mm in range(len(found_polygons_text_region_img)): textregion=ET.SubElement(page, 'ImageRegion')
textregion=ET.SubElement(page, 'ImageRegion') textregion.set('id', 'r%s' % id_indexer)
textregion.set('id','r%s' % id_indexer) id_indexer += 1
id_indexer += 1 coord_text = ET.SubElement(textregion, 'Coords')
coord_text = ET.SubElement(textregion, 'Coords') coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_img, mm, page_coord))
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_img, mm, page_coord))
except:
pass
try: for mm in range(len(found_polygons_tables)):
for mm in range(len(found_polygons_tables)): textregion = ET.SubElement(page, 'TableRegion')
textregion = ET.SubElement(page, 'TableRegion') textregion.set('id', 'r%s' %id_indexer)
textregion.set('id', 'r%s' %id_indexer) id_indexer += 1
id_indexer += 1 coord_text = ET.SubElement(textregion, 'Coords')
coord_text = ET.SubElement(textregion, 'Coords') coord_text.set('points', self.calculate_polygon_coords(found_polygons_tables, mm, page_coord))
coord_text.set('points', self.calculate_polygon_coords(found_polygons_tables, mm, page_coord))
except:
pass
self.logger.info("filename stem: '%s'", self.image_filename_stem) self.logger.info("filename stem: '%s'", self.image_filename_stem)
tree = ET.ElementTree(pcgts) tree = ET.ElementTree(pcgts)

Loading…
Cancel
Save