back on track- freezing problem , memory error and issues with reading order by drop capitals and marginals are resolved

pull/28/head
vahidrezanezhad 4 years ago committed by GitHub
parent 43b8759acf
commit d5a9817390
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -152,12 +152,12 @@ class EynollahXmlWriter():
self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, counter) self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, counter)
add_textequiv(textregion) add_textequiv(textregion)
for marginal_polygon in found_polygons_marginals: for mm in range(len(found_polygons_marginals)):
marginal = ET.SubElement(page, 'TextRegion') marginal = ET.SubElement(page, 'TextRegion')
marginal.set('id', counter.next_region_id) marginal.set('id', counter.next_region_id)
marginal.set('type', 'marginalia') marginal.set('type', 'marginalia')
coord_text = ET.SubElement(marginal, 'Coords') coord_text = ET.SubElement(marginal, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(marginal_polygon, page_coord)) coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))
self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
for mm in range(len(found_polygons_text_region_img)): for mm in range(len(found_polygons_text_region_img)):
@ -194,7 +194,7 @@ class EynollahXmlWriter():
textregion.set('type', 'paragraph') textregion.set('type', 'paragraph')
coord_text = ET.SubElement(textregion, 'Coords') coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)) coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord))
self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, counter_textregions) self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, counter)
add_textequiv(textregion) add_textequiv(textregion)
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
@ -204,15 +204,7 @@ class EynollahXmlWriter():
textregion.set('type','header') textregion.set('type','header')
coord_text = ET.SubElement(textregion, 'Coords') coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)) coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))
self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, counter_textregions) self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, counter)
add_textequiv(textregion)
for mm in range(len(found_polygons_drop_capitals)):
textregion=ET.SubElement(page, 'TextRegion')
textregion.set('id', counter.next_region_id)
textregion.set('type', 'drop-capital')
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))
add_textequiv(textregion) add_textequiv(textregion)
for mm in range(len(found_polygons_marginals)): for mm in range(len(found_polygons_marginals)):
@ -224,6 +216,14 @@ class EynollahXmlWriter():
coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)) coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))
self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
for mm in range(len(found_polygons_drop_capitals)):
textregion=ET.SubElement(page, 'TextRegion')
textregion.set('id', counter.next_region_id)
textregion.set('type', 'drop-capital')
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))
add_textequiv(textregion)
for mm in range(len(found_polygons_text_region_img)): for mm in range(len(found_polygons_text_region_img)):
textregion=ET.SubElement(page, 'ImageRegion') textregion=ET.SubElement(page, 'ImageRegion')
textregion.set('id', counter.next_region_id) textregion.set('id', counter.next_region_id)

Loading…
Cancel
Save