The text region coordinates are now correctly written into the XML output when using the skip layout and reading order option

pull/156/head
vahidrezanezhad 1 week ago
parent 83211ae684
commit 21ec4fbfb5

@ -4333,7 +4333,7 @@ class Eynollah:
cont_page, page_coord, order_text_new, id_of_texts_tot, cont_page, page_coord, order_text_new, id_of_texts_tot,
all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions) cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions, self.skip_layout_and_reading_order)
return pcgts return pcgts
#print("text region early -1 in %.1fs", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0)

@ -168,7 +168,7 @@ class EynollahXmlWriter():
with open(self.output_filename, 'w') as f: with open(self.output_filename, 'w') as f:
f.write(to_xml(pcgts)) f.write(to_xml(pcgts))
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion): def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion, skip_layout_reading_order=False):
self.logger.debug('enter build_pagexml_no_full_layout') self.logger.debug('enter build_pagexml_no_full_layout')
# create the file structure # create the file structure
@ -184,7 +184,7 @@ class EynollahXmlWriter():
for mm in range(len(found_polygons_text_region)): for mm in range(len(found_polygons_text_region)):
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm]), Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]),
) )
#textregion.set_conf(conf_contours_textregion[mm]) #textregion.set_conf(conf_contours_textregion[mm])
page.add_TextRegion(textregion) page.add_TextRegion(textregion)
@ -303,10 +303,20 @@ class EynollahXmlWriter():
return pcgts return pcgts
def calculate_polygon_coords(self, contour, page_coord): def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False):
self.logger.debug('enter calculate_polygon_coords') self.logger.debug('enter calculate_polygon_coords')
coords = '' coords = ''
for value_bbox in contour: for value_bbox in contour:
if skip_layout_reading_order:
if len(value_bbox) == 2:
coords += str(int((value_bbox[0]) / self.scale_x))
coords += ','
coords += str(int((value_bbox[1]) / self.scale_y))
else:
coords += str(int((value_bbox[0][0]) / self.scale_x))
coords += ','
coords += str(int((value_bbox[0][1]) / self.scale_y))
else:
if len(value_bbox) == 2: if len(value_bbox) == 2:
coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x)) coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
coords += ',' coords += ','

Loading…
Cancel
Save