From 097520bfd275f8260eebd698bae42b0c33eafd3c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sun, 25 May 2025 03:33:54 +0200 Subject: [PATCH] rnn ocr for all layout textregion types --- src/eynollah/eynollah.py | 41 ++++++++++++++++++++++++++-------------- src/eynollah/writer.py | 31 ++++++++++++++++++++++-------- 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index aa38274..0ee3d14 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4715,11 +4715,10 @@ class Eynollah: if self.extract_only_images: text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) - ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], - cont_page, [], [], ocr_all_textlines, []) + cont_page, [], []) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) return pcgts @@ -4772,7 +4771,7 @@ class Eynollah: cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions, self.skip_layout_and_reading_order) + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order) return pcgts #print("text region early -1 in %.1fs", time.time() - t0) @@ -4822,10 +4821,9 @@ class Eynollah: if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], [], [], [], [], [], [], - cont_page, [], [], ocr_all_textlines, []) + cont_page, [], []) return pcgts #print("text region early in %.1fs", time.time() - t0) @@ -5004,13 +5002,13 @@ class Eynollah: [], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], - cont_page, polygons_lines_xml, [], [], []) + cont_page, polygons_lines_xml) else: pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], - cont_page, polygons_lines_xml, contours_tables, [], []) + cont_page, polygons_lines_xml, contours_tables) return pcgts @@ -5196,16 +5194,28 @@ class Eynollah: contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - if self.ocr: - ocr_all_textlines = [] + if self.ocr and not self.tr: + gc.collect() + if len(all_found_textline_polygons)>0: + ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0: + ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + + if all_found_textline_polygons_h and len(all_found_textline_polygons)>0: + ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0: + ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(image_page, polygons_of_drop_capitals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines = None + ocr_all_textlines_marginals = None + ocr_all_textlines_h = None + ocr_all_textlines_drop = None pcgts = self.writer.build_pagexml_full_layout( contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, - cont_page, polygons_lines_xml, ocr_all_textlines, conf_contours_textregions, conf_contours_textregions_h) + cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) return pcgts contours_only_text_parent_h = None @@ -5278,18 +5288,21 @@ class Eynollah: elif self.ocr and not self.tr: gc.collect() - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - + if len(all_found_textline_polygons)>0: + ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0: + ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines = None - #print(ocr_all_textlines) + ocr_all_textlines_marginals = None self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout( txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions) + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals, conf_contours_textregions) return pcgts diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index f07abf6..085ee6f 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -56,10 +56,12 @@ class EynollahXmlWriter(): points_page_print = points_page_print + ' ' return points_page_print[:-1] - def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter): + def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_all_textlines_textregion): for j in range(len(all_found_textline_polygons_marginals[marginal_idx])): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) + if ocr_all_textlines_textregion: + textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) marginal_region.add_TextLine(textline) marginal_region.set_orientation(-slopes_marginals[marginal_idx]) points_co = '' @@ -168,7 +170,7 @@ class EynollahXmlWriter(): with open(self.output_filename, 'w') as f: f.write(to_xml(pcgts)) - def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion, skip_layout_reading_order=False): + def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals=None, conf_contours_textregion=None, skip_layout_reading_order=False): self.logger.debug('enter build_pagexml_no_full_layout') # create the file structure @@ -198,7 +200,12 @@ class EynollahXmlWriter(): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) page.add_TextRegion(marginal) - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) + if ocr_all_textlines_marginals: + ocr_textlines = ocr_all_textlines_marginals[mm] + else: + ocr_textlines = None + + self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_textlines) for mm in range(len(found_polygons_text_region_img)): img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType()) @@ -242,7 +249,7 @@ class EynollahXmlWriter(): return pcgts - def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines, conf_contours_textregion, conf_contours_textregion_h): + def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None): self.logger.debug('enter build_pagexml_full_layout') # create the file structure @@ -272,8 +279,8 @@ class EynollahXmlWriter(): Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) page.add_TextRegion(textregion) - if ocr_all_textlines: - ocr_textlines = ocr_all_textlines[mm] + if ocr_all_textlines_h: + ocr_textlines = ocr_all_textlines_h[mm] else: ocr_textlines = None self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines) @@ -282,7 +289,11 @@ class EynollahXmlWriter(): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) page.add_TextRegion(marginal) - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) + if ocr_all_textlines_marginals: + ocr_textlines = ocr_all_textlines_marginals[mm] + else: + ocr_textlines = None + self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_textlines) for mm in range(len(found_polygons_drop_capitals)): dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', @@ -290,7 +301,11 @@ class EynollahXmlWriter(): page.add_TextRegion(dropcapital) all_box_coord_drop = None slopes_drop = None - self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) + if ocr_all_textlines_drop: + ocr_textlines = ocr_all_textlines_drop[mm] + else: + ocr_textlines = None + self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=ocr_textlines) for mm in range(len(found_polygons_text_region_img)): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))