From 25e967397d753a0fdfd1c4c9181cfc93f94414b7 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 11 Dec 2024 11:24:56 +0000 Subject: [PATCH] exit early if no text regions found (to avoid segfault) --- src/eynollah/eynollah.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 3b43f7b..d6ba8a9 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5019,6 +5019,20 @@ class Eynollah: contours_only_text_parent_d = [] contours_only_text_parent = [] + if not len(contours_only_text_parent): + # stop early + empty_marginals = [[]] * len(polygons_of_marginals) + if self.full_layout: + pcgts = self.writer.build_pagexml_full_layout([], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, []) + else: + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, polygons_of_marginals, empty_marginals, empty_marginals, [], [], cont_page, polygons_lines_xml, contours_tables, []) + self.logger.info("Job done in %.1fs", time.time() - t0) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) @@ -5164,10 +5178,12 @@ class Eynollah: all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: return pcgts - else: contours_only_text_parent_h = None if self.reading_order_machine_based: