Merge remote-tracking branch 'michalbubula/add-feedback' into prepare-release-v0.5.0

2025-12-12 22:24:12 +01:00 · 2025-09-23 19:50:27 +02:00 · 2025-09-23 19:50:27 +02:00 · 9ead58b99a
commit 9ead58b99a
parent 7bde99e866 df8d93dbfa
3 changed files with 187 additions and 25 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,4 @@ models_eynollah*
 output.html
 /build
 /dist
 *.tif
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@ -1,5 +1,6 @@
 import sys
 import click
 import logging
 from ocrd_utils import initLogging, getLevelName, getLogger
 from eynollah.eynollah import Eynollah, Eynollah_ocr
 from eynollah.sbb_binarize import SbbBinarizer
@ -335,15 +336,30 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
    is_flag=True,
    help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.",
 )
 # TODO move to top-level CLI context
@click.option(
    "--log_level",
    "-l",
    type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
-    help="Override log level globally to this",
+    help="Override 'eynollah' log level globally to this",
 )
 # 
@click.option(
    "--setup-logging",
    is_flag=True,
    help="Setup a basic console logger",
 )
-def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level):
+def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging):
-    initLogging()
+    if setup_logging:
        console_handler = logging.StreamHandler(sys.stdout)
        console_handler.setLevel(logging.INFO)
        formatter = logging.Formatter('%(message)s')
        console_handler.setFormatter(formatter)
        getLogger('eynollah').addHandler(console_handler)
        getLogger('eynollah').setLevel(logging.INFO)
    else:
        initLogging()
    if log_level:
        getLogger('eynollah').setLevel(getLevelName(log_level))
    assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep"
@ -367,7 +383,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
    assert image or dir_in, "Either a single image -i or a dir_in -di is required"
    eynollah = Eynollah(
        model,
        logger=getLogger('eynollah'),
        dir_out=out,
        dir_of_cropped_images=save_images,
        extract_only_images=extract_only_images,
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -6,7 +6,6 @@
 document layout analysis (segmentation) with output in PAGE-XML
 """
 from logging import Logger
 from difflib import SequenceMatcher as sq
 from PIL import Image, ImageDraw, ImageFont
 import math
@ -219,8 +218,9 @@ class Eynollah:
        threshold_art_class_layout: Optional[float] = None,
        threshold_art_class_textline: Optional[float] = None,
        skip_layout_and_reading_order : bool = False,
        logger : Optional[Logger] = None,
    ):
        self.logger = getLogger('eynollah')
        if skip_layout_and_reading_order:
            textline_light = True
        self.light_version = light_version
@ -267,10 +267,6 @@ class Eynollah:
        else:
            self.threshold_art_class_textline = 0.1
        self.logger = logger if logger else getLogger('eynollah')
        # for parallelization of CPU-intensive tasks:
        self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
        atexit.register(self.executor.shutdown)
        self.dir_models = dir_models
        self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"
        self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425"
@ -326,6 +322,13 @@ class Eynollah:
            else:
                self.model_table_dir = dir_models + "/eynollah-tables_20210319"
        t_start = time.time()
        # for parallelization of CPU-intensive tasks:
        self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
        atexit.register(self.executor.shutdown)
        # #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
        # #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
        # #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
@ -340,6 +343,8 @@ class Eynollah:
        except:
            self.logger.warning("no GPU device available")
        self.logger.info("Loading models...")
        self.model_page = self.our_load_model(self.model_page_dir)
        self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
        self.model_bin = self.our_load_model(self.model_dir_of_binarization)
@ -393,6 +398,8 @@ class Eynollah:
            if self.tables:
                self.model_table = self.our_load_model(self.model_table_dir)
        self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)")
    def cache_images(self, image_filename=None, image_pil=None, dpi=None):
        ret = {}
        t_c0 = time.time()
@ -4548,19 +4555,48 @@ class Eynollah:
            pcgts = self.run_single()
            self.logger.info("Job done in %.1fs", time.time() - t0)
            #print("Job done in %.1fs" % (time.time() - t0))
            self.writer.write_pagexml(pcgts)
        if dir_in:
            self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
            print("all Job done in %.1fs", time.time() - t0_tot)
    def run_single(self):
        t0 = time.time()
        img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
        self.logger.info("Enhancing took %.1fs ", time.time() - t0)
        self.logger.info(f"Processing file: {self.writer.image_filename}")
        # Log enabled features directly
        enabled_modes = []
        if self.light_version:
            enabled_modes.append("Light version")
        if self.textline_light:
            enabled_modes.append("Light textline detection")
        if self.full_layout:
            enabled_modes.append("Full layout analysis")
        if self.ocr:
            enabled_modes.append("OCR")
        if self.tables:
            enabled_modes.append("Table detection")
        if enabled_modes:
            self.logger.info("Enabled modes: " + ", ".join(enabled_modes))
        self.logger.info("Step 1/5: Image Enhancement")
        img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
        self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, {self.dpi} DPI, {num_col_classifier} columns")
        if is_image_enhanced:
            self.logger.info("Enhancement applied")
        self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)")
        # Image Extraction Mode
        if self.extract_only_images:
            self.logger.info("Step 2/5: Image Extraction Mode")
            text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \
                self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
            pcgts = self.writer.build_pagexml_no_full_layout(
@ -4569,9 +4605,15 @@ class Eynollah:
                cont_page, [], [])
            if self.plotter:
                self.plotter.write_images_into_directory(polygons_of_images, image_page)
            self.logger.info("Image extraction complete")
            return pcgts
        # Basic Processing Mode
        if self.skip_layout_and_reading_order:
            self.logger.info("Step 2/5: Basic Processing Mode")
            self.logger.info("Skipping layout analysis and reading order detection")
            _ ,_, _, textline_mask_tot_ea, img_bin_light, _ = \
                self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier,
                                         skip_layout_and_reading_order=self.skip_layout_and_reading_order)
@ -4623,11 +4665,15 @@ class Eynollah:
                all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
                all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, 
                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order)
            self.logger.info("Basic processing complete")
            return pcgts
        #print("text region early -1 in %.1fs", time.time() - t0)
        t1 = time.time()
        self.logger.info("Step 2/5: Layout Analysis")
        if self.light_version:
            self.logger.info("Using light version processing")
            text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
                self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
            #print("text region early -2 in %.1fs", time.time() - t0)
@ -4658,19 +4704,22 @@ class Eynollah:
            text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \
                self.get_regions_from_xy_2models(img_res, is_image_enhanced,
                                                 num_col_classifier)
-            self.logger.info("Textregion detection took %.1fs ", time.time() - t1)
+            self.logger.info(f"Textregion detection took {time.time() - t1:.1f}s")
            confidence_matrix = np.zeros((text_regions_p_1.shape[:2]))
            t1 = time.time()
            num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
                text_regions_p_1, cont_page, table_prediction = \
                    self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts)
-            self.logger.info("Graphics detection took %.1fs ", time.time() - t1)
+            self.logger.info(f"Graphics detection took {time.time() - t1:.1f}s")
            #self.logger.info('cont_page %s', cont_page)
        #plt.imshow(table_prediction)
        #plt.show()
        self.logger.info(f"Layout analysis complete ({time.time() - t1:.1f}s)")
        if not num_col:
-            self.logger.info("No columns detected, outputting an empty PAGE-XML")
+            self.logger.info("No columns detected - generating empty PAGE-XML")
            pcgts = self.writer.build_pagexml_no_full_layout(
                [], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [], [],
                cont_page, [], [])
@ -4680,10 +4729,12 @@ class Eynollah:
        t1 = time.time()
        if not self.light_version:
            textline_mask_tot_ea = self.run_textline(image_page)
-            self.logger.info("textline detection took %.1fs", time.time() - t1)
+            self.logger.info(f"Textline detection took {time.time() - t1:.1f}s")
            t1 = time.time()
            slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
-            self.logger.info("deskewing took %.1fs", time.time() - t1)
+            if np.abs(slope_deskew) > 0.01:  # Only log if there is significant skew
                self.logger.info(f"Applied deskew correction: {slope_deskew:.2f} degrees")
            self.logger.info(f"Deskewing took {time.time() - t1:.1f}s")
        elif num_col_classifier in (1,2):
            org_h_l_m = textline_mask_tot_ea.shape[0]
            org_w_l_m = textline_mask_tot_ea.shape[1]
@ -4704,6 +4755,13 @@ class Eynollah:
            self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines,
                               num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
        self.logger.info("Step 3/5: Text Line Detection")
        if self.curved_line:
            self.logger.info("Mode: Curved line detection")
        elif self.textline_light:
            self.logger.info("Mode: Light detection")
        if self.light_version and num_col_classifier in (1,2):
            image_page = resize_image(image_page,org_h_l_m, org_w_l_m )
            textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m )
@ -4713,8 +4771,7 @@ class Eynollah:
            table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m )
            image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m )
-        self.logger.info("detection of marginals took %.1fs", time.time() - t1)
+        self.logger.info(f"Detection of marginals took {time.time() - t1:.1f}s")
        #print("text region early 2 marginal in %.1fs", time.time() - t0)
        ## birdan sora chock chakir
        t1 = time.time()
        if not self.full_layout:
@ -4812,7 +4869,7 @@ class Eynollah:
                        cx_bigest_d_big[0] = cx_bigest_d[ind_largest]
                        cy_biggest_d_big[0] = cy_biggest_d[ind_largest]
                    except Exception as why:
-                        self.logger.error(why)
+                        self.logger.error(str(why))
                    (h, w) = text_only.shape[:2]
                    center = (w // 2.0, h // 2.0)
@ -5034,6 +5091,15 @@ class Eynollah:
        t_order = time.time()
        if self.full_layout:
            self.logger.info(ep 4/5: Reading Order Detection")
            if self.reading_order_machine_based:
                self.logger.info("Using machine-based detection")
            if self.right2left:
                self.logger.info("Right-to-left mode enabled")
            if self.headers_off:
                self.logger.info("Headers ignored in reading order")
            if self.reading_order_machine_based:
                tror = time.time()
                order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
@ -5045,9 +5111,16 @@ class Eynollah:
                else:
                    order_text_new, id_of_texts_tot = self.do_order_of_regions(
                        contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d)
-            self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
+            self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
            if self.ocr and not self.tr:
                self.logger.info("Step 4.5/5: OCR Processing")
                if torch.cuda.is_available():
                    self.logger.info("Using GPU acceleration")
                else:
                    self.logger.info("Using CPU processing")
                gc.collect()
                if len(all_found_textline_polygons)>0:
                    ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
@ -5079,15 +5152,54 @@ class Eynollah:
                ocr_all_textlines_marginals_right = None
                ocr_all_textlines_h = None
                ocr_all_textlines_drop = None
            self.logger.info("Step 5/5: Output Generation")
            output_config = []
            if self.enable_plotting:
                output_config.append("Saving debug plots")
            if self.dir_of_cropped_images:
                output_config.append(f"Saving cropped images to: {self.dir_of_cropped_images}")
            if self.dir_of_layout:
                output_config.append(f"Saving layout plots to: {self.dir_of_layout}")
            if self.dir_of_deskewed:
                output_config.append(f"Saving deskewed images to: {self.dir_of_deskewed}")
            if output_config:
                self.logger.info("Output configuration:\n  * %s", "\n  * ".join(output_config))
            pcgts = self.writer.build_pagexml_full_layout(
                contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot,
                all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
                polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right,
                all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
                cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop,  conf_contours_textregions, conf_contours_textregions_h)
            summary = [
                f"Total processing time: {time.time() - t0:.1f}s",
                f"Output file: {self.writer.output_filename}"
            ]
            if self.ocr:
                summary.append("OCR processing completed")
            if self.full_layout:
                summary.append("Full layout analysis completed")
            if self.tables:
                summary.append("Table detection completed")
            self.logger.info(f"Summary: {summary}")
            return pcgts
        contours_only_text_parent_h = None
        self.logger.info("Step 4/5: Reading Order Detection")
        if self.reading_order_machine_based:
            self.logger.info("Using machine-based detection")
        if self.right2left:
            self.logger.info("Right-to-left mode enabled")
        if self.headers_off:
            self.logger.info("Headers ignored in reading order")
        if self.reading_order_machine_based:
            order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
                contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
@ -5108,6 +5220,21 @@ class Eynollah:
                    contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)
        if self.ocr and self.tr:
            self.logger.info("Step 4.5/5: OCR Processing")
            if torch.cuda.is_available():
                self.logger.info("Using GPU acceleration")
            else:
                self.logger.info("Using CPU processing")
            if self.light_version:
                self.logger.info("Using light version OCR")
            if self.textline_light:
                self.logger.info("Using light text line detection for OCR")
            self.logger.info("Processing text lines...")
            device = cuda.get_current_device()
            device.reset()
            gc.collect()
@ -5170,13 +5297,32 @@ class Eynollah:
            ocr_all_textlines = None
            ocr_all_textlines_marginals_left = None
            ocr_all_textlines_marginals_right = None
-        self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
+        self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
        self.logger.info("Step 5/5: Output Generation")
        self.logger.info("Generating PAGE-XML output")
        if self.enable_plotting:
            self.logger.info("Saving debug plots")
        if self.dir_of_cropped_images:
            self.logger.info(f"Saving cropped images to: {self.dir_of_cropped_images}")
        if self.dir_of_layout:
            self.logger.info(f"Saving layout plots to: {self.dir_of_layout}")
        if self.dir_of_deskewed:
            self.logger.info(f"Saving deskewed images to: {self.dir_of_deskewed}")
        pcgts = self.writer.build_pagexml_no_full_layout(
            txt_con_org, page_coord, order_text_new, id_of_texts_tot,
            all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
            all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, 
            cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions)
        self.logger.info(f"Output file: {self.writer.output_filename}")
        return pcgts