From 8ebba5ac046faff317e13455b94f79c6c510d782 Mon Sep 17 00:00:00 2001
From: michalbubula <michal.bubula@outlook.com>
Date: Tue, 12 Aug 2025 16:21:15 +0200
Subject: [PATCH] add feedback to command line interface

---
 src/eynollah/eynollah.py | 305 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 290 insertions(+), 15 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index d47016b..d9939ca 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -203,6 +203,17 @@ class Eynollah:
         skip_layout_and_reading_order : bool = False,
         logger : Optional[Logger] = None,
     ):
+        if logger:
+            self.logger = logger
+        else:
+            self.logger = getLogger('eynollah')
+            console_handler = logging.StreamHandler(sys.stdout)
+            console_handler.setLevel(logging.INFO)
+            formatter = logging.Formatter('%(message)s')
+            console_handler.setFormatter(formatter)
+            self.logger.addHandler(console_handler)
+            self.logger.setLevel(logging.INFO)
+            
         if skip_layout_and_reading_order:
             textline_light = True
         self.light_version = light_version
@@ -237,10 +248,7 @@ class Eynollah:
             self.num_col_lower = int(num_col_lower)
         else:
             self.num_col_lower = num_col_lower
-        self.logger = logger if logger else getLogger('eynollah')
-        # for parallelization of CPU-intensive tasks:
-        self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
-        atexit.register(self.executor.shutdown)
+            
         self.dir_models = dir_models
         self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"
         self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425"
@@ -293,7 +301,14 @@ class Eynollah:
                 self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
             else:
                 self.model_table_dir = dir_models + "/eynollah-tables_20210319"
+                
+                
+        t_start = time.time()
 
+        # for parallelization of CPU-intensive tasks:
+        self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
+        atexit.register(self.executor.shutdown)
+        
         # #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
         # #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
         # #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
@@ -307,7 +322,11 @@ class Eynollah:
                 tf.config.experimental.set_memory_growth(device, True)
         except:
             self.logger.warning("no GPU device available")
-
+            
+        msg = "Loading models..."
+        print(msg)
+        self.logger.info(msg)
+        
         self.model_page = self.our_load_model(self.model_page_dir)
         self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
         self.model_bin = self.our_load_model(self.model_dir_of_binarization)
@@ -334,6 +353,10 @@ class Eynollah:
                 self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
             if self.tables:
                 self.model_table = self.our_load_model(self.model_table_dir)
+                
+        msg = f"Model initialization complete ({time.time() - t_start:.1f}s)"
+        print(msg)
+        self.logger.info(msg)
 
     def cache_images(self, image_filename=None, image_pil=None, dpi=None):
         ret = {}
@@ -4294,21 +4317,81 @@ class Eynollah:
 
     def run_single(self):
         t0 = time.time()
+    
+        msg = f"Processing file: {self.writer.image_filename}"
+        print(msg)
+        self.logger.info(msg)
+        
+        # Log enabled features directly
+        enabled_modes = []
+        if self.light_version:
+            enabled_modes.append("Light version")
+        if self.textline_light:
+            enabled_modes.append("Light textline detection")
+        if self.full_layout:
+            enabled_modes.append("Full layout analysis")
+        if self.ocr:
+            enabled_modes.append("OCR")
+        if self.tables:
+            enabled_modes.append("Table detection")
+        
+        if enabled_modes:
+            msg = "Enabled modes: " + ", ".join(enabled_modes)
+            print(msg)
+            self.logger.info(msg)
+                        
+                        
+        msg = "Step 1/5: Image Enhancement"
+        print(msg)
+        self.logger.info(msg)
+        
         img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
-        self.logger.info("Enhancing took %.1fs ", time.time() - t0)
+        
+        msg = f"Image: {self.image.shape[1]}x{self.image.shape[0]}, {self.dpi} DPI, {num_col_classifier} columns"
+        print(msg)
+        self.logger.info(msg)
+        if is_image_enhanced:
+            msg = "Enhancement applied"
+            print(msg)
+            self.logger.info(msg)
+        
+        msg = f"Enhancement complete ({time.time() - t0:.1f}s)"
+        print(msg)
+        self.logger.info(msg)
+        
+
+        # Image Extraction Mode
         if self.extract_only_images:
+            msg = "Step 2/5: Image Extraction Mode"
+            print(msg)
+            self.logger.info(msg)
+            
             text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \
                 self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
+            
             ocr_all_textlines = None
             pcgts = self.writer.build_pagexml_no_full_layout(
                 [], page_coord, [], [], [], [],
                 polygons_of_images, [], [], [], [], [],
                 cont_page, [], [], ocr_all_textlines, [])
+                
             if self.plotter:
                 self.plotter.write_images_into_directory(polygons_of_images, image_page)
+                
+            msg = "Image extraction complete"
+            print(msg)
+            self.logger.info(msg)
             return pcgts
 
+        # Basic Processing Mode
         if self.skip_layout_and_reading_order:
+            msg = "Step 2/5: Basic Processing Mode"
+            print(msg)
+            self.logger.info(msg)
+            msg = "Skipping layout analysis and reading order detection"
+            print(msg)
+            self.logger.info(msg)
+    
             _ ,_, _, textline_mask_tot_ea, img_bin_light, _ = \
                 self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier,
                                          skip_layout_and_reading_order=self.skip_layout_and_reading_order)
@@ -4349,11 +4432,21 @@ class Eynollah:
                 all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
                 all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
                 cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
+            msg = "Basic processing complete"
+            print(msg)
+            self.logger.info(msg)
             return pcgts
 
         #print("text region early -1 in %.1fs", time.time() - t0)
         t1 = time.time()
+        msg = "Step 2/5: Layout Analysis"
+        print(msg)
+        self.logger.info(msg)
+        
         if self.light_version:
+            msg = "Using light version processing"
+            print(msg)
+            self.logger.info(msg)
             text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
                 self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
             #print("text region early -2 in %.1fs", time.time() - t0)
@@ -4384,20 +4477,30 @@ class Eynollah:
             text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \
                 self.get_regions_from_xy_2models(img_res, is_image_enhanced,
                                                  num_col_classifier)
-            self.logger.info("Textregion detection took %.1fs ", time.time() - t1)
+            msg = f"Textregion detection took {time.time() - t1:.1f}s"
+            print(msg)
+            self.logger.info(msg)
             confidence_matrix = np.zeros((text_regions_p_1.shape[:2]))
 
             t1 = time.time()
             num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
                 text_regions_p_1, cont_page, table_prediction = \
                     self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts)
-            self.logger.info("Graphics detection took %.1fs ", time.time() - t1)
+            msg = f"Graphics detection took {time.time() - t1:.1f}s"
+            print(msg)
+            self.logger.info(msg)
             #self.logger.info('cont_page %s', cont_page)
         #plt.imshow(table_prediction)
         #plt.show()
+        msg = f"Layout analysis complete ({time.time() - t1:.1f}s)"
+        print(msg)
+        self.logger.info(msg)
 
         if not num_col:
-            self.logger.info("No columns detected, outputting an empty PAGE-XML")
+            msg = "No columns detected - generating empty PAGE-XML"
+            print(msg)
+            self.logger.info(msg)
+    
             ocr_all_textlines = None
             pcgts = self.writer.build_pagexml_no_full_layout(
                 [], page_coord, [], [], [], [], [], [], [], [], [], [],
@@ -4408,10 +4511,18 @@ class Eynollah:
         t1 = time.time()
         if not self.light_version:
             textline_mask_tot_ea = self.run_textline(image_page)
-            self.logger.info("textline detection took %.1fs", time.time() - t1)
+            msg = f"Textline detection took {time.time() - t1:.1f}s"
+            print(msg)
+            self.logger.info(msg)
             t1 = time.time()
             slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
-            self.logger.info("deskewing took %.1fs", time.time() - t1)
+            if np.abs(slope_deskew) > 0.01:  # Only log if there is significant skew
+                msg = f"Applied deskew correction: {slope_deskew:.2f} degrees"
+                print(msg)
+                self.logger.info(msg)
+            msg = f"Deskewing took {time.time() - t1:.1f}s"
+            print(msg)
+            self.logger.info(msg)
         elif num_col_classifier in (1,2):
             org_h_l_m = textline_mask_tot_ea.shape[0]
             org_w_l_m = textline_mask_tot_ea.shape[1]
@@ -4431,6 +4542,19 @@ class Eynollah:
         textline_mask_tot, text_regions_p, image_page_rotated = \
             self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines,
                                num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
+                               
+        msg = "Step 3/5: Text Line Detection"
+        print(msg)
+        self.logger.info(msg)
+        
+        if self.curved_line:
+            msg = "Mode: Curved line detection"
+            print(msg)
+            self.logger.info(msg)
+        elif self.textline_light:
+            msg = "Mode: Light detection"
+            print(msg)
+            self.logger.info(msg)
 
         if self.light_version and num_col_classifier in (1,2):
             image_page = resize_image(image_page,org_h_l_m, org_w_l_m )
@@ -4441,7 +4565,9 @@ class Eynollah:
             table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m )
             image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m )
 
-        self.logger.info("detection of marginals took %.1fs", time.time() - t1)
+        msg = f"Detection of marginals took {time.time() - t1:.1f}s"
+        print(msg)
+        self.logger.info(msg)
         #print("text region early 2 marginal in %.1fs", time.time() - t0)
         ## birdan sora chock chakir
         t1 = time.time()
@@ -4540,7 +4666,9 @@ class Eynollah:
                         cx_bigest_d_big[0] = cx_bigest_d[ind_largest]
                         cy_biggest_d_big[0] = cy_biggest_d[ind_largest]
                     except Exception as why:
-                        self.logger.error(why)
+                        msg = str(why)
+                        print(f"Error: {msg}")
+                        self.logger.error(msg)
 
                     (h, w) = text_only.shape[:2]
                     center = (w // 2.0, h // 2.0)
@@ -4758,6 +4886,23 @@ class Eynollah:
         t_order = time.time()
 
         if self.full_layout:
+            msg = "Step 4/5: Reading Order Detection"
+            print(msg)
+            self.logger.info(msg)
+            
+            if self.reading_order_machine_based:
+                msg = "Using machine-based detection"
+                print(msg)
+                self.logger.info(msg)
+            if self.right2left:
+                msg = "Right-to-left mode enabled"
+                print(msg)
+                self.logger.info(msg)
+            if self.headers_off:
+                msg = "Headers ignored in reading order"
+                print(msg)
+                self.logger.info(msg)
+                        
             if self.reading_order_machine_based:
                 order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
                     contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
@@ -4768,21 +4913,84 @@ class Eynollah:
                 else:
                     order_text_new, id_of_texts_tot = self.do_order_of_regions(
                         contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d)
-            self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
+            msg = f"Detection of reading order took {time.time() - t_order:.1f}s"
+            print(msg)
+            self.logger.info(msg)
 
             if self.ocr:
+                msg = "Step 4.5/5: OCR Processing"
+                print(msg)
+                self.logger.info(msg)
+                
+                if torch.cuda.is_available():
+                    msg = "Using GPU acceleration"
+                    print(msg)
+                    self.logger.info(msg)
+                else:
+                    msg = "Using CPU processing"
+                    print(msg)
+                    self.logger.info(msg)
+                
                 ocr_all_textlines = []
             else:
                 ocr_all_textlines = None
+                
+            msg = "Step 5/5: Output Generation"
+            print(msg)
+            self.logger.info(msg)
+    
+            output_config = []
+            if self.enable_plotting:
+                output_config.append("Saving debug plots")
+            if self.dir_of_cropped_images:
+                output_config.append(f"Saving cropped images to: {self.dir_of_cropped_images}")
+            if self.dir_of_layout:
+                output_config.append(f"Saving layout plots to: {self.dir_of_layout}")
+            if self.dir_of_deskewed:
+                output_config.append(f"Saving deskewed images to: {self.dir_of_deskewed}")
+            
+            if output_config:
+                self.logger.info("Output configuration:\n  * %s", "\n  * ".join(output_config))
+            
             pcgts = self.writer.build_pagexml_full_layout(
                 contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot,
                 all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
                 polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals,
                 all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals,
                 cont_page, polygons_lines_xml, ocr_all_textlines, conf_contours_textregions, conf_contours_textregions_h)
+            
+            summary = [
+                f"Total processing time: {time.time() - t0:.1f}s",
+                f"Output file: {self.writer.output_filename}"
+            ]
+            
+            if self.ocr:
+                summary.append("OCR processing completed")
+            if self.full_layout:
+                summary.append("Full layout analysis completed")
+            if self.tables:
+                summary.append("Table detection completed")
+                    
             return pcgts
 
         contours_only_text_parent_h = None
+        msg = "Step 4/5: Reading Order Detection"
+        print(msg)
+        self.logger.info(msg)
+
+        if self.reading_order_machine_based:
+            msg = "Using machine-based detection"
+            print(msg)
+            self.logger.info(msg)
+        if self.right2left:
+            msg = "Right-to-left mode enabled"
+            print(msg)
+            self.logger.info(msg)
+        if self.headers_off:
+            msg = "Headers ignored in reading order"
+            print(msg)
+            self.logger.info(msg)
+    
         if self.reading_order_machine_based:
             order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
                 contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
@@ -4803,6 +5011,33 @@ class Eynollah:
                     contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)
 
         if self.ocr:
+            msg = "Step 4.5/5: OCR Processing"
+            print(msg)
+            self.logger.info(msg)
+            
+            if torch.cuda.is_available():
+                msg = "Using GPU acceleration"
+                print(msg)
+                self.logger.info(msg)
+            else:
+                msg = "Using CPU processing"
+                print(msg)
+                self.logger.info(msg)
+            
+            if self.light_version:
+                msg = "Using light version OCR"
+                print(msg)
+                self.logger.info(msg)
+            
+            if self.textline_light:
+                msg = "Using light text line detection for OCR"
+                print(msg)
+                self.logger.info(msg)
+            
+            msg = "Processing text lines..."
+            print(msg)
+            self.logger.info(msg)
+            
             device = cuda.get_current_device()
             device.reset()
             gc.collect()
@@ -4853,12 +5088,52 @@ class Eynollah:
         else:
             ocr_all_textlines = None
             #print(ocr_all_textlines)
-        self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
+        msg = f"Detection of reading order took {time.time() - t_order:.1f}s"
+        print(msg)
+        self.logger.info(msg)
+
+        msg = "Step 5/5: Output Generation"
+        print(msg)
+        self.logger.info(msg)
+
+        msg = "Generating PAGE-XML output"
+        print(msg)
+        self.logger.info(msg)
+
+        if self.enable_plotting:
+            msg = "Saving debug plots"
+            print(msg)
+            self.logger.info(msg)
+
+        if self.dir_of_cropped_images:
+            msg = f"Saving cropped images to: {self.dir_of_cropped_images}"
+            print(msg)
+            self.logger.info(msg)
+
+        if self.dir_of_layout:
+            msg = f"Saving layout plots to: {self.dir_of_layout}"
+            print(msg)
+            self.logger.info(msg)
+
+        if self.dir_of_deskewed:
+            msg = f"Saving deskewed images to: {self.dir_of_deskewed}"
+            print(msg)
+            self.logger.info(msg)
+    
         pcgts = self.writer.build_pagexml_no_full_layout(
             txt_con_org, page_coord, order_text_new, id_of_texts_tot,
             all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals,
             all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
             cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
+        
+        msg = f"\nProcessing completed in {time.time() - t0:.1f}s"
+        print(msg)
+        self.logger.info(msg)
+        
+        msg = f"Output file: {self.writer.output_filename}"
+        print(msg)
+        self.logger.info(msg)
+        
         return pcgts