From 8ebba5ac046faff317e13455b94f79c6c510d782 Mon Sep 17 00:00:00 2001 From: michalbubula Date: Tue, 12 Aug 2025 16:21:15 +0200 Subject: [PATCH] add feedback to command line interface --- src/eynollah/eynollah.py | 305 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 290 insertions(+), 15 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d47016b..d9939ca 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -203,6 +203,17 @@ class Eynollah: skip_layout_and_reading_order : bool = False, logger : Optional[Logger] = None, ): + if logger: + self.logger = logger + else: + self.logger = getLogger('eynollah') + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.INFO) + formatter = logging.Formatter('%(message)s') + console_handler.setFormatter(formatter) + self.logger.addHandler(console_handler) + self.logger.setLevel(logging.INFO) + if skip_layout_and_reading_order: textline_light = True self.light_version = light_version @@ -237,10 +248,7 @@ class Eynollah: self.num_col_lower = int(num_col_lower) else: self.num_col_lower = num_col_lower - self.logger = logger if logger else getLogger('eynollah') - # for parallelization of CPU-intensive tasks: - self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200) - atexit.register(self.executor.shutdown) + self.dir_models = dir_models self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" @@ -293,7 +301,14 @@ class Eynollah: self.model_table_dir = dir_models + "/modelens_table_0t4_201124" else: self.model_table_dir = dir_models + "/eynollah-tables_20210319" + + + t_start = time.time() + # for parallelization of CPU-intensive tasks: + self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200) + atexit.register(self.executor.shutdown) + # #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) # #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) # #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) @@ -307,7 +322,11 @@ class Eynollah: tf.config.experimental.set_memory_growth(device, True) except: self.logger.warning("no GPU device available") - + + msg = "Loading models..." + print(msg) + self.logger.info(msg) + self.model_page = self.our_load_model(self.model_page_dir) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_bin = self.our_load_model(self.model_dir_of_binarization) @@ -334,6 +353,10 @@ class Eynollah: self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") if self.tables: self.model_table = self.our_load_model(self.model_table_dir) + + msg = f"Model initialization complete ({time.time() - t_start:.1f}s)" + print(msg) + self.logger.info(msg) def cache_images(self, image_filename=None, image_pil=None, dpi=None): ret = {} @@ -4294,21 +4317,81 @@ class Eynollah: def run_single(self): t0 = time.time() + + msg = f"Processing file: {self.writer.image_filename}" + print(msg) + self.logger.info(msg) + + # Log enabled features directly + enabled_modes = [] + if self.light_version: + enabled_modes.append("Light version") + if self.textline_light: + enabled_modes.append("Light textline detection") + if self.full_layout: + enabled_modes.append("Full layout analysis") + if self.ocr: + enabled_modes.append("OCR") + if self.tables: + enabled_modes.append("Table detection") + + if enabled_modes: + msg = "Enabled modes: " + ", ".join(enabled_modes) + print(msg) + self.logger.info(msg) + + + msg = "Step 1/5: Image Enhancement" + print(msg) + self.logger.info(msg) + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - self.logger.info("Enhancing took %.1fs ", time.time() - t0) + + msg = f"Image: {self.image.shape[1]}x{self.image.shape[0]}, {self.dpi} DPI, {num_col_classifier} columns" + print(msg) + self.logger.info(msg) + if is_image_enhanced: + msg = "Enhancement applied" + print(msg) + self.logger.info(msg) + + msg = f"Enhancement complete ({time.time() - t0:.1f}s)" + print(msg) + self.logger.info(msg) + + + # Image Extraction Mode if self.extract_only_images: + msg = "Step 2/5: Image Extraction Mode" + print(msg) + self.logger.info(msg) + text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) + ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], cont_page, [], [], ocr_all_textlines, []) + if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) + + msg = "Image extraction complete" + print(msg) + self.logger.info(msg) return pcgts + # Basic Processing Mode if self.skip_layout_and_reading_order: + msg = "Step 2/5: Basic Processing Mode" + print(msg) + self.logger.info(msg) + msg = "Skipping layout analysis and reading order detection" + print(msg) + self.logger.info(msg) + _ ,_, _, textline_mask_tot_ea, img_bin_light, _ = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) @@ -4349,11 +4432,21 @@ class Eynollah: all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions) + msg = "Basic processing complete" + print(msg) + self.logger.info(msg) return pcgts #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() + msg = "Step 2/5: Layout Analysis" + print(msg) + self.logger.info(msg) + if self.light_version: + msg = "Using light version processing" + print(msg) + self.logger.info(msg) text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \ self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) @@ -4384,20 +4477,30 @@ class Eynollah: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \ self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + msg = f"Textregion detection took {time.time() - t1:.1f}s" + print(msg) + self.logger.info(msg) confidence_matrix = np.zeros((text_regions_p_1.shape[:2])) t1 = time.time() num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ text_regions_p_1, cont_page, table_prediction = \ self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + msg = f"Graphics detection took {time.time() - t1:.1f}s" + print(msg) + self.logger.info(msg) #self.logger.info('cont_page %s', cont_page) #plt.imshow(table_prediction) #plt.show() + msg = f"Layout analysis complete ({time.time() - t1:.1f}s)" + print(msg) + self.logger.info(msg) if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") + msg = "No columns detected - generating empty PAGE-XML" + print(msg) + self.logger.info(msg) + ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], [], [], [], [], [], [], @@ -4408,10 +4511,18 @@ class Eynollah: t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) + msg = f"Textline detection took {time.time() - t1:.1f}s" + print(msg) + self.logger.info(msg) t1 = time.time() slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) + if np.abs(slope_deskew) > 0.01: # Only log if there is significant skew + msg = f"Applied deskew correction: {slope_deskew:.2f} degrees" + print(msg) + self.logger.info(msg) + msg = f"Deskewing took {time.time() - t1:.1f}s" + print(msg) + self.logger.info(msg) elif num_col_classifier in (1,2): org_h_l_m = textline_mask_tot_ea.shape[0] org_w_l_m = textline_mask_tot_ea.shape[1] @@ -4431,6 +4542,19 @@ class Eynollah: textline_mask_tot, text_regions_p, image_page_rotated = \ self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + msg = "Step 3/5: Text Line Detection" + print(msg) + self.logger.info(msg) + + if self.curved_line: + msg = "Mode: Curved line detection" + print(msg) + self.logger.info(msg) + elif self.textline_light: + msg = "Mode: Light detection" + print(msg) + self.logger.info(msg) if self.light_version and num_col_classifier in (1,2): image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) @@ -4441,7 +4565,9 @@ class Eynollah: table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) - self.logger.info("detection of marginals took %.1fs", time.time() - t1) + msg = f"Detection of marginals took {time.time() - t1:.1f}s" + print(msg) + self.logger.info(msg) #print("text region early 2 marginal in %.1fs", time.time() - t0) ## birdan sora chock chakir t1 = time.time() @@ -4540,7 +4666,9 @@ class Eynollah: cx_bigest_d_big[0] = cx_bigest_d[ind_largest] cy_biggest_d_big[0] = cy_biggest_d[ind_largest] except Exception as why: - self.logger.error(why) + msg = str(why) + print(f"Error: {msg}") + self.logger.error(msg) (h, w) = text_only.shape[:2] center = (w // 2.0, h // 2.0) @@ -4758,6 +4886,23 @@ class Eynollah: t_order = time.time() if self.full_layout: + msg = "Step 4/5: Reading Order Detection" + print(msg) + self.logger.info(msg) + + if self.reading_order_machine_based: + msg = "Using machine-based detection" + print(msg) + self.logger.info(msg) + if self.right2left: + msg = "Right-to-left mode enabled" + print(msg) + self.logger.info(msg) + if self.headers_off: + msg = "Headers ignored in reading order" + print(msg) + self.logger.info(msg) + if self.reading_order_machine_based: order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( contours_only_text_parent, contours_only_text_parent_h, text_regions_p) @@ -4768,21 +4913,84 @@ class Eynollah: else: order_text_new, id_of_texts_tot = self.do_order_of_regions( contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + msg = f"Detection of reading order took {time.time() - t_order:.1f}s" + print(msg) + self.logger.info(msg) if self.ocr: + msg = "Step 4.5/5: OCR Processing" + print(msg) + self.logger.info(msg) + + if torch.cuda.is_available(): + msg = "Using GPU acceleration" + print(msg) + self.logger.info(msg) + else: + msg = "Using CPU processing" + print(msg) + self.logger.info(msg) + ocr_all_textlines = [] else: ocr_all_textlines = None + + msg = "Step 5/5: Output Generation" + print(msg) + self.logger.info(msg) + + output_config = [] + if self.enable_plotting: + output_config.append("Saving debug plots") + if self.dir_of_cropped_images: + output_config.append(f"Saving cropped images to: {self.dir_of_cropped_images}") + if self.dir_of_layout: + output_config.append(f"Saving layout plots to: {self.dir_of_layout}") + if self.dir_of_deskewed: + output_config.append(f"Saving deskewed images to: {self.dir_of_deskewed}") + + if output_config: + self.logger.info("Output configuration:\n * %s", "\n * ".join(output_config)) + pcgts = self.writer.build_pagexml_full_layout( contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines, conf_contours_textregions, conf_contours_textregions_h) + + summary = [ + f"Total processing time: {time.time() - t0:.1f}s", + f"Output file: {self.writer.output_filename}" + ] + + if self.ocr: + summary.append("OCR processing completed") + if self.full_layout: + summary.append("Full layout analysis completed") + if self.tables: + summary.append("Table detection completed") + return pcgts contours_only_text_parent_h = None + msg = "Step 4/5: Reading Order Detection" + print(msg) + self.logger.info(msg) + + if self.reading_order_machine_based: + msg = "Using machine-based detection" + print(msg) + self.logger.info(msg) + if self.right2left: + msg = "Right-to-left mode enabled" + print(msg) + self.logger.info(msg) + if self.headers_off: + msg = "Headers ignored in reading order" + print(msg) + self.logger.info(msg) + if self.reading_order_machine_based: order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( contours_only_text_parent, contours_only_text_parent_h, text_regions_p) @@ -4803,6 +5011,33 @@ class Eynollah: contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) if self.ocr: + msg = "Step 4.5/5: OCR Processing" + print(msg) + self.logger.info(msg) + + if torch.cuda.is_available(): + msg = "Using GPU acceleration" + print(msg) + self.logger.info(msg) + else: + msg = "Using CPU processing" + print(msg) + self.logger.info(msg) + + if self.light_version: + msg = "Using light version OCR" + print(msg) + self.logger.info(msg) + + if self.textline_light: + msg = "Using light text line detection for OCR" + print(msg) + self.logger.info(msg) + + msg = "Processing text lines..." + print(msg) + self.logger.info(msg) + device = cuda.get_current_device() device.reset() gc.collect() @@ -4853,12 +5088,52 @@ class Eynollah: else: ocr_all_textlines = None #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + msg = f"Detection of reading order took {time.time() - t_order:.1f}s" + print(msg) + self.logger.info(msg) + + msg = "Step 5/5: Output Generation" + print(msg) + self.logger.info(msg) + + msg = "Generating PAGE-XML output" + print(msg) + self.logger.info(msg) + + if self.enable_plotting: + msg = "Saving debug plots" + print(msg) + self.logger.info(msg) + + if self.dir_of_cropped_images: + msg = f"Saving cropped images to: {self.dir_of_cropped_images}" + print(msg) + self.logger.info(msg) + + if self.dir_of_layout: + msg = f"Saving layout plots to: {self.dir_of_layout}" + print(msg) + self.logger.info(msg) + + if self.dir_of_deskewed: + msg = f"Saving deskewed images to: {self.dir_of_deskewed}" + print(msg) + self.logger.info(msg) + pcgts = self.writer.build_pagexml_no_full_layout( txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions) + + msg = f"\nProcessing completed in {time.time() - t0:.1f}s" + print(msg) + self.logger.info(msg) + + msg = f"Output file: {self.writer.output_filename}" + print(msg) + self.logger.info(msg) + return pcgts