From 1f6db34adfd61f30961cfef7f8ee3d319e14c6ff Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sun, 26 Apr 2026 23:51:05 +0200 Subject: [PATCH] =?UTF-8?q?run/get=5Fmarginals():=20simplify=20and=20speed?= =?UTF-8?q?=20up=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `get_marginals` modifies region labels in-place anyways, so no need for retval - de/rotate only inside `get_marginals` (for consistency) - return early if no marginals detected - `run_marginals`: only useful in 1 or 2 columns, so keep to that conditional branch; allows avoiding unnecessary resizing of images to and fro - rename `text_regions_p_1` → `text_regions_p` --- src/eynollah/eynollah.py | 53 +++++++++++---------------------- src/eynollah/utils/marginals.py | 20 +++++++------ 2 files changed, 28 insertions(+), 45 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index a659922..ffc5eb7 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1628,22 +1628,14 @@ class Eynollah: return slope_deskew def run_marginals( - self, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): + self, num_col_classifier, slope_deskew, text_regions_p, table_prediction): - text_regions_p = np.array(text_regions_p_1) - if num_col_classifier in (1, 2): - try: - regions_without_separators = (text_regions_p == 1) * 1 - if self.tables: - regions_without_separators[table_prediction == 1] = 1 - regions_without_separators = regions_without_separators.astype(np.uint8) - text_regions_p = get_marginals( - rotate_image(regions_without_separators, slope_deskew), text_regions_p, - num_col_classifier, slope_deskew, kernel=KERNEL) - except Exception as e: - self.logger.error("exception %s", e) + regions_without_separators = (text_regions_p == 1).astype(np.uint8) + if self.tables: + regions_without_separators[table_prediction == 1] = 1 - return text_regions_p + get_marginals(regions_without_separators, text_regions_p, + num_col_classifier, slope_deskew, kernel=KERNEL) def get_full_layout( self, image_page, @@ -2267,7 +2259,7 @@ class Eynollah: t1 = time.time() self.logger.info("Step 2/5: Layout Analysis") - (text_regions_p_1, + (text_regions_p, erosion_hurts, polygons_seplines, polygons_text_early, @@ -2293,9 +2285,9 @@ class Eynollah: #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) (num_col, num_col_classifier, page_coord, image_page, cont_page, - text_regions_p_1, table_prediction, textline_mask_tot_ea, + text_regions_p, table_prediction, textline_mask_tot_ea, regions_confidence, table_confidence, textline_confidence) = \ - self.run_graphics_and_columns(text_regions_p_1, textline_mask_tot_ea, + self.run_graphics_and_columns(text_regions_p, textline_mask_tot_ea, regions_confidence, textline_confidence, num_col_classifier, num_column_is_classified, erosion_hurts, image) @@ -2344,13 +2336,10 @@ class Eynollah: img_w_new = 2400 img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1] - image_page = resize_image(image_page, img_h_new, img_w_new) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea, img_h_new, img_w_new) - text_regions_p_1 = resize_image(text_regions_p_1, img_h_new, img_w_new) - table_prediction = resize_image(table_prediction, img_h_new, img_w_new) - - text_regions_p = \ - self.run_marginals(num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + text_regions_p_new = resize_image(text_regions_p, img_h_new, img_w_new) + table_prediction_new = resize_image(table_prediction, img_h_new, img_w_new) + self.run_marginals(num_col_classifier, slope_deskew, text_regions_p_new, table_prediction_new) + text_regions_p = resize_image(text_regions_p_new, org_h_l_m, org_w_l_m) if self.plotter: self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page, image['name']) @@ -2378,20 +2367,10 @@ class Eynollah: if not np.any(text_regions_p == label_text): text_regions_p[text_regions_p == label_marg] = label_text + t5 = time.time() + self.logger.info("Marginalia extraction took %.1fs", t5 - t4) self.logger.info("Step 3/5: Text Line Detection") - if self.curved_line: - self.logger.info("Mode: Curved line detection") - - if num_col_classifier in (1,2): - image_page = resize_image(image_page, org_h_l_m, org_w_l_m) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea, org_h_l_m, org_w_l_m) - text_regions_p = resize_image(text_regions_p, org_h_l_m, org_w_l_m) - text_regions_p_1 = resize_image(text_regions_p_1, org_h_l_m, org_w_l_m) - table_prediction = resize_image(table_prediction, org_h_l_m, org_w_l_m) - - self.logger.info(f"Detection of marginals took {time.time() - t1:.1f}s") - t1 = time.time() regions_fully, regionsfl_confidence, regions_without_separators = \ self.get_full_layout(image_page, textline_mask_tot_ea, @@ -2479,6 +2458,8 @@ class Eynollah: all_found_textline_polygons_marginals = dilate_textline_contours( all_found_textline_polygons_marginals) else: + self.logger.info("Mode: Curved line detection") + textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) all_found_textline_polygons, slopes = \ self.get_slopes_and_deskew_new_curved( diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index 7ef46aa..5a82377 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -6,16 +6,20 @@ from .contour import find_center_of_contours, return_contours_of_interested_regi from .resize import resize_image from .rotate import rotate_image -def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None, +def get_marginals(text_mask, early_layout, num_col, slope_deskew, + kernel=None, label_text=1, label_marg=4, ): - # rs: text_mask_d is already deskewed, while early_layout is not... + if kernel is None: + kernel = np.ones((5, 5), dtype=np.uint8) + kernel_hor = np.ones((1, 5), dtype=np.uint8) + + text_mask_d = rotate_image(text_mask, slope_deskew) main_mask_d = np.zeros_like(text_mask_d) height, width = main_mask_d.shape - ##text_mask_d=cv2.erode(text_mask_d,self.kernel,iterations=3) - text_mask_d_eroded = cv2.erode(text_mask_d,kernel,iterations=5) + text_mask_d_eroded = cv2.erode(text_mask_d, kernel, iterations=5) if height <= 1500: pass @@ -32,7 +36,6 @@ def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None, # rs: and back to original size text_mask_d = resize_image(text_mask_d, height, width) - kernel_hor = np.ones((1, 5), dtype=np.uint8) text_mask_d = cv2.erode(text_mask_d, kernel_hor, iterations=6) text_mask_d_y = text_mask_d.sum(axis=0) text_mask_d_y_eroded = text_mask_d_eroded.sum(axis=0) @@ -98,7 +101,7 @@ def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None, # ax2.scatter(peaks_orig, region_sum_0[peaks_orig], label='peaks') # plt.legend() # plt.show() - return early_layout + return point_right = peaks_right[np.argmax(scores[peaks_right])] #point_left = first_nonzero point_left = 0 @@ -116,6 +119,8 @@ def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None, point_right = 0 main_mask_d[:, point_left: point_right] = 1 + if not np.any(main_mask_d): + return # plt.figure() # ax1 = plt.subplot(2, 2, 1) @@ -167,6 +172,3 @@ def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None, # plt.imshow(early_layout) # plt.show() - else: - pass - return early_layout