From 9309586712c1756e39bf6748fc8a00de2876564e Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 16 Apr 2026 05:07:22 +0200 Subject: [PATCH] =?UTF-8?q?split=5Ftextregion=5Fmain=5Fvs=5Fheader=20?= =?UTF-8?q?=E2=86=92=20split=5Ftextregion=5Fmain=5Fvs=5Fhead=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (and simplify) --- src/eynollah/eynollah.py | 4 ++-- src/eynollah/utils/__init__.py | 32 +++++++++++++++++--------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index ff9e8e6..10f0ba7 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -80,7 +80,7 @@ from .utils import ( otsu_copy_binary, seg_mask_label, putt_bb_of_drop_capitals_of_model_in_patches_in_layout, - split_textregion_main_vs_header, + split_textregion_main_vs_head, small_textlines_to_parent_adherence2, order_of_regions, find_number_of_columns_in_document, @@ -2860,7 +2860,7 @@ class Eynollah: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, \ - conf_contours_textregions, conf_contours_textregions_h = split_textregion_main_vs_header( + conf_contours_textregions, conf_contours_textregions_h = split_textregion_main_vs_head( text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered, conf_contours_textregions) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index ef27f24..15ce7ab 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -866,13 +866,18 @@ def check_any_text_region_in_model_one_is_main_or_header( conf_contours_main, conf_contours_head) -def split_textregion_main_vs_header( +def split_textregion_main_vs_head( regions_model_1, regions_model_full, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered, - conf_contours): + conf_contours, + label_text=1, + label_head_full=2, + label_head_final=2, + label_main_final=1, +): ### to make it faster h_o = regions_model_1.shape[0] @@ -912,21 +917,19 @@ def split_textregion_main_vs_header( contours_only_text_parent_head_d=[] for ii, con in enumerate(contours_only_text_parent_z): - img = np.zeros(regions_model_1.shape[:2]) - img = cv2.fillPoly(img, pts=[con], color=255) + parent = np.zeros_like(regions_model_1) + parent = cv2.fillPoly(parent, pts=[con], color=1) - all_pixels = (img == 255).sum() - pixels_header=((img == 255) & - (regions_model_full==2)).sum() - pixels_main = all_pixels - pixels_header + pixels_head = ((parent > 0) & (regions_model_full == label_head_full)).sum() + pixels_main = parent.sum() - pixels_head - if (( pixels_header >= 0.6 * pixels_main and + if (( pixels_head >= 0.6 * pixels_main and length_con[ii] >= 1.3 * height_con[ii] and length_con[ii] <= 3 * height_con[ii] ) or - ( pixels_header >= 0.3 * pixels_main and + ( pixels_head >= 0.3 * pixels_main and length_con[ii] >= 3 * height_con[ii] )): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ] = 2 + regions_model_1[(regions_model_1 == label_text) & (parent > 0)] = label_head_final contours_only_text_parent_head.append(contours_only_text_parent[ii]) conf_contours_head.append(None) # why not conf_contours[ii], too? if len(contours_only_text_parent_d_ordered): @@ -936,7 +939,7 @@ def split_textregion_main_vs_header( all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ] = 1 + regions_model_1[(regions_model_1 == label_text) & (parent > 0)] = label_main_final contours_only_text_parent_main.append(contours_only_text_parent[ii]) conf_contours_main.append(conf_contours[ii]) if len(contours_only_text_parent_d_ordered): @@ -944,12 +947,11 @@ def split_textregion_main_vs_header( all_box_coord_main.append(all_box_coord[ii]) slopes_main.append(slopes[ii]) all_found_textline_polygons_main.append(all_found_textline_polygons[ii]) - #print(all_pixels,pixels_main,pixels_header) ### to make it faster regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST) - # regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom, - # regions_model_full.shape[0] // zoom), + # regions_model_full = cv2.resize(parent, (regions_model_full.shape[1] // zoom, + # regions_model_full.shape[0] // zoom), # interpolation=cv2.INTER_NEAREST) ###