split_textregion_main_vs_header → split_textregion_main_vs_head…

(and simplify)
This commit is contained in:
Robert Sachunsky 2026-04-16 05:07:22 +02:00
parent 0f82b568ba
commit 9309586712
2 changed files with 19 additions and 17 deletions

View file

@ -80,7 +80,7 @@ from .utils import (
otsu_copy_binary, otsu_copy_binary,
seg_mask_label, seg_mask_label,
putt_bb_of_drop_capitals_of_model_in_patches_in_layout, putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
split_textregion_main_vs_header, split_textregion_main_vs_head,
small_textlines_to_parent_adherence2, small_textlines_to_parent_adherence2,
order_of_regions, order_of_regions,
find_number_of_columns_in_document, find_number_of_columns_in_document,
@ -2860,7 +2860,7 @@ class Eynollah:
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \ text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, \
all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \ all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, \
contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, \ contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, \
conf_contours_textregions, conf_contours_textregions_h = split_textregion_main_vs_header( conf_contours_textregions, conf_contours_textregions_h = split_textregion_main_vs_head(
text_regions_p, regions_fully, contours_only_text_parent, text_regions_p, regions_fully, contours_only_text_parent,
all_box_coord, all_found_textline_polygons, all_box_coord, all_found_textline_polygons,
slopes, contours_only_text_parent_d_ordered, conf_contours_textregions) slopes, contours_only_text_parent_d_ordered, conf_contours_textregions)

View file

@ -866,13 +866,18 @@ def check_any_text_region_in_model_one_is_main_or_header(
conf_contours_main, conf_contours_main,
conf_contours_head) conf_contours_head)
def split_textregion_main_vs_header( def split_textregion_main_vs_head(
regions_model_1, regions_model_full, regions_model_1, regions_model_full,
contours_only_text_parent, contours_only_text_parent,
all_box_coord, all_found_textline_polygons, all_box_coord, all_found_textline_polygons,
slopes, slopes,
contours_only_text_parent_d_ordered, contours_only_text_parent_d_ordered,
conf_contours): conf_contours,
label_text=1,
label_head_full=2,
label_head_final=2,
label_main_final=1,
):
### to make it faster ### to make it faster
h_o = regions_model_1.shape[0] h_o = regions_model_1.shape[0]
@ -912,21 +917,19 @@ def split_textregion_main_vs_header(
contours_only_text_parent_head_d=[] contours_only_text_parent_head_d=[]
for ii, con in enumerate(contours_only_text_parent_z): for ii, con in enumerate(contours_only_text_parent_z):
img = np.zeros(regions_model_1.shape[:2]) parent = np.zeros_like(regions_model_1)
img = cv2.fillPoly(img, pts=[con], color=255) parent = cv2.fillPoly(parent, pts=[con], color=1)
all_pixels = (img == 255).sum() pixels_head = ((parent > 0) & (regions_model_full == label_head_full)).sum()
pixels_header=((img == 255) & pixels_main = parent.sum() - pixels_head
(regions_model_full==2)).sum()
pixels_main = all_pixels - pixels_header
if (( pixels_header >= 0.6 * pixels_main and if (( pixels_head >= 0.6 * pixels_main and
length_con[ii] >= 1.3 * height_con[ii] and length_con[ii] >= 1.3 * height_con[ii] and
length_con[ii] <= 3 * height_con[ii] ) or length_con[ii] <= 3 * height_con[ii] ) or
( pixels_header >= 0.3 * pixels_main and ( pixels_head >= 0.3 * pixels_main and
length_con[ii] >= 3 * height_con[ii] )): length_con[ii] >= 3 * height_con[ii] )):
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ] = 2 regions_model_1[(regions_model_1 == label_text) & (parent > 0)] = label_head_final
contours_only_text_parent_head.append(contours_only_text_parent[ii]) contours_only_text_parent_head.append(contours_only_text_parent[ii])
conf_contours_head.append(None) # why not conf_contours[ii], too? conf_contours_head.append(None) # why not conf_contours[ii], too?
if len(contours_only_text_parent_d_ordered): if len(contours_only_text_parent_d_ordered):
@ -936,7 +939,7 @@ def split_textregion_main_vs_header(
all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
else: else:
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ] = 1 regions_model_1[(regions_model_1 == label_text) & (parent > 0)] = label_main_final
contours_only_text_parent_main.append(contours_only_text_parent[ii]) contours_only_text_parent_main.append(contours_only_text_parent[ii])
conf_contours_main.append(conf_contours[ii]) conf_contours_main.append(conf_contours[ii])
if len(contours_only_text_parent_d_ordered): if len(contours_only_text_parent_d_ordered):
@ -944,12 +947,11 @@ def split_textregion_main_vs_header(
all_box_coord_main.append(all_box_coord[ii]) all_box_coord_main.append(all_box_coord[ii])
slopes_main.append(slopes[ii]) slopes_main.append(slopes[ii])
all_found_textline_polygons_main.append(all_found_textline_polygons[ii]) all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
#print(all_pixels,pixels_main,pixels_header)
### to make it faster ### to make it faster
regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST) regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST)
# regions_model_full = cv2.resize(img, (regions_model_full.shape[1] // zoom, # regions_model_full = cv2.resize(parent, (regions_model_full.shape[1] // zoom,
# regions_model_full.shape[0] // zoom), # regions_model_full.shape[0] // zoom),
# interpolation=cv2.INTER_NEAREST) # interpolation=cv2.INTER_NEAREST)
### ###