decoding of drop-capitals in full layout: also allow replacing img…

- rename `putt_bb_of_drop_capitals_of_model_in_patches_in_layout`
  → `fill_bb_of_drop_capitals`
- also allow image (besides text) label in early layout prediction
  result when checking if entire bbox can be filled (as opposed to
  just drop-capital | image | background mask)
- simplify
This commit is contained in:
Robert Sachunsky 2026-04-16 18:04:01 +02:00
parent 6e0aed35f4
commit 29b42fdfaa
2 changed files with 28 additions and 33 deletions

View file

@ -79,7 +79,7 @@ from .utils import (
find_num_col, find_num_col,
otsu_copy_binary, otsu_copy_binary,
seg_mask_label, seg_mask_label,
putt_bb_of_drop_capitals_of_model_in_patches_in_layout, fill_bb_of_drop_capitals,
split_textregion_main_vs_head, split_textregion_main_vs_head,
small_textlines_to_parent_adherence2, small_textlines_to_parent_adherence2,
order_of_regions, order_of_regions,
@ -1802,11 +1802,10 @@ class Eynollah:
drops = regions_fully == label_drop_fl_model drops = regions_fully == label_drop_fl_model
regions_fully[drops] = label_text regions_fully[drops] = label_text
# rs: why erode to text here, when putt_bb... will mask out text (only allowing img/drop/bg)? # rs: why erode to text here, when fill_bb... will mask out text (only allowing img/drop/bg)?
drops = cv2.erode(drops.astype(np.uint8), KERNEL, iterations=1) == 1 drops = cv2.erode(drops.astype(np.uint8), KERNEL, iterations=1) == 1
regions_fully[drops] = label_drop_fl_model regions_fully[drops] = label_drop_fl_model
regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout( regions_fully = fill_bb_of_drop_capitals(regions_fully, text_regions_p)
regions_fully, label_drop_fl_model, text_regions_p)
text_regions_p[regions_fully == label_drop_fl_model] = label_drop_fl text_regions_p[regions_fully == label_drop_fl_model] = label_drop_fl
regions_without_separators = (text_regions_p == label_text) * 1 regions_without_separators = (text_regions_p == label_text) * 1

View file

@ -754,43 +754,39 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1):
return layout_no_patch return layout_no_patch
def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label, text_regions_p): def fill_bb_of_drop_capitals(
drop_only = (layout_in_patch == drop_capital_label) * 1 full_prediction, early_prediction,
label_bg=0,
label_text=1,
label_imgs=5,
label_drop_fl_model=3,
label_imgs_fl_model=4):
area_tot = full_prediction.size
drop_only = (full_prediction == label_drop_fl_model) * 1
contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop, hir_on_drop = return_contours_of_image(drop_only)
contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)
areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j])
for j in range(len(contours_drop_parent))])
areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1])
contours_drop_parent = [contours_drop_parent[jz]
for jz in range(len(contours_drop_parent))
if areas_cnt_text[jz] > 0.00001]
areas_cnt_text = [areas_cnt_text[jz]
for jz in range(len(areas_cnt_text))
if areas_cnt_text[jz] > 0.00001]
contours_drop_parent_final = [] contours_drop_parent_final = []
for jj in range(len(contours_drop_parent)): for contour in contours_drop_parent:
x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) area_drop = cv2.contourArea(contour)
if area_drop <= 0.00001 * area_tot:
continue
x, y, w, h = cv2.boundingRect(contour)
box = slice(y, y + h), slice(x, x + w) box = slice(y, y + h), slice(x, x + w)
mask_of_drop_cpaital_in_early_layout = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])) area_box = w * h
mask_of_drop_cpaital_in_early_layout[box] = text_regions_p[box] area_text_in_early_layout = np.sum((early_prediction[box] == label_text) |
(early_prediction[box] == label_imgs))
all_drop_capital_pixels_which_is_text_in_early_lo = np.sum(mask_of_drop_cpaital_in_early_layout[box]==1) if (area_drop > 0.6 * area_box and
mask_of_drop_cpaital_in_early_layout[box] = 1 area_text_in_early_layout >= 0.3 * area_box):
all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1) full_prediction[box] = label_drop_fl_model
percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels)
if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and
percent_text_to_all_in_drop >= 0.3):
layout_in_patch[box] = drop_capital_label
else: else:
mask = ((layout_in_patch[box] == drop_capital_label) | mask = ((full_prediction[box] == label_drop_fl_model) |
(layout_in_patch[box] == 0) | (full_prediction[box] == label_imgs_fl_model) |
(layout_in_patch[box] == 4)) (full_prediction[box] == label_bg))
layout_in_patch[box][mask] = drop_capital_label full_prediction[box][mask] = label_drop_fl_model
return layout_in_patch return full_prediction
def check_any_text_region_in_model_one_is_main_or_header( def check_any_text_region_in_model_one_is_main_or_header(
regions_model_1, regions_model_full, regions_model_1, regions_model_full,