decoding of drop-capitals in full layout: also allow replacing img…

- rename `putt_bb_of_drop_capitals_of_model_in_patches_in_layout` → `fill_bb_of_drop_capitals` - also allow image (besides text) label in early layout prediction result when checking if entire bbox can be filled (as opposed to just drop-capital | image | background mask) - simplify
2026-05-01 03:32:00 +02:00 · 2026-04-16 18:04:01 +02:00 · 2026-04-16 18:04:01 +02:00 · 29b42fdfaa
commit 29b42fdfaa
parent 6e0aed35f4
2 changed files with 28 additions and 33 deletions
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -79,7 +79,7 @@ from .utils import (
    find_num_col,
    otsu_copy_binary,
    seg_mask_label,
-    putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
+    fill_bb_of_drop_capitals,
    split_textregion_main_vs_head,
    small_textlines_to_parent_adherence2,
    order_of_regions,
@ -1802,11 +1802,10 @@ class Eynollah:
        drops = regions_fully == label_drop_fl_model
        regions_fully[drops] = label_text
-        # rs: why erode to text here, when putt_bb... will mask out text (only allowing img/drop/bg)?
+        # rs: why erode to text here, when fill_bb... will mask out text (only allowing img/drop/bg)?
        drops = cv2.erode(drops.astype(np.uint8), KERNEL, iterations=1) == 1
        regions_fully[drops] = label_drop_fl_model
-        regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(
+        regions_fully = fill_bb_of_drop_capitals(regions_fully, text_regions_p)
            regions_fully, label_drop_fl_model, text_regions_p)
        text_regions_p[regions_fully == label_drop_fl_model] = label_drop_fl
        regions_without_separators = (text_regions_p == label_text) * 1
--- a/src/eynollah/utils/init.py
+++ b/src/eynollah/utils/init.py
@ -754,43 +754,39 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1):
    return layout_no_patch
-def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label, text_regions_p):
+def fill_bb_of_drop_capitals(
-    drop_only = (layout_in_patch == drop_capital_label) * 1
+        full_prediction, early_prediction,
        label_bg=0,
        label_text=1,
        label_imgs=5,
        label_drop_fl_model=3,
        label_imgs_fl_model=4):
    area_tot = full_prediction.size
    drop_only = (full_prediction == label_drop_fl_model) * 1
    contours_drop, hir_on_drop = return_contours_of_image(drop_only)
    contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)
    areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j])
                               for j in range(len(contours_drop_parent))])
    areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1])
    contours_drop_parent = [contours_drop_parent[jz]
                            for jz in range(len(contours_drop_parent))
                            if areas_cnt_text[jz] > 0.00001]
    areas_cnt_text = [areas_cnt_text[jz]
                      for jz in range(len(areas_cnt_text))
                      if areas_cnt_text[jz] > 0.00001]
    contours_drop_parent_final = []
-    for jj in range(len(contours_drop_parent)):
+    for contour in contours_drop_parent:
-        x, y, w, h = cv2.boundingRect(contours_drop_parent[jj])
+        area_drop = cv2.contourArea(contour)
        if area_drop <= 0.00001 * area_tot:
            continue
        x, y, w, h = cv2.boundingRect(contour)
        box = slice(y, y + h), slice(x, x + w)
-        mask_of_drop_cpaital_in_early_layout = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1]))
+        area_box = w * h
-        mask_of_drop_cpaital_in_early_layout[box] = text_regions_p[box]
+        area_text_in_early_layout = np.sum((early_prediction[box] == label_text) |
                                           (early_prediction[box] == label_imgs))
-        all_drop_capital_pixels_which_is_text_in_early_lo = np.sum(mask_of_drop_cpaital_in_early_layout[box]==1)
+        if (area_drop > 0.6 * area_box and
-        mask_of_drop_cpaital_in_early_layout[box] = 1
+            area_text_in_early_layout >= 0.3 * area_box):
-        all_drop_capital_pixels = np.sum(mask_of_drop_cpaital_in_early_layout==1)
+            full_prediction[box] = label_drop_fl_model
        percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels)
        if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and
            percent_text_to_all_in_drop >= 0.3):
            layout_in_patch[box] = drop_capital_label
        else:
-            mask = ((layout_in_patch[box] == drop_capital_label) |
+            mask = ((full_prediction[box] == label_drop_fl_model) |
-                    (layout_in_patch[box] == 0) |
+                    (full_prediction[box] == label_imgs_fl_model) |
-                    (layout_in_patch[box] == 4))
+                    (full_prediction[box] == label_bg))
-            layout_in_patch[box][mask] = drop_capital_label
+            full_prediction[box][mask] = label_drop_fl_model
-    return layout_in_patch
+    return full_prediction
 def check_any_text_region_in_model_one_is_main_or_header(
        regions_model_1, regions_model_full,