decoding of dropcaps in -fl: ensure consistency w/ early layout…

1. use connected component analysis to get unique segments in early prediction result 2. for each drop-capital segment in full prediction result, find matching early segment 3. when they have high overlap, assign drop-capital label to the entire early segment
2026-06-16 09:59:13 +02:00 · 2026-04-17 03:34:38 +02:00 · 2026-04-17 03:34:38 +02:00 · 92e94753c7
commit 92e94753c7
parent 29b42fdfaa
3 changed files with 33 additions and 14 deletions
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -1805,8 +1805,8 @@ class Eynollah:
        # rs: why erode to text here, when fill_bb... will mask out text (only allowing img/drop/bg)?
        drops = cv2.erode(drops.astype(np.uint8), KERNEL, iterations=1) == 1
        regions_fully[drops] = label_drop_fl_model
-        regions_fully = fill_bb_of_drop_capitals(regions_fully, text_regions_p)
-        text_regions_p[regions_fully == label_drop_fl_model] = label_drop_fl
+        drops = fill_bb_of_drop_capitals(regions_fully, text_regions_p)
+        text_regions_p[drops] = label_drop_fl

        regions_without_separators = (text_regions_p == label_text) * 1
        # regions_without_separators = ( text_regions_p == 1 | text_regions_p == 2 ) * 1
--- a/src/eynollah/utils/init.py
+++ b/src/eynollah/utils/init.py
@ -761,10 +761,18 @@ def fill_bb_of_drop_capitals(
        label_imgs=5,
        label_drop_fl_model=3,
        label_imgs_fl_model=4):
+    """
+    Given segmentation maps from full layout model (including drop-capital)
+    and early layout model (after post-processing), re-assign regions which
+    are (large enough and) majority classified as drop-capital to that label.
+    """
    area_tot = full_prediction.size
    drop_only = (full_prediction == label_drop_fl_model) * 1
    contours_drop, hir_on_drop = return_contours_of_image(drop_only)
    contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)
+    text_mask = ((early_prediction == label_text) |
+                 (early_prediction == label_imgs))
+    _, text_segs, text_bbox, _ = cv2.connectedComponentsWithStats(early_prediction * text_mask)

    contours_drop_parent_final = []
    for contour in contours_drop_parent:
@ -774,19 +782,31 @@ def fill_bb_of_drop_capitals(
        x, y, w, h = cv2.boundingRect(contour)
        box = slice(y, y + h), slice(x, x + w)
        area_box = w * h
-        area_text_in_early_layout = np.sum((early_prediction[box] == label_text) |
-                                           (early_prediction[box] == label_imgs))
+        area_text_in_early_layout = np.sum(text_mask[box] == label_text)

        if (area_drop > 0.6 * area_box and
            area_text_in_early_layout >= 0.3 * area_box):
-            full_prediction[box] = label_drop_fl_model
+            mask = np.ones((h, w), dtype=bool)
        else:
            mask = ((full_prediction[box] == label_drop_fl_model) |
                    (full_prediction[box] == label_imgs_fl_model) |
                    (full_prediction[box] == label_bg))
        full_prediction[box][mask] = label_drop_fl_model

-    return full_prediction
+        # also try to enlarge to corresponding labels in early_prediction
+        for label in range(1, len(text_bbox)):
+            x0, y0, w0, h0, area0 = text_bbox[label]
+            x1 = max(0, x0 - x)
+            y1 = max(0, y0 - y)
+            w1 = min(w0, w - x1) if x0 >= x else min(w, w0 - x + x0)
+            h1 = min(h0, h - y1) if y0 >= y else min(h, h0 - y + y0)
+            if w1 < 0 or h1 < 0:
+                continue
+            area1 = np.count_nonzero(mask[y1: y1 + h1, x1: x1 + w1])
+            if area1 and area1 >= 0.8 * area0:
+                full_prediction[text_segs == label] = label_drop_fl_model
+
+    return full_prediction == label_drop_fl_model

 def check_any_text_region_in_model_one_is_main_or_header(
        regions_model_1, regions_model_full,
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@ -60,8 +60,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.
        ##print(np.prod(thresh.shape[:2]))
        # Check that polygon has area greater than minimal area
        # print(hierarchy[0][jv][3],hierarchy )
-        if (area >= min_area * np.prod(image.shape[:2]) and
-            area <= max_area * np.prod(image.shape[:2]) and
+        if (area >= min_area * image.size and
+            area <= max_area * image.size and
            # hierarchy[0][jv][3]==-1
            True):
            # print(contour[0][0][1])
@ -109,14 +109,13 @@ def return_parent_contours(contours, hierarchy):
 def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
    # pixels of images are identified by 5
    if region_pre_p.ndim == 3:
-        cnts_images = (region_pre_p[:, :, 0] == label) * 1
+        mask = (region_pre_p[:, :, 0] == label).astype(np.uint8)
    else:
-        cnts_images = (region_pre_p[:, :] == label) * 1
-    _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0)
+        mask = (region_pre_p[:, :] == label).astype(np.uint8)

-    contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    contours_imgs, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours_imgs = return_parent_contours(contours_imgs, hierarchy)
-    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy,
+    contours_imgs = filter_contours_area_of_image_tables(mask, contours_imgs, hierarchy,
                                                         max_area=1, min_area=min_area)
    return contours_imgs