decoding of dropcaps in -fl: ensure consistency w/ early layout…

1. use connected component analysis to get unique segments
   in early prediction result
2. for each drop-capital segment in full prediction result,
   find matching early segment
3. when they have high overlap, assign drop-capital label
   to the entire early segment
This commit is contained in:
Robert Sachunsky 2026-04-17 03:34:38 +02:00
parent 29b42fdfaa
commit 92e94753c7
3 changed files with 33 additions and 14 deletions

View file

@ -1805,8 +1805,8 @@ class Eynollah:
# rs: why erode to text here, when fill_bb... will mask out text (only allowing img/drop/bg)?
drops = cv2.erode(drops.astype(np.uint8), KERNEL, iterations=1) == 1
regions_fully[drops] = label_drop_fl_model
regions_fully = fill_bb_of_drop_capitals(regions_fully, text_regions_p)
text_regions_p[regions_fully == label_drop_fl_model] = label_drop_fl
drops = fill_bb_of_drop_capitals(regions_fully, text_regions_p)
text_regions_p[drops] = label_drop_fl
regions_without_separators = (text_regions_p == label_text) * 1
# regions_without_separators = ( text_regions_p == 1 | text_regions_p == 2 ) * 1

View file

@ -761,10 +761,18 @@ def fill_bb_of_drop_capitals(
label_imgs=5,
label_drop_fl_model=3,
label_imgs_fl_model=4):
"""
Given segmentation maps from full layout model (including drop-capital)
and early layout model (after post-processing), re-assign regions which
are (large enough and) majority classified as drop-capital to that label.
"""
area_tot = full_prediction.size
drop_only = (full_prediction == label_drop_fl_model) * 1
contours_drop, hir_on_drop = return_contours_of_image(drop_only)
contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)
text_mask = ((early_prediction == label_text) |
(early_prediction == label_imgs))
_, text_segs, text_bbox, _ = cv2.connectedComponentsWithStats(early_prediction * text_mask)
contours_drop_parent_final = []
for contour in contours_drop_parent:
@ -774,19 +782,31 @@ def fill_bb_of_drop_capitals(
x, y, w, h = cv2.boundingRect(contour)
box = slice(y, y + h), slice(x, x + w)
area_box = w * h
area_text_in_early_layout = np.sum((early_prediction[box] == label_text) |
(early_prediction[box] == label_imgs))
area_text_in_early_layout = np.sum(text_mask[box] == label_text)
if (area_drop > 0.6 * area_box and
area_text_in_early_layout >= 0.3 * area_box):
full_prediction[box] = label_drop_fl_model
mask = np.ones((h, w), dtype=bool)
else:
mask = ((full_prediction[box] == label_drop_fl_model) |
(full_prediction[box] == label_imgs_fl_model) |
(full_prediction[box] == label_bg))
full_prediction[box][mask] = label_drop_fl_model
return full_prediction
# also try to enlarge to corresponding labels in early_prediction
for label in range(1, len(text_bbox)):
x0, y0, w0, h0, area0 = text_bbox[label]
x1 = max(0, x0 - x)
y1 = max(0, y0 - y)
w1 = min(w0, w - x1) if x0 >= x else min(w, w0 - x + x0)
h1 = min(h0, h - y1) if y0 >= y else min(h, h0 - y + y0)
if w1 < 0 or h1 < 0:
continue
area1 = np.count_nonzero(mask[y1: y1 + h1, x1: x1 + w1])
if area1 and area1 >= 0.8 * area0:
full_prediction[text_segs == label] = label_drop_fl_model
return full_prediction == label_drop_fl_model
def check_any_text_region_in_model_one_is_main_or_header(
regions_model_1, regions_model_full,

View file

@ -60,8 +60,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1.
##print(np.prod(thresh.shape[:2]))
# Check that polygon has area greater than minimal area
# print(hierarchy[0][jv][3],hierarchy )
if (area >= min_area * np.prod(image.shape[:2]) and
area <= max_area * np.prod(image.shape[:2]) and
if (area >= min_area * image.size and
area <= max_area * image.size and
# hierarchy[0][jv][3]==-1
True):
# print(contour[0][0][1])
@ -109,14 +109,13 @@ def return_parent_contours(contours, hierarchy):
def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002):
# pixels of images are identified by 5
if region_pre_p.ndim == 3:
cnts_images = (region_pre_p[:, :, 0] == label) * 1
mask = (region_pre_p[:, :, 0] == label).astype(np.uint8)
else:
cnts_images = (region_pre_p[:, :] == label) * 1
_, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0)
mask = (region_pre_p[:, :] == label).astype(np.uint8)
contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs = return_parent_contours(contours_imgs, hierarchy)
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy,
contours_imgs = filter_contours_area_of_image_tables(mask, contours_imgs, hierarchy,
max_area=1, min_area=min_area)
return contours_imgs