diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 6b30af3..c1e9085 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -1805,8 +1805,8 @@ class Eynollah: # rs: why erode to text here, when fill_bb... will mask out text (only allowing img/drop/bg)? drops = cv2.erode(drops.astype(np.uint8), KERNEL, iterations=1) == 1 regions_fully[drops] = label_drop_fl_model - regions_fully = fill_bb_of_drop_capitals(regions_fully, text_regions_p) - text_regions_p[regions_fully == label_drop_fl_model] = label_drop_fl + drops = fill_bb_of_drop_capitals(regions_fully, text_regions_p) + text_regions_p[drops] = label_drop_fl regions_without_separators = (text_regions_p == label_text) * 1 # regions_without_separators = ( text_regions_p == 1 | text_regions_p == 2 ) * 1 diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 782ffdf..c5cd704 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -761,10 +761,18 @@ def fill_bb_of_drop_capitals( label_imgs=5, label_drop_fl_model=3, label_imgs_fl_model=4): + """ + Given segmentation maps from full layout model (including drop-capital) + and early layout model (after post-processing), re-assign regions which + are (large enough and) majority classified as drop-capital to that label. + """ area_tot = full_prediction.size drop_only = (full_prediction == label_drop_fl_model) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) + text_mask = ((early_prediction == label_text) | + (early_prediction == label_imgs)) + _, text_segs, text_bbox, _ = cv2.connectedComponentsWithStats(early_prediction * text_mask) contours_drop_parent_final = [] for contour in contours_drop_parent: @@ -774,19 +782,31 @@ def fill_bb_of_drop_capitals( x, y, w, h = cv2.boundingRect(contour) box = slice(y, y + h), slice(x, x + w) area_box = w * h - area_text_in_early_layout = np.sum((early_prediction[box] == label_text) | - (early_prediction[box] == label_imgs)) + area_text_in_early_layout = np.sum(text_mask[box] == label_text) if (area_drop > 0.6 * area_box and area_text_in_early_layout >= 0.3 * area_box): - full_prediction[box] = label_drop_fl_model + mask = np.ones((h, w), dtype=bool) else: mask = ((full_prediction[box] == label_drop_fl_model) | (full_prediction[box] == label_imgs_fl_model) | (full_prediction[box] == label_bg)) - full_prediction[box][mask] = label_drop_fl_model + full_prediction[box][mask] = label_drop_fl_model - return full_prediction + # also try to enlarge to corresponding labels in early_prediction + for label in range(1, len(text_bbox)): + x0, y0, w0, h0, area0 = text_bbox[label] + x1 = max(0, x0 - x) + y1 = max(0, y0 - y) + w1 = min(w0, w - x1) if x0 >= x else min(w, w0 - x + x0) + h1 = min(h0, h - y1) if y0 >= y else min(h, h0 - y + y0) + if w1 < 0 or h1 < 0: + continue + area1 = np.count_nonzero(mask[y1: y1 + h1, x1: x1 + w1]) + if area1 and area1 >= 0.8 * area0: + full_prediction[text_segs == label] = label_drop_fl_model + + return full_prediction == label_drop_fl_model def check_any_text_region_in_model_one_is_main_or_header( regions_model_1, regions_model_full, diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index fd6a158..f23c824 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -60,8 +60,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1. ##print(np.prod(thresh.shape[:2])) # Check that polygon has area greater than minimal area # print(hierarchy[0][jv][3],hierarchy ) - if (area >= min_area * np.prod(image.shape[:2]) and - area <= max_area * np.prod(image.shape[:2]) and + if (area >= min_area * image.size and + area <= max_area * image.size and # hierarchy[0][jv][3]==-1 True): # print(contour[0][0][1]) @@ -109,14 +109,13 @@ def return_parent_contours(contours, hierarchy): def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002): # pixels of images are identified by 5 if region_pre_p.ndim == 3: - cnts_images = (region_pre_p[:, :, 0] == label) * 1 + mask = (region_pre_p[:, :, 0] == label).astype(np.uint8) else: - cnts_images = (region_pre_p[:, :] == label) * 1 - _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0) + mask = (region_pre_p[:, :] == label).astype(np.uint8) - contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + contours_imgs, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, + contours_imgs = filter_contours_area_of_image_tables(mask, contours_imgs, hierarchy, max_area=1, min_area=min_area) return contours_imgs