combine_hor_lines_and_delete_cross_points: simplify and rename

- `x_width_smaller_than_acolumn_width` → `avg_col_width` - `len_lines_bigger_than_x_width_smaller_than_acolumn_width` → `nseps_wider_than_than_avg_col_width` - `img_in_hor` → `img_p_in_hor` (analogous to vertical)
2026-01-16 23:36:58 +01:00 · 2025-11-28 17:27:12 +01:00 · 2025-11-28 17:27:12 +01:00 · 5c12b6a851
commit 5c12b6a851
parent 06cb9d1d31
1 changed files with 36 additions and 16 deletions
--- a/src/eynollah/utils/init.py
+++ b/src/eynollah/utils/init.py
@ -1176,7 +1176,23 @@ def order_of_regions(textline_mask, contours_main, contours_head, y_ref, x_ref):
    return np.array(final_indexers_sorted), np.array(final_types), np.array(final_index_type)

 def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
-        img_p_in_ver, img_in_hor,num_col_classifier):
+        img_p_in_ver: np.ndarray,
+        img_p_in_hor: np.ndarray,
+        num_col_classifier: int,
+) -> Tuple[np.ndarray, List[float]]:
+    """
+    Given a horizontal and vertical separator mask, combine horizontal separators
+    (where possible) and make sure they do not cross each other.
+
+    Arguments:
+      * img_p_in_ver: mask of vertical separators
+      * img_p_in_hor: mask of horizontal separators
+      * num_col_classifier: predicted (expected) number of columns
+
+    Returns: a tuple of
+      * the final horizontal separators
+      * the y coordinates with horizontal separators spanning the full width
+    """

    #img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2)
    _, thresh = cv2.threshold(img_p_in_ver, 0, 255, 0)
@ -1192,20 +1208,26 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
                     int(y_max_main_ver[i]+1),
                     int(cx_main_ver[i])-25:
                     int(cx_main_ver[i])+25] = 0
+    height, width = img_p_in_ver.shape

-    _, thresh = cv2.threshold(img_in_hor, 0, 255, 0)
+    _, thresh = cv2.threshold(img_p_in_hor, 0, 255, 0)
    contours_lines_hor, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

-    slope_lines_hor, dist_x_hor, x_min_main_hor, x_max_main_hor, cy_main_hor, _, _, _, _ = \
-        find_features_of_lines(contours_lines_hor)
-    x_width_smaller_than_acolumn_width=img_in_hor.shape[1]/float(num_col_classifier+1.)
+    (slope_lines_hor,
+     dist_x_hor,
+     x_min_main_hor,
+     x_max_main_hor,
+     cy_main_hor, _,
+     y_min_main_hor,
+     y_max_main_hor,
+     _) = find_features_of_lines(contours_lines_hor)

-    len_lines_bigger_than_x_width_smaller_than_acolumn_width=len( dist_x_hor[dist_x_hor>=x_width_smaller_than_acolumn_width] )
-    len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column=int(len_lines_bigger_than_x_width_smaller_than_acolumn_width /
-                                                                            float(num_col_classifier))
-    if len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column < 10:
+    avg_col_width = width / float(num_col_classifier + 1)
+    nseps_wider_than_than_avg_col_width = np.count_nonzero(dist_x_hor>=avg_col_width)
+    if nseps_wider_than_than_avg_col_width < 10 * num_col_classifier:
        args_hor=np.arange(len(slope_lines_hor))
        sep_pairs=contours_in_same_horizon(cy_main_hor)
+        img_p_in = np.copy(img_p_in_hor)
        if len(sep_pairs):
            special_separators=[]
            contours_new=[]
@ -1242,20 +1264,18 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
                    #       np.var( dist_x_hor[some_args] ),'jalibdiha')
                    special_separators.append(np.mean(cy_main_hor[some_args]))
        else:
-            img_p_in=img_in_hor
+            img_p_in = img_p_in_hor
            special_separators = []

        img_p_in_ver[img_p_in_ver == 255] = 1
-        sep_ver_hor = img_p_in + img_p_in_ver
-        sep_ver_hor_cross = (sep_ver_hor == 2) * 1
-        _, thresh = cv2.threshold(sep_ver_hor_cross.astype(np.uint8), 0, 255, 0)
+        sep_ver_hor_cross = 255 * ((img_p_in > 0) & (img_p_in_ver > 0))
        contours_cross, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        center_cross = np.array(find_center_of_contours(contours_cross), dtype=int)
        for cx, cy in center_cross.T:
            img_p_in[cy - 30: cy + 30, cx + 5: cx + 40] = 0
            img_p_in[cy - 30: cy + 30, cx - 40: cx - 4] = 0
    else:
-        img_p_in=np.copy(img_in_hor)
+        img_p_in = np.copy(img_p_in_hor)
        special_separators = []
    return img_p_in, special_separators