deduplicate code seperate_lines{,_vertical}

2025-06-30 14:39:54 +02:00 · 2020-11-30 13:54:06 +01:00 · 2020-11-30 13:54:06 +01:00 · c0ae2dc7fa
commit c0ae2dc7fa
parent 11315da683
1 changed files with 10 additions and 107 deletions
--- a/sbb_newspapers_org_image/utils/separate_lines.py
+++ b/sbb_newspapers_org_image/utils/separate_lines.py
@ -13,8 +13,7 @@ from .contour import (
 )
 from .is_nan import isNaN
-def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
+def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
    (h, w) = img_patch.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
@ -23,7 +22,6 @@ def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
    thetha = thetha / 180.0 * np.pi
    rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
    contour_text_interest_copy = contour_text_interest.copy()
    x_cont = contour_text_interest[:, 0, 0]
    y_cont = contour_text_interest[:, 0, 1]
@ -37,7 +35,7 @@ def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
    xv = np.linspace(x_min_cont, x_max_cont, 1000)
-    textline_patch_sum_along_width = img_patch.sum(axis=1)
+    textline_patch_sum_along_width = img_patch.sum(axis=axis)
    first_nonzero = 0  # (next((i for i, x in enumerate(mada_n) if x), None))
@ -121,6 +119,13 @@ def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
    peaks, _ = find_peaks(y_padded_smoothed, height=0)
    peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
    return x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix
 def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
    contour_text_interest_copy = contour_text_interest.copy()
    x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix = dedup_separate_lines(img_patch, contour_text_interest, thetha, 1)
    try:
        neg_peaks_max = np.max(y_padded_smoothed[peaks])
@ -478,111 +483,9 @@ def seperate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
 def seperate_lines_vertical(img_patch, contour_text_interest, thetha):
    thetha = thetha + 90
    (h, w) = img_patch.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
    x_d = M[0, 2]
    y_d = M[1, 2]
    thetha = thetha / 180.0 * np.pi
    rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
    contour_text_interest_copy = contour_text_interest.copy()
    x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix = dedup_separate_lines(img_patch, contour_text_interest, thetha, 0)
    x_cont = contour_text_interest[:, 0, 0]
    y_cont = contour_text_interest[:, 0, 1]
    x_cont = x_cont - np.min(x_cont)
    y_cont = y_cont - np.min(y_cont)
    x_min_cont = 0
    x_max_cont = img_patch.shape[1]
    y_min_cont = 0
    y_max_cont = img_patch.shape[0]
    xv = np.linspace(x_min_cont, x_max_cont, 1000)
    textline_patch_sum_along_width = img_patch.sum(axis=0)
    first_nonzero = 0  # (next((i for i, x in enumerate(mada_n) if x), None))
    y = textline_patch_sum_along_width[:]  # [first_nonzero:last_nonzero]
    y_padded = np.zeros(len(y) + 40)
    y_padded[20 : len(y) + 20] = y
    x = np.array(range(len(y)))
    peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
    if 1 > 0:
        try:
            y_padded_smoothed_e = gaussian_filter1d(y_padded, 2)
            y_padded_up_to_down_e = -y_padded + np.max(y_padded)
            y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40)
            y_padded_up_to_down_padded_e[20 : len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e
            y_padded_up_to_down_padded_e = gaussian_filter1d(y_padded_up_to_down_padded_e, 2)
            peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0)
            peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
            neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
            arg_neg_must_be_deleted = np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
            diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
            arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
            arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1]
            peaks_new = peaks_e[:]
            peaks_neg_new = peaks_neg_e[:]
            clusters_to_be_deleted = []
            if len(arg_diff_cluster) > 0:
                clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
                for i in range(len(arg_diff_cluster) - 1):
                    clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1])
                clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
            if len(clusters_to_be_deleted) > 0:
                peaks_new_extra = []
                for m in range(len(clusters_to_be_deleted)):
                    min_cluster = np.min(peaks_e[clusters_to_be_deleted[m]])
                    max_cluster = np.max(peaks_e[clusters_to_be_deleted[m]])
                    peaks_new_extra.append(int((min_cluster + max_cluster) / 2.0))
                    for m1 in range(len(clusters_to_be_deleted[m])):
                        peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1] - 1]]
                        peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1]]]
                        peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg_e[clusters_to_be_deleted[m][m1]]]
                peaks_new_tot = []
                for i1 in peaks_new:
                    peaks_new_tot.append(i1)
                for i1 in peaks_new_extra:
                    peaks_new_tot.append(i1)
                peaks_new_tot = np.sort(peaks_new_tot)
            else:
                peaks_new_tot = peaks_e[:]
            textline_con, hierachy = return_contours_of_image(img_patch)
            textline_con_fil = filter_contours_area_of_image(img_patch, textline_con, hierachy, max_area=1, min_area=0.0008)
            y_diff_mean = np.mean(np.diff(peaks_new_tot))  # self.find_contours_mean_y_diff(textline_con_fil)
            sigma_gaus = int(y_diff_mean * (7.0 / 40.0))
            # print(sigma_gaus,'sigma_gaus')
        except:
            sigma_gaus = 12
        if sigma_gaus < 3:
            sigma_gaus = 3
        # print(sigma_gaus,'sigma')
    y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus)
    y_padded_up_to_down = -y_padded + np.max(y_padded)
    y_padded_up_to_down_padded = np.zeros(len(y_padded_up_to_down) + 40)
    y_padded_up_to_down_padded[20 : len(y_padded_up_to_down) + 20] = y_padded_up_to_down
    y_padded_up_to_down_padded = gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus)
    peaks, _ = find_peaks(y_padded_smoothed, height=0)
    peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
    # plt.plot(y_padded_up_to_down_padded)
    # plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')