Merge remote-tracking branch 'bertsky/ro-fixes-final' into prepare-release-v0.8.0

# Conflicts: # requirements-ocr.txt
2026-06-02 11:09:16 +02:00 · 2026-05-11 09:46:17 +02:00 · 2026-05-11 09:46:17 +02:00 · 2035b07b55
commit 2035b07b55
parent ce5d6bc43c db87aa995d
3 changed files with 187 additions and 254 deletions
--- a/requirements-ocr.txt
+++ b/requirements-ocr.txt
@ -1,3 +1,3 @@
 torch
-transformers < 5 ; python_version < '3.10'
+transformers <= 4.30.2 ; python_version < '3.10'
 transformers >= 5 ; python_version >= '3.10'
--- a/src/eynollah/utils/contour.py
+++ b/src/eynollah/utils/contour.py
@ -223,18 +223,12 @@ def get_region_confidences(cnts, confidence_matrix):
        confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
    return confs

-def return_contours_of_interested_textline(region_pre_p, label):
-    # pixels of images are identified by 5
-    if region_pre_p.ndim == 3:
-        cnts_images = (region_pre_p[:, :, 0] == label) * 1
-    else:
-        cnts_images = (region_pre_p[:, :] == label) * 1
-    _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0)
-    contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
-
+def return_contours_of_interested_textline(region_pre_p, label, min_area=0.0):
+    cnts_images = (region_pre_p == label).astype(np.uint8)
+    contours_imgs, hierarchy = cv2.findContours(cnts_images, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours_imgs = return_parent_contours(contours_imgs, hierarchy)
    contours_imgs = filter_contours_area_of_image_tables(
-        thresh, contours_imgs, hierarchy, max_area=1, min_area=0.000000003)
+        cnts_images, contours_imgs, hierarchy, max_area=1, min_area=min_area)
    return contours_imgs

 def return_contours_of_image(image):
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@ -45,10 +45,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
    x_cont = x_cont - np.min(x_cont)
    y_cont = y_cont - np.min(y_cont)

-    x_min_cont = 0
-    x_max_cont = img_patch.shape[1]
-    y_min_cont = 0
-    y_max_cont = img_patch.shape[0]
+    y_min_cont, x_min_cont = 0, 0
+    y_max_cont, x_max_cont = img_patch.shape

    xv = np.linspace(x_min_cont, x_max_cont, 1000)
    textline_patch_sum_along_width = img_patch.sum(axis=axis)
@ -957,122 +955,93 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
                                            [[int(x_min), int(point_down)]]]))
    return peaks, textline_boxes_rot

-def separate_lines_new_inside_tiles2(img_patch, thetha):
-    (h, w) = img_patch.shape[:2]
-    center = (w // 2, h // 2)
-    M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
-    x_d = M[0, 2]
-    y_d = M[1, 2]
-
-    thetha = thetha / 180.0 * np.pi
-    rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
-    # contour_text_interest_copy = contour_text_interest.copy()
-
-    # x_cont = contour_text_interest[:, 0, 0]
-    # y_cont = contour_text_interest[:, 0, 1]
-    # x_cont = x_cont - np.min(x_cont)
-    # y_cont = y_cont - np.min(y_cont)
-
-    x_min_cont = 0
-    x_max_cont = img_patch.shape[1]
-    y_min_cont = 0
-    y_max_cont = img_patch.shape[0]
-
-    xv = np.linspace(x_min_cont, x_max_cont, 1000)
-    textline_patch_sum_along_width = img_patch.sum(axis=1)
-    first_nonzero = 0  # (next((i for i, x in enumerate(mada_n) if x), None))
-
-    y = textline_patch_sum_along_width[:]  # [first_nonzero:last_nonzero]
-    y_padded = np.zeros(len(y) + 40)
-    y_padded[20 : len(y) + 20] = y
-    x = np.array(range(len(y)))
+def separate_lines_new_inside_tiles2(img_patch, _):
+    y = img_patch.sum(axis=1)
+    y_padded = np.pad(y, (20,))
+    x = np.arange(len(y))

    peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
-    if 1 > 0:
-        try:
-            y_padded_smoothed_e = gaussian_filter1d(y_padded, 2)
-            y_padded_up_to_down_e = -y_padded + np.max(y_padded)
-            y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40)
-            y_padded_up_to_down_padded_e[20 : len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e
-            y_padded_up_to_down_padded_e = gaussian_filter1d(y_padded_up_to_down_padded_e, 2)
+    try:
+        y_padded_smoothed_e = gaussian_filter1d(y_padded, 2)
+        y_padded_up_to_down_e = -y_padded + np.max(y_padded)
+        y_padded_up_to_down_padded_e = np.zeros(len(y_padded_up_to_down_e) + 40)
+        y_padded_up_to_down_padded_e[20 : len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e
+        y_padded_up_to_down_padded_e = gaussian_filter1d(y_padded_up_to_down_padded_e, 2)

-            peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0)
-            peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
-            neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
+        peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0)
+        peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
+        neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])

-            arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
-                y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
-            diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
+        arg_neg_must_be_deleted = np.arange(len(peaks_neg_e))[
+            y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
+        diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)

-            arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
-            arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1]
+        arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
+        arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1]

-            peaks_new = peaks_e[:]
-            peaks_neg_new = peaks_neg_e[:]
+        peaks_new = peaks_e[:]
+        peaks_neg_new = peaks_neg_e[:]

-            clusters_to_be_deleted = []
-            if len(arg_diff_cluster) > 0:
-                clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
-                for i in range(len(arg_diff_cluster) - 1):
-                    clusters_to_be_deleted.append(
-                        arg_neg_must_be_deleted[arg_diff_cluster[i] + 1:
-                                                arg_diff_cluster[i + 1] + 1])
+        clusters_to_be_deleted = []
+        if len(arg_diff_cluster) > 0:
+            clusters_to_be_deleted.append(arg_neg_must_be_deleted[0 : arg_diff_cluster[0] + 1])
+            for i in range(len(arg_diff_cluster) - 1):
                clusters_to_be_deleted.append(
-                    arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
-            if len(clusters_to_be_deleted) > 0:
-                peaks_new_extra = []
-                for m in range(len(clusters_to_be_deleted)):
-                    min_cluster = np.min(peaks_e[clusters_to_be_deleted[m]])
-                    max_cluster = np.max(peaks_e[clusters_to_be_deleted[m]])
-                    peaks_new_extra.append(int((min_cluster + max_cluster) / 2.0))
-                    for m1 in range(len(clusters_to_be_deleted[m])):
-                        peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1] - 1]]
-                        peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1]]]
-                        peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg_e[clusters_to_be_deleted[m][m1]]]
-                peaks_new_tot = []
-                for i1 in peaks_new:
-                    peaks_new_tot.append(i1)
-                for i1 in peaks_new_extra:
-                    peaks_new_tot.append(i1)
-                peaks_new_tot = np.sort(peaks_new_tot)
-            else:
-                peaks_new_tot = peaks_e[:]
+                    arg_neg_must_be_deleted[arg_diff_cluster[i] + 1:
+                                            arg_diff_cluster[i + 1] + 1])
+            clusters_to_be_deleted.append(
+                arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1 :])
+        if len(clusters_to_be_deleted) > 0:
+            peaks_new_extra = []
+            for m in range(len(clusters_to_be_deleted)):
+                min_cluster = np.min(peaks_e[clusters_to_be_deleted[m]])
+                max_cluster = np.max(peaks_e[clusters_to_be_deleted[m]])
+                peaks_new_extra.append(int((min_cluster + max_cluster) / 2.0))
+                for m1 in range(len(clusters_to_be_deleted[m])):
+                    peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1] - 1]]
+                    peaks_new = peaks_new[peaks_new != peaks_e[clusters_to_be_deleted[m][m1]]]
+                    peaks_neg_new = peaks_neg_new[peaks_neg_new != peaks_neg_e[clusters_to_be_deleted[m][m1]]]
+            peaks_new_tot = []
+            for i1 in peaks_new:
+                peaks_new_tot.append(i1)
+            for i1 in peaks_new_extra:
+                peaks_new_tot.append(i1)
+            peaks_new_tot = np.sort(peaks_new_tot)
+        else:
+            peaks_new_tot = peaks_e[:]

-            textline_con, hierarchy = return_contours_of_image(img_patch)
-            textline_con_fil = filter_contours_area_of_image(img_patch,
-                                                             textline_con, hierarchy,
-                                                             max_area=1, min_area=0.0008)
-            if len(np.diff(peaks_new_tot)):
-                y_diff_mean = np.mean(np.diff(peaks_new_tot))  # self.find_contours_mean_y_diff(textline_con_fil)
-                sigma_gaus = int(y_diff_mean * (7.0 / 40.0))
-            else:
-                sigma_gaus = 12
-
-        except:
+        textline_con, hierarchy = return_contours_of_image(img_patch)
+        textline_con_fil = filter_contours_area_of_image(img_patch,
+                                                         textline_con, hierarchy,
+                                                         max_area=1, min_area=0.0008)
+        if len(np.diff(peaks_new_tot)):
+            y_diff_mean = np.mean(np.diff(peaks_new_tot))  # self.find_contours_mean_y_diff(textline_con_fil)
+            sigma_gaus = int(y_diff_mean * (7.0 / 40.0))
+        else:
            sigma_gaus = 12
-        if sigma_gaus < 3:
-            sigma_gaus = 3
+
+    except:
+        sigma_gaus = 12
+    if sigma_gaus < 3:
+        sigma_gaus = 3

    y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus)
-    y_padded_up_to_down = -y_padded + np.max(y_padded)
-    y_padded_up_to_down_padded = np.zeros(len(y_padded_up_to_down) + 40)
-    y_padded_up_to_down_padded[20 : len(y_padded_up_to_down) + 20] = y_padded_up_to_down
-    y_padded_up_to_down_padded = gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus)
+    y_padded_neg = np.pad(np.max(y_padded) - y_padded, (20,))
+    y_padded_neg_smoothed = gaussian_filter1d(y_padded_neg, sigma_gaus)

    peaks, _ = find_peaks(y_padded_smoothed, height=0)
-    peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
+    peaks_neg, _ = find_peaks(y_padded_neg_smoothed, height=0)

    peaks_new = peaks[:]
    peaks_neg_new = peaks_neg[:]

    try:
-        neg_peaks_max = np.max(y_padded_smoothed[peaks])
-
        arg_neg_must_be_deleted = np.arange(len(peaks_neg))[
-            y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.24]
+            y_padded_neg_smoothed[peaks_neg] <
+            y_padded_smoothed[peaks].max() * 0.24]
        diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)

-        arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
+        arg_diff = np.arange(len(diff_arg_neg_must_be_deleted))
        arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1]

        clusters_to_be_deleted = []
@ -1103,12 +1072,12 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
                peaks_new_tot.append(i1)
            peaks_new_tot = np.sort(peaks_new_tot)

-            # plt.plot(y_padded_up_to_down_padded)
-            # plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
+            # plt.plot(y_padded_neg_smoothed)
+            # plt.plot(peaks_neg,y_padded_neg_smoothed[peaks_neg],'*')
            # plt.show()

-            # plt.plot(y_padded_up_to_down_padded)
-            # plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*')
+            # plt.plot(y_padded_neg_smoothed)
+            # plt.plot(peaks_neg_new,y_padded_neg_smoothed[peaks_neg_new],'*')
            # plt.show()

            # plt.plot(y_padded_smoothed)
@ -1128,62 +1097,48 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
        peaks = peaks_new_tot[:]
        peaks_neg = peaks_neg_new[:]
    
-    if len(y_padded_smoothed[peaks]) > 1:
-        mean_value_of_peaks = np.mean(y_padded_smoothed[peaks])
-        std_value_of_peaks = np.std(y_padded_smoothed[peaks])
-    else:
-        mean_value_of_peaks = np.nan
-        std_value_of_peaks = np.nan
+    # if len(y_padded_smoothed[peaks]) > 1:
+    #     mean_value_of_peaks = np.mean(y_padded_smoothed[peaks])
+    #     std_value_of_peaks = np.std(y_padded_smoothed[peaks])
+    # else:
+    #     mean_value_of_peaks = np.nan
+    #     std_value_of_peaks = np.nan
        
-    peaks_values = y_padded_smoothed[peaks]
+    # peaks_values = y_padded_smoothed[peaks]

-    ###peaks_neg = peaks_neg - 20 - 20
-    ###peaks = peaks - 20
-    peaks_neg_true = peaks_neg[:]
-    peaks_pos_true = peaks[:]
+    def clip(positions):
+        # prevent wrap around array bounds
+        return np.maximum(0, np.minimum(img_patch.shape[0] - 1, positions))

-    if len(peaks_neg_true) > 0:
-        peaks_neg_true = np.array(peaks_neg_true)
-        peaks_neg_true = peaks_neg_true - 20 - 20
+    peaks_neg_true = clip(np.array(peaks_neg) - 40)
+    peaks_pos_true = clip(np.array(peaks) - 20)

-        for i in range(len(peaks_neg_true)):
-            img_patch[peaks_neg_true[i] - 6 : peaks_neg_true[i] + 6, :] = 0
-    else:
-        pass
+    # ax1 = plt.subplot(1, 2, 1, title="textline mask slice")
+    # plt.imshow(img_patch, aspect="auto")
+    # ax2 = plt.subplot(1, 2, 2, title="projection profile", sharey=ax1)
+    # plt.plot(y, x)
+    # ax2.scatter(y[peaks_neg_true], peaks_neg_true, color='r', label="neg (0)")
+    # ax2.scatter(y[peaks_pos_true], peaks_pos_true, color='g', label="pos (1)")
+    # plt.legend()
+    # plt.show()

-    if len(peaks_pos_true) > 0:
-        peaks_pos_true = np.array(peaks_pos_true)
-        peaks_pos_true = peaks_pos_true - 20
+    offsets = np.arange(-6, 6)
+    def add_offsets(positions):
+        # let y range around peak positions (without slice indexing)
+        return (positions[np.newaxis] + offsets[:, np.newaxis]).flatten()
+
+    if peaks_neg_true.size:
+        img_patch[clip(add_offsets(peaks_neg_true))] = 0
+
+    if peaks_pos_true.size:
+        img_patch[clip(add_offsets(peaks_pos_true))] = 1

-        for i in range(len(peaks_pos_true)):
-            ##img_patch[peaks_pos_true[i]-8:peaks_pos_true[i]+8,:]=1
-            img_patch[peaks_pos_true[i] - 6 : peaks_pos_true[i] + 6, :] = 1
-    else:
-        pass
    kernel = np.ones((5, 5), np.uint8)
-
    # img_patch = cv2.erode(img_patch,kernel,iterations = 3)
-    #######################img_patch = cv2.erode(img_patch,kernel,iterations = 2)
    img_patch = cv2.erode(img_patch, kernel, iterations=1)
    return img_patch

-def separate_lines_new_inside_tiles(img_path, thetha):
-    (h, w) = img_path.shape[:2]
-    center = (w // 2, h // 2)
-    M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
-    x_d = M[0, 2]
-    y_d = M[1, 2]
-
-    thetha = thetha / 180.0 * np.pi
-    rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]])
-
-    x_min_cont = 0
-    x_max_cont = img_path.shape[1]
-    y_min_cont = 0
-    y_max_cont = img_path.shape[0]
-
-    xv = np.linspace(x_min_cont, x_max_cont, 1000)
-
+def separate_lines_new_inside_tiles(img_path, _):
    mada_n = img_path.sum(axis=1)

    ##plt.plot(mada_n)
@ -1371,26 +1326,18 @@ def textline_contours_postprocessing(textline_mask, angle, contour_parent):
                              if len(contour) > 3]
    return contours_rotated_clean

-def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, plotter=None):
+def separate_lines_new2(img_crop, _, num_col, slope_region, logger=None, plotter=None):
+    """
+    morph textline mask to cope with warped lines by independently deskewing horizontal slices
+    """
    if logger is None:
        logger = getLogger(__package__)
    if not np.prod(img_crop.shape):
        return img_crop

-    if num_col == 1:
-        num_patches = int(img_crop.shape[1] / 200.0)
-    else:
-        num_patches = int(img_crop.shape[1] / 140.0)
-    # num_patches=int(img_crop.shape[1]/200.)
-    if num_patches == 0:
-        num_patches = 1
-
-    img_patch_interest = img_crop[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:]
-
-    # plt.imshow(img_patch_interest)
-    # plt.show()
-
-    length_x = int(img_crop.shape[1] / float(num_patches))
+    height, width = img_crop.shape
+    num_patches = max(1, width // (200 if num_col == 1 else 140))
+    length_x = width // num_patches
    # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2
    margin = int(0.04 * length_x)
    # if margin<=4:
@ -1398,85 +1345,68 @@ def separate_lines_new2(img_crop, thetha, num_col, slope_region, logger=None, pl
    # margin=0

    width_mid = length_x - 2 * margin
-    nxf = img_crop.shape[1] / float(width_mid)

-    if nxf > int(nxf):
-        nxf = int(nxf) + 1
-    else:
-        nxf = int(nxf)
+    img_crop_revised = np.zeros_like(img_crop)
+    for index_x_d in range(0, width, width_mid):
+        index_x_u = index_x_d + length_x
+        if index_x_u > width:
+            if index_x_u >= width + width_mid:
+                break # already in last window
+            index_x_u = width
+            index_x_d = width - length_x

-    slopes_tile_wise = []
-    for i in range(nxf):
-        if i == 0:
-            index_x_d = i * width_mid
-            index_x_u = index_x_d + length_x
-        elif i > 0:
-            index_x_d = i * width_mid
-            index_x_u = index_x_d + length_x
+        # box = (slice(index_y_d, index_y_u), slice(index_x_d, index_x_u))
+        # img_patch = img_crop[box]
+        box = (slice(None), slice(index_x_d, index_x_u))
+        img_xline = img_crop[box]

-        if index_x_u > img_crop.shape[1]:
-            index_x_u = img_crop.shape[1]
-            index_x_d = img_crop.shape[1] - length_x
-
-        # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
-        img_xline = img_patch_interest[:, index_x_d:index_x_u]
-
-        try:
-            assert img_xline.any()
+        if img_xline.any():
            slope_xline = return_deskew_slop(img_xline, 2, logger=logger, plotter=plotter)
-        except:
-            slope_xline = 0
+        else:
+            continue

-        if abs(slope_region) < 25 and abs(slope_xline) > 25:
-            slope_xline = [slope_region][0]
+        if (abs(slope_region) < 25 and
+            abs(slope_xline) > 25):
+            slope_xline = slope_region
        # if abs(slope_region)>70 and abs(slope_xline)<25:
-        # slope_xline=[slope_region][0]
-        slopes_tile_wise.append(slope_xline)
-        img_line_rotated = rotate_image(img_xline, slope_xline)
-        img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1
-        
-    img_patch_interest = img_crop[:, :]  # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:]
+        #     slope_xline = slope_region

-    img_patch_interest_revised = np.zeros(img_patch_interest.shape)
-
-    for i in range(nxf):
-        if i == 0:
-            index_x_d = i * width_mid
-            index_x_u = index_x_d + length_x
-        elif i > 0:
-            index_x_d = i * width_mid
-            index_x_u = index_x_d + length_x
-
-        if index_x_u > img_crop.shape[1]:
-            index_x_u = img_crop.shape[1]
-            index_x_d = img_crop.shape[1] - length_x
-
-        img_xline = img_patch_interest[:, index_x_d:index_x_u]
-
-        img_int = np.zeros((img_xline.shape[0], img_xline.shape[1]))
-        img_int[:, :] = img_xline[:, :]  # img_patch_org[:,:,0]
-
-        img_resized = np.zeros((int(img_int.shape[0] * (1.2)), int(img_int.shape[1] * (3))))
-        img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0],
-                    int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]] = img_int[:, :]
-        # plt.imshow(img_xline)
+        pad_above = pad_below = int(img_xline.shape[0] * 0.1)
+        pad_left = pad_right = img_xline.shape[1]
+        img_xline_padded = np.pad(img_xline, ((pad_above, pad_below),
+                                              (pad_left, pad_right)))
+        # plt.subplot(2, 2, 1, title="xline padded")
+        # plt.imshow(img_xline_padded)
+        img_xline_rotated = rotate_image(img_xline_padded, slope_xline)
+        #img_xline_rotated[img_xline_rotated != 0] = 1
+        # plt.subplot(2, 2, 2, title="xline rotated")
+        # plt.imshow(img_xline_rotated)
+        img_xline_separated = separate_lines_new_inside_tiles2(img_xline_rotated, 0)
+        # plt.subplot(2, 2, 3, title="xline separated")
+        # plt.imshow(img_xline_separated)
+        img_xline_separated = rotate_image(img_xline_separated, -slope_xline)
+        #img_xline_separated[img_xline_separated != 0] = 1
+        # plt.subplot(2, 2, 4, title="xline unrotated")
+        # plt.imshow(img_xline_separated)
        # plt.show()
-        img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i])
-        img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1

-        img_patch_separated = separate_lines_new_inside_tiles2(img_line_rotated, 0)
+        # unpad
+        img_xline_separated = img_xline_separated[
+            pad_above: -pad_below,
+            pad_left: -pad_right]

-        img_patch_separated_returned = rotate_image(img_patch_separated, -slopes_tile_wise[i])
-        img_patch_separated_returned[:, :][img_patch_separated_returned[:, :] != 0] = 1
+        # window
+        window = (slice(None), slice(margin, -margin or None))
+        img_crop_revised[box][window] = img_xline_separated[window]
+        # plt.subplot(1, 2, 1, title="original box")
+        # plt.imshow(img_crop[box])
+        # plt.gca().add_patch(patches.Rectangle((margin, 0), length_x - 2 * margin, height, alpha=0.5, color='gray'))
+        # plt.subplot(1, 2, 2, title="revised box")
+        # plt.imshow(img_crop_revised[box])
+        # plt.gca().add_patch(patches.Rectangle((margin, 0), length_x - 2 * margin, height, alpha=0.5, color='gray'))
+        # plt.show()

-        img_patch_separated_returned_true_size = img_patch_separated_returned[
-            int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0],
-            int(img_int.shape[1] * (1.0)) : int(img_int.shape[1] * (1.0)) + img_int.shape[1]]
-
-        img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin : length_x - margin]
-        img_patch_interest_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_separated_returned_true_size
-
-    return img_patch_interest_revised
+    return img_crop_revised

 def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None):
    if logger is None:
@ -1580,19 +1510,20 @@ def do_work_of_slopes_new_curved(
    if not np.any(all_text_region_raw):
        return [], slope_deskew
    img_int_p = np.copy(all_text_region_raw)
+    # correct for relative area
+    rel_area = 1.0 * textline_mask_tot_ea.size / img_int_p.size

    # img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2)
    # plt.imshow(img_int_p)
    # plt.show()

-    if not np.prod(img_int_p.shape) or img_int_p.shape[0] / img_int_p.shape[1] < 0.1:
-        slope = slope_deskew
-    else:
+    slope = slope_deskew
+    if h >= 0.1 * w:
        try:
            textline_con, hierarchy = return_contours_of_image(img_int_p)
            textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con,
                                                             hierarchy,
-                                                             max_area=1, min_area=0.0008)
+                                                             min_area=0.0008 * rel_area)
            if len(textline_con_fil) > 1:
                cx, cy = find_center_of_contours(textline_con_fil)
                y_diff_mean = np.median(np.diff(np.sort(np.array(cy))))
@ -1613,7 +1544,7 @@ def do_work_of_slopes_new_curved(
                    slope = -90 - slope if slope < 0 else 90 - slope
                if abs(slope - slope_deskew) < 0.5:
                    slope = slope_deskew
-            else:
+            elif len(textline_con_fil):
                if h > 3 * w:
                    # print(1, "transposed", h, w)
                    transposed = True
@ -1636,24 +1567,32 @@ def do_work_of_slopes_new_curved(
    # print(slope, slope_deskew)

    if abs(slope) < 45:
-        mask_parent = np.zeros((h, w), dtype=np.uint8)
-        mask_parent = cv2.fillPoly(mask_parent, pts=[contour_par - [x, y]], color=1)
-        mask_parent_textline = mask_parent * textline_mask_tot_ea[y : y + h, x : x + w]
-
-        mask_textlines_separated_d = separate_lines_new2(mask_parent_textline, 0,
+        # apply horizontal tiling, deskew each patch independently
+        mask_textlines_separated_d = separate_lines_new2(all_text_region_raw, 0,
                                                         num_col, slope,
                                                         logger=logger, plotter=plotter)
-        #mask_textlines_separated_d[mask_parent != 1] = 0
+        # plt.subplot(1, 2, 1, title="textline mask of region")
+        # plt.imshow(all_text_region_raw)
+        # plt.subplot(1, 2, 2, title="separated+deskewed")
+        # plt.imshow(mask_textlines_separated_d)
+        # plt.show()

-        textline_contours = return_contours_of_interested_textline(mask_textlines_separated_d, 1)
+        textline_contours = return_contours_of_interested_textline(
+            mask_textlines_separated_d, 1, min_area=3e-9 * rel_area)

        textlines_cnt_per_region = []
        for contour in textline_contours:
            mask_line = np.zeros_like(mask_parent)
            mask_line = cv2.fillPoly(mask_line, pts=[contour], color=1)
            mask_line = cv2.dilate(mask_line, KERNEL, iterations=5 if num_col == 0 else 4)
+            # plt.subplot(1, 2, 1, title="parent mask")
+            # plt.imshow(mask_parent)
+            # plt.subplot(1, 2, 2, title="single textline")
+            # plt.imshow(mask_line)
+            # plt.show()

-            textline_contours2 = return_contours_of_interested_textline(mask_line, 1)
+            textline_contours2 = return_contours_of_interested_textline(
+                mask_line, 1, min_area=3e-9 * rel_area)
            textline_areas2 = np.array(list(map(cv2.contourArea, textline_contours2)))
            try:
                contour2 = textline_contours2[np.argmax(textline_areas2)]