From bb092364afce5a79da686fdb7fbdfb4d2f6fb83a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 Apr 2026 15:22:42 +0200 Subject: [PATCH] =?UTF-8?q?get=5Fslopes=5Fand=5Fdeskew=5Fnew=5Flight2:=20e?= =?UTF-8?q?stimate=20slopes=20here,=20too=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit extract slopes from minimal bounding rectangles of textlines, using heuristics on aspect ratios, lengths and angles --- src/eynollah/eynollah.py | 7 ++++++- src/eynollah/utils/contour.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 5432b52..a659922 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -56,6 +56,7 @@ from .utils.contour import ( dilate_textregion_contours, dilate_textline_contours, match_deskewed_contours, + estimate_skew_contours, polygon2contour, contour2polygon, join_polygons, @@ -1007,7 +1008,11 @@ class Eynollah: [w_h_textlines[ind] for ind in indexes_in]) all_found_textline_polygons.append(textlines_in) #[::-1]) - slopes.append(slope_deskew) + + try: + slopes.append(estimate_skew_contours(textlines_in)) + except ValueError: + slopes.append(slope_deskew) return all_found_textline_polygons, slopes diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 4307409..07f89c9 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -396,6 +396,34 @@ def match_deskewed_contours(slope_deskew, contours_o, contours_d, shape_o, shape invsort_o = np.argsort(sort_o) return contours_d_ordered[invsort_o] +def estimate_skew_contours(contours): + if not len(contours): + raise ValueError("not enough contours") + _, size_in, angle_in = zip(*map(cv2.minAreaRect, contours)) + w_in, h_in = np.array(size_in).T + angle_in = np.array(angle_in) + transposed = h_in > w_in + # print("transposed", transposed, angle_in) + w_in[transposed], h_in[transposed] = h_in[transposed], w_in[transposed] + angle_in[transposed] -= 90 + usable = w_in > 3 * h_in + # print("usable aspect", w_in / h_in, usable, angle_in[usable]) + if not np.any(usable): + raise ValueError("not enough contours with high aspect ratio") + w_avg = np.median(w_in[usable]) + w_dev = w_in[usable] / w_avg + usable[usable] = (0.67 <= w_dev) & (w_dev <= 1.33) + # print("usable width", usable, w_in[usable], angle_in[usable]) + if not np.any(usable): + raise ValueError("not enough contours with consistent length") + angle_avg = np.median(angle_in[usable]) + angle_dev = np.abs(angle_in[usable] - angle_avg) + usable[usable] = (angle_dev <= 2 * np.median(angle_dev)) + # print("usable angle", usable, angle_in[usable], np.mean(angle_in[usable])) + if not np.any(usable): + raise ValueError("not enough contours with consistent angle") + return np.mean(angle_in[usable]) + def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0): polygon = Polygon([point[0] for point in contour]) if dilate: