From 07f5b52fa704f0d74c9ce8a14234499b958a6849 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 13 May 2025 14:40:57 +0200 Subject: [PATCH] The initial attempt at reading heavily deskewed or vertically aligned lines. --- src/eynollah/eynollah.py | 91 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 0c7c5d2..9f2ca50 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -22,7 +22,6 @@ from multiprocessing import cpu_count import gc import copy import json - from loky import ProcessPoolExecutor import xml.etree.ElementTree as ET import cv2 @@ -77,7 +76,8 @@ from .utils.contour import ( from .utils.rotate import ( rotate_image, rotation_not_90_func, - rotation_not_90_func_full_layout + rotation_not_90_func_full_layout, + rotation_image_new ) from .utils.separate_lines import ( textline_contours_postprocessing, @@ -5310,6 +5310,75 @@ class Eynollah_ocr: img_fin = img_fin / 255. return img_fin + def get_deskewed_contour_and_bb_and_image(self, contour, image, deskew_angle): + (h_in, w_in) = image.shape[:2] + center = (w_in // 2, h_in // 2) + + rotation_matrix = cv2.getRotationMatrix2D(center, deskew_angle, 1.0) + + cos_angle = abs(rotation_matrix[0, 0]) + sin_angle = abs(rotation_matrix[0, 1]) + new_w = int((h_in * sin_angle) + (w_in * cos_angle)) + new_h = int((h_in * cos_angle) + (w_in * sin_angle)) + + rotation_matrix[0, 2] += (new_w / 2) - center[0] + rotation_matrix[1, 2] += (new_h / 2) - center[1] + + deskewed_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h)) + + contour_points = np.array(contour, dtype=np.float32) + transformed_points = cv2.transform(np.array([contour_points]), rotation_matrix)[0] + + x, y, w, h = cv2.boundingRect(np.array(transformed_points, dtype=np.int32)) + cropped_textline = deskewed_image[y:y+h, x:x+w] + + return cropped_textline + + def rotate_image_with_padding(self, image, angle): + # Get image dimensions + (h, w) = image.shape[:2] + + # Calculate the center of the image + center = (w // 2, h // 2) + + # Get the rotation matrix + rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) + + # Compute the new bounding dimensions + cos = abs(rotation_matrix[0, 0]) + sin = abs(rotation_matrix[0, 1]) + new_w = int((h * sin) + (w * cos)) + new_h = int((h * cos) + (w * sin)) + + # Adjust the rotation matrix to account for translation + rotation_matrix[0, 2] += (new_w / 2) - center[0] + rotation_matrix[1, 2] += (new_h / 2) - center[1] + + # Perform the rotation + rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=(0, 0, 0)) + + return rotated_image + + def get_orientation_moments(self, contour): + moments = cv2.moments(contour) + if moments["mu20"] - moments["mu02"] == 0: # Avoid division by zero + return 90 if moments["mu11"] > 0 else -90 + else: + angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"]) + return np.degrees(angle) # Convert radians to degrees + + def get_contours_and_bounding_boxes(self, mask): + # Find contours in the binary mask + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + largest_contour = max(contours, key=cv2.contourArea) if contours else None + + # Get the bounding rectangle for the contour + x, y, w, h = cv2.boundingRect(largest_contour) + #bounding_boxes.append((x, y, w, h)) + + return x, y, w, h + def run(self): ls_imgs = os.listdir(self.dir_in) @@ -5533,6 +5602,10 @@ class Eynollah_ocr: x,y,w,h = cv2.boundingRect(textline_coords) + angle_radians = math.atan2(h, w) + # Convert to degrees + angle_degrees = math.degrees(angle_radians) + if self.draw_texts_on_image: total_bb_coordinates.append([x,y,w,h]) @@ -5549,7 +5622,21 @@ class Eynollah_ocr: mask_poly = mask_poly[y:y+h, x:x+w, :] img_crop = img_poly_on_img[y:y+h, x:x+w, :] if not self.do_not_mask_with_textline_contour: + if angle_degrees > 15: + better_des_slope = self.get_orientation_moments(textline_coords) + + img_crop = self.rotate_image_with_padding(img_crop, -abs(better_des_slope) ) + mask_poly = self.rotate_image_with_padding(mask_poly, -abs(better_des_slope) ) + mask_poly = mask_poly.astype('uint8') + + #new bounding box + x_n, y_n, w_n, h_n = self.get_contours_and_bounding_boxes(mask_poly[:,:,0]) + + mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :] + img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :] + img_crop[mask_poly==0] = 255 + if self.prediction_with_both_of_rgb_and_bin: img_crop_bin[mask_poly==0] = 255