From 687aba1fa288074e3e326e06866cc5acc4beb235 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Mon, 3 Mar 2025 22:10:40 +0100 Subject: [PATCH 1/3] replace usages of `imutils` with opencv equivalents should fix https://github.com/qurator-spk/eynollah/issues/141 --- src/eynollah/utils/rotate.py | 40 ++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/eynollah/utils/rotate.py b/src/eynollah/utils/rotate.py index 603c2d9..c01f5e8 100644 --- a/src/eynollah/utils/rotate.py +++ b/src/eynollah/utils/rotate.py @@ -1,6 +1,4 @@ import math - -import imutils import cv2 def rotatedRectWithMaxArea(w, h, angle): @@ -11,11 +9,11 @@ def rotatedRectWithMaxArea(w, h, angle): side_long, side_short = (w, h) if width_is_longer else (h, w) # since the solutions for angle, -angle and 180-angle are all the same, - # if suffices to look at the first quadrant and the absolute values of sin,cos: + # it suffices to look at the first quadrant and the absolute values of sin,cos: sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle)) if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < 1e-10: - # half constrained case: two crop corners touch the longer side, - # the other two corners are on the mid-line parallel to the longer line + # half constrained case: two crop corners touch the longer side, + # the other two corners are on the mid-line parallel to the longer line x = 0.5 * side_short wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a) else: @@ -25,6 +23,12 @@ def rotatedRectWithMaxArea(w, h, angle): return wr, hr +def rotate_image_opencv(image, angle): + (h, w) = image.shape[:2] + center = (w // 2, h // 2) + M = cv2.getRotationMatrix2D(center, angle, 1.0) + return cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) + def rotate_max_area_new(image, rotated, angle): wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle)) h, w, _ = rotated.shape @@ -35,7 +39,7 @@ def rotate_max_area_new(image, rotated, angle): return rotated[y1:y2, x1:x2] def rotation_image_new(img, thetha): - rotated = imutils.rotate(img, thetha) + rotated = rotate_image_opencv(img, thetha) return rotate_max_area_new(img, rotated, thetha) def rotate_image(img_patch, slope): @@ -44,13 +48,10 @@ def rotate_image(img_patch, slope): M = cv2.getRotationMatrix2D(center, slope, 1.0) return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) -def rotate_image_different( img, slope): - # img = cv2.imread('images/input.jpg') +def rotate_image_different(img, slope): num_rows, num_cols = img.shape[:2] - rotation_matrix = cv2.getRotationMatrix2D((num_cols / 2, num_rows / 2), slope, 1) - img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows)) - return img_rotation + return cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows)) def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_table_prediction, angle): wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle)) @@ -62,17 +63,17 @@ def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_ta return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_table_prediction[y1:y2, x1:x2] def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thetha): - rotated = imutils.rotate(img, thetha) - rotated_textline = imutils.rotate(textline, thetha) - rotated_layout = imutils.rotate(text_regions_p_1, thetha) - rotated_table_prediction = imutils.rotate(table_prediction, thetha) + rotated = rotate_image_opencv(img, thetha) + rotated_textline = rotate_image_opencv(textline, thetha) + rotated_layout = rotate_image_opencv(text_regions_p_1, thetha) + rotated_table_prediction = rotate_image_opencv(table_prediction, thetha) return rotate_max_area(img, rotated, rotated_textline, rotated_layout, rotated_table_prediction, thetha) def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regions_p_fully, thetha): - rotated = imutils.rotate(img, thetha) - rotated_textline = imutils.rotate(textline, thetha) - rotated_layout = imutils.rotate(text_regions_p_1, thetha) - rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha) + rotated = rotate_image_opencv(img, thetha) + rotated_textline = rotate_image_opencv(textline, thetha) + rotated_layout = rotate_image_opencv(text_regions_p_1, thetha) + rotated_layout_full = rotate_image_opencv(text_regions_p_fully, thetha) return rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha) def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle): @@ -83,4 +84,3 @@ def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout x1 = w // 2 - int(wr / 2) x2 = x1 + int(wr) return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_layout_full[y1:y2, x1:x2] - From 0b2c1b9275077eed0a7963fd4ad2c25624b9b88a Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Mon, 3 Mar 2025 22:21:57 +0100 Subject: [PATCH 2/3] remove `imutils` dependency --- requirements.txt | 1 - src/eynollah/utils/__init__.py | 1 - 2 files changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index e6f6e4b..7817f27 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,5 @@ ocrd >= 2.23.3 numpy <1.24.0 scikit-learn >= 0.23.2 tensorflow < 2.13 -imutils >= 0.5.3 matplotlib setuptools >= 50 diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index d2b2488..149de6d 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -4,7 +4,6 @@ import matplotlib.pyplot as plt import numpy as np from shapely import geometry import cv2 -import imutils from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d From 181c0c584f0370c789557b8db0610636bed414fb Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 26 Mar 2025 22:25:22 +0100 Subject: [PATCH 3/3] bbox rotation with opencv --- src/eynollah/utils/rotate.py | 42 ++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/eynollah/utils/rotate.py b/src/eynollah/utils/rotate.py index c01f5e8..734f924 100644 --- a/src/eynollah/utils/rotate.py +++ b/src/eynollah/utils/rotate.py @@ -1,4 +1,5 @@ import math +import numpy as np import cv2 def rotatedRectWithMaxArea(w, h, angle): @@ -23,11 +24,44 @@ def rotatedRectWithMaxArea(w, h, angle): return wr, hr + def rotate_image_opencv(image, angle): - (h, w) = image.shape[:2] - center = (w // 2, h // 2) - M = cv2.getRotationMatrix2D(center, angle, 1.0) - return cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) + # Calculate the original image dimensions (h, w) and the center point (cx, cy) + h, w = image.shape[:2] + cx, cy = (w // 2, h // 2) + + # Compute the rotation matrix + M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0) + + # Calculate the new bounding box + corners = np.array([ + [0, 0], + [w, 0], + [w, h], + [0, h] + ]) + + # Apply rotation matrix to the corner points + ones = np.ones(shape=(len(corners), 1)) + corners_ones = np.hstack([corners, ones]) + transformed_corners = M @ corners_ones.T + transformed_corners = transformed_corners.T + + # Calculate the new bounding box dimensions + min_x, min_y = np.min(transformed_corners, axis=0) + max_x, max_y = np.max(transformed_corners, axis=0) + + newW = int(np.ceil(max_x - min_x)) + newH = int(np.ceil(max_y - min_y)) + + # Adjust the rotation matrix to account for translation + M[0, 2] += (newW / 2) - cx + M[1, 2] += (newH / 2) - cy + + # Perform the affine transformation (rotation) + rotated_image = cv2.warpAffine(image, M, (newW, newH)) + + return rotated_image def rotate_max_area_new(image, rotated, angle): wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))