From 1d4815b48f1f5b1bf006efe78141fd3161ee8073 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 8 Oct 2025 14:56:14 +0200 Subject: [PATCH] utils_ocr: forgot to pass coordinate offsets --- src/eynollah/eynollah.py | 24 ++++++++++++------------ src/eynollah/utils/utils_ocr.py | 10 ++++++++-- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index b109c90..a6b65c4 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -4265,8 +4265,8 @@ class Eynollah: if self.ocr and not self.tr: gc.collect() ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons, self.prediction_model, - self.b_s_ocr, self.num_to_char, textline_light=True) + image_page, all_found_textline_polygons, np.zeros((len(all_found_textline_polygons), 4)), + self.prediction_model, self.b_s_ocr, self.num_to_char, textline_light=True) else: ocr_all_textlines = None @@ -4756,36 +4756,36 @@ class Eynollah: if len(all_found_textline_polygons)>0: ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + image_page, all_found_textline_polygons, all_box_coord, + self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines = None if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons_marginals_left, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + image_page, all_found_textline_polygons_marginals_left, all_box_coord_marginals_left, + self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_marginals_left = None if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons_marginals_right, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + image_page, all_found_textline_polygons_marginals_right, all_box_coord_marginals_right, + self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_marginals_right = None if all_found_textline_polygons_h and len(all_found_textline_polygons)>0: ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons_h, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + image_page, all_found_textline_polygons_h, all_box_coord_h, + self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_h = None if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0: ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines( - image_page, polygons_of_drop_capitals, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + image_page, polygons_of_drop_capitals, np.zeros((len(polygons_of_drop_capitals), 4)), + self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines_drop = None diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py index 602ad6e..6e71b0f 100644 --- a/src/eynollah/utils/utils_ocr.py +++ b/src/eynollah/utils/utils_ocr.py @@ -1,13 +1,17 @@ +import math +import copy + import numpy as np import cv2 import tensorflow as tf from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d -import math from PIL import Image, ImageDraw, ImageFont from Bio import pairwise2 + from .resize import resize_image + def decode_batch_predictions(pred, num_to_char, max_len = 128): # input_len is the product of the batch size and the # number of time steps. @@ -370,7 +374,9 @@ def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind return textline_contour -def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, +def return_rnn_cnn_ocr_of_given_textlines(image, + all_found_textline_polygons, + all_box_coord, prediction_model, b_s_ocr, num_to_char, textline_light=False,