From adcf03c7b7c91ef379404fe700175e8943439e31 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 23 May 2025 18:06:53 +0200
Subject: [PATCH] enhancing ocr

---
 src/eynollah/eynollah.py        | 47 ++++++++++++++++++---------------
 src/eynollah/utils/utils_ocr.py |  1 +
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 2564150..1b50713 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -85,7 +85,12 @@ from .utils.utils_ocr import (
     preprocess_and_resize_image_for_ocrcnn_model,
     return_textlines_split_if_needed,
     decode_batch_predictions,
-    return_rnn_cnn_ocr_of_given_textlines
+    return_rnn_cnn_ocr_of_given_textlines,
+    fit_text_single_line,
+    break_curved_line_into_small_pieces_and_then_merge,
+    get_orientation_moments,
+    rotate_image_with_padding,
+    get_contours_and_bounding_boxes
 )
 from .utils.separate_lines import (
     textline_contours_postprocessing,
@@ -5421,7 +5426,7 @@ class Eynollah_ocr:
                                         cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)  )
                                         cropped_lines_meging_indexing.append(0)
                                     else:
-                                        splited_images, _ = self.return_textlines_split_if_needed(img_crop, None)
+                                        splited_images, _ = return_textlines_split_if_needed(img_crop, None)
                                         #print(splited_images)
                                         if splited_images:
                                             cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
@@ -5474,7 +5479,7 @@ class Eynollah_ocr:
                         w_bb = bb_ind[2]
                         h_bb = bb_ind[3]
                         
-                        font = self.fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
+                        font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
                         
                         ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
                         
@@ -5607,14 +5612,14 @@ class Eynollah_ocr:
                                     #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
                                     if not self.do_not_mask_with_textline_contour:
                                         if angle_degrees > 15:
-                                            better_des_slope = self.get_orientation_moments(textline_coords)
+                                            better_des_slope = get_orientation_moments(textline_coords)
                                             
-                                            img_crop = self.rotate_image_with_padding(img_crop, better_des_slope )
-                                            mask_poly = self.rotate_image_with_padding(mask_poly, better_des_slope )
+                                            img_crop = rotate_image_with_padding(img_crop, better_des_slope )
+                                            mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
                                             mask_poly = mask_poly.astype('uint8')
                                             
                                             #new bounding box
-                                            x_n, y_n, w_n, h_n = self.get_contours_and_bounding_boxes(mask_poly[:,:,0])
+                                            x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
                                             
                                             mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                             img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
@@ -5622,13 +5627,13 @@ class Eynollah_ocr:
                                             img_crop[mask_poly==0] = 255
                                             
                                             if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 100:
-                                                img_crop = self.break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+                                                img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
 
                                             #print(file_name,w_n*h_n , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii')
                                         else:
                                             img_crop[mask_poly==0] = 255
                                             if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
-                                                img_crop = self.break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+                                                img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
 
 
                                         
@@ -5638,7 +5643,7 @@ class Eynollah_ocr:
                                     
                                     if not self.export_textline_images_and_text:
                                         if w_scaled < 640:#1.5*image_width:
-                                            img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                                            img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                             cropped_lines.append(img_fin)
                                             if angle_degrees > 15:
                                                 cropped_lines_ver_index.append(1)
@@ -5647,15 +5652,15 @@ class Eynollah_ocr:
                                                 
                                             cropped_lines_meging_indexing.append(0)
                                             if self.prediction_with_both_of_rgb_and_bin:
-                                                img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
                                                 cropped_lines_bin.append(img_fin)
                                         else:
                                             if self.prediction_with_both_of_rgb_and_bin:
-                                                splited_images, splited_images_bin = self.return_textlines_split_if_needed(img_crop, img_crop_bin)
+                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin)
                                             else:
-                                                splited_images, splited_images_bin = self.return_textlines_split_if_needed(img_crop, None)
+                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
                                             if splited_images:
-                                                img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(1)
                                                 
@@ -5664,7 +5669,7 @@ class Eynollah_ocr:
                                                 else:
                                                     cropped_lines_ver_index.append(0)
                                                 
-                                                img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
                                                 
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(-1)
@@ -5675,13 +5680,13 @@ class Eynollah_ocr:
                                                     cropped_lines_ver_index.append(0)
                                                 
                                                 if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width)
+                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
-                                                    img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width)
+                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
                                                     
                                             else:
-                                                img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(0)
                                                 
@@ -5691,7 +5696,7 @@ class Eynollah_ocr:
                                                     cropped_lines_ver_index.append(0)
                                                 
                                                 if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
+                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
                                         
                                 if self.export_textline_images_and_text:
@@ -5814,7 +5819,7 @@ class Eynollah_ocr:
                             preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
                             preds = (preds + preds_bin) / 2.
 
-                        pred_texts = self.decode_batch_predictions(preds, self.num_to_char)
+                        pred_texts = decode_batch_predictions(preds, self.num_to_char)
 
                         for ib in range(imgs.shape[0]):
                             pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
@@ -5844,7 +5849,7 @@ class Eynollah_ocr:
                             w_bb = bb_ind[2]
                             h_bb = bb_ind[3]
                             
-                            font = self.fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
+                            font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
                             
                             ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
                             
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index 44367b6..339b38a 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -4,6 +4,7 @@ import tensorflow as tf
 from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
 import math
+from PIL import Image, ImageDraw, ImageFont
 from .resize import resize_image
 
 def decode_batch_predictions(pred, num_to_char, max_len = 128):