From b93fc112bf8c414186e64de6cc092b1839239128 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Tue, 27 May 2025 23:45:22 +0200
Subject: [PATCH] updating ocr

---
 src/eynollah/cli.py             | 10 ++++--
 src/eynollah/eynollah.py        | 24 ++++++++++++--
 src/eynollah/utils/utils_ocr.py | 55 +++++++++++++++++----------------
 3 files changed, 58 insertions(+), 31 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 0c18b2c..2d0d6f9 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -337,6 +337,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="image filename",
     type=click.Path(exists=True, dir_okay=False),
 )
+@click.option(
+    "--overwrite",
+    "-O",
+    help="overwrite (instead of skipping) if output xml exists",
+    is_flag=True,
+)
 @click.option(
     "--dir_in",
     "-di",
@@ -421,7 +427,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="Override log level globally to this",
 )
 
-def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
+def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
@@ -449,7 +455,7 @@ def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_
         batch_size=batch_size,
         pref_of_dataset=dataset_abbrevation,
     )
-    eynollah_ocr.run()
+    eynollah_ocr.run(overwrite=overwrite)
 
 if __name__ == "__main__":
     main()
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 1f79995..efa1dde 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5338,6 +5338,8 @@ class Eynollah_ocr:
         self.dir_out_image_text = dir_out_image_text
         self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
         self.pref_of_dataset = pref_of_dataset
+        self.logger = logger if logger else getLogger('eynollah')
+        
         if not export_textline_images_and_text:
             if tr_ocr:
                 self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
@@ -5351,7 +5353,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_step_750000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_step_1075000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5377,7 +5379,7 @@ class Eynollah_ocr:
                     vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
                 )
 
-    def run(self):
+    def run(self, overwrite : bool = False):
         if self.dir_in:
             ls_imgs = os.listdir(self.dir_in)
         else:
@@ -5394,6 +5396,14 @@ class Eynollah_ocr:
                     dir_img = self.image_filename
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
+                
+                if os.path.exists(out_file_ocr):
+                    if overwrite:
+                        self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
+                    else:
+                        self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
+                        continue
+                    
                 img = cv2.imread(dir_img)
                 
                 if self.draw_texts_on_image:
@@ -5574,6 +5584,14 @@ class Eynollah_ocr:
                 #dir_img = os.path.join(self.dir_in, ind_img)
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
+                
+                if os.path.exists(out_file_ocr):
+                    if overwrite:
+                        self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
+                    else:
+                        self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
+                        continue
+                
                 img = cv2.imread(dir_img)
                 if self.prediction_with_both_of_rgb_and_bin:
                     cropped_lines_bin = []
@@ -5704,7 +5722,7 @@ class Eynollah_ocr:
                                                 cropped_lines_bin.append(img_fin)
                                         else:
                                             if self.prediction_with_both_of_rgb_and_bin:
-                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin)
+                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin, prediction_with_both_of_rgb_and_bin=self.prediction_with_both_of_rgb_and_bin)
                                             else:
                                                 splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
                                             if splited_images:
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index 524e7ce..9ef344a 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -74,32 +74,24 @@ def distortion_free_resize(image, img_size):
 def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image):
     width = np.shape(textline_image)[1]
     height = np.shape(textline_image)[0]
-    common_window = int(0.22*width)
+    common_window = int(0.06*width)
 
     width1 = int ( width/2. - common_window )
     width2 = int ( width/2. + common_window )
-    
+
     img_sum = np.sum(textline_image[:,:,0], axis=0)
     sum_smoothed = gaussian_filter1d(img_sum, 3)
-    
+
     peaks_real, _ = find_peaks(sum_smoothed, height=0)
-    
-    if len(peaks_real)>35:
+    if len(peaks_real)>70:
 
-        #peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-        argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
-        peaks_real_top_six = peaks_real[argsort[:6]]
-        midpoint = textline_image.shape[1] / 2.
-        arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
+        peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
 
-        #arg_max = np.argmax(sum_smoothed[peaks_real])
-
-        peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
-        
+        arg_max = np.argmax(sum_smoothed[peaks_real])
+        peaks_final = peaks_real[arg_max]
         return peaks_final
     else:
         return None
-    
 # Function to fit text inside the given area
 def fit_text_single_line(draw, text, font_path, max_width, max_height):
     initial_font_size = 50
@@ -305,17 +297,28 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved,
                 #new bounding box
                 x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_in_des[:,:,0])
                 
-                mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                if img_bin_curved:
-                    img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                
-                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
-                if w_relative==0:
-                    w_relative = img_in_des.shape[1]
-                img_in_des = resize_image(img_in_des, 32, w_relative)
-                if img_bin_curved:
-                    img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
+                if w_n==0 or h_n==0:
+                    img_in_des = np.copy(img_in)
+                    if img_bin_curved:
+                        img_bin_in_des = np.copy(img_bin_in)
+                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                    if w_relative==0:
+                        w_relative = img_in_des.shape[1]
+                    img_in_des = resize_image(img_in_des, 32, w_relative)
+                    if img_bin_curved:
+                        img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
+                else:
+                    mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    if img_bin_curved:
+                        img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    
+                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                    if w_relative==0:
+                        w_relative = img_in_des.shape[1]
+                    img_in_des = resize_image(img_in_des, 32, w_relative)
+                    if img_bin_curved:
+                        img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
                 
 
             else: