updating ocr

2026-01-11 21:07:00 +01:00 · 2025-05-27 23:45:22 +02:00 · 2025-05-27 23:45:22 +02:00 · b93fc112bf
commit b93fc112bf
parent 0f154c605a
3 changed files with 58 additions and 31 deletions
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@ -337,6 +337,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
    help="image filename",
    type=click.Path(exists=True, dir_okay=False),
 )
+@click.option(
+    "--overwrite",
+    "-O",
+    help="overwrite (instead of skipping) if output xml exists",
+    is_flag=True,
+)
@click.option(
    "--dir_in",
    "-di",
@ -421,7 +427,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
    help="Override log level globally to this",
 )

-def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
+def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
    initLogging()
    if log_level:
        getLogger('eynollah').setLevel(getLevelName(log_level))
@ -449,7 +455,7 @@ def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_
        batch_size=batch_size,
        pref_of_dataset=dataset_abbrevation,
    )
-    eynollah_ocr.run()
+    eynollah_ocr.run(overwrite=overwrite)

 if __name__ == "__main__":
    main()
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -5338,6 +5338,8 @@ class Eynollah_ocr:
        self.dir_out_image_text = dir_out_image_text
        self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
        self.pref_of_dataset = pref_of_dataset
+        self.logger = logger if logger else getLogger('eynollah')
+        
        if not export_textline_images_and_text:
            if tr_ocr:
                self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
@ -5351,7 +5353,7 @@ class Eynollah_ocr:
                    self.b_s = int(batch_size)

            else:
-                self.model_ocr_dir = dir_models + "/model_step_750000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_step_1075000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                model_ocr = load_model(self.model_ocr_dir , compile=False)
                
                self.prediction_model = tf.keras.models.Model(
@ -5377,7 +5379,7 @@ class Eynollah_ocr:
                    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
                )

-    def run(self):
+    def run(self, overwrite : bool = False):
        if self.dir_in:
            ls_imgs = os.listdir(self.dir_in)
        else:
@ -5394,6 +5396,14 @@ class Eynollah_ocr:
                    dir_img = self.image_filename
                dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
+                
+                if os.path.exists(out_file_ocr):
+                    if overwrite:
+                        self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
+                    else:
+                        self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
+                        continue
+                    
                img = cv2.imread(dir_img)
                
                if self.draw_texts_on_image:
@ -5574,6 +5584,14 @@ class Eynollah_ocr:
                #dir_img = os.path.join(self.dir_in, ind_img)
                dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
+                
+                if os.path.exists(out_file_ocr):
+                    if overwrite:
+                        self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
+                    else:
+                        self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
+                        continue
+                
                img = cv2.imread(dir_img)
                if self.prediction_with_both_of_rgb_and_bin:
                    cropped_lines_bin = []
@ -5704,7 +5722,7 @@ class Eynollah_ocr:
                                                cropped_lines_bin.append(img_fin)
                                        else:
                                            if self.prediction_with_both_of_rgb_and_bin:
-                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin)
+                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin, prediction_with_both_of_rgb_and_bin=self.prediction_with_both_of_rgb_and_bin)
                                            else:
                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
                                            if splited_images:
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@ -74,32 +74,24 @@ def distortion_free_resize(image, img_size):
 def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image):
    width = np.shape(textline_image)[1]
    height = np.shape(textline_image)[0]
-    common_window = int(0.22*width)
+    common_window = int(0.06*width)

    width1 = int ( width/2. - common_window )
    width2 = int ( width/2. + common_window )
-    
+
    img_sum = np.sum(textline_image[:,:,0], axis=0)
    sum_smoothed = gaussian_filter1d(img_sum, 3)
-    
+
    peaks_real, _ = find_peaks(sum_smoothed, height=0)
-    
-    if len(peaks_real)>35:
+    if len(peaks_real)>70:

-        #peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-        argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
-        peaks_real_top_six = peaks_real[argsort[:6]]
-        midpoint = textline_image.shape[1] / 2.
-        arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
+        peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]

-        #arg_max = np.argmax(sum_smoothed[peaks_real])
-
-        peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
-        
+        arg_max = np.argmax(sum_smoothed[peaks_real])
+        peaks_final = peaks_real[arg_max]
        return peaks_final
    else:
        return None
-    
 # Function to fit text inside the given area
 def fit_text_single_line(draw, text, font_path, max_width, max_height):
    initial_font_size = 50
@ -305,17 +297,28 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved,
                #new bounding box
                x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_in_des[:,:,0])
                
-                mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                if img_bin_curved:
-                    img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                
-                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
-                if w_relative==0:
-                    w_relative = img_in_des.shape[1]
-                img_in_des = resize_image(img_in_des, 32, w_relative)
-                if img_bin_curved:
-                    img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
+                if w_n==0 or h_n==0:
+                    img_in_des = np.copy(img_in)
+                    if img_bin_curved:
+                        img_bin_in_des = np.copy(img_bin_in)
+                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                    if w_relative==0:
+                        w_relative = img_in_des.shape[1]
+                    img_in_des = resize_image(img_in_des, 32, w_relative)
+                    if img_bin_curved:
+                        img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
+                else:
+                    mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    if img_bin_curved:
+                        img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    
+                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                    if w_relative==0:
+                        w_relative = img_in_des.shape[1]
+                    img_in_des = resize_image(img_in_des, 32, w_relative)
+                    if img_bin_curved:
+                        img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
                

            else: