From 0803881f3675a38558145fc81e40f9a9802f59fb Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 25 Jul 2025 13:18:38 +0200
Subject: [PATCH] threshold for textline ocr + new ocr model

---
 src/eynollah/cli.py      |   8 ++-
 src/eynollah/eynollah.py | 117 +++++++++++++++++++++++----------------
 2 files changed, 76 insertions(+), 49 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 9398c47..a313860 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -496,6 +496,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     "-ds_pref",
     help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset",
 )
+@click.option(
+    "--min_conf_value_of_textline_text",
+    "-min_conf",
+    help="minimum OCR confidence value. Text lines with a confidence value lower than this threshold will not be included in the output XML file.",
+)
 @click.option(
     "--log_level",
     "-l",
@@ -503,7 +508,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="Override log level globally to this",
 )
 
-def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
+def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
@@ -530,6 +535,7 @@ def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text,
         prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
         batch_size=batch_size,
         pref_of_dataset=dataset_abbrevation,
+        min_conf_value_of_textline_text=min_conf_value_of_textline_text,
     )
     eynollah_ocr.run(overwrite=overwrite)
 
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index bdb8f1a..aa1b2e1 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -318,7 +318,7 @@ class Eynollah:
         if self.ocr and self.tr:
             self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
         elif self.ocr and not self.tr:
-            self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250716"
+            self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250725"
         if self.tables:
             if self.light_version:
                 self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
@@ -4974,13 +4974,23 @@ class Eynollah:
                 gc.collect()
                 if len(all_found_textline_polygons)>0:
                     ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                else:
+                    ocr_all_textlines = None
+                    
                 if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0:
                     ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                else:
+                    ocr_all_textlines_marginals = None
                 
                 if all_found_textline_polygons_h and len(all_found_textline_polygons)>0:
                     ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                else:
+                    ocr_all_textlines_h = None
+                    
                 if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0:
                     ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(image_page, polygons_of_drop_capitals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                else:
+                    ocr_all_textlines_drop = None
             else:
                 ocr_all_textlines = None
                 ocr_all_textlines_marginals = None
@@ -5098,7 +5108,8 @@ class Eynollah_ocr:
         do_not_mask_with_textline_contour=False,
         draw_texts_on_image=False,
         prediction_with_both_of_rgb_and_bin=False,
-        pref_of_dataset = None,
+        pref_of_dataset=None,
+        min_conf_value_of_textline_text : Optional[float]=None,
         logger=None,
     ):
         self.dir_in = dir_in
@@ -5117,6 +5128,10 @@ class Eynollah_ocr:
         self.logger = logger if logger else getLogger('eynollah')
         
         if not export_textline_images_and_text:
+            if min_conf_value_of_textline_text:
+                self.min_conf_value_of_textline_text = float(min_conf_value_of_textline_text)
+            else:
+                self.min_conf_value_of_textline_text = 0.3
             if tr_ocr:
                 self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
                 self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -5129,7 +5144,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_ens_ocrcnn_new6"#"/model_eynollah_ocr_cnnrnn_20250716"#"/model_ens_ocrcnn_new6"#"/model_ens_ocrcnn_new2"#
+                self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250725"#"/model_step_1020000_ocr"#"/model_ens_ocrcnn_new10"#"/model_step_255000_ocr"#"/model_ens_ocrcnn_new9"#"/model_step_900000_ocr"#"/model_eynollah_ocr_cnnrnn_20250716"#"/model_ens_ocrcnn_new6"#"/model_ens_ocrcnn_new2"#
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5139,9 +5154,8 @@ class Eynollah_ocr:
                     self.b_s = 8
                 else:
                     self.b_s = int(batch_size)
-
                     
-                with open(os.path.join(self.model_ocr_dir, "characters_20250707_all_lang.txt"),"r") as config_file:
+                with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
                     characters = json.load(config_file)
                     
                 AUTOTUNE = tf.data.AUTOTUNE
@@ -5442,50 +5456,54 @@ class Eynollah_ocr:
                                         
                                     else:
                                         #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
-                                        if not self.do_not_mask_with_textline_contour:
-                                            if angle_degrees > 3:
-                                                better_des_slope = get_orientation_moments(textline_coords)
+                                        
+                                        if angle_degrees > 3:
+                                            better_des_slope = get_orientation_moments(textline_coords)
+                                            
+                                            img_crop = rotate_image_with_padding(img_crop, better_des_slope )
+                                            
+                                            if self.prediction_with_both_of_rgb_and_bin:
+                                                img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope )
                                                 
-                                                img_crop = rotate_image_with_padding(img_crop, better_des_slope )
+                                            mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
+                                            mask_poly = mask_poly.astype('uint8')
+                                            
+                                            #new bounding box
+                                            x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
+                                            
+                                            mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                            img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                                 
-                                                if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope )
-                                                    
-                                                mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
-                                                mask_poly = mask_poly.astype('uint8')
-                                                
-                                                #new bounding box
-                                                x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
-                                                
-                                                mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                                img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                                    
+                                            if not self.do_not_mask_with_textline_contour:
                                                 img_crop[mask_poly==0] = 255
-                                                
-                                                if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                            
+                                            if self.prediction_with_both_of_rgb_and_bin:
+                                                img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                                if not self.do_not_mask_with_textline_contour:
                                                     img_crop_bin[mask_poly==0] = 255
+                                            
+                                            if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
+                                                if self.prediction_with_both_of_rgb_and_bin:
+                                                    img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
+                                                else:
+                                                    img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+        
                                                 
-                                                if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
+                                        else:
+                                            better_des_slope = 0
+                                            if not self.do_not_mask_with_textline_contour:
+                                                img_crop[mask_poly==0] = 255
+                                            if self.prediction_with_both_of_rgb_and_bin:
+                                                if not self.do_not_mask_with_textline_contour:
+                                                    img_crop_bin[mask_poly==0] = 255
+                                            if type_textregion=='drop-capital':
+                                                pass
+                                            else:
+                                                if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
                                                     if self.prediction_with_both_of_rgb_and_bin:
                                                         img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
                                                     else:
                                                         img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
-            
-                                                    
-                                            else:
-                                                better_des_slope = 0
-                                                img_crop[mask_poly==0] = 255
-                                                if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_crop_bin[mask_poly==0] = 255
-                                                if type_textregion=='drop-capital':
-                                                    pass
-                                                else:
-                                                    if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
-                                                        if self.prediction_with_both_of_rgb_and_bin:
-                                                            img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
-                                                        else:
-                                                            img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
                                     
                                     if not self.export_textline_images_and_text:
                                         if w_scaled < 750:#1.5*image_width:
@@ -5716,9 +5734,12 @@ class Eynollah_ocr:
 
                         for ib in range(imgs.shape[0]):
                             pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
-                            extracted_texts.append(pred_texts_ib)
-                            extracted_conf_value.append(masked_means[ib])
-                            
+                            if masked_means[ib] >= self.min_conf_value_of_textline_text:
+                                extracted_texts.append(pred_texts_ib)
+                                extracted_conf_value.append(masked_means[ib])
+                            else:
+                                extracted_texts.append("")
+                                extracted_conf_value.append(0)
                     del cropped_lines
                     if self.prediction_with_both_of_rgb_and_bin:
                         del cropped_lines_bin
@@ -5790,14 +5811,14 @@ class Eynollah_ocr:
                         
                     ###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)}
         
-                    id_textregions = []
-                    textregions_by_existing_ids = []
+                    #id_textregions = []
+                    #textregions_by_existing_ids = []
                     indexer = 0
                     indexer_textregion = 0
                     for nn in root1.iter(region_tags):
-                        id_textregion = nn.attrib['id']
-                        id_textregions.append(id_textregion)
-                        textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
+                        #id_textregion = nn.attrib['id']
+                        #id_textregions.append(id_textregion)
+                        #textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
                         
                         is_textregion_text = False
                         for childtest in nn: