From 5c8084a3976b9aef89518ac2029dbe1a76b634c9 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 2 May 2025 00:30:36 +0200
Subject: [PATCH 01/40] displaying detexted text on an image is provided for
 trocr case

---
 src/eynollah/eynollah.py | 55 +++++++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 9 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index d47016b..5793d37 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -259,7 +259,7 @@ class Eynollah:
         self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
         self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
         self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
-        self.model_reading_order_dir = dir_models + "/model_ens_reading_order_machine_based"
+        self.model_reading_order_dir = dir_models + "/model_mb_ro_aug_2"#"/model_ens_reading_order_machine_based"
         #"/modelens_12sp_elay_0_3_4__3_6_n"
         #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"
         #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"
@@ -1221,7 +1221,7 @@ class Eynollah:
                         seg_art[seg_art>0] =1
 
                         seg_line = label_p_pred[:,:,:,3]
-                        seg_line[seg_line>0.1] =1
+                        seg_line[seg_line>0.5] =1#seg_line[seg_line>0.1] =1
                         seg_line[seg_line<1] =0
 
                         seg[seg_art==1]=4
@@ -3329,13 +3329,13 @@ class Eynollah:
         img_poly[text_regions_p[:,:]==6] = 5
         
         
-        #temp
-        sep_mask = (img_poly==5)*1
-        sep_mask = sep_mask.astype('uint8')
-        sep_mask = cv2.erode(sep_mask, kernel=KERNEL, iterations=2)
-        img_poly[img_poly==5] = 0
-        img_poly[sep_mask==1] = 5
-        #
+        ###temp
+        ##sep_mask = (img_poly==5)*1
+        ##sep_mask = sep_mask.astype('uint8')
+        ##sep_mask = cv2.erode(sep_mask, kernel=KERNEL, iterations=2)
+        ##img_poly[img_poly==5] = 0
+        ##img_poly[sep_mask==1] = 5
+        ###
 
         img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
         if contours_only_text_parent_h:
@@ -5081,6 +5081,12 @@ class Eynollah_ocr:
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
                 img = cv2.imread(dir_img)
+                
+                if self.draw_texts_on_image:
+                    out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
+                    image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
+                    draw = ImageDraw.Draw(image_text)
+                    total_bb_coordinates = []
 
                 ##file_name = Path(dir_xmls).stem
                 tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8"))
@@ -5111,6 +5117,9 @@ class Eynollah_ocr:
                                     textline_coords =  np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] )
                                     x,y,w,h = cv2.boundingRect(textline_coords)
                                     
+                                    if self.draw_texts_on_image:
+                                        total_bb_coordinates.append([x,y,w,h])
+                                    
                                     h2w_ratio = h/float(w)
                                     
                                     img_poly_on_img = np.copy(img)
@@ -5161,6 +5170,34 @@ class Eynollah_ocr:
                 #print(extracted_texts_merged, len(extracted_texts_merged))
 
                 unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
+                
+                if self.draw_texts_on_image:
+                    
+                    font_path = "NotoSans-Regular.ttf"  # Make sure this file exists!
+                    font = ImageFont.truetype(font_path, 40)
+                    
+                    for indexer_text, bb_ind in enumerate(total_bb_coordinates):
+                        
+                        
+                        x_bb = bb_ind[0]
+                        y_bb = bb_ind[1]
+                        w_bb = bb_ind[2]
+                        h_bb = bb_ind[3]
+                        
+                        font = self.fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
+                        
+                        ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
+                        
+                        text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
+                        text_width = text_bbox[2] - text_bbox[0]
+                        text_height = text_bbox[3] - text_bbox[1]
+
+                        text_x = x_bb + (w_bb - text_width) // 2  # Center horizontally
+                        text_y = y_bb + (h_bb - text_height) // 2  # Center vertically
+
+                        # Draw the text
+                        draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
+                    image_text.save(out_image_with_text)
 
                 #print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer')
                 text_by_textregion = []

From fd375e15d59e9e83dbfcc82c8e36a429883f3dad Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 2 May 2025 01:02:32 +0200
Subject: [PATCH 02/40] adding space between splitted textline predicted text
 in the case of trocr

---
 src/eynollah/eynollah.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 5793d37..d148c67 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5164,7 +5164,7 @@ class Eynollah_ocr:
                     
                     extracted_texts = extracted_texts + generated_text_merged
 
-                extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
+                extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
 
                 extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
                 #print(extracted_texts_merged, len(extracted_texts_merged))

From a4defbb04d6c2867e3f80c3cd3aecc7cef6a0464 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 2 May 2025 12:53:33 +0200
Subject: [PATCH 03/40] inference batch size for ocr is passed as an argument

---
 src/eynollah/cli.py      |  8 +++++-
 src/eynollah/eynollah.py | 53 ++++++++++++++++++++++++++--------------
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index c189aca..56d5d7e 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -374,6 +374,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     is_flag=True,
     help="If this parameter is set to True, the prediction will be performed using both RGB and binary images. However, this does not necessarily improve results; it may be beneficial for certain document images.",
 )
+@click.option(
+    "--batch_size",
+    "-bs",
+    help="number of inference batch size. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
+)
 @click.option(
     "--log_level",
     "-l",
@@ -381,7 +386,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="Override log level globally to this",
 )
 
-def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, log_level):
+def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
@@ -397,6 +402,7 @@ def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, ex
         do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
         draw_texts_on_image=draw_texts_on_image,
         prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
+        batch_size=batch_size,
     )
     eynollah_ocr.run()
 
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index d148c67..62026bf 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -4872,6 +4872,7 @@ class Eynollah_ocr:
         dir_out=None,
         dir_out_image_text=None,
         tr_ocr=False,
+        batch_size=None,
         export_textline_images_and_text=False,
         do_not_mask_with_textline_contour=False,
         draw_texts_on_image=False,
@@ -4895,6 +4896,10 @@ class Eynollah_ocr:
             self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
             self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
             self.model_ocr.to(self.device)
+            if not batch_size:
+                self.b_s = 2
+            else:
+                self.b_s = int(batch_size)
 
         else:
             self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
@@ -4903,6 +4908,10 @@ class Eynollah_ocr:
             self.prediction_model = tf.keras.models.Model(
                             model_ocr.get_layer(name = "image").input, 
                             model_ocr.get_layer(name = "dense2").output)
+            if not batch_size:
+                self.b_s = 8
+            else:
+                self.b_s = int(batch_size)
 
                 
             with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
@@ -4918,6 +4927,7 @@ class Eynollah_ocr:
             self.num_to_char = StringLookup(
                 vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
             )
+
         
     def decode_batch_predictions(self, pred, max_len = 128):
         # input_len is the product of the batch size and the
@@ -5073,10 +5083,9 @@ class Eynollah_ocr:
         ls_imgs = os.listdir(self.dir_in)
         
         if self.tr_ocr:
-            b_s = 2
+            tr_ocr_input_height_and_width = 384
             for ind_img in ls_imgs:
-                t0 = time.time()
-                file_name = ind_img.split('.')[0]
+                file_name = Path(ind_img).stem
                 dir_img = os.path.join(self.dir_in, ind_img)
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
@@ -5131,15 +5140,15 @@ class Eynollah_ocr:
                                     img_crop[mask_poly==0] = 255
                                     
                                     if h2w_ratio > 0.1:
-                                        cropped_lines.append(img_crop)
+                                        cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)  )
                                         cropped_lines_meging_indexing.append(0)
                                     else:
                                         splited_images, _ = self.return_textlines_split_if_needed(img_crop, None)
                                         #print(splited_images)
                                         if splited_images:
-                                            cropped_lines.append(splited_images[0])
+                                            cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
                                             cropped_lines_meging_indexing.append(1)
-                                            cropped_lines.append(splited_images[1])
+                                            cropped_lines.append(resize_image(splited_images[1], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
                                             cropped_lines_meging_indexing.append(-1)
                                         else:
                                             cropped_lines.append(img_crop)
@@ -5148,21 +5157,24 @@ class Eynollah_ocr:
         
         
                 extracted_texts = []
-                n_iterations  = math.ceil(len(cropped_lines) / b_s) 
+                n_iterations  = math.ceil(len(cropped_lines) / self.b_s) 
 
                 for i in range(n_iterations):
                     if i==(n_iterations-1):
-                        n_start = i*b_s
+                        n_start = i*self.b_s
                         imgs = cropped_lines[n_start:]
                     else:
-                        n_start = i*b_s
-                        n_end = (i+1)*b_s
+                        n_start = i*self.b_s
+                        n_end = (i+1)*self.b_s
                         imgs = cropped_lines[n_start:n_end]
                     pixel_values_merged = self.processor(imgs, return_tensors="pt").pixel_values
                     generated_ids_merged = self.model_ocr.generate(pixel_values_merged.to(self.device))
                     generated_text_merged = self.processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
                     
                     extracted_texts = extracted_texts + generated_text_merged
+                    
+                del cropped_lines
+                gc.collect()
 
                 extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
 
@@ -5241,14 +5253,12 @@ class Eynollah_ocr:
             padding_token = 299
             image_width = 512#max_len * 4
             image_height = 32
-            b_s = 8
 
 
             img_size=(image_width, image_height)
             
             for ind_img in ls_imgs:
-                t0 = time.time()
-                file_name = ind_img.split('.')[0]
+                file_name = Path(ind_img).stem
                 dir_img = os.path.join(self.dir_in, ind_img)
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
@@ -5368,11 +5378,11 @@ class Eynollah_ocr:
                 if not self.export_textline_images_and_text:
                     extracted_texts = []
 
-                    n_iterations  = math.ceil(len(cropped_lines) / b_s) 
+                    n_iterations  = math.ceil(len(cropped_lines) / self.b_s) 
 
                     for i in range(n_iterations):
                         if i==(n_iterations-1):
-                            n_start = i*b_s
+                            n_start = i*self.b_s
                             imgs = cropped_lines[n_start:]
                             imgs = np.array(imgs)
                             imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
@@ -5381,14 +5391,14 @@ class Eynollah_ocr:
                                 imgs_bin = np.array(imgs_bin)
                                 imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
                         else:
-                            n_start = i*b_s
-                            n_end = (i+1)*b_s
+                            n_start = i*self.b_s
+                            n_end = (i+1)*self.b_s
                             imgs = cropped_lines[n_start:n_end]
-                            imgs = np.array(imgs).reshape(b_s, image_height, image_width, 3)
+                            imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3)
                             
                             if self.prediction_with_both_of_rgb_and_bin:
                                 imgs_bin = cropped_lines_bin[n_start:n_end]
-                                imgs_bin = np.array(imgs_bin).reshape(b_s, image_height, image_width, 3)
+                                imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
                             
 
                         preds = self.prediction_model.predict(imgs, verbose=0)
@@ -5402,6 +5412,11 @@ class Eynollah_ocr:
                             pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
                             extracted_texts.append(pred_texts_ib)
                             
+                    del cropped_lines
+                    if self.prediction_with_both_of_rgb_and_bin:
+                        del cropped_lines_bin
+                    gc.collect()
+                    
                     extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
 
                     extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]

From 8c8fa461bba762a07ee4a0e129c391b91be23e18 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 2 May 2025 12:57:26 +0200
Subject: [PATCH 04/40] machine based model name changed to public one

---
 src/eynollah/eynollah.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 62026bf..cc1f766 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -259,7 +259,7 @@ class Eynollah:
         self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
         self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
         self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
-        self.model_reading_order_dir = dir_models + "/model_mb_ro_aug_2"#"/model_ens_reading_order_machine_based"
+        self.model_reading_order_dir = dir_models + "/model_ens_reading_order_machine_based"
         #"/modelens_12sp_elay_0_3_4__3_6_n"
         #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"
         #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"

From 5d447abcc4e24cec25e228fb93f95bdd6e549e5a Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Sat, 3 May 2025 02:59:16 +0200
Subject: [PATCH 05/40] let to add dataset abbrevation to extracted textline
 images and text

---
 src/eynollah/cli.py      | 17 +++++++-
 src/eynollah/eynollah.py | 91 ++++++++++++++++++++++++----------------
 2 files changed, 71 insertions(+), 37 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 56d5d7e..7d08ac8 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -342,7 +342,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     "-m",
     help="directory of models",
     type=click.Path(exists=True, file_okay=False),
-    required=True,
 )
 @click.option(
     "--tr_ocr",
@@ -379,6 +378,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     "-bs",
     help="number of inference batch size. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
 )
+@click.option(
+    "--dataset_abbrevation",
+    "-ds_pref",
+    help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset",
+)
 @click.option(
     "--log_level",
     "-l",
@@ -386,10 +390,18 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="Override log level globally to this",
 )
 
-def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, log_level):
+def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
+    assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text  -etit can not be set alongside transformer ocr -tr_ocr"
+    assert not export_textline_images_and_text or not model, "Exporting textline and text  -etit can not be set alongside model -m"
+    assert not export_textline_images_and_text or not batch_size, "Exporting textline and text  -etit can not be set alongside batch size -bs"
+    assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text  -etit can not be set alongside directory of bin images -dib"
+    assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text  -etit can not be set alongside directory of images with predicted text -doit"
+    assert not export_textline_images_and_text or not draw_texts_on_image, "Exporting textline and text  -etit can not be set alongside draw text on image -dtoi"
+    assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text  -etit can not be set alongside prediction with both rgb and bin -brb"
+    
     eynollah_ocr = Eynollah_ocr(
         dir_xmls=dir_xmls,
         dir_out_image_text=dir_out_image_text,
@@ -403,6 +415,7 @@ def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, ex
         draw_texts_on_image=draw_texts_on_image,
         prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
         batch_size=batch_size,
+        pref_of_dataset=dataset_abbrevation,
     )
     eynollah_ocr.run()
 
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index cc1f766..0b15573 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -4877,6 +4877,7 @@ class Eynollah_ocr:
         do_not_mask_with_textline_contour=False,
         draw_texts_on_image=False,
         prediction_with_both_of_rgb_and_bin=False,
+        pref_of_dataset = None,
         logger=None,
     ):
         self.dir_in = dir_in
@@ -4890,43 +4891,45 @@ class Eynollah_ocr:
         self.draw_texts_on_image = draw_texts_on_image
         self.dir_out_image_text = dir_out_image_text
         self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
-        if tr_ocr:
-            self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
-            self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-            self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
-            self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
-            self.model_ocr.to(self.device)
-            if not batch_size:
-                self.b_s = 2
+        self.pref_of_dataset = pref_of_dataset
+        if not export_textline_images_and_text:
+            if tr_ocr:
+                self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
+                self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+                self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
+                self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
+                self.model_ocr.to(self.device)
+                if not batch_size:
+                    self.b_s = 2
+                else:
+                    self.b_s = int(batch_size)
+
             else:
-                self.b_s = int(batch_size)
-
-        else:
-            self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
-            model_ocr = load_model(self.model_ocr_dir , compile=False)
-            
-            self.prediction_model = tf.keras.models.Model(
-                            model_ocr.get_layer(name = "image").input, 
-                            model_ocr.get_layer(name = "dense2").output)
-            if not batch_size:
-                self.b_s = 8
-            else:
-                self.b_s = int(batch_size)
-
+                self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
-            with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
-                characters = json.load(config_file)
+                self.prediction_model = tf.keras.models.Model(
+                                model_ocr.get_layer(name = "image").input, 
+                                model_ocr.get_layer(name = "dense2").output)
+                if not batch_size:
+                    self.b_s = 8
+                else:
+                    self.b_s = int(batch_size)
 
-                
-            AUTOTUNE = tf.data.AUTOTUNE
+                    
+                with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
+                    characters = json.load(config_file)
 
-            # Mapping characters to integers.
-            char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
+                    
+                AUTOTUNE = tf.data.AUTOTUNE
 
-            # Mapping integers back to original characters.
-            self.num_to_char = StringLookup(
-                vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
-            )
+                # Mapping characters to integers.
+                char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
+
+                # Mapping integers back to original characters.
+                self.num_to_char = StringLookup(
+                    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
+                )
 
         
     def decode_batch_predictions(self, pred, max_len = 128):
@@ -5365,10 +5368,28 @@ class Eynollah_ocr:
                                             if cheild_text.tag.endswith("Unicode"):
                                                 textline_text = cheild_text.text
                                                 if textline_text:
-                                                    with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file:
-                                                        text_file.write(textline_text)
+                                                    if self.do_not_mask_with_textline_contour:
+                                                        if self.pref_of_dataset:
+                                                            with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.txt'), 'w') as text_file:
+                                                                text_file.write(textline_text)
 
-                                                    cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop )
+                                                            cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.png'), img_crop )
+                                                        else:
+                                                            with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file:
+                                                                text_file.write(textline_text)
+
+                                                            cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop )
+                                                    else:
+                                                        if self.pref_of_dataset:
+                                                            with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.txt'), 'w') as text_file:
+                                                                text_file.write(textline_text)
+
+                                                            cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.png'), img_crop )
+                                                        else:
+                                                            with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.txt'), 'w') as text_file:
+                                                                text_file.write(textline_text)
+
+                                                            cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.png'), img_crop )
                                                         
                                                 indexer_textlines+=1
 

From 02a679a14500b414fd9e10357febc2e5c0bf9c21 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Mon, 12 May 2025 00:10:18 +0200
Subject: [PATCH 06/40] I have tried to address the issues #163 and #161 . The
 changes have also improved marginal detection and enhanced the isolation of
 headers.

---
 requirements.txt         |   1 +
 src/eynollah/cli.py      |  14 +-
 src/eynollah/eynollah.py | 294 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 275 insertions(+), 34 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 9ed0584..aeffd47 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ numpy <1.24.0
 scikit-learn >= 0.23.2
 tensorflow < 2.13
 numba <= 0.58.1
+scikit-image
 loky
diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 7d08ac8..99961c9 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -235,6 +235,16 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
     "-ncl",
     help="upper limit of columns in document image",
 )
+@click.option(
+    "--threshold_art_class_layout",
+    "-tharl",
+    help="threshold of artifical class in the case of layout detection",
+)
+@click.option(
+    "--threshold_art_class_textline",
+    "-thart",
+    help="threshold of artifical class in the case of textline detection",
+)
 @click.option(
     "--skip_layout_and_reading_order",
     "-slro/-noslro",
@@ -248,7 +258,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
     help="Override log level globally to this",
 )
 
-def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level):
+def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
@@ -298,6 +308,8 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
         num_col_upper=num_col_upper,
         num_col_lower=num_col_lower,
         skip_layout_and_reading_order=skip_layout_and_reading_order,
+        threshold_art_class_textline=threshold_art_class_textline,
+        threshold_art_class_layout=threshold_art_class_layout,
     )
     if dir_in:
         eynollah.run(dir_in=dir_in, overwrite=overwrite)
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 0b15573..0c7c5d2 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -30,7 +30,7 @@ import numpy as np
 from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
 from numba import cuda
-
+from skimage.morphology import skeletonize
 from ocrd import OcrdPage
 from ocrd_utils import getLogger, tf_disable_interactive_logs
 
@@ -200,6 +200,8 @@ class Eynollah:
         do_ocr : bool = False,
         num_col_upper : Optional[int] = None,
         num_col_lower : Optional[int] = None,
+        threshold_art_class_layout: Optional[float] = None,
+        threshold_art_class_textline: Optional[float] = None,
         skip_layout_and_reading_order : bool = False,
         logger : Optional[Logger] = None,
     ):
@@ -237,6 +239,17 @@ class Eynollah:
             self.num_col_lower = int(num_col_lower)
         else:
             self.num_col_lower = num_col_lower
+            
+        if threshold_art_class_layout:
+            self.threshold_art_class_layout = float(threshold_art_class_layout)
+        else:
+            self.threshold_art_class_layout = 0.1
+            
+        if threshold_art_class_textline:
+            self.threshold_art_class_textline = float(threshold_art_class_textline)
+        else:
+            self.threshold_art_class_textline = 0.1
+            
         self.logger = logger if logger else getLogger('eynollah')
         # for parallelization of CPU-intensive tasks:
         self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
@@ -784,7 +797,7 @@ class Eynollah:
             self, patches, img, model,
             n_batch_inference=1, marginal_of_patch_percent=0.1,
             thresholding_for_some_classes_in_light_version=False,
-            thresholding_for_artificial_class_in_light_version=False):
+            thresholding_for_artificial_class_in_light_version=False, threshold_art_class_textline=0.1):
 
         self.logger.debug("enter do_prediction")
         img_height_model = model.layers[-1].output_shape[1]
@@ -802,10 +815,13 @@ class Eynollah:
             if thresholding_for_artificial_class_in_light_version:
                 seg_art = label_p_pred[0,:,:,2]
 
-                seg_art[seg_art<0.2] = 0
+                seg_art[seg_art<threshold_art_class_textline] = 0
                 seg_art[seg_art>0] =1
+                
+                skeleton_art = skeletonize(seg_art)
+                skeleton_art = skeleton_art*1
 
-                seg[seg_art==1]=2
+                seg[skeleton_art==1]=2
             seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
             prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
             return prediction_true
@@ -896,14 +912,17 @@ class Eynollah:
                     if thresholding_for_artificial_class_in_light_version:
                         seg_art = label_p_pred[:,:,:,2]
 
-                        seg_art[seg_art<0.2] = 0
+                        seg_art[seg_art<threshold_art_class_textline] = 0
                         seg_art[seg_art>0] =1
 
-                        seg[seg_art==1]=2
+                        ##seg[seg_art==1]=2
 
                     indexer_inside_batch = 0
                     for i_batch, j_batch in zip(list_i_s, list_j_s):
                         seg_in = seg[indexer_inside_batch]
+                        
+                        if thresholding_for_artificial_class_in_light_version:
+                            seg_in_art = seg_art[indexer_inside_batch]
 
                         index_y_u_in = list_y_u[indexer_inside_batch]
                         index_y_d_in = list_y_d[indexer_inside_batch]
@@ -917,54 +936,107 @@ class Eynollah:
                                                 seg_in[0:-margin or None,
                                                        0:-margin or None,
                                                        np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                                index_x_d_in + 0:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[0:-margin or None,
+                                                        0:-margin or None]
+                                
                         elif i_batch == nxf - 1 and j_batch == nyf - 1:
                             prediction_true[index_y_d_in + margin:index_y_u_in - 0,
                                             index_x_d_in + margin:index_x_u_in - 0] = \
                                                 seg_in[margin:,
                                                        margin:,
                                                        np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                                index_x_d_in + margin:index_x_u_in - 0, 1] = \
+                                                    seg_in_art[margin:,
+                                                        margin:]
+                                
                         elif i_batch == 0 and j_batch == nyf - 1:
                             prediction_true[index_y_d_in + margin:index_y_u_in - 0,
                                             index_x_d_in + 0:index_x_u_in - margin] = \
                                                 seg_in[margin:,
                                                        0:-margin or None,
                                                        np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                                index_x_d_in + 0:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:,
+                                                        0:-margin or None]
+                                
                         elif i_batch == nxf - 1 and j_batch == 0:
                             prediction_true[index_y_d_in + 0:index_y_u_in - margin,
                                             index_x_d_in + margin:index_x_u_in - 0] = \
                                                 seg_in[0:-margin or None,
                                                        margin:,
                                                        np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - 0, 1] = \
+                                                    seg_in_art[0:-margin or None,
+                                                        margin:]
+                                
                         elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1:
                             prediction_true[index_y_d_in + margin:index_y_u_in - margin,
                                             index_x_d_in + 0:index_x_u_in - margin] = \
                                                 seg_in[margin:-margin or None,
                                                        0:-margin or None,
                                                        np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                                index_x_d_in + 0:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:-margin or None,
+                                                        0:-margin or None]
+                                
                         elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1:
                             prediction_true[index_y_d_in + margin:index_y_u_in - margin,
                                             index_x_d_in + margin:index_x_u_in - 0] = \
                                                 seg_in[margin:-margin or None,
                                                        margin:,
                                                        np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - 0, 1] = \
+                                                    seg_in_art[margin:-margin or None,
+                                                        margin:]
+                                
                         elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0:
                             prediction_true[index_y_d_in + 0:index_y_u_in - margin,
                                             index_x_d_in + margin:index_x_u_in - margin] = \
                                                 seg_in[0:-margin or None,
                                                        margin:-margin or None,
                                                        np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[0:-margin or None,
+                                                        margin:-margin or None]
+                                
                         elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1:
                             prediction_true[index_y_d_in + margin:index_y_u_in - 0,
                                             index_x_d_in + margin:index_x_u_in - margin] = \
                                                 seg_in[margin:,
                                                        margin:-margin or None,
                                                        np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                                index_x_d_in + margin:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:,
+                                                        margin:-margin or None]
+                                
                         else:
                             prediction_true[index_y_d_in + margin:index_y_u_in - margin,
                                             index_x_d_in + margin:index_x_u_in - margin] = \
                                                 seg_in[margin:-margin or None,
                                                        margin:-margin or None,
                                                        np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:-margin or None,
+                                                        margin:-margin or None]
                         indexer_inside_batch += 1
 
 
@@ -979,6 +1051,19 @@ class Eynollah:
                     img_patch[:] = 0
 
         prediction_true = prediction_true.astype(np.uint8)
+        
+        if thresholding_for_artificial_class_in_light_version:
+            kernel_min = np.ones((3, 3), np.uint8)
+            prediction_true[:,:,0][prediction_true[:,:,0]==2] = 0
+            
+            skeleton_art = skeletonize(prediction_true[:,:,1])
+            skeleton_art = skeleton_art*1
+            
+            skeleton_art = skeleton_art.astype('uint8')
+            
+            skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
+
+            prediction_true[:,:,0][skeleton_art==1]=2
         #del model
         gc.collect()
         return prediction_true
@@ -1117,7 +1202,7 @@ class Eynollah:
             self, patches, img, model,
             n_batch_inference=1, marginal_of_patch_percent=0.1,
             thresholding_for_some_classes_in_light_version=False,
-            thresholding_for_artificial_class_in_light_version=False):
+            thresholding_for_artificial_class_in_light_version=False, threshold_art_class_textline=0.1, threshold_art_class_layout=0.1):
 
         self.logger.debug("enter do_prediction_new_concept")
         img_height_model = model.layers[-1].output_shape[1]
@@ -1132,19 +1217,28 @@ class Eynollah:
             label_p_pred = model.predict(img[np.newaxis], verbose=0)
             seg = np.argmax(label_p_pred, axis=3)[0]
 
-            if thresholding_for_artificial_class_in_light_version:
-                #seg_text = label_p_pred[0,:,:,1]
-                #seg_text[seg_text<0.2] =0
-                #seg_text[seg_text>0] =1
-                #seg[seg_text==1]=1
-
-                seg_art = label_p_pred[0,:,:,4]
-                seg_art[seg_art<0.2] =0
-                seg_art[seg_art>0] =1
-                seg[seg_art==1]=4
-
             seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
             prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
+            
+            if thresholding_for_artificial_class_in_light_version:
+                kernel_min = np.ones((3, 3), np.uint8)
+                seg_art = label_p_pred[0,:,:,4]
+                seg_art[seg_art<threshold_art_class_layout] =0
+                seg_art[seg_art>0] =1
+                #seg[seg_art==1]=4
+                seg_art = resize_image(seg_art, img_h_page, img_w_page).astype(np.uint8)
+                
+                prediction_true[:,:,0][prediction_true[:,:,0]==4] = 0
+                
+                skeleton_art = skeletonize(seg_art)
+                skeleton_art = skeleton_art*1
+                
+                skeleton_art = skeleton_art.astype('uint8')
+                
+                skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
+                
+                prediction_true[:,:,0][skeleton_art==1] = 4
+                
             return prediction_true , resize_image(label_p_pred[0, :, :, 1] , img_h_page, img_w_page)
 
         if img.shape[0] < img_height_model:
@@ -1217,26 +1311,29 @@ class Eynollah:
 
                     if thresholding_for_some_classes_in_light_version:
                         seg_art = label_p_pred[:,:,:,4]
-                        seg_art[seg_art<0.2] =0
+                        seg_art[seg_art<threshold_art_class_layout] =0
                         seg_art[seg_art>0] =1
 
                         seg_line = label_p_pred[:,:,:,3]
                         seg_line[seg_line>0.5] =1#seg_line[seg_line>0.1] =1
                         seg_line[seg_line<1] =0
 
-                        seg[seg_art==1]=4
+                        ##seg[seg_art==1]=4
                         seg[(seg_line==1) & (seg==0)]=3
                     if thresholding_for_artificial_class_in_light_version:
                         seg_art = label_p_pred[:,:,:,2]
 
-                        seg_art[seg_art<0.2] = 0
+                        seg_art[seg_art<threshold_art_class_textline] = 0
                         seg_art[seg_art>0] =1
 
-                        seg[seg_art==1]=2
+                        ##seg[seg_art==1]=2
 
                     indexer_inside_batch = 0
                     for i_batch, j_batch in zip(list_i_s, list_j_s):
                         seg_in = seg[indexer_inside_batch]
+                        
+                        if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                            seg_in_art = seg_art[indexer_inside_batch]
 
                         index_y_u_in = list_y_u[indexer_inside_batch]
                         index_y_d_in = list_y_d[indexer_inside_batch]
@@ -1255,6 +1352,12 @@ class Eynollah:
                                                 label_p_pred[0, 0:-margin or None,
                                                        0:-margin or None,
                                                        1]
+                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                                prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                                index_x_d_in + 0:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[0:-margin or None,
+                                                        0:-margin or None]
+                            
                         elif i_batch == nxf - 1 and j_batch == nyf - 1:
                             prediction_true[index_y_d_in + margin:index_y_u_in - 0,
                                             index_x_d_in + margin:index_x_u_in - 0] = \
@@ -1266,6 +1369,12 @@ class Eynollah:
                                                 label_p_pred[0, margin:,
                                                        margin:,
                                                        1]
+                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                                index_x_d_in + margin:index_x_u_in - 0, 1] = \
+                                                    seg_in_art[margin:,
+                                                        margin:]
+                            
                         elif i_batch == 0 and j_batch == nyf - 1:
                             prediction_true[index_y_d_in + margin:index_y_u_in - 0,
                                             index_x_d_in + 0:index_x_u_in - margin] = \
@@ -1277,6 +1386,13 @@ class Eynollah:
                                                 label_p_pred[0, margin:,
                                                        0:-margin or None,
                                                        1]
+                                            
+                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                                index_x_d_in + 0:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:,
+                                                        0:-margin or None]
+                            
                         elif i_batch == nxf - 1 and j_batch == 0:
                             prediction_true[index_y_d_in + 0:index_y_u_in - margin,
                                             index_x_d_in + margin:index_x_u_in - 0] = \
@@ -1288,6 +1404,12 @@ class Eynollah:
                                                 label_p_pred[0, 0:-margin or None,
                                                        margin:,
                                                        1]
+                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                                prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - 0, 1] = \
+                                                    seg_in_art[0:-margin or None,
+                                                        margin:]
+                            
                         elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1:
                             prediction_true[index_y_d_in + margin:index_y_u_in - margin,
                                             index_x_d_in + 0:index_x_u_in - margin] = \
@@ -1299,6 +1421,11 @@ class Eynollah:
                                                 label_p_pred[0, margin:-margin or None,
                                                        0:-margin or None,
                                                        1]
+                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                                index_x_d_in + 0:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:-margin or None,
+                                                        0:-margin or None]
                         elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1:
                             prediction_true[index_y_d_in + margin:index_y_u_in - margin,
                                             index_x_d_in + margin:index_x_u_in - 0] = \
@@ -1310,6 +1437,11 @@ class Eynollah:
                                                 label_p_pred[0, margin:-margin or None,
                                                        margin:,
                                                        1]
+                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - 0, 1] = \
+                                                    seg_in_art[margin:-margin or None,
+                                                        margin:]
                         elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0:
                             prediction_true[index_y_d_in + 0:index_y_u_in - margin,
                                             index_x_d_in + margin:index_x_u_in - margin] = \
@@ -1321,6 +1453,11 @@ class Eynollah:
                                                 label_p_pred[0, 0:-margin or None,
                                                        margin:-margin or None,
                                                        1]
+                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                                prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[0:-margin or None,
+                                                        margin:-margin or None]
                         elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1:
                             prediction_true[index_y_d_in + margin:index_y_u_in - 0,
                                             index_x_d_in + margin:index_x_u_in - margin] = \
@@ -1332,6 +1469,11 @@ class Eynollah:
                                                 label_p_pred[0, margin:,
                                                        margin:-margin or None,
                                                        1]
+                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                                index_x_d_in + margin:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:,
+                                                        margin:-margin or None]
                         else:
                             prediction_true[index_y_d_in + margin:index_y_u_in - margin,
                                             index_x_d_in + margin:index_x_u_in - margin] = \
@@ -1343,6 +1485,11 @@ class Eynollah:
                                                 label_p_pred[0, margin:-margin or None,
                                                        margin:-margin or None,
                                                        1]
+                            if thresholding_for_artificial_class_in_light_version or thresholding_for_some_classes_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:-margin or None,
+                                                        margin:-margin or None]
                         indexer_inside_batch += 1
 
                     list_i_s = []
@@ -1356,6 +1503,32 @@ class Eynollah:
                     img_patch[:] = 0
 
         prediction_true = prediction_true.astype(np.uint8)
+        
+        if thresholding_for_artificial_class_in_light_version:
+            kernel_min = np.ones((3, 3), np.uint8)
+            prediction_true[:,:,0][prediction_true[:,:,0]==2] = 0
+            
+            skeleton_art = skeletonize(prediction_true[:,:,1])
+            skeleton_art = skeleton_art*1
+            
+            skeleton_art = skeleton_art.astype('uint8')
+            
+            skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
+
+            prediction_true[:,:,0][skeleton_art==1]=2
+            
+        if thresholding_for_some_classes_in_light_version:
+            kernel_min = np.ones((3, 3), np.uint8)
+            prediction_true[:,:,0][prediction_true[:,:,0]==4] = 0
+            
+            skeleton_art = skeletonize(prediction_true[:,:,1])
+            skeleton_art = skeleton_art*1
+            
+            skeleton_art = skeleton_art.astype('uint8')
+            
+            skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
+
+            prediction_true[:,:,0][skeleton_art==1]=4
         gc.collect()
         return prediction_true, confidence_matrix
 
@@ -1608,7 +1781,7 @@ class Eynollah:
         prediction_textline = self.do_prediction(
             use_patches, img, self.model_textline,
             marginal_of_patch_percent=0.15, n_batch_inference=3,
-            thresholding_for_artificial_class_in_light_version=self.textline_light)
+            thresholding_for_artificial_class_in_light_version=self.textline_light, threshold_art_class_textline=self.threshold_art_class_textline)
         #if not self.textline_light:
             #if num_col_classifier==1:
                 #prediction_textline_nopatch = self.do_prediction(False, img, self.model_textline)
@@ -1622,7 +1795,55 @@ class Eynollah:
             textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8')
             #textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1)
             prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2
+        """
+        else:
+            textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8')
+            hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 1))
+            
+            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
+            ##cv2.imwrite('textline_mask_tot_ea_art.png', textline_mask_tot_ea_art)
+            textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, hor_kernel, iterations=1)
+            
+            ###cv2.imwrite('dil_textline_mask_tot_ea_art.png', dil_textline_mask_tot_ea_art)
+            
+            textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8')
+            
+            #print(np.shape(dil_textline_mask_tot_ea_art), np.unique(dil_textline_mask_tot_ea_art), 'dil_textline_mask_tot_ea_art')
+            tsk = time.time()
+            skeleton_art_textline = skeletonize(textline_mask_tot_ea_art[:,:,0])
+            
+            skeleton_art_textline =  skeleton_art_textline*1
+            
+            skeleton_art_textline = skeleton_art_textline.astype('uint8')
+            
+            skeleton_art_textline = cv2.dilate(skeleton_art_textline, kernel, iterations=1)
+            
+            #print(np.unique(skeleton_art_textline), np.shape(skeleton_art_textline))
+            
+            #print(skeleton_art_textline, np.unique(skeleton_art_textline))
+            
+            #cv2.imwrite('skeleton_art_textline.png', skeleton_art_textline)
 
+            
+            prediction_textline[:,:,0][skeleton_art_textline[:,:]==1]=2
+            
+            #cv2.imwrite('prediction_textline1.png', prediction_textline[:,:,0])
+            
+            ##hor_kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 1))
+            ##ver_kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 3))
+            ##textline_mask_tot_ea_main = (prediction_textline[:,:]==1)*1
+            ##textline_mask_tot_ea_main = textline_mask_tot_ea_main.astype('uint8')
+            
+            ##dil_textline_mask_tot_ea_main = cv2.erode(textline_mask_tot_ea_main, ver_kernel2, iterations=1)
+            
+            ##dil_textline_mask_tot_ea_main = cv2.dilate(textline_mask_tot_ea_main, hor_kernel2, iterations=1)
+            
+            ##dil_textline_mask_tot_ea_main = cv2.dilate(textline_mask_tot_ea_main, ver_kernel2, iterations=1)
+            
+            ##prediction_textline[:,:][dil_textline_mask_tot_ea_main[:,:]==1]=1
+            
+        """
+        
         textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1
         textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8')
         if not self.textline_light:
@@ -1631,10 +1852,15 @@ class Eynollah:
         prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1
         if not self.textline_light:
             prediction_textline[:,:][old_art[:,:]==1]=2
+            
+        #cv2.imwrite('prediction_textline2.png', prediction_textline[:,:,0])
 
         prediction_textline_longshot = self.do_prediction(False, img, self.model_textline)
         prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w)
-
+        
+        
+        #cv2.imwrite('prediction_textline.png', prediction_textline[:,:,0])
+        #sys.exit()
         self.logger.debug('exit textline_contours')
         return ((prediction_textline[:, :, 0]==1).astype(np.uint8),
                 (prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8))
@@ -1840,7 +2066,7 @@ class Eynollah:
         textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h )
 
         #print(self.image_org.shape)
-        #cv2.imwrite('out_13.png', self.image_page_org_size)
+        #cv2.imwrite('textline.png', textline_mask_tot_ea)
 
         #plt.imshwo(self.image_page_org_size)
         #plt.show()
@@ -1852,13 +2078,13 @@ class Eynollah:
                                       img_resized.shape[1], img_resized.shape[0], num_col_classifier)
                     prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
                         True, img_resized, self.model_region_1_2, n_batch_inference=1,
-                        thresholding_for_some_classes_in_light_version=True)
+                        thresholding_for_some_classes_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout)
                 else:
                     prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3))
                     confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
                     prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept(
                         False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1,
-                        thresholding_for_artificial_class_in_light_version=True)
+                        thresholding_for_artificial_class_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout)
                     ys = slice(*self.page_coord[0:2])
                     xs = slice(*self.page_coord[2:4])
                     prediction_regions_org[ys, xs] = prediction_regions_page
@@ -1871,7 +2097,7 @@ class Eynollah:
                                   img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier)
                 prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
                     True, img_resized, self.model_region_1_2, n_batch_inference=2,
-                    thresholding_for_some_classes_in_light_version=True)
+                    thresholding_for_some_classes_in_light_version=True, threshold_art_class_layout=self.threshold_art_class_layout)
             ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True)
             #print("inside 3 ", time.time()-t_in)
             #plt.imshow(prediction_regions_org[:,:,0])
@@ -3811,7 +4037,7 @@ class Eynollah:
             if dilation_m1<6:
                 dilation_m1 = 6
             #print(dilation_m1, 'dilation_m1')
-            dilation_m1 = 6
+            dilation_m1 = 4#6
             dilation_m2 = int(dilation_m1/2.) +1 
 
             for i in range(len(x_differential)):
@@ -4322,6 +4548,8 @@ class Eynollah:
             cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
             all_found_textline_polygons = filter_contours_area_of_image(
                 textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
+            
+            all_found_textline_polygons = all_found_textline_polygons[::-1]
 
             all_found_textline_polygons=[ all_found_textline_polygons ]
 
@@ -4329,8 +4557,8 @@ class Eynollah:
                 all_found_textline_polygons)
             all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(
                 all_found_textline_polygons, None, textline_mask_tot_ea, type_contour="textline")
-
-
+            
+            
             order_text_new = [0]
             slopes =[0]
             id_of_texts_tot =['region_0001']
@@ -4343,7 +4571,7 @@ class Eynollah:
             polygons_lines_xml = []
             contours_tables = []
             ocr_all_textlines = None
-            conf_contours_textregions =None
+            conf_contours_textregions =[0]
             pcgts = self.writer.build_pagexml_no_full_layout(
                 cont_page, page_coord, order_text_new, id_of_texts_tot,
                 all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
@@ -4905,7 +5133,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_ens_ocrcnn_125_225"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(

From 07f5b52fa704f0d74c9ce8a14234499b958a6849 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Tue, 13 May 2025 14:40:57 +0200
Subject: [PATCH 07/40] The initial attempt at reading heavily deskewed or
 vertically aligned lines.

---
 src/eynollah/eynollah.py | 91 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 2 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 0c7c5d2..9f2ca50 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -22,7 +22,6 @@ from multiprocessing import cpu_count
 import gc
 import copy
 import json
-
 from loky import ProcessPoolExecutor
 import xml.etree.ElementTree as ET
 import cv2
@@ -77,7 +76,8 @@ from .utils.contour import (
 from .utils.rotate import (
     rotate_image,
     rotation_not_90_func,
-    rotation_not_90_func_full_layout
+    rotation_not_90_func_full_layout,
+    rotation_image_new
 )
 from .utils.separate_lines import (
     textline_contours_postprocessing,
@@ -5310,6 +5310,75 @@ class Eynollah_ocr:
         img_fin = img_fin / 255.
         return img_fin
     
+    def get_deskewed_contour_and_bb_and_image(self, contour, image, deskew_angle):
+        (h_in, w_in) = image.shape[:2]
+        center = (w_in // 2, h_in // 2)
+        
+        rotation_matrix = cv2.getRotationMatrix2D(center, deskew_angle, 1.0)
+        
+        cos_angle = abs(rotation_matrix[0, 0])
+        sin_angle = abs(rotation_matrix[0, 1])
+        new_w = int((h_in * sin_angle) + (w_in * cos_angle))
+        new_h = int((h_in * cos_angle) + (w_in * sin_angle))
+        
+        rotation_matrix[0, 2] += (new_w / 2) - center[0]
+        rotation_matrix[1, 2] += (new_h / 2) - center[1]
+        
+        deskewed_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h))
+        
+        contour_points = np.array(contour, dtype=np.float32)
+        transformed_points = cv2.transform(np.array([contour_points]), rotation_matrix)[0]
+        
+        x, y, w, h = cv2.boundingRect(np.array(transformed_points, dtype=np.int32))
+        cropped_textline = deskewed_image[y:y+h, x:x+w]
+        
+        return cropped_textline
+    
+    def rotate_image_with_padding(self, image, angle):
+        # Get image dimensions
+        (h, w) = image.shape[:2]
+        
+        # Calculate the center of the image
+        center = (w // 2, h // 2)
+        
+        # Get the rotation matrix
+        rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
+        
+        # Compute the new bounding dimensions
+        cos = abs(rotation_matrix[0, 0])
+        sin = abs(rotation_matrix[0, 1])
+        new_w = int((h * sin) + (w * cos))
+        new_h = int((h * cos) + (w * sin))
+        
+        # Adjust the rotation matrix to account for translation
+        rotation_matrix[0, 2] += (new_w / 2) - center[0]
+        rotation_matrix[1, 2] += (new_h / 2) - center[1]
+        
+        # Perform the rotation
+        rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=(0, 0, 0))
+        
+        return rotated_image
+    
+    def get_orientation_moments(self, contour):
+        moments = cv2.moments(contour)
+        if moments["mu20"] - moments["mu02"] == 0:  # Avoid division by zero
+            return 90 if moments["mu11"] > 0 else -90
+        else:
+            angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
+            return np.degrees(angle)  # Convert radians to degrees
+    
+    def get_contours_and_bounding_boxes(self, mask):
+        # Find contours in the binary mask
+        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        
+        largest_contour = max(contours, key=cv2.contourArea) if contours else None
+
+        # Get the bounding rectangle for the contour
+        x, y, w, h = cv2.boundingRect(largest_contour)
+        #bounding_boxes.append((x, y, w, h))
+        
+        return x, y, w, h
+
     def run(self):
         ls_imgs = os.listdir(self.dir_in)
         
@@ -5533,6 +5602,10 @@ class Eynollah_ocr:
                                     
                                     x,y,w,h = cv2.boundingRect(textline_coords)
                                     
+                                    angle_radians = math.atan2(h, w)
+                                    # Convert to degrees
+                                    angle_degrees = math.degrees(angle_radians)
+                                    
                                     if self.draw_texts_on_image:
                                         total_bb_coordinates.append([x,y,w,h])
                                         
@@ -5549,7 +5622,21 @@ class Eynollah_ocr:
                                     mask_poly = mask_poly[y:y+h, x:x+w, :]
                                     img_crop = img_poly_on_img[y:y+h, x:x+w, :]
                                     if not self.do_not_mask_with_textline_contour:
+                                        if angle_degrees > 15:
+                                            better_des_slope = self.get_orientation_moments(textline_coords)
+                                            
+                                            img_crop = self.rotate_image_with_padding(img_crop, -abs(better_des_slope) )
+                                            mask_poly = self.rotate_image_with_padding(mask_poly, -abs(better_des_slope) )
+                                            mask_poly = mask_poly.astype('uint8')
+                                            
+                                            #new bounding box
+                                            x_n, y_n, w_n, h_n = self.get_contours_and_bounding_boxes(mask_poly[:,:,0])
+                                            
+                                            mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                            img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
+
                                         img_crop[mask_poly==0] = 255
+                                        
                                         if self.prediction_with_both_of_rgb_and_bin:
                                             img_crop_bin[mask_poly==0] = 255
                                     

From 1ccd3fb7cf54d16cfa5969434aa33d059f252797 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Tue, 13 May 2025 15:53:05 +0200
Subject: [PATCH 08/40] Accurately writing text line contours into xml file 
 when the deskewing exceeds 45 degrees and the text line is in light mode

---
 src/eynollah/writer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py
index 92e353f..8cd1c8e 100644
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@@ -119,7 +119,7 @@ class EynollahXmlWriter():
                     points_co += ','
                     points_co += str(textline_y_coord)
 
-                if (self.curved_line or self.textline_light) and np.abs(slopes[region_idx]) <= 45:
+                if self.textline_light or (self.curved_line and np.abs(slopes[region_idx]) <= 45):
                     if len(contour_textline) == 2:
                         points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x))
                         points_co += ','
@@ -128,7 +128,7 @@ class EynollahXmlWriter():
                         points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x))
                         points_co += ','
                         points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y))
-                elif (self.curved_line or self.textline_light) and np.abs(slopes[region_idx]) > 45:
+                elif self.curved_line and np.abs(slopes[region_idx]) > 45:
                     if len(contour_textline)==2:
                         points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2])/self.scale_x))
                         points_co += ','

From a9cdd56e9a2a30f89020487fe2567df9d5426fa0 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Wed, 14 May 2025 18:34:58 +0200
Subject: [PATCH 09/40] enhance ocr for vertical textlines

---
 src/eynollah/eynollah.py | 79 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 9f2ca50..5a73ef3 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5133,7 +5133,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_ens_ocrcnn_125_225"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_step_425000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5585,6 +5585,7 @@ class Eynollah_ocr:
                 region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) 
                     
                 cropped_lines = []
+                cropped_lines_ver_index = []
                 cropped_lines_region_indexer = []
                 cropped_lines_meging_indexing = []
                 
@@ -5644,6 +5645,11 @@ class Eynollah_ocr:
                                         if w_scaled < 1.5*image_width:
                                             img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                             cropped_lines.append(img_fin)
+                                            if angle_degrees > 15:
+                                                cropped_lines_ver_index.append(1)
+                                            else:
+                                                cropped_lines_ver_index.append(0)
+                                                
                                             cropped_lines_meging_indexing.append(0)
                                             if self.prediction_with_both_of_rgb_and_bin:
                                                 img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
@@ -5657,11 +5663,22 @@ class Eynollah_ocr:
                                                 img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(1)
+                                                
+                                                if angle_degrees > 15:
+                                                    cropped_lines_ver_index.append(1)
+                                                else:
+                                                    cropped_lines_ver_index.append(0)
+                                                
                                                 img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
                                                 
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(-1)
                                                 
+                                                if angle_degrees > 15:
+                                                    cropped_lines_ver_index.append(1)
+                                                else:
+                                                    cropped_lines_ver_index.append(0)
+                                                
                                                 if self.prediction_with_both_of_rgb_and_bin:
                                                     img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
@@ -5673,6 +5690,11 @@ class Eynollah_ocr:
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(0)
                                                 
+                                                if angle_degrees > 15:
+                                                    cropped_lines_ver_index.append(1)
+                                                else:
+                                                    cropped_lines_ver_index.append(0)
+                                                
                                                 if self.prediction_with_both_of_rgb_and_bin:
                                                     img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
@@ -5722,6 +5744,19 @@ class Eynollah_ocr:
                             imgs = cropped_lines[n_start:]
                             imgs = np.array(imgs)
                             imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
+                            
+                            ver_imgs = np.array( cropped_lines_ver_index[n_start:] )
+                            indices_ver = np.where(ver_imgs == 1)[0]
+                            
+                            #print(indices_ver, 'indices_ver')
+                            if len(indices_ver)>0:
+                                imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
+                                imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
+                                #print(imgs_ver_flipped, 'imgs_ver_flipped')
+                                
+                            else:
+                                imgs_ver_flipped = None
+                            
                             if self.prediction_with_both_of_rgb_and_bin:
                                 imgs_bin = cropped_lines_bin[n_start:]
                                 imgs_bin = np.array(imgs_bin)
@@ -5732,12 +5767,54 @@ class Eynollah_ocr:
                             imgs = cropped_lines[n_start:n_end]
                             imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3)
                             
+                            ver_imgs = np.array( cropped_lines_ver_index[n_start:n_end] )
+                            indices_ver = np.where(ver_imgs == 1)[0]
+                            #print(indices_ver, 'indices_ver')
+                            
+                            if len(indices_ver)>0:
+                                imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
+                                imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
+                                #print(imgs_ver_flipped, 'imgs_ver_flipped')
+                            else:
+                                imgs_ver_flipped = None
+
+                            
                             if self.prediction_with_both_of_rgb_and_bin:
                                 imgs_bin = cropped_lines_bin[n_start:n_end]
                                 imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
                             
 
                         preds = self.prediction_model.predict(imgs, verbose=0)
+                        
+                        if len(indices_ver)>0:
+                            #cv2.imwrite('flipped.png', (imgs_ver_flipped[0, :,:,:]*255).astype('uint8'))
+                            #cv2.imwrite('original.png', (imgs[0, :,:,:]*255).astype('uint8'))
+                            #sys.exit()
+                            #print(imgs_ver_flipped.shape, 'imgs_ver_flipped.shape')
+                            preds_flipped = self.prediction_model.predict(imgs_ver_flipped, verbose=0)
+                            preds_max_fliped = np.max(preds_flipped, axis=2 )
+                            preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
+                            pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
+                            masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
+                            masked_means_flipped[np.isnan(masked_means_flipped)] = 0
+                            #print(masked_means_flipped, 'masked_means_flipped')
+                            
+                            preds_max = np.max(preds, axis=2 )
+                            preds_max_args = np.argmax(preds, axis=2 )
+                            pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256
+                            
+                            masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
+                            masked_means[np.isnan(masked_means)] = 0
+                            
+                            masked_means_ver = masked_means[indices_ver]
+                            #print(masked_means_ver, 'pred_max_not_unk')
+                            
+                            indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0]
+                            
+                            #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
+                            if len(indices_where_flipped_conf_value_is_higher)>0:
+                                indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
+                                preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
                         if self.prediction_with_both_of_rgb_and_bin:
                             preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
                             preds = (preds + preds_bin) / 2.

From adee1dc55cb67ad20fa0d6eb4a8ebc9edfa6d64a Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Thu, 15 May 2025 00:45:22 +0200
Subject: [PATCH 10/40] enhancement for vertical textlines

---
 src/eynollah/eynollah.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 5a73ef3..2e54687 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5626,8 +5626,8 @@ class Eynollah_ocr:
                                         if angle_degrees > 15:
                                             better_des_slope = self.get_orientation_moments(textline_coords)
                                             
-                                            img_crop = self.rotate_image_with_padding(img_crop, -abs(better_des_slope) )
-                                            mask_poly = self.rotate_image_with_padding(mask_poly, -abs(better_des_slope) )
+                                            img_crop = self.rotate_image_with_padding(img_crop, better_des_slope )
+                                            mask_poly = self.rotate_image_with_padding(mask_poly, better_des_slope )
                                             mask_poly = mask_poly.astype('uint8')
                                             
                                             #new bounding box

From 0819730355eba1b5b8e566048809b7c92610ff4d Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Thu, 15 May 2025 15:33:50 +0200
Subject: [PATCH 11/40] marginals detection enhanced for light version

---
 src/eynollah/utils/marginals.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py
index a29e50d..c0c4892 100644
--- a/src/eynollah/utils/marginals.py
+++ b/src/eynollah/utils/marginals.py
@@ -26,8 +26,10 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
         text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1])
         text_with_lines=cv2.erode(text_with_lines,kernel,iterations=7)
         text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
-
-
+    
+    if light_version:
+        text_with_lines=rotate_image(text_with_lines,-slope_deskew)
+    
     text_with_lines_y=text_with_lines.sum(axis=0)
     text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0)
 

From 7a34bbb49333e78808d5eb0a2eaca406a35fa948 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Sun, 18 May 2025 02:48:05 +0200
Subject: [PATCH 12/40] enhancing marginal detection for light version

---
 src/eynollah/eynollah.py             |  7 +++----
 src/eynollah/utils/marginals.py      | 13 ++++++++-----
 src/eynollah/utils/separate_lines.py |  4 ++--
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 2e54687..08a781c 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -272,7 +272,7 @@ class Eynollah:
         self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
         self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
         self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
-        self.model_reading_order_dir = dir_models + "/model_ens_reading_order_machine_based"
+        self.model_reading_order_dir = dir_models + "/model_step_2500000_mb_ro"#"/model_ens_reading_order_machine_based"
         #"/modelens_12sp_elay_0_3_4__3_6_n"
         #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"
         #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"
@@ -1315,7 +1315,7 @@ class Eynollah:
                         seg_art[seg_art>0] =1
 
                         seg_line = label_p_pred[:,:,:,3]
-                        seg_line[seg_line>0.5] =1#seg_line[seg_line>0.1] =1
+                        seg_line[seg_line>0.3] =1#seg_line[seg_line>0.5] =1#seg_line[seg_line>0.1] =1
                         seg_line[seg_line<1] =0
 
                         ##seg[seg_art==1]=4
@@ -3667,7 +3667,6 @@ class Eynollah:
 
         peaks_real, _ = find_peaks(sum_smoothed, height=0)
         if len(peaks_real)>70:
-            print(len(peaks_real), 'len(peaks_real)')
 
             peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
 
@@ -5133,7 +5132,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_step_425000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_step_600000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py
index c0c4892..ac8dc1d 100644
--- a/src/eynollah/utils/marginals.py
+++ b/src/eynollah/utils/marginals.py
@@ -10,7 +10,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
     mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1]))
     mask_marginals=mask_marginals.astype(np.uint8)
 
-
     text_with_lines=text_with_lines.astype(np.uint8)
     ##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
 
@@ -26,9 +25,11 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
         text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1])
         text_with_lines=cv2.erode(text_with_lines,kernel,iterations=7)
         text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
-    
+        
+        
     if light_version:
-        text_with_lines=rotate_image(text_with_lines,-slope_deskew)
+        kernel_hor = np.ones((1, 5), dtype=np.uint8)
+        text_with_lines = cv2.erode(text_with_lines,kernel_hor,iterations=6)
     
     text_with_lines_y=text_with_lines.sum(axis=0)
     text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0)
@@ -42,8 +43,10 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
     elif thickness_along_y_percent>=30 and thickness_along_y_percent<50:
         min_textline_thickness=20
     else:
-        min_textline_thickness=40
-
+        if light_version:
+            min_textline_thickness=45
+        else:
+            min_textline_thickness=40
 
 
     if thickness_along_y_percent>=14:
diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index 0322579..6289d4d 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -1466,7 +1466,7 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
                        main_page=False, logger=None, plotter=None, map=map):
     if main_page and plotter:
         plotter.save_plot_of_textline_density(img_patch_org)
-
+    
     img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
     img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
 
@@ -1487,7 +1487,7 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
         angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
         angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
     elif main_page:
-        angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
+        angles = np.array (list(np.linspace(-12, -7, int(n_tot_angles/4))) + list(np.linspace(-6, 6, n_tot_angles- 2* int(n_tot_angles/4))) + list(np.linspace(7, 12, int(n_tot_angles/4))))#np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
         angle = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter)
 
         early_slope_edge=11

From 848156dd9d2bcb834f33591a1377a9451e1d919f Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Tue, 20 May 2025 16:51:08 +0200
Subject: [PATCH 13/40] mb reading order now can be done faster. Text regions
 are clustered using dilation, and mb reading order needs to be implemented
 for fewer regions

---
 src/eynollah/eynollah.py       | 181 +++++++++++++++++++++++++++++----
 src/eynollah/utils/__init__.py |   2 +-
 2 files changed, 163 insertions(+), 20 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 08a781c..eb5c860 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -32,6 +32,7 @@ from numba import cuda
 from skimage.morphology import skeletonize
 from ocrd import OcrdPage
 from ocrd_utils import getLogger, tf_disable_interactive_logs
+import statistics
 
 try:
     import torch
@@ -797,7 +798,7 @@ class Eynollah:
             self, patches, img, model,
             n_batch_inference=1, marginal_of_patch_percent=0.1,
             thresholding_for_some_classes_in_light_version=False,
-            thresholding_for_artificial_class_in_light_version=False, threshold_art_class_textline=0.1):
+            thresholding_for_artificial_class_in_light_version=False, thresholding_for_fl_light_version=False, threshold_art_class_textline=0.1):
 
         self.logger.debug("enter do_prediction")
         img_height_model = model.layers[-1].output_shape[1]
@@ -822,6 +823,15 @@ class Eynollah:
                 skeleton_art = skeleton_art*1
 
                 seg[skeleton_art==1]=2
+                
+            if thresholding_for_fl_light_version:
+                seg_header = label_p_pred[0,:,:,2]
+
+                seg_header[seg_header<0.2] = 0
+                seg_header[seg_header>0] =1
+
+                seg[seg_header==1]=2
+                
             seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
             prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
             return prediction_true
@@ -1613,10 +1623,11 @@ class Eynollah:
         model_region = self.model_region_fl if patches else self.model_region_fl_np
 
         if self.light_version:
-            pass
+            thresholding_for_fl_light_version = True
         elif not patches:
             img = otsu_copy_binary(img).astype(np.uint8)
             prediction_regions = None
+            thresholding_for_fl_light_version = False
         elif cols:
             img = otsu_copy_binary(img).astype(np.uint8)
             if cols == 1:
@@ -1632,7 +1643,7 @@ class Eynollah:
             else:
                 img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8)
 
-        prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3)
+        prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1, n_batch_inference=3, thresholding_for_fl_light_version=thresholding_for_fl_light_version)
         prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
         self.logger.debug("exit extract_text_regions")
         return prediction_regions, prediction_regions
@@ -3544,9 +3555,87 @@ class Eynollah:
         return model
 
     def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p):
+        #cv2.imwrite('textregions.png', text_regions_p*50)
+        min_cont_size_to_be_dilated = 10
+        if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
+            ver_kernel = np.ones((5, 1), dtype=np.uint8)
+            
+            cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent)
+            args_cont_located = np.array(range(len(contours_only_text_parent)))
+            
+            diff_y_conts = np.abs(y_max_conts[:]-y_min_conts)
+            diff_x_conts = np.abs(x_max_conts[:]-x_min_conts)
+            
+            mean_x = statistics.mean(diff_x_conts)
+            median_x = statistics.median(diff_x_conts)
+            
+            
+            diff_x_ratio= diff_x_conts/mean_x
+            
+            args_cont_located_excluded = args_cont_located[diff_x_ratio>=1.3]
+            args_cont_located_included = args_cont_located[diff_x_ratio<1.3]
+            
+            contours_only_text_parent_excluded = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]>=1.3]#contours_only_text_parent[diff_x_ratio>=1.3]
+            contours_only_text_parent_included = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]<1.3]#contours_only_text_parent[diff_x_ratio<1.3]
+            
+            
+            cx_conts_excluded = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]>=1.3]#cx_conts[diff_x_ratio>=1.3]
+            cx_conts_included = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]<1.3]#cx_conts[diff_x_ratio<1.3]
+            
+            cy_conts_excluded = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]>=1.3]#cy_conts[diff_x_ratio>=1.3]
+            cy_conts_included = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]<1.3]#cy_conts[diff_x_ratio<1.3]
+            
+            #print(diff_x_ratio, 'ratio')
+            text_regions_p = text_regions_p.astype('uint8')
+            
+            if len(contours_only_text_parent_excluded)>0:
+                textregion_par = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])).astype('uint8')
+                textregion_par = cv2.fillPoly(textregion_par, pts=contours_only_text_parent_included, color=(1,1))
+            else:
+                textregion_par = (text_regions_p[:,:]==1)*1
+                textregion_par = textregion_par.astype('uint8')
+                
+            
+            text_regions_p_textregions_dilated = cv2.dilate(textregion_par , ver_kernel, iterations=8)
+            text_regions_p_textregions_dilated[text_regions_p[:,:]>1] = 0
+            
+            #cv2.imwrite('textregions_dilated.png', text_regions_p_textregions_dilated*255)
+
+            
+            contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated)
+            contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated)
+            
+            indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = self.return_indexes_of_contours_loctaed_inside_another_list_of_contours(contours_only_dilated, contours_only_text_parent_included, cx_conts_included, cy_conts_included, args_cont_located_included)
+            
+            
+            if len(args_cont_located_excluded)>0:
+                for ind in args_cont_located_excluded:
+                    indexes_of_located_cont.append(np.array([ind]))
+                    contours_only_dilated.append(contours_only_text_parent[ind])
+                    center_y_coordinates_of_located.append(0)
+            
+            array_list = [np.array([elem]) if isinstance(elem, int) else elem for elem in indexes_of_located_cont]
+            flattened_array = np.concatenate([arr.ravel() for arr in array_list])
+            #print(len( np.unique(flattened_array)), 'indexes_of_located_cont uniques')
+            
+            missing_textregions = list( set(np.array(range(len(contours_only_text_parent))) ) - set(np.unique(flattened_array)) )
+            #print(missing_textregions, 'missing_textregions')
+
+            for ind in missing_textregions:
+                indexes_of_located_cont.append(np.array([ind]))
+                contours_only_dilated.append(contours_only_text_parent[ind])
+                center_y_coordinates_of_located.append(0)
+                
+                
+            if contours_only_text_parent_h:
+                for vi in range(len(contours_only_text_parent_h)):
+                    indexes_of_located_cont.append(int(vi+len(contours_only_text_parent)))
+                    
+            array_list = [np.array([elem]) if isinstance(elem, int) else elem for elem in indexes_of_located_cont]
+            flattened_array = np.concatenate([arr.ravel() for arr in array_list])
+        
         y_len = text_regions_p.shape[0]
         x_len = text_regions_p.shape[1]
-        
 
         img_poly = np.zeros((y_len,x_len), dtype='uint8')
         img_poly[text_regions_p[:,:]==1] = 1
@@ -3554,25 +3643,24 @@ class Eynollah:
         img_poly[text_regions_p[:,:]==3] = 4
         img_poly[text_regions_p[:,:]==6] = 5
         
-        
-        ###temp
-        ##sep_mask = (img_poly==5)*1
-        ##sep_mask = sep_mask.astype('uint8')
-        ##sep_mask = cv2.erode(sep_mask, kernel=KERNEL, iterations=2)
-        ##img_poly[img_poly==5] = 0
-        ##img_poly[sep_mask==1] = 5
-        ###
-
         img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
         if contours_only_text_parent_h:
             _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(
                 contours_only_text_parent_h)
             for j in range(len(cy_main)):
                 img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,
-                                   int(x_min_main[j]):int(x_max_main[j])] = 1 
-            co_text_all = contours_only_text_parent + contours_only_text_parent_h
+                                   int(x_min_main[j]):int(x_max_main[j])] = 1
+            co_text_all_org = contours_only_text_parent + contours_only_text_parent_h
+            if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
+                co_text_all = contours_only_dilated + contours_only_text_parent_h
+            else:
+                co_text_all = contours_only_text_parent + contours_only_text_parent_h
         else:
-            co_text_all = contours_only_text_parent
+            co_text_all_org = contours_only_text_parent
+            if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
+                co_text_all = contours_only_dilated
+            else:
+                co_text_all = contours_only_text_parent
 
         if not len(co_text_all):
             return [], []
@@ -3651,8 +3739,26 @@ class Eynollah:
                     break
 
         ordered = [i[0] for i in ordered]
-        region_ids = ['region_%04d' % i for i in range(len(co_text_all))]
-        return ordered, region_ids
+        
+        if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
+            org_contours_indexes = []
+            for ind in range(len(ordered)):
+                region_with_curr_order = ordered[ind]
+                if region_with_curr_order < len(contours_only_dilated):
+                    if np.isscalar(indexes_of_located_cont[region_with_curr_order]):
+                        org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]]
+                    else:
+                        arg_sort_located_cont = np.argsort(center_y_coordinates_of_located[region_with_curr_order])
+                        org_contours_indexes = org_contours_indexes + list(np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) ##org_contours_indexes + list ( 
+                else:
+                    org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]]
+            
+            region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
+            return org_contours_indexes, region_ids
+        else:
+            region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
+            return ordered, region_ids
+            
 
     def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot):
         width = np.shape(textline_image)[1]
@@ -4293,6 +4399,29 @@ class Eynollah:
                     contours[ind_u_a_trs].pop(ittrd)
 
             return contours
+        
+    def return_indexes_of_contours_loctaed_inside_another_list_of_contours(self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc):
+        indexes_of_located_cont = []
+        center_x_coordinates_of_located = []
+        center_y_coordinates_of_located = []
+        #M_main_tot = [cv2.moments(contours_loc[j])
+                        #for j in range(len(contours_loc))]
+        #cx_main_loc = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
+        #cy_main_loc = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
+        
+        for ij in range(len(contours)):
+            results = [cv2.pointPolygonTest(contours[ij], (cx_main_loc[ind], cy_main_loc[ind]), False)
+                        for ind in range(len(cy_main_loc)) ]
+            results = np.array(results)
+            indexes_in = np.where((results == 0) | (results == 1))
+            indexes = indexes_loc[indexes_in]# [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1))
+
+            indexes_of_located_cont.append(indexes)
+            center_x_coordinates_of_located.append(np.array(cx_main_loc)[indexes_in] )
+            center_y_coordinates_of_located.append(np.array(cy_main_loc)[indexes_in] )
+            
+        return indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located
+        
 
     def filter_contours_without_textline_inside(
             self, contours,text_con_org,  contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions):
@@ -4986,8 +5115,10 @@ class Eynollah:
 
         if self.full_layout:
             if self.reading_order_machine_based:
+                tror = time.time()
                 order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
                     contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
+                print('time spend for mb ro', time.time()-tror)
             else:
                 if np.abs(slope_deskew) < SLOPE_THRESHOLD:
                     order_text_new, id_of_texts_tot = self.do_order_of_regions(
@@ -5619,8 +5750,15 @@ class Eynollah_ocr:
                                     mask_poly = np.zeros(img.shape)
                                     mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1))
                                     
+                                    
                                     mask_poly = mask_poly[y:y+h, x:x+w, :]
                                     img_crop = img_poly_on_img[y:y+h, x:x+w, :]
+                                    
+                                    if angle_degrees<=15:
+                                        if mask_poly[:,:,0].sum() /float(w*h) < 0.6 and w_scaled > 520:
+                                            cv2.imwrite(file_name+'_desk.png', img_crop)
+                                        
+                                    print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
                                     if not self.do_not_mask_with_textline_contour:
                                         if angle_degrees > 15:
                                             better_des_slope = self.get_orientation_moments(textline_coords)
@@ -5634,6 +5772,11 @@ class Eynollah_ocr:
                                             
                                             mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                             img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                            
+                                            if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.6 and w_scaled > 520:
+                                                cv2.imwrite(file_name+'_desk.png', img_crop)
+                                            
+                                            print(file_name,w_n*h_n , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii')
 
                                         img_crop[mask_poly==0] = 255
                                         
@@ -5641,7 +5784,7 @@ class Eynollah_ocr:
                                             img_crop_bin[mask_poly==0] = 255
                                     
                                     if not self.export_textline_images_and_text:
-                                        if w_scaled < 1.5*image_width:
+                                        if w_scaled < 640:#1.5*image_width:
                                             img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                             cropped_lines.append(img_fin)
                                             if angle_degrees > 15:
diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py
index c5962f8..7fa4a7b 100644
--- a/src/eynollah/utils/__init__.py
+++ b/src/eynollah/utils/__init__.py
@@ -992,7 +992,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
                        (regions_model_full[:,:,0]==2)).sum()
         pixels_main = all_pixels - pixels_header
 
-        if (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ):
+        if ( (pixels_header/float(pixels_main)>=0.6) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ) and ( (length_con[ii]/float(height_con[ii]) )<=3 )) or ( (pixels_header/float(pixels_main)>=0.3) and ( (length_con[ii]/float(height_con[ii]) )>=3 ) ):
             regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2
             contours_only_text_parent_head.append(con)
             if contours_only_text_parent_d_ordered is not None:

From c0835665a9d6a6f16dc42ee287aaf5da064927bd Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Tue, 20 May 2025 19:01:52 +0200
Subject: [PATCH 14/40] ocr for curved lines

---
 src/eynollah/eynollah.py | 157 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 146 insertions(+), 11 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index eb5c860..912bc31 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5263,7 +5263,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_step_600000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_step_750000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5464,7 +5464,7 @@ class Eynollah_ocr:
         
         return cropped_textline
     
-    def rotate_image_with_padding(self, image, angle):
+    def rotate_image_with_padding(self, image, angle, border_value=(0,0,0)):
         # Get image dimensions
         (h, w) = image.shape[:2]
         
@@ -5485,7 +5485,7 @@ class Eynollah_ocr:
         rotation_matrix[1, 2] += (new_h / 2) - center[1]
         
         # Perform the rotation
-        rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=(0, 0, 0))
+        rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=border_value)
         
         return rotated_image
     
@@ -5496,6 +5496,21 @@ class Eynollah_ocr:
         else:
             angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
             return np.degrees(angle)  # Convert radians to degrees
+        
+        
+    def get_orientation_moments_of_mask(self, mask):
+        mask=mask.astype('uint8')
+        print(mask.shape)
+        contours, _ = cv2.findContours(mask[:,:,0], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        
+        largest_contour = max(contours, key=cv2.contourArea) if contours else None
+        
+        moments = cv2.moments(largest_contour)
+        if moments["mu20"] - moments["mu02"] == 0:  # Avoid division by zero
+            return 90 if moments["mu11"] > 0 else -90
+        else:
+            angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
+            return np.degrees(angle)  # Convert radians to degrees
     
     def get_contours_and_bounding_boxes(self, mask):
         # Find contours in the binary mask
@@ -5508,6 +5523,121 @@ class Eynollah_ocr:
         #bounding_boxes.append((x, y, w, h))
         
         return x, y, w, h
+    
+    def return_splitting_point_of_image(self, image_to_spliited):
+        width = np.shape(image_to_spliited)[1]
+        height = np.shape(image_to_spliited)[0]
+        common_window = int(0.03*width)
+
+        width1 = int ( common_window)
+        width2 = int ( width - common_window )
+
+        img_sum = np.sum(image_to_spliited[:,:,0], axis=0)
+        sum_smoothed = gaussian_filter1d(img_sum, 3)
+
+        peaks_real, _ = find_peaks(sum_smoothed, height=0)
+        
+        peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
+
+        arg_sort = np.argsort(sum_smoothed[peaks_real])
+        arg_sort4 =arg_sort[::-1][:4]
+        peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
+        
+        return np.sort(peaks_sort_4)
+        
+    def break_curved_line_into_small_pieces_and_then_merge(self, img_curved, mask_curved):
+        peaks_4 = self.return_splitting_point_of_image(img_curved)
+        
+        
+        
+        img_0 = img_curved[:, :peaks_4[0], :]
+        img_1 = img_curved[:, peaks_4[0]:peaks_4[1], :]
+        img_2 = img_curved[:, peaks_4[1]:peaks_4[2], :]
+        img_3 = img_curved[:, peaks_4[2]:peaks_4[3], :]
+        img_4 = img_curved[:, peaks_4[3]:, :]
+        
+        
+        mask_0 = mask_curved[:, :peaks_4[0], :]
+        mask_1 = mask_curved[:, peaks_4[0]:peaks_4[1], :]
+        mask_2 = mask_curved[:, peaks_4[1]:peaks_4[2], :]
+        mask_3 = mask_curved[:, peaks_4[2]:peaks_4[3], :]
+        mask_4 = mask_curved[:, peaks_4[3]:, :]
+        
+        cv2.imwrite("split0.png", img_0)
+        cv2.imwrite("split1.png", img_1)
+        cv2.imwrite("split2.png", img_2)
+        cv2.imwrite("split3.png", img_3)
+        
+        or_ma_0 = self.get_orientation_moments_of_mask(mask_0)
+        or_ma_1 = self.get_orientation_moments_of_mask(mask_1)
+        or_ma_2 = self.get_orientation_moments_of_mask(mask_2)
+        or_ma_3 = self.get_orientation_moments_of_mask(mask_3)
+        or_ma_4 = self.get_orientation_moments_of_mask(mask_4)
+        
+        imgs_tot = []
+        imgs_tot.append([img_0, mask_0, or_ma_0] )
+        imgs_tot.append([img_1, mask_1, or_ma_1])
+        imgs_tot.append([img_2, mask_2, or_ma_2])
+        imgs_tot.append([img_3, mask_3, or_ma_3])
+        imgs_tot.append([img_4, mask_4, or_ma_4])
+        
+        w_tot_des_list = []
+        w_tot_des = 0
+        imgs_deskewed_list = []
+        for ind in range(len(imgs_tot)):
+            img_in = imgs_tot[ind][0]
+            mask_in = imgs_tot[ind][1]
+            ori_in = imgs_tot[ind][2]
+            
+            if abs(ori_in)<45:
+                img_in_des = self.rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
+                mask_in_des = self.rotate_image_with_padding(mask_in, ori_in)
+                mask_in_des = mask_in_des.astype('uint8')
+                
+                #new bounding box
+                x_n, y_n, w_n, h_n = self.get_contours_and_bounding_boxes(mask_in_des[:,:,0])
+                
+                mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                
+                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                img_in_des = resize_image(img_in_des, 32, w_relative)
+                
+
+            else:
+                img_in_des = np.copy(img_in)
+                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                img_in_des = resize_image(img_in_des, 32, w_relative)
+                
+            w_tot_des+=img_in_des.shape[1]
+            w_tot_des_list.append(img_in_des.shape[1])
+            imgs_deskewed_list.append(img_in_des)
+            
+            
+            
+
+        img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
+        
+        w_indexer = 0
+        for ind in range(len(w_tot_des_list)):
+            img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
+            w_indexer = w_indexer+w_tot_des_list[ind]
+            
+        #cv2.imwrite('final.png', img_final_deskewed)
+        #print(or_ma_0, or_ma_1, or_ma_2, or_ma_3, or_ma_4, 'orients')
+        
+        ##cv2.imwrite("split4.png", img_curved[:, peaks_4[3]:peaks_4[4], :])
+        ##cv2.imwrite("split5.png", img_curved[:, peaks_4[4]:peaks_4[5], :])
+        ##cv2.imwrite("split6.png", img_curved[:, peaks_4[5]:peaks_4[6], :])
+        
+        ##cv2.imwrite("split7.png", img_curved[:, peaks_4[6]:peaks_4[7], :])
+        ##cv2.imwrite("split8.png", img_curved[:, peaks_4[7]:peaks_4[8], :])
+        ##cv2.imwrite("split9.png", img_curved[:, peaks_4[8]:peaks_4[9], :])
+        
+        
+        #cv2.imwrite("split4.png", img_4)
+        #sys.exit()
+        return img_final_deskewed
 
     def run(self):
         ls_imgs = os.listdir(self.dir_in)
@@ -5754,11 +5884,9 @@ class Eynollah_ocr:
                                     mask_poly = mask_poly[y:y+h, x:x+w, :]
                                     img_crop = img_poly_on_img[y:y+h, x:x+w, :]
                                     
-                                    if angle_degrees<=15:
-                                        if mask_poly[:,:,0].sum() /float(w*h) < 0.6 and w_scaled > 520:
-                                            cv2.imwrite(file_name+'_desk.png', img_crop)
+
                                         
-                                    print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
+                                    #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
                                     if not self.do_not_mask_with_textline_contour:
                                         if angle_degrees > 15:
                                             better_des_slope = self.get_orientation_moments(textline_coords)
@@ -5773,12 +5901,19 @@ class Eynollah_ocr:
                                             mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                             img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                             
-                                            if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.6 and w_scaled > 520:
-                                                cv2.imwrite(file_name+'_desk.png', img_crop)
+                                            img_crop[mask_poly==0] = 255
                                             
-                                            print(file_name,w_n*h_n , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii')
+                                            if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 100:
+                                                img_crop = self.break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
 
-                                        img_crop[mask_poly==0] = 255
+                                            #print(file_name,w_n*h_n , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii')
+                                        else:
+                                            img_crop[mask_poly==0] = 255
+                                            if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
+                                                img_crop = self.break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+
+
+                                        
                                         
                                         if self.prediction_with_both_of_rgb_and_bin:
                                             img_crop_bin[mask_poly==0] = 255

From f94fc9973bc370e9b780c8114520aec91c62e78b Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Wed, 21 May 2025 14:39:31 +0200
Subject: [PATCH 15/40] Implement hyphenated textline merging in OCR engine and
 a bug fixed for curved textline OCR

---
 src/eynollah/eynollah.py | 157 ++++++++++++++++++---------------------
 1 file changed, 71 insertions(+), 86 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 912bc31..6771db0 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5500,7 +5500,6 @@ class Eynollah_ocr:
         
     def get_orientation_moments_of_mask(self, mask):
         mask=mask.astype('uint8')
-        print(mask.shape)
         contours, _ = cv2.findContours(mask[:,:,0], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         
         largest_contour = max(contours, key=cv2.contourArea) if contours else None
@@ -5547,97 +5546,69 @@ class Eynollah_ocr:
         
     def break_curved_line_into_small_pieces_and_then_merge(self, img_curved, mask_curved):
         peaks_4 = self.return_splitting_point_of_image(img_curved)
-        
-        
-        
-        img_0 = img_curved[:, :peaks_4[0], :]
-        img_1 = img_curved[:, peaks_4[0]:peaks_4[1], :]
-        img_2 = img_curved[:, peaks_4[1]:peaks_4[2], :]
-        img_3 = img_curved[:, peaks_4[2]:peaks_4[3], :]
-        img_4 = img_curved[:, peaks_4[3]:, :]
-        
-        
-        mask_0 = mask_curved[:, :peaks_4[0], :]
-        mask_1 = mask_curved[:, peaks_4[0]:peaks_4[1], :]
-        mask_2 = mask_curved[:, peaks_4[1]:peaks_4[2], :]
-        mask_3 = mask_curved[:, peaks_4[2]:peaks_4[3], :]
-        mask_4 = mask_curved[:, peaks_4[3]:, :]
-        
-        cv2.imwrite("split0.png", img_0)
-        cv2.imwrite("split1.png", img_1)
-        cv2.imwrite("split2.png", img_2)
-        cv2.imwrite("split3.png", img_3)
-        
-        or_ma_0 = self.get_orientation_moments_of_mask(mask_0)
-        or_ma_1 = self.get_orientation_moments_of_mask(mask_1)
-        or_ma_2 = self.get_orientation_moments_of_mask(mask_2)
-        or_ma_3 = self.get_orientation_moments_of_mask(mask_3)
-        or_ma_4 = self.get_orientation_moments_of_mask(mask_4)
-        
-        imgs_tot = []
-        imgs_tot.append([img_0, mask_0, or_ma_0] )
-        imgs_tot.append([img_1, mask_1, or_ma_1])
-        imgs_tot.append([img_2, mask_2, or_ma_2])
-        imgs_tot.append([img_3, mask_3, or_ma_3])
-        imgs_tot.append([img_4, mask_4, or_ma_4])
-        
-        w_tot_des_list = []
-        w_tot_des = 0
-        imgs_deskewed_list = []
-        for ind in range(len(imgs_tot)):
-            img_in = imgs_tot[ind][0]
-            mask_in = imgs_tot[ind][1]
-            ori_in = imgs_tot[ind][2]
+        if len(peaks_4)>0:
+            imgs_tot = []
             
-            if abs(ori_in)<45:
-                img_in_des = self.rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
-                mask_in_des = self.rotate_image_with_padding(mask_in, ori_in)
-                mask_in_des = mask_in_des.astype('uint8')
+            for ind in range(len(peaks_4)+1):
+                if ind==0:
+                    img = img_curved[:, :peaks_4[ind], :]
+                    mask = mask_curved[:, :peaks_4[ind], :]
+                elif ind==len(peaks_4):
+                    img = img_curved[:, peaks_4[ind-1]:, :]
+                    mask = mask_curved[:, peaks_4[ind-1]:, :]
+                else:
+                    img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
+                    mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
+                    
+                or_ma = self.get_orientation_moments_of_mask(mask)
+            
+                imgs_tot.append([img, mask, or_ma] )
+            
+            
+            w_tot_des_list = []
+            w_tot_des = 0
+            imgs_deskewed_list = []
+            for ind in range(len(imgs_tot)):
+                img_in = imgs_tot[ind][0]
+                mask_in = imgs_tot[ind][1]
+                ori_in = imgs_tot[ind][2]
                 
-                #new bounding box
-                x_n, y_n, w_n, h_n = self.get_contours_and_bounding_boxes(mask_in_des[:,:,0])
+                if abs(ori_in)<45:
+                    img_in_des = self.rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
+                    mask_in_des = self.rotate_image_with_padding(mask_in, ori_in)
+                    mask_in_des = mask_in_des.astype('uint8')
+                    
+                    #new bounding box
+                    x_n, y_n, w_n, h_n = self.get_contours_and_bounding_boxes(mask_in_des[:,:,0])
+                    
+                    mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    
+                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                    img_in_des = resize_image(img_in_des, 32, w_relative)
+                    
+
+                else:
+                    img_in_des = np.copy(img_in)
+                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                    img_in_des = resize_image(img_in_des, 32, w_relative)
+                    
+                w_tot_des+=img_in_des.shape[1]
+                w_tot_des_list.append(img_in_des.shape[1])
+                imgs_deskewed_list.append(img_in_des)
                 
-                mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
                 
-                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
-                img_in_des = resize_image(img_in_des, 32, w_relative)
                 
 
-            else:
-                img_in_des = np.copy(img_in)
-                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
-                img_in_des = resize_image(img_in_des, 32, w_relative)
-                
-            w_tot_des+=img_in_des.shape[1]
-            w_tot_des_list.append(img_in_des.shape[1])
-            imgs_deskewed_list.append(img_in_des)
+            img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
             
-            
-            
-
-        img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
-        
-        w_indexer = 0
-        for ind in range(len(w_tot_des_list)):
-            img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
-            w_indexer = w_indexer+w_tot_des_list[ind]
-            
-        #cv2.imwrite('final.png', img_final_deskewed)
-        #print(or_ma_0, or_ma_1, or_ma_2, or_ma_3, or_ma_4, 'orients')
-        
-        ##cv2.imwrite("split4.png", img_curved[:, peaks_4[3]:peaks_4[4], :])
-        ##cv2.imwrite("split5.png", img_curved[:, peaks_4[4]:peaks_4[5], :])
-        ##cv2.imwrite("split6.png", img_curved[:, peaks_4[5]:peaks_4[6], :])
-        
-        ##cv2.imwrite("split7.png", img_curved[:, peaks_4[6]:peaks_4[7], :])
-        ##cv2.imwrite("split8.png", img_curved[:, peaks_4[7]:peaks_4[8], :])
-        ##cv2.imwrite("split9.png", img_curved[:, peaks_4[8]:peaks_4[9], :])
-        
-        
-        #cv2.imwrite("split4.png", img_4)
-        #sys.exit()
-        return img_final_deskewed
+            w_indexer = 0
+            for ind in range(len(w_tot_des_list)):
+                img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
+                w_indexer = w_indexer+w_tot_des_list[ind]
+            return img_final_deskewed
+        else:
+            return img_curved
 
     def run(self):
         ls_imgs = os.listdir(self.dir_in)
@@ -6144,7 +6115,21 @@ class Eynollah_ocr:
                     text_by_textregion = []
                     for ind in unique_cropped_lines_region_indexer:
                         extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
-                        text_by_textregion.append("".join(extracted_texts_merged_un))
+                        if len(extracted_texts_merged_un)>1:
+                            text_by_textregion_ind = ""
+                            next_glue = ""
+                            for indt in range(len(extracted_texts_merged_un)):
+                                if extracted_texts_merged_un[indt].endswith('⸗') or extracted_texts_merged_un[indt].endswith('-'):
+                                    text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt][:-1]
+                                    next_glue = ""
+                                else:
+                                    text_by_textregion_ind = text_by_textregion_ind + next_glue + extracted_texts_merged_un[indt]
+                                    next_glue = " "
+                            text_by_textregion.append(text_by_textregion_ind)
+                                
+                        else:
+                            text_by_textregion.append(" ".join(extracted_texts_merged_un))
+                        #print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion')
                         
                     indexer = 0
                     indexer_textregion = 0

From a0647eff9391fbe398c1de9154068f3841ad22a7 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Wed, 21 May 2025 17:42:44 +0200
Subject: [PATCH 16/40] enhancing curved lines OCR

---
 src/eynollah/eynollah.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 6771db0..b510218 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5532,14 +5532,12 @@ class Eynollah_ocr:
         width2 = int ( width - common_window )
 
         img_sum = np.sum(image_to_spliited[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 3)
+        sum_smoothed = gaussian_filter1d(img_sum, 1)
 
         peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        
         peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-
+        
         arg_sort = np.argsort(sum_smoothed[peaks_real])
-        arg_sort4 =arg_sort[::-1][:4]
         peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
         
         return np.sort(peaks_sort_4)
@@ -5585,12 +5583,16 @@ class Eynollah_ocr:
                     img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
                     
                     w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                    if w_relative==0:
+                        w_relative = img_in_des.shape[1]
                     img_in_des = resize_image(img_in_des, 32, w_relative)
                     
 
                 else:
                     img_in_des = np.copy(img_in)
                     w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                    if w_relative==0:
+                        w_relative = img_in_des.shape[1]
                     img_in_des = resize_image(img_in_des, 32, w_relative)
                     
                 w_tot_des+=img_in_des.shape[1]

From d4f6e10251f23ff01b15cc7736067c5af30b1278 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 23 May 2025 15:55:03 +0200
Subject: [PATCH 17/40] commit 21ec4fb is picked + rnn ocr at the same time
 with segmentation + enhancement of mb reading order

---
 src/eynollah/cli.py             |  15 +-
 src/eynollah/eynollah.py        | 775 +++++++++++---------------------
 src/eynollah/utils/utils_ocr.py | 435 ++++++++++++++++++
 src/eynollah/writer.py          |  30 +-
 4 files changed, 729 insertions(+), 526 deletions(-)
 create mode 100644 src/eynollah/utils/utils_ocr.py

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 99961c9..cd56833 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -225,6 +225,17 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
     is_flag=True,
     help="if this parameter set to true, this tool will try to do ocr",
 )
+@click.option(
+    "--transformer_ocr",
+    "-tr/-notr",
+    is_flag=True,
+    help="if this parameter set to true, this tool will apply transformer ocr",
+)
+@click.option(
+    "--batch_size_ocr",
+    "-bs_ocr",
+    help="number of inference batch size of ocr model. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
+)
 @click.option(
     "--num_col_upper",
     "-ncu",
@@ -258,7 +269,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
     help="Override log level globally to this",
 )
 
-def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level):
+def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
@@ -305,6 +316,8 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
         ignore_page_extraction=ignore_page_extraction,
         reading_order_machine_based=reading_order_machine_based,
         do_ocr=do_ocr,
+        transformer_ocr=transformer_ocr,
+        batch_size_ocr=batch_size_ocr,
         num_col_upper=num_col_upper,
         num_col_lower=num_col_lower,
         skip_layout_and_reading_order=skip_layout_and_reading_order,
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index b510218..2564150 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -80,6 +80,13 @@ from .utils.rotate import (
     rotation_not_90_func_full_layout,
     rotation_image_new
 )
+from .utils.utils_ocr import (
+    return_textline_contour_with_added_box_coordinate,
+    preprocess_and_resize_image_for_ocrcnn_model,
+    return_textlines_split_if_needed,
+    decode_batch_predictions,
+    return_rnn_cnn_ocr_of_given_textlines
+)
 from .utils.separate_lines import (
     textline_contours_postprocessing,
     separate_lines_new2,
@@ -199,6 +206,8 @@ class Eynollah:
         ignore_page_extraction : bool = False,
         reading_order_machine_based : bool = False,
         do_ocr : bool = False,
+        transformer_ocr: bool = False,
+        batch_size_ocr: Optional[int] = None,
         num_col_upper : Optional[int] = None,
         num_col_lower : Optional[int] = None,
         threshold_art_class_layout: Optional[float] = None,
@@ -232,6 +241,7 @@ class Eynollah:
         self.ignore_page_extraction = ignore_page_extraction
         self.skip_layout_and_reading_order = skip_layout_and_reading_order
         self.ocr = do_ocr
+        self.tr = transformer_ocr
         if num_col_upper:
             self.num_col_upper = int(num_col_upper)
         else:
@@ -273,7 +283,7 @@ class Eynollah:
         self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
         self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
         self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
-        self.model_reading_order_dir = dir_models + "/model_step_2500000_mb_ro"#"/model_ens_reading_order_machine_based"
+        self.model_reading_order_dir = dir_models + "/model_step_4800000_mb_ro"#"/model_ens_reading_order_machine_based"
         #"/modelens_12sp_elay_0_3_4__3_6_n"
         #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"
         #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"
@@ -300,8 +310,10 @@ class Eynollah:
         else:
             #"/eynollah-textline_20210425"
             self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"
-        if self.ocr:
+        if self.ocr and self.tr:
             self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
+        elif self.ocr and not self.tr:
+            self.model_ocr_dir = dir_models + "/model_step_750000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
         if self.tables:
             if self.light_version:
                 self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
@@ -341,11 +353,37 @@ class Eynollah:
             self.model_region_fl = self.our_load_model(self.model_region_dir_fully)
             if self.reading_order_machine_based:
                 self.model_reading_order = self.our_load_model(self.model_reading_order_dir)
-            if self.ocr:
+            if self.ocr and self.tr:
                 self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
                 self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
                 #("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten")
                 self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+            elif self.ocr and not self.tr:
+                model_ocr = load_model(self.model_ocr_dir , compile=False)
+                
+                self.prediction_model = tf.keras.models.Model(
+                                model_ocr.get_layer(name = "image").input, 
+                                model_ocr.get_layer(name = "dense2").output)
+                if not batch_size_ocr:
+                    self.b_s_ocr = 8
+                else:
+                    self.b_s_ocr = int(batch_size_ocr)
+
+                    
+                with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
+                    characters = json.load(config_file)
+
+                    
+                AUTOTUNE = tf.data.AUTOTUNE
+
+                # Mapping characters to integers.
+                char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
+
+                # Mapping integers back to original characters.
+                self.num_to_char = StringLookup(
+                    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
+                )
+                
             if self.tables:
                 self.model_table = self.our_load_model(self.model_table_dir)
 
@@ -1325,11 +1363,11 @@ class Eynollah:
                         seg_art[seg_art>0] =1
 
                         seg_line = label_p_pred[:,:,:,3]
-                        seg_line[seg_line>0.3] =1#seg_line[seg_line>0.5] =1#seg_line[seg_line>0.1] =1
+                        seg_line[seg_line>0.4] =1#seg_line[seg_line>0.5] =1#seg_line[seg_line>0.1] =1
                         seg_line[seg_line<1] =0
 
                         ##seg[seg_art==1]=4
-                        seg[(seg_line==1) & (seg==0)]=3
+                        #seg[(seg_line==1) & (seg==0)]=3
                     if thresholding_for_artificial_class_in_light_version:
                         seg_art = label_p_pred[:,:,:,2]
 
@@ -2060,7 +2098,7 @@ class Eynollah:
             ###img_bin = np.copy(prediction_bin)
         ###else:
             ###img_bin = np.copy(img_resized)
-        if self.ocr and not self.input_binary:
+        if (self.ocr and self.tr) and not self.input_binary:
             prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5)
             prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
             prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
@@ -3485,8 +3523,10 @@ class Eynollah:
         # 6 is the separators lable in old full layout model
         # 4 is the drop capital class in old full layout model
         # in the new full layout drop capital is 3 and separators are 5
-
-        text_regions_p[:,:][regions_fully[:,:,0]==5]=6
+        
+        # the separators in full layout will not be written on layout
+        if not self.reading_order_machine_based:
+            text_regions_p[:,:][regions_fully[:,:,0]==5]=6
         ###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4
 
         #text_regions_p[:,:][regions_fully[:,:,0]==6]=6
@@ -3555,11 +3595,37 @@ class Eynollah:
         return model
 
     def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p):
-        #cv2.imwrite('textregions.png', text_regions_p*50)
+        
+        height1 =672#448
+        width1 = 448#224
+
+        height2 =672#448
+        width2= 448#224
+
+        height3 =672#448
+        width3 = 448#224
+        
+        inference_bs = 3
+        
+        cv2.imwrite('textregions.png', text_regions_p*50)
+        cv2.imwrite('sep.png', (text_regions_p[:,:]==6)*255)
+        
+        ver_kernel = np.ones((5, 1), dtype=np.uint8)
+        hor_kernel = np.ones((1, 5), dtype=np.uint8)
+        
+        
+        
+        #separators = (text_regions_p[:,:]==6)*1
+        #text_regions_p[text_regions_p[:,:]==6] = 0
+        #separators = separators.astype('uint8')
+        
+        #separators = cv2.erode(separators , hor_kernel, iterations=1)
+        #text_regions_p[separators[:,:]==1] = 6
+        
+        #cv2.imwrite('sep_new.png', (text_regions_p[:,:]==6)*255)
+        
         min_cont_size_to_be_dilated = 10
         if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
-            ver_kernel = np.ones((5, 1), dtype=np.uint8)
-            
             cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent)
             args_cont_located = np.array(range(len(contours_only_text_parent)))
             
@@ -3595,12 +3661,13 @@ class Eynollah:
                 textregion_par = (text_regions_p[:,:]==1)*1
                 textregion_par = textregion_par.astype('uint8')
                 
-            
-            text_regions_p_textregions_dilated = cv2.dilate(textregion_par , ver_kernel, iterations=8)
+            text_regions_p_textregions_dilated = cv2.erode(textregion_par , hor_kernel, iterations=2)
+            text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=4)
+            text_regions_p_textregions_dilated = cv2.erode(text_regions_p_textregions_dilated , hor_kernel, iterations=1)
+            text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=5)
             text_regions_p_textregions_dilated[text_regions_p[:,:]>1] = 0
             
-            #cv2.imwrite('textregions_dilated.png', text_regions_p_textregions_dilated*255)
-
+            cv2.imwrite('text_regions_p_textregions_dilated.png', text_regions_p_textregions_dilated*255)
             
             contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated)
             contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated)
@@ -3664,7 +3731,8 @@ class Eynollah:
 
         if not len(co_text_all):
             return [], []
-
+        print(len(co_text_all), "co_text_all")
+        print(len(co_text_all_org), "co_text_all_org")
         labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
         co_text_all = [(i/6).astype(int) for i in co_text_all]
         for i in range(len(co_text_all)):
@@ -3675,21 +3743,13 @@ class Eynollah:
             cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,))
             labels_con[:,:,i] = img
 
-        height1 =672#448
-        width1 = 448#224
-
-        height2 =672#448
-        width2= 448#224
-
-        height3 =672#448
-        width3 = 448#224
 
         labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool)
         img_header_and_sep = resize_image(img_header_and_sep, height1, width1)
         img_poly = resize_image(img_poly, height3, width3)
         
 
-        inference_bs = 3
+        
         input_1 = np.zeros((inference_bs, height1, width1, 3))
         ordered = [list(range(len(co_text_all)))]
         index_update = 0
@@ -3760,217 +3820,213 @@ class Eynollah:
             return ordered, region_ids
             
 
-    def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot):
-        width = np.shape(textline_image)[1]
-        height = np.shape(textline_image)[0]
-        common_window = int(0.2*width)
+    ####def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot):
+        ####width = np.shape(textline_image)[1]
+        ####height = np.shape(textline_image)[0]
+        ####common_window = int(0.2*width)
 
-        width1 = int ( width/2. - common_window )
-        width2 = int ( width/2. + common_window )
+        ####width1 = int ( width/2. - common_window )
+        ####width2 = int ( width/2. + common_window )
 
-        img_sum = np.sum(textline_image[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 3)
+        ####img_sum = np.sum(textline_image[:,:,0], axis=0)
+        ####sum_smoothed = gaussian_filter1d(img_sum, 3)
 
-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        if len(peaks_real)>70:
+        ####peaks_real, _ = find_peaks(sum_smoothed, height=0)
+        ####if len(peaks_real)>70:
 
-            peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
+            ####peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
 
-            arg_sort = np.argsort(sum_smoothed[peaks_real])
-            arg_sort4 =arg_sort[::-1][:4]
-            peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
-            argsort_sorted = np.argsort(peaks_sort_4)
+            ####arg_sort = np.argsort(sum_smoothed[peaks_real])
+            ####arg_sort4 =arg_sort[::-1][:4]
+            ####peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
+            ####argsort_sorted = np.argsort(peaks_sort_4)
 
-            first_4_sorted = peaks_sort_4[argsort_sorted]
-            y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
-            #print(first_4_sorted,'first_4_sorted')
+            ####first_4_sorted = peaks_sort_4[argsort_sorted]
+            ####y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
+            #####print(first_4_sorted,'first_4_sorted')
 
-            arg_sortnew = np.argsort(y_4_sorted)
-            peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] )
+            ####arg_sortnew = np.argsort(y_4_sorted)
+            ####peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] )
 
-            #plt.figure(ind_tot)
-            #plt.imshow(textline_image)
-            #plt.plot([peaks_final[0], peaks_final[0]], [0, height-1])
-            #plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
-            #plt.savefig('./'+str(ind_tot)+'.png')
+            #####plt.figure(ind_tot)
+            #####plt.imshow(textline_image)
+            #####plt.plot([peaks_final[0], peaks_final[0]], [0, height-1])
+            #####plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
+            #####plt.savefig('./'+str(ind_tot)+'.png')
 
-            return peaks_final[0], peaks_final[1]
-        else:
-            pass
+            ####return peaks_final[0], peaks_final[1]
+        ####else:
+            ####pass
 
-    def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot):
-        width = np.shape(textline_image)[1]
-        height = np.shape(textline_image)[0]
-        common_window = int(0.06*width)
+    ##def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot):
+        ##width = np.shape(textline_image)[1]
+        ##height = np.shape(textline_image)[0]
+        ##common_window = int(0.06*width)
 
-        width1 = int ( width/2. - common_window )
-        width2 = int ( width/2. + common_window )
+        ##width1 = int ( width/2. - common_window )
+        ##width2 = int ( width/2. + common_window )
 
-        img_sum = np.sum(textline_image[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 3)
+        ##img_sum = np.sum(textline_image[:,:,0], axis=0)
+        ##sum_smoothed = gaussian_filter1d(img_sum, 3)
 
-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        if len(peaks_real)>70:
-            #print(len(peaks_real), 'len(peaks_real)')
+        ##peaks_real, _ = find_peaks(sum_smoothed, height=0)
+        ##if len(peaks_real)>70:
+            ###print(len(peaks_real), 'len(peaks_real)')
 
-            peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
+            ##peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
 
-            arg_max = np.argmax(sum_smoothed[peaks_real])
-            peaks_final = peaks_real[arg_max]
+            ##arg_max = np.argmax(sum_smoothed[peaks_real])
+            ##peaks_final = peaks_real[arg_max]
 
-            #plt.figure(ind_tot)
-            #plt.imshow(textline_image)
-            #plt.plot([peaks_final, peaks_final], [0, height-1])
-            ##plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
-            #plt.savefig('./'+str(ind_tot)+'.png')
+            ###plt.figure(ind_tot)
+            ###plt.imshow(textline_image)
+            ###plt.plot([peaks_final, peaks_final], [0, height-1])
+            ####plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
+            ###plt.savefig('./'+str(ind_tot)+'.png')
 
-            return peaks_final
-        else:
-            return None
+            ##return peaks_final
+        ##else:
+            ##return None
 
-    def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
-            self, peaks_real, sum_smoothed, start_split, end_split):
+    ###def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
+            ###self, peaks_real, sum_smoothed, start_split, end_split):
 
-        peaks_real = peaks_real[(peaks_real<end_split) & (peaks_real>start_split)]
+        ###peaks_real = peaks_real[(peaks_real<end_split) & (peaks_real>start_split)]
 
-        arg_sort = np.argsort(sum_smoothed[peaks_real])
-        arg_sort4 =arg_sort[::-1][:4]
-        peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
-        argsort_sorted = np.argsort(peaks_sort_4)
+        ###arg_sort = np.argsort(sum_smoothed[peaks_real])
+        ###arg_sort4 =arg_sort[::-1][:4]
+        ###peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
+        ###argsort_sorted = np.argsort(peaks_sort_4)
 
-        first_4_sorted = peaks_sort_4[argsort_sorted]
-        y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
-        #print(first_4_sorted,'first_4_sorted')
+        ###first_4_sorted = peaks_sort_4[argsort_sorted]
+        ###y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
+        ####print(first_4_sorted,'first_4_sorted')
 
-        arg_sortnew = np.argsort(y_4_sorted)
-        peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] )
-        return peaks_final[0]
+        ###arg_sortnew = np.argsort(y_4_sorted)
+        ###peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] )
+        ###return peaks_final[0]
 
-    def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot):
-        width = np.shape(textline_image)[1]
-        height = np.shape(textline_image)[0]
-        common_window = int(0.15*width)
+    ###def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot):
+        ###width = np.shape(textline_image)[1]
+        ###height = np.shape(textline_image)[0]
+        ###common_window = int(0.15*width)
 
-        width1 = int ( width/2. - common_window )
-        width2 = int ( width/2. + common_window )
-        mid = int(width/2.)
+        ###width1 = int ( width/2. - common_window )
+        ###width2 = int ( width/2. + common_window )
+        ###mid = int(width/2.)
 
-        img_sum = np.sum(textline_image[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 3)
+        ###img_sum = np.sum(textline_image[:,:,0], axis=0)
+        ###sum_smoothed = gaussian_filter1d(img_sum, 3)
 
-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        if len(peaks_real)>70:
-            peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
-                peaks_real, sum_smoothed, width1, mid+2)
-            peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
-                peaks_real, sum_smoothed, mid-2, width2)
+        ###peaks_real, _ = find_peaks(sum_smoothed, height=0)
+        ###if len(peaks_real)>70:
+            ###peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
+                ###peaks_real, sum_smoothed, width1, mid+2)
+            ###peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
+                ###peaks_real, sum_smoothed, mid-2, width2)
 
-            #plt.figure(ind_tot)
-            #plt.imshow(textline_image)
-            #plt.plot([peak_start, peak_start], [0, height-1])
-            #plt.plot([peak_end, peak_end], [0, height-1])
-            #plt.savefig('./'+str(ind_tot)+'.png')
+            ####plt.figure(ind_tot)
+            ####plt.imshow(textline_image)
+            ####plt.plot([peak_start, peak_start], [0, height-1])
+            ####plt.plot([peak_end, peak_end], [0, height-1])
+            ####plt.savefig('./'+str(ind_tot)+'.png')
 
-            return peak_start, peak_end
-        else:
-            pass
+            ###return peak_start, peak_end
+        ###else:
+            ###pass
 
-    def return_ocr_of_textline_without_common_section(
-            self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
+    ##def return_ocr_of_textline_without_common_section(
+            ##self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
 
-        if h2w_ratio > 0.05:
-            pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-            generated_ids = model_ocr.generate(pixel_values.to(device))
-            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        else:
-            #width = np.shape(textline_image)[1]
-            #height = np.shape(textline_image)[0]
-            #common_window = int(0.3*width)
-            #width1 = int ( width/2. - common_window )
-            #width2 = int ( width/2. + common_window )
+        ##if h2w_ratio > 0.05:
+            ##pixel_values = processor(textline_image, return_tensors="pt").pixel_values
+            ##generated_ids = model_ocr.generate(pixel_values.to(device))
+            ##generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        ##else:
+            ###width = np.shape(textline_image)[1]
+            ###height = np.shape(textline_image)[0]
+            ###common_window = int(0.3*width)
+            ###width1 = int ( width/2. - common_window )
+            ###width2 = int ( width/2. + common_window )
 
-            split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(
-                textline_image, ind_tot)
-            if split_point:
-                image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
-                image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
+            ##split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(
+                ##textline_image, ind_tot)
+            ##if split_point:
+                ##image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
+                ##image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
 
-                #pixel_values1 = processor(image1, return_tensors="pt").pixel_values
-                #pixel_values2 = processor(image2, return_tensors="pt").pixel_values
+                ###pixel_values1 = processor(image1, return_tensors="pt").pixel_values
+                ###pixel_values2 = processor(image2, return_tensors="pt").pixel_values
 
-                pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values
-                generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device))
-                generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                ##pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values
+                ##generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device))
+                ##generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
 
-                #print(generated_text_merged,'generated_text_merged')
+                ###print(generated_text_merged,'generated_text_merged')
 
-                #generated_ids1 = model_ocr.generate(pixel_values1.to(device))
-                #generated_ids2 = model_ocr.generate(pixel_values2.to(device))
+                ###generated_ids1 = model_ocr.generate(pixel_values1.to(device))
+                ###generated_ids2 = model_ocr.generate(pixel_values2.to(device))
 
-                #generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
-                #generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
+                ###generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
+                ###generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
 
-                #generated_text = generated_text1 + ' ' + generated_text2
-                generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1]
+                ###generated_text = generated_text1 + ' ' + generated_text2
+                ##generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1]
 
-                #print(generated_text1,'generated_text1')
-                #print(generated_text2, 'generated_text2')
-                #print('########################################')
-            else:
-                pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-                generated_ids = model_ocr.generate(pixel_values.to(device))
-                generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+                ###print(generated_text1,'generated_text1')
+                ###print(generated_text2, 'generated_text2')
+                ###print('########################################')
+            ##else:
+                ##pixel_values = processor(textline_image, return_tensors="pt").pixel_values
+                ##generated_ids = model_ocr.generate(pixel_values.to(device))
+                ##generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
-        #print(generated_text,'generated_text')
-        #print('########################################')
-        return generated_text
+        ###print(generated_text,'generated_text')
+        ###print('########################################')
+        ##return generated_text
 
-    def return_ocr_of_textline(
-            self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
+    ###def return_ocr_of_textline(
+            ###self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
 
-        if h2w_ratio > 0.05:
-            pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-            generated_ids = model_ocr.generate(pixel_values.to(device))
-            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        else:
-            #width = np.shape(textline_image)[1]
-            #height = np.shape(textline_image)[0]
-            #common_window = int(0.3*width)
-            #width1 = int ( width/2. - common_window )
-            #width2 = int ( width/2. + common_window )
+        ###if h2w_ratio > 0.05:
+            ###pixel_values = processor(textline_image, return_tensors="pt").pixel_values
+            ###generated_ids = model_ocr.generate(pixel_values.to(device))
+            ###generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        ###else:
+            ####width = np.shape(textline_image)[1]
+            ####height = np.shape(textline_image)[0]
+            ####common_window = int(0.3*width)
+            ####width1 = int ( width/2. - common_window )
+            ####width2 = int ( width/2. + common_window )
 
-            try:
-                width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot)
+            ###try:
+                ###width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot)
 
-                image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height))
-                image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height))
+                ###image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height))
+                ###image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height))
 
-                pixel_values1 = processor(image1, return_tensors="pt").pixel_values
-                pixel_values2 = processor(image2, return_tensors="pt").pixel_values
+                ###pixel_values1 = processor(image1, return_tensors="pt").pixel_values
+                ###pixel_values2 = processor(image2, return_tensors="pt").pixel_values
 
-                generated_ids1 = model_ocr.generate(pixel_values1.to(device))
-                generated_ids2 = model_ocr.generate(pixel_values2.to(device))
+                ###generated_ids1 = model_ocr.generate(pixel_values1.to(device))
+                ###generated_ids2 = model_ocr.generate(pixel_values2.to(device))
 
-                generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
-                generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
-                #print(generated_text1,'generated_text1')
-                #print(generated_text2, 'generated_text2')
-                #print('########################################')
+                ###generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
+                ###generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
+                ####print(generated_text1,'generated_text1')
+                ####print(generated_text2, 'generated_text2')
+                ####print('########################################')
 
-                match = sq(None, generated_text1, generated_text2).find_longest_match(
-                    0, len(generated_text1), 0, len(generated_text2))
-                generated_text = generated_text1 + generated_text2[match.b+match.size:]
-            except:
-                pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-                generated_ids = model_ocr.generate(pixel_values.to(device))
-                generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+                ###match = sq(None, generated_text1, generated_text2).find_longest_match(
+                    ###0, len(generated_text1), 0, len(generated_text2))
+                ###generated_text = generated_text1 + generated_text2[match.b+match.size:]
+            ###except:
+                ###pixel_values = processor(textline_image, return_tensors="pt").pixel_values
+                ###generated_ids = model_ocr.generate(pixel_values.to(device))
+                ###generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
-        return generated_text
+        ###return generated_text
 
-    def return_textline_contour_with_added_box_coordinate(self, textline_contour,  box_ind):
-        textline_contour[:,0] = textline_contour[:,0] + box_ind[2]
-        textline_contour[:,1] = textline_contour[:,1] + box_ind[0]
-        return textline_contour
 
     def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes):
         return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))]
@@ -4625,6 +4681,7 @@ class Eynollah:
             raise ValueError("run requires either a single image filename or a directory")
 
         for img_filename in self.ls_imgs:
+            print(img_filename, 'img_filename')
             self.logger.info(img_filename)
             t0 = time.time()
 
@@ -4698,13 +4755,19 @@ class Eynollah:
             all_box_coord_marginals = []
             polygons_lines_xml = []
             contours_tables = []
-            ocr_all_textlines = None
             conf_contours_textregions =[0]
+            
+            if self.ocr and not self.tr:
+                gc.collect()
+                ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, textline_light=True)
+            else:
+                ocr_all_textlines = None
+            
             pcgts = self.writer.build_pagexml_no_full_layout(
                 cont_page, page_coord, order_text_new, id_of_texts_tot,
                 all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
                 all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
-                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
+                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions, self.skip_layout_and_reading_order)
             return pcgts
 
         #print("text region early -1 in %.1fs", time.time() - t0)
@@ -5118,7 +5181,7 @@ class Eynollah:
                 tror = time.time()
                 order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
                     contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
-                print('time spend for mb ro', time.time()-tror)
+                print('time spend for mb ro',  time.time()-tror)
             else:
                 if np.abs(slope_deskew) < SLOPE_THRESHOLD:
                     order_text_new, id_of_texts_tot = self.do_order_of_regions(
@@ -5160,7 +5223,7 @@ class Eynollah:
                 order_text_new, id_of_texts_tot = self.do_order_of_regions(
                     contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)
 
-        if self.ocr:
+        if self.ocr and self.tr:
             device = cuda.get_current_device()
             device.reset()
             gc.collect()
@@ -5207,6 +5270,11 @@ class Eynollah:
                     ocr_textline_in_textregion.append(text_ocr)
                     ind_tot = ind_tot +1
                 ocr_all_textlines.append(ocr_textline_in_textregion)
+                
+        elif self.ocr and not self.tr:
+            gc.collect()
+            ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+
 
         else:
             ocr_all_textlines = None
@@ -5289,329 +5357,6 @@ class Eynollah_ocr:
                     vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
                 )
 
-        
-    def decode_batch_predictions(self, pred, max_len = 128):
-        # input_len is the product of the batch size and the
-        # number of time steps.
-        input_len = np.ones(pred.shape[0]) * pred.shape[1]
-        
-        # Decode CTC predictions using greedy search.
-        # decoded is a tuple with 2 elements.
-        decoded = tf.keras.backend.ctc_decode(pred, 
-                        input_length = input_len, 
-                                    beam_width = 100)
-        # The outputs are in the first element of the tuple.
-        # Additionally, the first element is actually a list,
-        # therefore we take the first element of that list as well.
-        #print(decoded,'decoded')
-        decoded = decoded[0][0][:, :max_len]
-        
-        #print(decoded, decoded.shape,'decoded')
-
-        output = []
-        for d in decoded:
-            # Convert the predicted indices to the corresponding chars.
-            d = tf.strings.reduce_join(self.num_to_char(d))
-            d = d.numpy().decode("utf-8")
-            output.append(d)
-        return output
-        
-        
-    def distortion_free_resize(self, image, img_size):
-        w, h = img_size
-        image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)
-
-        # Check tha amount of padding needed to be done.
-        pad_height = h - tf.shape(image)[0]
-        pad_width = w - tf.shape(image)[1]
-
-        # Only necessary if you want to do same amount of padding on both sides.
-        if pad_height % 2 != 0:
-            height = pad_height // 2
-            pad_height_top = height + 1
-            pad_height_bottom = height
-        else:
-            pad_height_top = pad_height_bottom = pad_height // 2
-
-        if pad_width % 2 != 0:
-            width = pad_width // 2
-            pad_width_left = width + 1
-            pad_width_right = width
-        else:
-            pad_width_left = pad_width_right = pad_width // 2
-
-        image = tf.pad(
-            image,
-            paddings=[
-                [pad_height_top, pad_height_bottom],
-                [pad_width_left, pad_width_right],
-                [0, 0],
-            ],
-        )
-
-        image = tf.transpose(image, (1, 0, 2))
-        image = tf.image.flip_left_right(image)
-        return image
-    
-    def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image):
-        width = np.shape(textline_image)[1]
-        height = np.shape(textline_image)[0]
-        common_window = int(0.22*width)
-
-        width1 = int ( width/2. - common_window )
-        width2 = int ( width/2. + common_window )
-        
-        img_sum = np.sum(textline_image[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 3)
-        
-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        
-        if len(peaks_real)>35:
-
-            #peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-            argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
-            peaks_real_top_six = peaks_real[argsort[:6]]
-            midpoint = textline_image.shape[1] / 2.
-            arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
-
-            #arg_max = np.argmax(sum_smoothed[peaks_real])
-
-            peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
-            
-            return peaks_final
-        else:
-            return None
-        
-    # Function to fit text inside the given area
-    def fit_text_single_line(self, draw, text, font_path, max_width, max_height):
-        initial_font_size = 50
-        font_size = initial_font_size
-        while font_size > 10:  # Minimum font size
-            font = ImageFont.truetype(font_path, font_size)
-            text_bbox = draw.textbbox((0, 0), text, font=font)  # Get text bounding box
-            text_width = text_bbox[2] - text_bbox[0]
-            text_height = text_bbox[3] - text_bbox[1]
-
-            if text_width <= max_width and text_height <= max_height:
-                return font  # Return the best-fitting font
-
-            font_size -= 2  # Reduce font size and retry
-
-        return ImageFont.truetype(font_path, 10)  # Smallest font fallback
-    
-    def return_textlines_split_if_needed(self, textline_image, textline_image_bin):
-
-        split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image)
-        if split_point:
-            image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
-            image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
-            if self.prediction_with_both_of_rgb_and_bin:
-                image1_bin = textline_image_bin[:, :split_point,:]# image.crop((0, 0, width2, height))
-                image2_bin = textline_image_bin[:, split_point:,:]#image.crop((width1, 0, width, height))
-                return [image1, image2], [image1_bin, image2_bin]
-            else:
-                return [image1, image2], None
-        else:
-            return None, None
-    def preprocess_and_resize_image_for_ocrcnn_model(self, img, image_height, image_width):
-        ratio = image_height /float(img.shape[0])
-        w_ratio = int(ratio * img.shape[1])
-        
-        if w_ratio <= image_width:
-            width_new = w_ratio
-        else:
-            width_new = image_width
-            
-        if width_new == 0:
-            width_new = img.shape[1]
-            
-        ##if width_new+32 >= image_width:
-            ##width_new = width_new - 32
-            
-        ###patch_zero = np.zeros((32, 32, 3))#+255
-        ###patch_zero[9:19,8:18,:] = 0
-            
-        
-        img = resize_image(img, image_height, width_new)
-        img_fin = np.ones((image_height, image_width, 3))*255
-        ###img_fin[:,:32,:] = patch_zero[:,:,:]
-        ###img_fin[:,32:32+width_new,:] = img[:,:,:]
-        img_fin[:,:width_new,:] = img[:,:,:]
-        img_fin = img_fin / 255.
-        return img_fin
-    
-    def get_deskewed_contour_and_bb_and_image(self, contour, image, deskew_angle):
-        (h_in, w_in) = image.shape[:2]
-        center = (w_in // 2, h_in // 2)
-        
-        rotation_matrix = cv2.getRotationMatrix2D(center, deskew_angle, 1.0)
-        
-        cos_angle = abs(rotation_matrix[0, 0])
-        sin_angle = abs(rotation_matrix[0, 1])
-        new_w = int((h_in * sin_angle) + (w_in * cos_angle))
-        new_h = int((h_in * cos_angle) + (w_in * sin_angle))
-        
-        rotation_matrix[0, 2] += (new_w / 2) - center[0]
-        rotation_matrix[1, 2] += (new_h / 2) - center[1]
-        
-        deskewed_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h))
-        
-        contour_points = np.array(contour, dtype=np.float32)
-        transformed_points = cv2.transform(np.array([contour_points]), rotation_matrix)[0]
-        
-        x, y, w, h = cv2.boundingRect(np.array(transformed_points, dtype=np.int32))
-        cropped_textline = deskewed_image[y:y+h, x:x+w]
-        
-        return cropped_textline
-    
-    def rotate_image_with_padding(self, image, angle, border_value=(0,0,0)):
-        # Get image dimensions
-        (h, w) = image.shape[:2]
-        
-        # Calculate the center of the image
-        center = (w // 2, h // 2)
-        
-        # Get the rotation matrix
-        rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
-        
-        # Compute the new bounding dimensions
-        cos = abs(rotation_matrix[0, 0])
-        sin = abs(rotation_matrix[0, 1])
-        new_w = int((h * sin) + (w * cos))
-        new_h = int((h * cos) + (w * sin))
-        
-        # Adjust the rotation matrix to account for translation
-        rotation_matrix[0, 2] += (new_w / 2) - center[0]
-        rotation_matrix[1, 2] += (new_h / 2) - center[1]
-        
-        # Perform the rotation
-        rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=border_value)
-        
-        return rotated_image
-    
-    def get_orientation_moments(self, contour):
-        moments = cv2.moments(contour)
-        if moments["mu20"] - moments["mu02"] == 0:  # Avoid division by zero
-            return 90 if moments["mu11"] > 0 else -90
-        else:
-            angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
-            return np.degrees(angle)  # Convert radians to degrees
-        
-        
-    def get_orientation_moments_of_mask(self, mask):
-        mask=mask.astype('uint8')
-        contours, _ = cv2.findContours(mask[:,:,0], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        
-        largest_contour = max(contours, key=cv2.contourArea) if contours else None
-        
-        moments = cv2.moments(largest_contour)
-        if moments["mu20"] - moments["mu02"] == 0:  # Avoid division by zero
-            return 90 if moments["mu11"] > 0 else -90
-        else:
-            angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
-            return np.degrees(angle)  # Convert radians to degrees
-    
-    def get_contours_and_bounding_boxes(self, mask):
-        # Find contours in the binary mask
-        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        
-        largest_contour = max(contours, key=cv2.contourArea) if contours else None
-
-        # Get the bounding rectangle for the contour
-        x, y, w, h = cv2.boundingRect(largest_contour)
-        #bounding_boxes.append((x, y, w, h))
-        
-        return x, y, w, h
-    
-    def return_splitting_point_of_image(self, image_to_spliited):
-        width = np.shape(image_to_spliited)[1]
-        height = np.shape(image_to_spliited)[0]
-        common_window = int(0.03*width)
-
-        width1 = int ( common_window)
-        width2 = int ( width - common_window )
-
-        img_sum = np.sum(image_to_spliited[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 1)
-
-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-        
-        arg_sort = np.argsort(sum_smoothed[peaks_real])
-        peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
-        
-        return np.sort(peaks_sort_4)
-        
-    def break_curved_line_into_small_pieces_and_then_merge(self, img_curved, mask_curved):
-        peaks_4 = self.return_splitting_point_of_image(img_curved)
-        if len(peaks_4)>0:
-            imgs_tot = []
-            
-            for ind in range(len(peaks_4)+1):
-                if ind==0:
-                    img = img_curved[:, :peaks_4[ind], :]
-                    mask = mask_curved[:, :peaks_4[ind], :]
-                elif ind==len(peaks_4):
-                    img = img_curved[:, peaks_4[ind-1]:, :]
-                    mask = mask_curved[:, peaks_4[ind-1]:, :]
-                else:
-                    img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
-                    mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
-                    
-                or_ma = self.get_orientation_moments_of_mask(mask)
-            
-                imgs_tot.append([img, mask, or_ma] )
-            
-            
-            w_tot_des_list = []
-            w_tot_des = 0
-            imgs_deskewed_list = []
-            for ind in range(len(imgs_tot)):
-                img_in = imgs_tot[ind][0]
-                mask_in = imgs_tot[ind][1]
-                ori_in = imgs_tot[ind][2]
-                
-                if abs(ori_in)<45:
-                    img_in_des = self.rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
-                    mask_in_des = self.rotate_image_with_padding(mask_in, ori_in)
-                    mask_in_des = mask_in_des.astype('uint8')
-                    
-                    #new bounding box
-                    x_n, y_n, w_n, h_n = self.get_contours_and_bounding_boxes(mask_in_des[:,:,0])
-                    
-                    mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                    img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                    
-                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
-                    if w_relative==0:
-                        w_relative = img_in_des.shape[1]
-                    img_in_des = resize_image(img_in_des, 32, w_relative)
-                    
-
-                else:
-                    img_in_des = np.copy(img_in)
-                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
-                    if w_relative==0:
-                        w_relative = img_in_des.shape[1]
-                    img_in_des = resize_image(img_in_des, 32, w_relative)
-                    
-                w_tot_des+=img_in_des.shape[1]
-                w_tot_des_list.append(img_in_des.shape[1])
-                imgs_deskewed_list.append(img_in_des)
-                
-                
-                
-
-            img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
-            
-            w_indexer = 0
-            for ind in range(len(w_tot_des_list)):
-                img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
-                w_indexer = w_indexer+w_tot_des_list[ind]
-            return img_final_deskewed
-        else:
-            return img_curved
-
     def run(self):
         ls_imgs = os.listdir(self.dir_in)
         
@@ -6069,7 +5814,7 @@ class Eynollah_ocr:
                             preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
                             preds = (preds + preds_bin) / 2.
 
-                        pred_texts = self.decode_batch_predictions(preds)
+                        pred_texts = self.decode_batch_predictions(preds, self.num_to_char)
 
                         for ib in range(imgs.shape[0]):
                             pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
new file mode 100644
index 0000000..44367b6
--- /dev/null
+++ b/src/eynollah/utils/utils_ocr.py
@@ -0,0 +1,435 @@
+import numpy as np
+import cv2
+import tensorflow as tf
+from scipy.signal import find_peaks
+from scipy.ndimage import gaussian_filter1d
+import math
+from .resize import resize_image
+
+def decode_batch_predictions(pred, num_to_char, max_len = 128):
+    # input_len is the product of the batch size and the
+    # number of time steps.
+    input_len = np.ones(pred.shape[0]) * pred.shape[1]
+    
+    # Decode CTC predictions using greedy search.
+    # decoded is a tuple with 2 elements.
+    decoded = tf.keras.backend.ctc_decode(pred, 
+                    input_length = input_len, 
+                                beam_width = 100)
+    # The outputs are in the first element of the tuple.
+    # Additionally, the first element is actually a list,
+    # therefore we take the first element of that list as well.
+    #print(decoded,'decoded')
+    decoded = decoded[0][0][:, :max_len]
+    
+    #print(decoded, decoded.shape,'decoded')
+
+    output = []
+    for d in decoded:
+        # Convert the predicted indices to the corresponding chars.
+        d = tf.strings.reduce_join(num_to_char(d))
+        d = d.numpy().decode("utf-8")
+        output.append(d)
+    return output
+    
+    
+def distortion_free_resize(image, img_size):
+    w, h = img_size
+    image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)
+
+    # Check tha amount of padding needed to be done.
+    pad_height = h - tf.shape(image)[0]
+    pad_width = w - tf.shape(image)[1]
+
+    # Only necessary if you want to do same amount of padding on both sides.
+    if pad_height % 2 != 0:
+        height = pad_height // 2
+        pad_height_top = height + 1
+        pad_height_bottom = height
+    else:
+        pad_height_top = pad_height_bottom = pad_height // 2
+
+    if pad_width % 2 != 0:
+        width = pad_width // 2
+        pad_width_left = width + 1
+        pad_width_right = width
+    else:
+        pad_width_left = pad_width_right = pad_width // 2
+
+    image = tf.pad(
+        image,
+        paddings=[
+            [pad_height_top, pad_height_bottom],
+            [pad_width_left, pad_width_right],
+            [0, 0],
+        ],
+    )
+
+    image = tf.transpose(image, (1, 0, 2))
+    image = tf.image.flip_left_right(image)
+    return image
+
+def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image):
+    width = np.shape(textline_image)[1]
+    height = np.shape(textline_image)[0]
+    common_window = int(0.22*width)
+
+    width1 = int ( width/2. - common_window )
+    width2 = int ( width/2. + common_window )
+    
+    img_sum = np.sum(textline_image[:,:,0], axis=0)
+    sum_smoothed = gaussian_filter1d(img_sum, 3)
+    
+    peaks_real, _ = find_peaks(sum_smoothed, height=0)
+    
+    if len(peaks_real)>35:
+
+        #peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
+        argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
+        peaks_real_top_six = peaks_real[argsort[:6]]
+        midpoint = textline_image.shape[1] / 2.
+        arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
+
+        #arg_max = np.argmax(sum_smoothed[peaks_real])
+
+        peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
+        
+        return peaks_final
+    else:
+        return None
+    
+# Function to fit text inside the given area
+def fit_text_single_line(draw, text, font_path, max_width, max_height):
+    initial_font_size = 50
+    font_size = initial_font_size
+    while font_size > 10:  # Minimum font size
+        font = ImageFont.truetype(font_path, font_size)
+        text_bbox = draw.textbbox((0, 0), text, font=font)  # Get text bounding box
+        text_width = text_bbox[2] - text_bbox[0]
+        text_height = text_bbox[3] - text_bbox[1]
+
+        if text_width <= max_width and text_height <= max_height:
+            return font  # Return the best-fitting font
+
+        font_size -= 2  # Reduce font size and retry
+
+    return ImageFont.truetype(font_path, 10)  # Smallest font fallback
+
+def return_textlines_split_if_needed(textline_image, textline_image_bin, prediction_with_both_of_rgb_and_bin=False):
+
+    split_point = return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image)
+    if split_point:
+        image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
+        image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
+        if prediction_with_both_of_rgb_and_bin:
+            image1_bin = textline_image_bin[:, :split_point,:]# image.crop((0, 0, width2, height))
+            image2_bin = textline_image_bin[:, split_point:,:]#image.crop((width1, 0, width, height))
+            return [image1, image2], [image1_bin, image2_bin]
+        else:
+            return [image1, image2], None
+    else:
+        return None, None
+def preprocess_and_resize_image_for_ocrcnn_model(img, image_height, image_width):
+    ratio = image_height /float(img.shape[0])
+    w_ratio = int(ratio * img.shape[1])
+    
+    if w_ratio <= image_width:
+        width_new = w_ratio
+    else:
+        width_new = image_width
+        
+    if width_new == 0:
+        width_new = img.shape[1]
+        
+    
+    img = resize_image(img, image_height, width_new)
+    img_fin = np.ones((image_height, image_width, 3))*255
+
+    img_fin[:,:width_new,:] = img[:,:,:]
+    img_fin = img_fin / 255.
+    return img_fin
+
+def get_deskewed_contour_and_bb_and_image(contour, image, deskew_angle):
+    (h_in, w_in) = image.shape[:2]
+    center = (w_in // 2, h_in // 2)
+    
+    rotation_matrix = cv2.getRotationMatrix2D(center, deskew_angle, 1.0)
+    
+    cos_angle = abs(rotation_matrix[0, 0])
+    sin_angle = abs(rotation_matrix[0, 1])
+    new_w = int((h_in * sin_angle) + (w_in * cos_angle))
+    new_h = int((h_in * cos_angle) + (w_in * sin_angle))
+    
+    rotation_matrix[0, 2] += (new_w / 2) - center[0]
+    rotation_matrix[1, 2] += (new_h / 2) - center[1]
+    
+    deskewed_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h))
+    
+    contour_points = np.array(contour, dtype=np.float32)
+    transformed_points = cv2.transform(np.array([contour_points]), rotation_matrix)[0]
+    
+    x, y, w, h = cv2.boundingRect(np.array(transformed_points, dtype=np.int32))
+    cropped_textline = deskewed_image[y:y+h, x:x+w]
+    
+    return cropped_textline
+
+def rotate_image_with_padding(image, angle, border_value=(0,0,0)):
+    # Get image dimensions
+    (h, w) = image.shape[:2]
+    
+    # Calculate the center of the image
+    center = (w // 2, h // 2)
+    
+    # Get the rotation matrix
+    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
+    
+    # Compute the new bounding dimensions
+    cos = abs(rotation_matrix[0, 0])
+    sin = abs(rotation_matrix[0, 1])
+    new_w = int((h * sin) + (w * cos))
+    new_h = int((h * cos) + (w * sin))
+    
+    # Adjust the rotation matrix to account for translation
+    rotation_matrix[0, 2] += (new_w / 2) - center[0]
+    rotation_matrix[1, 2] += (new_h / 2) - center[1]
+    
+    # Perform the rotation
+    rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=border_value)
+    
+    return rotated_image
+
+def get_orientation_moments(contour):
+    moments = cv2.moments(contour)
+    if moments["mu20"] - moments["mu02"] == 0:  # Avoid division by zero
+        return 90 if moments["mu11"] > 0 else -90
+    else:
+        angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
+        return np.degrees(angle)  # Convert radians to degrees
+    
+    
+def get_orientation_moments_of_mask(mask):
+    mask=mask.astype('uint8')
+    contours, _ = cv2.findContours(mask[:,:,0], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    
+    largest_contour = max(contours, key=cv2.contourArea) if contours else None
+    
+    moments = cv2.moments(largest_contour)
+    if moments["mu20"] - moments["mu02"] == 0:  # Avoid division by zero
+        return 90 if moments["mu11"] > 0 else -90
+    else:
+        angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
+        return np.degrees(angle)  # Convert radians to degrees
+
+def get_contours_and_bounding_boxes(mask):
+    # Find contours in the binary mask
+    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    
+    largest_contour = max(contours, key=cv2.contourArea) if contours else None
+
+    # Get the bounding rectangle for the contour
+    x, y, w, h = cv2.boundingRect(largest_contour)
+    #bounding_boxes.append((x, y, w, h))
+    
+    return x, y, w, h
+
+def return_splitting_point_of_image(image_to_spliited):
+    width = np.shape(image_to_spliited)[1]
+    height = np.shape(image_to_spliited)[0]
+    common_window = int(0.03*width)
+
+    width1 = int ( common_window)
+    width2 = int ( width - common_window )
+
+    img_sum = np.sum(image_to_spliited[:,:,0], axis=0)
+    sum_smoothed = gaussian_filter1d(img_sum, 1)
+
+    peaks_real, _ = find_peaks(sum_smoothed, height=0)
+    peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
+    
+    arg_sort = np.argsort(sum_smoothed[peaks_real])
+    peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
+    
+    return np.sort(peaks_sort_4)
+    
+def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
+    peaks_4 = return_splitting_point_of_image(img_curved)
+    if len(peaks_4)>0:
+        imgs_tot = []
+        
+        for ind in range(len(peaks_4)+1):
+            if ind==0:
+                img = img_curved[:, :peaks_4[ind], :]
+                mask = mask_curved[:, :peaks_4[ind], :]
+            elif ind==len(peaks_4):
+                img = img_curved[:, peaks_4[ind-1]:, :]
+                mask = mask_curved[:, peaks_4[ind-1]:, :]
+            else:
+                img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
+                mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
+                
+            or_ma = get_orientation_moments_of_mask(mask)
+        
+            imgs_tot.append([img, mask, or_ma] )
+        
+        
+        w_tot_des_list = []
+        w_tot_des = 0
+        imgs_deskewed_list = []
+        for ind in range(len(imgs_tot)):
+            img_in = imgs_tot[ind][0]
+            mask_in = imgs_tot[ind][1]
+            ori_in = imgs_tot[ind][2]
+            
+            if abs(ori_in)<45:
+                img_in_des = rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
+                mask_in_des = rotate_image_with_padding(mask_in, ori_in)
+                mask_in_des = mask_in_des.astype('uint8')
+                
+                #new bounding box
+                x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_in_des[:,:,0])
+                
+                mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                
+                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                if w_relative==0:
+                    w_relative = img_in_des.shape[1]
+                img_in_des = resize_image(img_in_des, 32, w_relative)
+                
+
+            else:
+                img_in_des = np.copy(img_in)
+                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                if w_relative==0:
+                    w_relative = img_in_des.shape[1]
+                img_in_des = resize_image(img_in_des, 32, w_relative)
+                
+            w_tot_des+=img_in_des.shape[1]
+            w_tot_des_list.append(img_in_des.shape[1])
+            imgs_deskewed_list.append(img_in_des)
+            
+            
+            
+
+        img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
+        
+        w_indexer = 0
+        for ind in range(len(w_tot_des_list)):
+            img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
+            w_indexer = w_indexer+w_tot_des_list[ind]
+        return img_final_deskewed
+    else:
+        return img_curved
+    
+def return_textline_contour_with_added_box_coordinate(textline_contour,  box_ind):
+    textline_contour[:,0] = textline_contour[:,0] + box_ind[2]
+    textline_contour[:,1] = textline_contour[:,1] + box_ind[0]
+    return textline_contour
+
+
+def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False):
+    max_len = 512
+    padding_token = 299
+    image_width = 512#max_len * 4
+    image_height = 32
+    ind_tot = 0
+    #cv2.imwrite('./img_out.png', image_page)
+    ocr_all_textlines = []
+    cropped_lines_region_indexer = []
+    cropped_lines_meging_indexing = []
+    cropped_lines = []
+    indexer_text_region = 0
+    
+    for indexing, ind_poly_first in enumerate(all_found_textline_polygons):
+        #ocr_textline_in_textregion = []
+        for indexing2, ind_poly in enumerate(ind_poly_first):
+            cropped_lines_region_indexer.append(indexer_text_region)
+            if not (textline_light or curved_line):
+                ind_poly = copy.deepcopy(ind_poly)
+                box_ind = all_box_coord[indexing]
+
+                ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind)
+                #print(ind_poly_copy)
+                ind_poly[ind_poly<0] = 0
+            x, y, w, h = cv2.boundingRect(ind_poly)
+            
+            w_scaled = w *  image_height/float(h)
+
+            mask_poly = np.zeros(image.shape)
+
+            img_poly_on_img = np.copy(image)
+            
+            mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1))
+
+
+            
+            mask_poly = mask_poly[y:y+h, x:x+w, :]
+            img_crop = img_poly_on_img[y:y+h, x:x+w, :]
+            
+            img_crop[mask_poly==0] = 255
+            
+            if w_scaled < 640:#1.5*image_width:
+                img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                cropped_lines.append(img_fin)
+                cropped_lines_meging_indexing.append(0)
+            else:
+                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
+                
+                if splited_images:
+                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
+                    cropped_lines.append(img_fin)
+                    cropped_lines_meging_indexing.append(1)
+                    
+                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
+                    
+                    cropped_lines.append(img_fin)
+                    cropped_lines_meging_indexing.append(-1)
+                    
+                else:
+                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                    cropped_lines.append(img_fin)
+                    cropped_lines_meging_indexing.append(0)
+            
+        indexer_text_region+=1
+        
+        
+    extracted_texts = []
+
+    n_iterations  = math.ceil(len(cropped_lines) / b_s_ocr) 
+
+    for i in range(n_iterations):
+        if i==(n_iterations-1):
+            n_start = i*b_s_ocr
+            imgs = cropped_lines[n_start:]
+            imgs = np.array(imgs)
+            imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
+            
+            
+        else:
+            n_start = i*b_s_ocr
+            n_end = (i+1)*b_s_ocr
+            imgs = cropped_lines[n_start:n_end]
+            imgs = np.array(imgs).reshape(b_s_ocr, image_height, image_width, 3)
+            
+
+        preds = prediction_model.predict(imgs, verbose=0)
+        
+        pred_texts = decode_batch_predictions(preds, num_to_char)
+
+        for ib in range(imgs.shape[0]):
+            pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
+            extracted_texts.append(pred_texts_ib)
+            
+    extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
+
+    extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
+    unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
+    
+    ocr_all_textlines = []
+    for ind in unique_cropped_lines_region_indexer:
+        ocr_textline_in_textregion = []
+        extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
+        for  it_ind, text_textline in enumerate(extracted_texts_merged_un):
+            ocr_textline_in_textregion.append(text_textline)
+        ocr_all_textlines.append(ocr_textline_in_textregion)
+    return ocr_all_textlines
diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py
index 8cd1c8e..cf0551b 100644
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@@ -168,7 +168,7 @@ class EynollahXmlWriter():
         with open(self.output_filename, 'w') as f:
             f.write(to_xml(pcgts))
 
-    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion):
+    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion, skip_layout_reading_order=False):
         self.logger.debug('enter build_pagexml_no_full_layout')
 
         # create the file structure
@@ -184,7 +184,7 @@ class EynollahXmlWriter():
 
         for mm in range(len(found_polygons_text_region)):
             textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
-                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm]),
+                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]),
                     )
             #textregion.set_conf(conf_contours_textregion[mm])
             page.add_TextRegion(textregion)
@@ -303,18 +303,28 @@ class EynollahXmlWriter():
 
         return pcgts
 
-    def calculate_polygon_coords(self, contour, page_coord):
+    def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False):
         self.logger.debug('enter calculate_polygon_coords')
         coords = ''
         for value_bbox in contour:
-            if len(value_bbox) == 2:
-                coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
-                coords += ','
-                coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y))
+            if skip_layout_reading_order:
+                if len(value_bbox) == 2:
+                    coords += str(int((value_bbox[0]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[1]) / self.scale_y))
+                else:
+                    coords += str(int((value_bbox[0][0]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[0][1]) / self.scale_y))
             else:
-                coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x))
-                coords += ','
-                coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
+                if len(value_bbox) == 2:
+                    coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y))
+                else:
+                    coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
             coords=coords + ' '
         return coords[:-1]
 

From adcf03c7b7c91ef379404fe700175e8943439e31 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 23 May 2025 18:06:53 +0200
Subject: [PATCH 18/40] enhancing ocr

---
 src/eynollah/eynollah.py        | 47 ++++++++++++++++++---------------
 src/eynollah/utils/utils_ocr.py |  1 +
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 2564150..1b50713 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -85,7 +85,12 @@ from .utils.utils_ocr import (
     preprocess_and_resize_image_for_ocrcnn_model,
     return_textlines_split_if_needed,
     decode_batch_predictions,
-    return_rnn_cnn_ocr_of_given_textlines
+    return_rnn_cnn_ocr_of_given_textlines,
+    fit_text_single_line,
+    break_curved_line_into_small_pieces_and_then_merge,
+    get_orientation_moments,
+    rotate_image_with_padding,
+    get_contours_and_bounding_boxes
 )
 from .utils.separate_lines import (
     textline_contours_postprocessing,
@@ -5421,7 +5426,7 @@ class Eynollah_ocr:
                                         cropped_lines.append(resize_image(img_crop, tr_ocr_input_height_and_width, tr_ocr_input_height_and_width)  )
                                         cropped_lines_meging_indexing.append(0)
                                     else:
-                                        splited_images, _ = self.return_textlines_split_if_needed(img_crop, None)
+                                        splited_images, _ = return_textlines_split_if_needed(img_crop, None)
                                         #print(splited_images)
                                         if splited_images:
                                             cropped_lines.append(resize_image(splited_images[0], tr_ocr_input_height_and_width, tr_ocr_input_height_and_width))
@@ -5474,7 +5479,7 @@ class Eynollah_ocr:
                         w_bb = bb_ind[2]
                         h_bb = bb_ind[3]
                         
-                        font = self.fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
+                        font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
                         
                         ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
                         
@@ -5607,14 +5612,14 @@ class Eynollah_ocr:
                                     #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
                                     if not self.do_not_mask_with_textline_contour:
                                         if angle_degrees > 15:
-                                            better_des_slope = self.get_orientation_moments(textline_coords)
+                                            better_des_slope = get_orientation_moments(textline_coords)
                                             
-                                            img_crop = self.rotate_image_with_padding(img_crop, better_des_slope )
-                                            mask_poly = self.rotate_image_with_padding(mask_poly, better_des_slope )
+                                            img_crop = rotate_image_with_padding(img_crop, better_des_slope )
+                                            mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
                                             mask_poly = mask_poly.astype('uint8')
                                             
                                             #new bounding box
-                                            x_n, y_n, w_n, h_n = self.get_contours_and_bounding_boxes(mask_poly[:,:,0])
+                                            x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
                                             
                                             mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                             img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
@@ -5622,13 +5627,13 @@ class Eynollah_ocr:
                                             img_crop[mask_poly==0] = 255
                                             
                                             if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 100:
-                                                img_crop = self.break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+                                                img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
 
                                             #print(file_name,w_n*h_n , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii')
                                         else:
                                             img_crop[mask_poly==0] = 255
                                             if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
-                                                img_crop = self.break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+                                                img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
 
 
                                         
@@ -5638,7 +5643,7 @@ class Eynollah_ocr:
                                     
                                     if not self.export_textline_images_and_text:
                                         if w_scaled < 640:#1.5*image_width:
-                                            img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                                            img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                             cropped_lines.append(img_fin)
                                             if angle_degrees > 15:
                                                 cropped_lines_ver_index.append(1)
@@ -5647,15 +5652,15 @@ class Eynollah_ocr:
                                                 
                                             cropped_lines_meging_indexing.append(0)
                                             if self.prediction_with_both_of_rgb_and_bin:
-                                                img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
                                                 cropped_lines_bin.append(img_fin)
                                         else:
                                             if self.prediction_with_both_of_rgb_and_bin:
-                                                splited_images, splited_images_bin = self.return_textlines_split_if_needed(img_crop, img_crop_bin)
+                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin)
                                             else:
-                                                splited_images, splited_images_bin = self.return_textlines_split_if_needed(img_crop, None)
+                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
                                             if splited_images:
-                                                img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(1)
                                                 
@@ -5664,7 +5669,7 @@ class Eynollah_ocr:
                                                 else:
                                                     cropped_lines_ver_index.append(0)
                                                 
-                                                img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
                                                 
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(-1)
@@ -5675,13 +5680,13 @@ class Eynollah_ocr:
                                                     cropped_lines_ver_index.append(0)
                                                 
                                                 if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width)
+                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
-                                                    img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width)
+                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[1], image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
                                                     
                                             else:
-                                                img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(0)
                                                 
@@ -5691,7 +5696,7 @@ class Eynollah_ocr:
                                                     cropped_lines_ver_index.append(0)
                                                 
                                                 if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
+                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
                                         
                                 if self.export_textline_images_and_text:
@@ -5814,7 +5819,7 @@ class Eynollah_ocr:
                             preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
                             preds = (preds + preds_bin) / 2.
 
-                        pred_texts = self.decode_batch_predictions(preds, self.num_to_char)
+                        pred_texts = decode_batch_predictions(preds, self.num_to_char)
 
                         for ib in range(imgs.shape[0]):
                             pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
@@ -5844,7 +5849,7 @@ class Eynollah_ocr:
                             w_bb = bb_ind[2]
                             h_bb = bb_ind[3]
                             
-                            font = self.fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
+                            font = fit_text_single_line(draw, extracted_texts_merged[indexer_text], font_path, w_bb, int(h_bb*0.4) )
                             
                             ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
                             
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index 44367b6..339b38a 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -4,6 +4,7 @@ import tensorflow as tf
 from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
 import math
+from PIL import Image, ImageDraw, ImageFont
 from .resize import resize_image
 
 def decode_batch_predictions(pred, num_to_char, max_len = 128):

From 27c4b0d0e09ff9d7dabe31074f225adedb3ee5d1 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Sun, 25 May 2025 01:12:58 +0200
Subject: [PATCH 19/40] Drop capitals are written separately and are not
 attached to their corresponding text line. The OCR use case also supports
 single-image input.

---
 src/eynollah/cli.py      | 11 ++++++++--
 src/eynollah/eynollah.py | 46 +++++++++++++++++++++++++++++-----------
 src/eynollah/writer.py   |  8 +++----
 3 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index cd56833..0c18b2c 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -331,6 +331,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
 
 
 @main.command()
+@click.option(
+    "--image",
+    "-i",
+    help="image filename",
+    type=click.Path(exists=True, dir_okay=False),
+)
 @click.option(
     "--dir_in",
     "-di",
@@ -415,7 +421,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="Override log level globally to this",
 )
 
-def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
+def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
@@ -426,8 +432,9 @@ def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, ex
     assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text  -etit can not be set alongside directory of images with predicted text -doit"
     assert not export_textline_images_and_text or not draw_texts_on_image, "Exporting textline and text  -etit can not be set alongside draw text on image -dtoi"
     assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text  -etit can not be set alongside prediction with both rgb and bin -brb"
-    
+    assert (bool(image) ^ bool(dir_in)), "Either -i (single image) or -di (directory) must be provided, but not both."
     eynollah_ocr = Eynollah_ocr(
+        image_filename=image,
         dir_xmls=dir_xmls,
         dir_out_image_text=dir_out_image_text,
         dir_in=dir_in,
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 1b50713..aa38274 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5134,10 +5134,10 @@ class Eynollah:
 
             pixel_img = 4
             polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img)
-            all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(
-                text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h,
-                all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h,
-                kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light)
+            ##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(
+                ##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h,
+                ##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h,
+                ##kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light)
 
             if not self.reading_order_machine_based:
                 pixel_seps = 6
@@ -5299,6 +5299,7 @@ class Eynollah_ocr:
         dir_models,
         dir_xmls=None,
         dir_in=None,
+        image_filename=None,
         dir_in_bin=None,
         dir_out=None,
         dir_out_image_text=None,
@@ -5312,6 +5313,7 @@ class Eynollah_ocr:
         logger=None,
     ):
         self.dir_in = dir_in
+        self.image_filename = image_filename
         self.dir_in_bin = dir_in_bin
         self.dir_out = dir_out
         self.dir_xmls = dir_xmls
@@ -5363,13 +5365,20 @@ class Eynollah_ocr:
                 )
 
     def run(self):
-        ls_imgs = os.listdir(self.dir_in)
+        if self.dir_in:
+            ls_imgs = os.listdir(self.dir_in)
+        else:
+            ls_imgs = [self.image_filename]
         
         if self.tr_ocr:
             tr_ocr_input_height_and_width = 384
             for ind_img in ls_imgs:
-                file_name = Path(ind_img).stem
-                dir_img = os.path.join(self.dir_in, ind_img)
+                if self.dir_in:
+                    file_name = Path(ind_img).stem
+                    dir_img = os.path.join(self.dir_in, ind_img)
+                else:
+                    file_name = Path(self.image_filename).stem
+                    dir_img = self.image_filename
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
                 img = cv2.imread(dir_img)
@@ -5541,8 +5550,15 @@ class Eynollah_ocr:
             img_size=(image_width, image_height)
             
             for ind_img in ls_imgs:
-                file_name = Path(ind_img).stem
-                dir_img = os.path.join(self.dir_in, ind_img)
+                if self.dir_in:
+                    file_name = Path(ind_img).stem
+                    dir_img = os.path.join(self.dir_in, ind_img)
+                else:
+                    file_name = Path(self.image_filename).stem
+                    dir_img = self.image_filename
+                    
+                #file_name = Path(ind_img).stem
+                #dir_img = os.path.join(self.dir_in, ind_img)
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
                 img = cv2.imread(dir_img)
@@ -5576,6 +5592,7 @@ class Eynollah_ocr:
                 indexer_text_region = 0
                 indexer_textlines = 0
                 for nn in root1.iter(region_tags):
+                    type_textregion = nn.attrib['type']
                     for child_textregion in nn:
                         if child_textregion.tag.endswith("TextLine"):
                             for child_textlines in child_textregion:
@@ -5589,7 +5606,9 @@ class Eynollah_ocr:
                                     angle_radians = math.atan2(h, w)
                                     # Convert to degrees
                                     angle_degrees = math.degrees(angle_radians)
-                                    
+                                    if type_textregion=='drop-capital':
+                                        angle_degrees = 0
+                                        
                                     if self.draw_texts_on_image:
                                         total_bb_coordinates.append([x,y,w,h])
                                         
@@ -5632,8 +5651,11 @@ class Eynollah_ocr:
                                             #print(file_name,w_n*h_n , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii')
                                         else:
                                             img_crop[mask_poly==0] = 255
-                                            if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
-                                                img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+                                            if type_textregion=='drop-capital':
+                                                pass
+                                            else:
+                                                if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
+                                                    img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
 
 
                                         
diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py
index cf0551b..f07abf6 100644
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@@ -283,14 +283,14 @@ class EynollahXmlWriter():
                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)))
             page.add_TextRegion(marginal)
             self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
-
+        
         for mm in range(len(found_polygons_drop_capitals)):
             dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital',
                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord)))
             page.add_TextRegion(dropcapital)
-            ###all_box_coord_drop = None
-            ###slopes_drop = None
-            ###self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None)
+            all_box_coord_drop = None
+            slopes_drop = None
+            self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None)
 
         for mm in range(len(found_polygons_text_region_img)):
             page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))

From 097520bfd275f8260eebd698bae42b0c33eafd3c Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Sun, 25 May 2025 03:33:54 +0200
Subject: [PATCH 20/40] rnn ocr for all layout textregion types

---
 src/eynollah/eynollah.py | 41 ++++++++++++++++++++++++++--------------
 src/eynollah/writer.py   | 31 ++++++++++++++++++++++--------
 2 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index aa38274..0ee3d14 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -4715,11 +4715,10 @@ class Eynollah:
         if self.extract_only_images:
             text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \
                 self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
-            ocr_all_textlines = None
             pcgts = self.writer.build_pagexml_no_full_layout(
                 [], page_coord, [], [], [], [],
                 polygons_of_images, [], [], [], [], [],
-                cont_page, [], [], ocr_all_textlines, [])
+                cont_page, [], [])
             if self.plotter:
                 self.plotter.write_images_into_directory(polygons_of_images, image_page)
             return pcgts
@@ -4772,7 +4771,7 @@ class Eynollah:
                 cont_page, page_coord, order_text_new, id_of_texts_tot,
                 all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
                 all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
-                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions, self.skip_layout_and_reading_order)
+                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order)
             return pcgts
 
         #print("text region early -1 in %.1fs", time.time() - t0)
@@ -4822,10 +4821,9 @@ class Eynollah:
 
         if not num_col:
             self.logger.info("No columns detected, outputting an empty PAGE-XML")
-            ocr_all_textlines = None
             pcgts = self.writer.build_pagexml_no_full_layout(
                 [], page_coord, [], [], [], [], [], [], [], [], [], [],
-                cont_page, [], [], ocr_all_textlines, [])
+                cont_page, [], [])
             return pcgts
 
         #print("text region early in %.1fs", time.time() - t0)
@@ -5004,13 +5002,13 @@ class Eynollah:
                     [], [], page_coord, [], [], [], [], [], [],
                     polygons_of_images, contours_tables, [],
                     polygons_of_marginals, empty_marginals, empty_marginals, [], [], [],
-                    cont_page, polygons_lines_xml, [], [], [])
+                    cont_page, polygons_lines_xml)
             else:
                 pcgts = self.writer.build_pagexml_no_full_layout(
                     [], page_coord, [], [], [], [],
                     polygons_of_images,
                     polygons_of_marginals, empty_marginals, empty_marginals, [], [],
-                    cont_page, polygons_lines_xml, contours_tables, [], [])
+                    cont_page, polygons_lines_xml, contours_tables)
             return pcgts
 
 
@@ -5196,16 +5194,28 @@ class Eynollah:
                         contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d)
             self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
 
-            if self.ocr:
-                ocr_all_textlines = []
+            if self.ocr and not self.tr:
+                gc.collect()
+                if len(all_found_textline_polygons)>0:
+                    ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0:
+                    ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                
+                if all_found_textline_polygons_h and len(all_found_textline_polygons)>0:
+                    ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0:
+                    ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(image_page, polygons_of_drop_capitals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
             else:
                 ocr_all_textlines = None
+                ocr_all_textlines_marginals = None
+                ocr_all_textlines_h = None
+                ocr_all_textlines_drop = None
             pcgts = self.writer.build_pagexml_full_layout(
                 contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot,
                 all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
                 polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals,
                 all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals,
-                cont_page, polygons_lines_xml, ocr_all_textlines, conf_contours_textregions, conf_contours_textregions_h)
+                cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals, ocr_all_textlines_drop,  conf_contours_textregions, conf_contours_textregions_h)
             return pcgts
 
         contours_only_text_parent_h = None
@@ -5278,18 +5288,21 @@ class Eynollah:
                 
         elif self.ocr and not self.tr:
             gc.collect()
-            ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
-
+            if len(all_found_textline_polygons)>0:
+                ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+            if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0:
+                ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
 
         else:
             ocr_all_textlines = None
-            #print(ocr_all_textlines)
+            ocr_all_textlines_marginals = None
         self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
+
         pcgts = self.writer.build_pagexml_no_full_layout(
             txt_con_org, page_coord, order_text_new, id_of_texts_tot,
             all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals,
             all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
-            cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
+            cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals, conf_contours_textregions)
         return pcgts
 
 
diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py
index f07abf6..085ee6f 100644
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@@ -56,10 +56,12 @@ class EynollahXmlWriter():
             points_page_print = points_page_print + ' '
         return points_page_print[:-1]
 
-    def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter):
+    def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_all_textlines_textregion):
         for j in range(len(all_found_textline_polygons_marginals[marginal_idx])):
             coords = CoordsType()
             textline = TextLineType(id=counter.next_line_id, Coords=coords)
+            if ocr_all_textlines_textregion:
+                textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] )
             marginal_region.add_TextLine(textline)
             marginal_region.set_orientation(-slopes_marginals[marginal_idx])
             points_co = ''
@@ -168,7 +170,7 @@ class EynollahXmlWriter():
         with open(self.output_filename, 'w') as f:
             f.write(to_xml(pcgts))
 
-    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion, skip_layout_reading_order=False):
+    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals=None, conf_contours_textregion=None, skip_layout_reading_order=False):
         self.logger.debug('enter build_pagexml_no_full_layout')
 
         # create the file structure
@@ -198,7 +200,12 @@ class EynollahXmlWriter():
             marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)))
             page.add_TextRegion(marginal)
-            self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
+            if ocr_all_textlines_marginals:
+                ocr_textlines = ocr_all_textlines_marginals[mm]
+            else:
+                ocr_textlines = None
+                
+            self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_textlines)
 
         for mm in range(len(found_polygons_text_region_img)):
             img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType())
@@ -242,7 +249,7 @@ class EynollahXmlWriter():
 
         return pcgts
 
-    def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines, conf_contours_textregion, conf_contours_textregion_h):
+    def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None):
         self.logger.debug('enter build_pagexml_full_layout')
 
         # create the file structure
@@ -272,8 +279,8 @@ class EynollahXmlWriter():
                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)))
             page.add_TextRegion(textregion)
 
-            if ocr_all_textlines:
-                ocr_textlines = ocr_all_textlines[mm]
+            if ocr_all_textlines_h:
+                ocr_textlines = ocr_all_textlines_h[mm]
             else:
                 ocr_textlines = None
             self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines)
@@ -282,7 +289,11 @@ class EynollahXmlWriter():
             marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)))
             page.add_TextRegion(marginal)
-            self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
+            if ocr_all_textlines_marginals:
+                ocr_textlines = ocr_all_textlines_marginals[mm]
+            else:
+                ocr_textlines = None
+            self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_textlines)
         
         for mm in range(len(found_polygons_drop_capitals)):
             dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital',
@@ -290,7 +301,11 @@ class EynollahXmlWriter():
             page.add_TextRegion(dropcapital)
             all_box_coord_drop = None
             slopes_drop = None
-            self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None)
+            if ocr_all_textlines_drop:
+                ocr_textlines = ocr_all_textlines_drop[mm]
+            else:
+                ocr_textlines = None
+            self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=ocr_textlines)
 
         for mm in range(len(found_polygons_text_region_img)):
             page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))

From 0f154c605a870c14556d0d0df539f19511735410 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Sun, 25 May 2025 21:44:36 +0200
Subject: [PATCH 21/40] strings alignment function is added + new changes
 needed for prediction with both bin and rgb inputs is implemented

---
 requirements.txt                |  1 +
 src/eynollah/eynollah.py        | 78 +++++++++++++++++++++++++++------
 src/eynollah/utils/utils_ocr.py | 47 +++++++++++++++++---
 3 files changed, 107 insertions(+), 19 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index aeffd47..4bc0c6a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ tensorflow < 2.13
 numba <= 0.58.1
 scikit-image
 loky
+biopython
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 0ee3d14..1f79995 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5647,6 +5647,10 @@ class Eynollah_ocr:
                                             better_des_slope = get_orientation_moments(textline_coords)
                                             
                                             img_crop = rotate_image_with_padding(img_crop, better_des_slope )
+                                            
+                                            if self.prediction_with_both_of_rgb_and_bin:
+                                                img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope )
+                                                
                                             mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
                                             mask_poly = mask_poly.astype('uint8')
                                             
@@ -5655,26 +5659,35 @@ class Eynollah_ocr:
                                             
                                             mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                             img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                            
+                                                
                                             img_crop[mask_poly==0] = 255
                                             
+                                            if self.prediction_with_both_of_rgb_and_bin:
+                                                img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                                img_crop_bin[mask_poly==0] = 255
+                                            
                                             if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 100:
-                                                img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
-
-                                            #print(file_name,w_n*h_n , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii')
+                                                if self.prediction_with_both_of_rgb_and_bin:
+                                                    img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
+                                                else:
+                                                    img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+        
+                                                
                                         else:
                                             img_crop[mask_poly==0] = 255
+                                            if self.prediction_with_both_of_rgb_and_bin:
+                                                img_crop_bin[mask_poly==0] = 255
                                             if type_textregion=='drop-capital':
                                                 pass
                                             else:
                                                 if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
-                                                    img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+                                                    if self.prediction_with_both_of_rgb_and_bin:
+                                                        img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
+                                                    else:
+                                                        img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
 
 
-                                        
-                                        
-                                        if self.prediction_with_both_of_rgb_and_bin:
-                                            img_crop_bin[mask_poly==0] = 255
+                                    
                                     
                                     if not self.export_textline_images_and_text:
                                         if w_scaled < 640:#1.5*image_width:
@@ -5796,6 +5809,14 @@ class Eynollah_ocr:
                                 imgs_bin = cropped_lines_bin[n_start:]
                                 imgs_bin = np.array(imgs_bin)
                                 imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
+                                
+                                if len(indices_ver)>0:
+                                    imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
+                                    imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
+                                    #print(imgs_ver_flipped, 'imgs_ver_flipped')
+                                    
+                                else:
+                                    imgs_bin_ver_flipped = None
                         else:
                             n_start = i*self.b_s
                             n_end = (i+1)*self.b_s
@@ -5817,22 +5838,25 @@ class Eynollah_ocr:
                             if self.prediction_with_both_of_rgb_and_bin:
                                 imgs_bin = cropped_lines_bin[n_start:n_end]
                                 imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
+                                
+                                
+                                if len(indices_ver)>0:
+                                    imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
+                                    imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
+                                    #print(imgs_ver_flipped, 'imgs_ver_flipped')
+                                else:
+                                    imgs_bin_ver_flipped = None
                             
 
                         preds = self.prediction_model.predict(imgs, verbose=0)
                         
                         if len(indices_ver)>0:
-                            #cv2.imwrite('flipped.png', (imgs_ver_flipped[0, :,:,:]*255).astype('uint8'))
-                            #cv2.imwrite('original.png', (imgs[0, :,:,:]*255).astype('uint8'))
-                            #sys.exit()
-                            #print(imgs_ver_flipped.shape, 'imgs_ver_flipped.shape')
                             preds_flipped = self.prediction_model.predict(imgs_ver_flipped, verbose=0)
                             preds_max_fliped = np.max(preds_flipped, axis=2 )
                             preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
                             pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
                             masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
                             masked_means_flipped[np.isnan(masked_means_flipped)] = 0
-                            #print(masked_means_flipped, 'masked_means_flipped')
                             
                             preds_max = np.max(preds, axis=2 )
                             preds_max_args = np.argmax(preds, axis=2 )
@@ -5852,6 +5876,32 @@ class Eynollah_ocr:
                                 preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
                         if self.prediction_with_both_of_rgb_and_bin:
                             preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
+                            
+                            if len(indices_ver)>0:
+                                preds_flipped = self.prediction_model.predict(imgs_bin_ver_flipped, verbose=0)
+                                preds_max_fliped = np.max(preds_flipped, axis=2 )
+                                preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
+                                pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
+                                masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
+                                masked_means_flipped[np.isnan(masked_means_flipped)] = 0
+                                
+                                preds_max = np.max(preds, axis=2 )
+                                preds_max_args = np.argmax(preds, axis=2 )
+                                pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256
+                                
+                                masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
+                                masked_means[np.isnan(masked_means)] = 0
+                                
+                                masked_means_ver = masked_means[indices_ver]
+                                #print(masked_means_ver, 'pred_max_not_unk')
+                                
+                                indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0]
+                                
+                                #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
+                                if len(indices_where_flipped_conf_value_is_higher)>0:
+                                    indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
+                                    preds_bin[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
+                            
                             preds = (preds + preds_bin) / 2.
 
                         pred_texts = decode_batch_predictions(preds, self.num_to_char)
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index 339b38a..524e7ce 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -5,6 +5,7 @@ from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
 import math
 from PIL import Image, ImageDraw, ImageFont
+from Bio import pairwise2
 from .resize import resize_image
 
 def decode_batch_predictions(pred, num_to_char, max_len = 128):
@@ -252,7 +253,7 @@ def return_splitting_point_of_image(image_to_spliited):
     
     return np.sort(peaks_sort_4)
     
-def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
+def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved, img_bin_curved=None):
     peaks_4 = return_splitting_point_of_image(img_curved)
     if len(peaks_4)>0:
         imgs_tot = []
@@ -260,29 +261,44 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
         for ind in range(len(peaks_4)+1):
             if ind==0:
                 img = img_curved[:, :peaks_4[ind], :]
+                if img_bin_curved:
+                    img_bin = img_curved_bin[:, :peaks_4[ind], :]
                 mask = mask_curved[:, :peaks_4[ind], :]
             elif ind==len(peaks_4):
                 img = img_curved[:, peaks_4[ind-1]:, :]
+                if img_bin_curved:
+                    img_bin = img_curved_bin[:, peaks_4[ind-1]:, :]
                 mask = mask_curved[:, peaks_4[ind-1]:, :]
             else:
                 img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
+                if img_bin_curved:
+                    img_bin = img_curved_bin[:, peaks_4[ind-1]:peaks_4[ind], :]
                 mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
                 
             or_ma = get_orientation_moments_of_mask(mask)
-        
-            imgs_tot.append([img, mask, or_ma] )
+            
+            if img_bin_curved:
+                imgs_tot.append([img, mask, or_ma, img_bin] )
+            else:
+                imgs_tot.append([img, mask, or_ma] )
         
         
         w_tot_des_list = []
         w_tot_des = 0
         imgs_deskewed_list = []
+        imgs_bin_deskewed_list = []
+        
         for ind in range(len(imgs_tot)):
             img_in = imgs_tot[ind][0]
             mask_in = imgs_tot[ind][1]
             ori_in = imgs_tot[ind][2]
+            if img_bin_curved:
+                img_bin_in = imgs_tot[ind][3]
             
             if abs(ori_in)<45:
                 img_in_des = rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
+                if img_bin_curved:
+                    img_bin_in_des = rotate_image_with_padding(img_bin_in, ori_in, border_value=(255,255,255) )
                 mask_in_des = rotate_image_with_padding(mask_in, ori_in)
                 mask_in_des = mask_in_des.astype('uint8')
                 
@@ -291,36 +307,52 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
                 
                 mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
                 img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                if img_bin_curved:
+                    img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
                 
                 w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
                 if w_relative==0:
                     w_relative = img_in_des.shape[1]
                 img_in_des = resize_image(img_in_des, 32, w_relative)
+                if img_bin_curved:
+                    img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
                 
 
             else:
                 img_in_des = np.copy(img_in)
+                if img_bin_curved:
+                    img_bin_in_des = np.copy(img_bin_in)
                 w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
                 if w_relative==0:
                     w_relative = img_in_des.shape[1]
                 img_in_des = resize_image(img_in_des, 32, w_relative)
+                if img_bin_curved:
+                    img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
                 
             w_tot_des+=img_in_des.shape[1]
             w_tot_des_list.append(img_in_des.shape[1])
             imgs_deskewed_list.append(img_in_des)
+            if img_bin_curved:
+                imgs_bin_deskewed_list.append(img_bin_in_des)
             
             
             
 
         img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
+        if img_bin_curved:
+            img_bin_final_deskewed = np.zeros((32, w_tot_des, 3))+255
+        else:
+            img_bin_final_deskewed = None
         
         w_indexer = 0
         for ind in range(len(w_tot_des_list)):
             img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
+            if img_bin_curved:
+                img_bin_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_bin_deskewed_list[ind][:,:,:]
             w_indexer = w_indexer+w_tot_des_list[ind]
-        return img_final_deskewed
+        return img_final_deskewed, img_bin_final_deskewed
     else:
-        return img_curved
+        return img_curved, img_bin_curved
     
 def return_textline_contour_with_added_box_coordinate(textline_contour,  box_ind):
     textline_contour[:,0] = textline_contour[:,0] + box_ind[2]
@@ -434,3 +466,8 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
             ocr_textline_in_textregion.append(text_textline)
         ocr_all_textlines.append(ocr_textline_in_textregion)
     return ocr_all_textlines
+
+def biopython_align(str1, str2):
+    alignments = pairwise2.align.globalms(str1, str2, 2, -1, -2, -2)
+    best_alignment = alignments[0]  # Get the best alignment
+    return best_alignment.seqA, best_alignment.seqB

From b93fc112bf8c414186e64de6cc092b1839239128 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Tue, 27 May 2025 23:45:22 +0200
Subject: [PATCH 22/40] updating ocr

---
 src/eynollah/cli.py             | 10 ++++--
 src/eynollah/eynollah.py        | 24 ++++++++++++--
 src/eynollah/utils/utils_ocr.py | 55 +++++++++++++++++----------------
 3 files changed, 58 insertions(+), 31 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 0c18b2c..2d0d6f9 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -337,6 +337,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="image filename",
     type=click.Path(exists=True, dir_okay=False),
 )
+@click.option(
+    "--overwrite",
+    "-O",
+    help="overwrite (instead of skipping) if output xml exists",
+    is_flag=True,
+)
 @click.option(
     "--dir_in",
     "-di",
@@ -421,7 +427,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="Override log level globally to this",
 )
 
-def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
+def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
@@ -449,7 +455,7 @@ def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_
         batch_size=batch_size,
         pref_of_dataset=dataset_abbrevation,
     )
-    eynollah_ocr.run()
+    eynollah_ocr.run(overwrite=overwrite)
 
 if __name__ == "__main__":
     main()
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 1f79995..efa1dde 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5338,6 +5338,8 @@ class Eynollah_ocr:
         self.dir_out_image_text = dir_out_image_text
         self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
         self.pref_of_dataset = pref_of_dataset
+        self.logger = logger if logger else getLogger('eynollah')
+        
         if not export_textline_images_and_text:
             if tr_ocr:
                 self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
@@ -5351,7 +5353,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_step_750000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_step_1075000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5377,7 +5379,7 @@ class Eynollah_ocr:
                     vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
                 )
 
-    def run(self):
+    def run(self, overwrite : bool = False):
         if self.dir_in:
             ls_imgs = os.listdir(self.dir_in)
         else:
@@ -5394,6 +5396,14 @@ class Eynollah_ocr:
                     dir_img = self.image_filename
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
+                
+                if os.path.exists(out_file_ocr):
+                    if overwrite:
+                        self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
+                    else:
+                        self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
+                        continue
+                    
                 img = cv2.imread(dir_img)
                 
                 if self.draw_texts_on_image:
@@ -5574,6 +5584,14 @@ class Eynollah_ocr:
                 #dir_img = os.path.join(self.dir_in, ind_img)
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
+                
+                if os.path.exists(out_file_ocr):
+                    if overwrite:
+                        self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
+                    else:
+                        self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
+                        continue
+                
                 img = cv2.imread(dir_img)
                 if self.prediction_with_both_of_rgb_and_bin:
                     cropped_lines_bin = []
@@ -5704,7 +5722,7 @@ class Eynollah_ocr:
                                                 cropped_lines_bin.append(img_fin)
                                         else:
                                             if self.prediction_with_both_of_rgb_and_bin:
-                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin)
+                                                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin, prediction_with_both_of_rgb_and_bin=self.prediction_with_both_of_rgb_and_bin)
                                             else:
                                                 splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
                                             if splited_images:
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index 524e7ce..9ef344a 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -74,32 +74,24 @@ def distortion_free_resize(image, img_size):
 def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image):
     width = np.shape(textline_image)[1]
     height = np.shape(textline_image)[0]
-    common_window = int(0.22*width)
+    common_window = int(0.06*width)
 
     width1 = int ( width/2. - common_window )
     width2 = int ( width/2. + common_window )
-    
+
     img_sum = np.sum(textline_image[:,:,0], axis=0)
     sum_smoothed = gaussian_filter1d(img_sum, 3)
-    
+
     peaks_real, _ = find_peaks(sum_smoothed, height=0)
-    
-    if len(peaks_real)>35:
+    if len(peaks_real)>70:
 
-        #peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-        argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
-        peaks_real_top_six = peaks_real[argsort[:6]]
-        midpoint = textline_image.shape[1] / 2.
-        arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
+        peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
 
-        #arg_max = np.argmax(sum_smoothed[peaks_real])
-
-        peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
-        
+        arg_max = np.argmax(sum_smoothed[peaks_real])
+        peaks_final = peaks_real[arg_max]
         return peaks_final
     else:
         return None
-    
 # Function to fit text inside the given area
 def fit_text_single_line(draw, text, font_path, max_width, max_height):
     initial_font_size = 50
@@ -305,17 +297,28 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved,
                 #new bounding box
                 x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_in_des[:,:,0])
                 
-                mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                if img_bin_curved:
-                    img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                
-                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
-                if w_relative==0:
-                    w_relative = img_in_des.shape[1]
-                img_in_des = resize_image(img_in_des, 32, w_relative)
-                if img_bin_curved:
-                    img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
+                if w_n==0 or h_n==0:
+                    img_in_des = np.copy(img_in)
+                    if img_bin_curved:
+                        img_bin_in_des = np.copy(img_bin_in)
+                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                    if w_relative==0:
+                        w_relative = img_in_des.shape[1]
+                    img_in_des = resize_image(img_in_des, 32, w_relative)
+                    if img_bin_curved:
+                        img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
+                else:
+                    mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    if img_bin_curved:
+                        img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                    
+                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                    if w_relative==0:
+                        w_relative = img_in_des.shape[1]
+                    img_in_des = resize_image(img_in_des, 32, w_relative)
+                    if img_bin_curved:
+                        img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
                 
 
             else:

From 48285ce3f5f132cfe3df84f91d7957b5da8e14e8 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Wed, 28 May 2025 01:17:21 +0200
Subject: [PATCH 23/40] updating ocr

---
 src/eynollah/eynollah.py        |  2 +-
 src/eynollah/utils/utils_ocr.py | 36 ++++++++++++++++-----------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index efa1dde..0a9248e 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5353,7 +5353,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_step_1075000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_step_1150000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index 9ef344a..aa1efa6 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -253,23 +253,23 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved,
         for ind in range(len(peaks_4)+1):
             if ind==0:
                 img = img_curved[:, :peaks_4[ind], :]
-                if img_bin_curved:
-                    img_bin = img_curved_bin[:, :peaks_4[ind], :]
+                if img_bin_curved is not None:
+                    img_bin = img_bin_curved[:, :peaks_4[ind], :]
                 mask = mask_curved[:, :peaks_4[ind], :]
             elif ind==len(peaks_4):
                 img = img_curved[:, peaks_4[ind-1]:, :]
-                if img_bin_curved:
-                    img_bin = img_curved_bin[:, peaks_4[ind-1]:, :]
+                if img_bin_curved is not None:
+                    img_bin = img_bin_curved[:, peaks_4[ind-1]:, :]
                 mask = mask_curved[:, peaks_4[ind-1]:, :]
             else:
                 img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
-                if img_bin_curved:
-                    img_bin = img_curved_bin[:, peaks_4[ind-1]:peaks_4[ind], :]
+                if img_bin_curved is not None:
+                    img_bin = img_bin_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
                 mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
                 
             or_ma = get_orientation_moments_of_mask(mask)
             
-            if img_bin_curved:
+            if img_bin_curved is not None:
                 imgs_tot.append([img, mask, or_ma, img_bin] )
             else:
                 imgs_tot.append([img, mask, or_ma] )
@@ -284,12 +284,12 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved,
             img_in = imgs_tot[ind][0]
             mask_in = imgs_tot[ind][1]
             ori_in = imgs_tot[ind][2]
-            if img_bin_curved:
+            if img_bin_curved is not None:
                 img_bin_in = imgs_tot[ind][3]
             
             if abs(ori_in)<45:
                 img_in_des = rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
-                if img_bin_curved:
+                if img_bin_curved is not None:
                     img_bin_in_des = rotate_image_with_padding(img_bin_in, ori_in, border_value=(255,255,255) )
                 mask_in_des = rotate_image_with_padding(mask_in, ori_in)
                 mask_in_des = mask_in_des.astype('uint8')
@@ -299,50 +299,50 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved,
                 
                 if w_n==0 or h_n==0:
                     img_in_des = np.copy(img_in)
-                    if img_bin_curved:
+                    if img_bin_curved is not None:
                         img_bin_in_des = np.copy(img_bin_in)
                     w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
                     if w_relative==0:
                         w_relative = img_in_des.shape[1]
                     img_in_des = resize_image(img_in_des, 32, w_relative)
-                    if img_bin_curved:
+                    if img_bin_curved is not None:
                         img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
                 else:
                     mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
                     img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                    if img_bin_curved:
+                    if img_bin_curved is not None:
                         img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
                     
                     w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
                     if w_relative==0:
                         w_relative = img_in_des.shape[1]
                     img_in_des = resize_image(img_in_des, 32, w_relative)
-                    if img_bin_curved:
+                    if img_bin_curved is not None:
                         img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
                 
 
             else:
                 img_in_des = np.copy(img_in)
-                if img_bin_curved:
+                if img_bin_curved is not None:
                     img_bin_in_des = np.copy(img_bin_in)
                 w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
                 if w_relative==0:
                     w_relative = img_in_des.shape[1]
                 img_in_des = resize_image(img_in_des, 32, w_relative)
-                if img_bin_curved:
+                if img_bin_curved is not None:
                     img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
                 
             w_tot_des+=img_in_des.shape[1]
             w_tot_des_list.append(img_in_des.shape[1])
             imgs_deskewed_list.append(img_in_des)
-            if img_bin_curved:
+            if img_bin_curved is not None:
                 imgs_bin_deskewed_list.append(img_bin_in_des)
             
             
             
 
         img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
-        if img_bin_curved:
+        if img_bin_curved is not None:
             img_bin_final_deskewed = np.zeros((32, w_tot_des, 3))+255
         else:
             img_bin_final_deskewed = None
@@ -350,7 +350,7 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved,
         w_indexer = 0
         for ind in range(len(w_tot_des_list)):
             img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
-            if img_bin_curved:
+            if img_bin_curved is not None:
                 img_bin_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_bin_deskewed_list[ind][:,:,:]
             w_indexer = w_indexer+w_tot_des_list[ind]
         return img_final_deskewed, img_bin_final_deskewed

From 928a548b70197c22a26721073fde208f6b4f81b5 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Sat, 31 May 2025 01:09:14 +0200
Subject: [PATCH 24/40] Parametrize OCR for handling curved lines

---
 src/eynollah/eynollah.py        | 10 +++++-----
 src/eynollah/utils/utils_ocr.py |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 0a9248e..6c00329 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5353,7 +5353,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_step_1150000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_step_1225000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5642,7 +5642,7 @@ class Eynollah_ocr:
                                         
                                     if self.draw_texts_on_image:
                                         total_bb_coordinates.append([x,y,w,h])
-                                        
+                                       
                                     w_scaled = w *  image_height/float(h)
                                     
                                     img_poly_on_img = np.copy(img)
@@ -5684,7 +5684,7 @@ class Eynollah_ocr:
                                                 img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                                 img_crop_bin[mask_poly==0] = 255
                                             
-                                            if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 100:
+                                            if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
                                                 if self.prediction_with_both_of_rgb_and_bin:
                                                     img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
                                                 else:
@@ -5698,7 +5698,7 @@ class Eynollah_ocr:
                                             if type_textregion=='drop-capital':
                                                 pass
                                             else:
-                                                if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
+                                                if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
                                                     if self.prediction_with_both_of_rgb_and_bin:
                                                         img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
                                                     else:
@@ -5708,7 +5708,7 @@ class Eynollah_ocr:
                                     
                                     
                                     if not self.export_textline_images_and_text:
-                                        if w_scaled < 640:#1.5*image_width:
+                                        if w_scaled < 530:#640:#1.5*image_width:
                                             img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                             cropped_lines.append(img_fin)
                                             if angle_degrees > 15:
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index aa1efa6..81a8ae1 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -241,7 +241,7 @@ def return_splitting_point_of_image(image_to_spliited):
     peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
     
     arg_sort = np.argsort(sum_smoothed[peaks_real])
-    peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
+    peaks_sort_4 = peaks_real[arg_sort][::-1][:3]
     
     return np.sort(peaks_sort_4)
     

From cc36694dfdab852e27780187f15da1155423bd02 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Sun, 1 Jun 2025 15:53:04 +0200
Subject: [PATCH 25/40] image enhancer is integrated

---
 src/eynollah/cli.py            |  69 +++
 src/eynollah/eynollah.py       | 234 +---------
 src/eynollah/image_enhancer.py | 756 +++++++++++++++++++++++++++++++++
 3 files changed, 830 insertions(+), 229 deletions(-)
 create mode 100644 src/eynollah/image_enhancer.py

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 2d0d6f9..840bc4b 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -3,6 +3,7 @@ import click
 from ocrd_utils import initLogging, getLevelName, getLogger
 from eynollah.eynollah import Eynollah, Eynollah_ocr
 from eynollah.sbb_binarize import SbbBinarizer
+from eynollah.image_enhancer import Enhancer
 
 @click.group()
 def main():
@@ -70,6 +71,74 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
 
 
 
+@main.command()
+@click.option(
+    "--image",
+    "-i",
+    help="image filename",
+    type=click.Path(exists=True, dir_okay=False),
+)
+
+@click.option(
+    "--out",
+    "-o",
+    help="directory to write output xml data",
+    type=click.Path(exists=True, file_okay=False),
+    required=True,
+)
+@click.option(
+    "--overwrite",
+    "-O",
+    help="overwrite (instead of skipping) if output xml exists",
+    is_flag=True,
+)
+@click.option(
+    "--dir_in",
+    "-di",
+    help="directory of images",
+    type=click.Path(exists=True, file_okay=False),
+)
+@click.option(
+    "--model",
+    "-m",
+    help="directory of models",
+    type=click.Path(exists=True, file_okay=False),
+    required=True,
+)
+
+@click.option(
+    "--num_col_upper",
+    "-ncu",
+    help="lower limit of columns in document image",
+)
+@click.option(
+    "--num_col_lower",
+    "-ncl",
+    help="upper limit of columns in document image",
+)
+@click.option(
+    "--log_level",
+    "-l",
+    type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
+    help="Override log level globally to this",
+)
+
+def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, log_level):
+    initLogging()
+    if log_level:
+        getLogger('enhancement').setLevel(getLevelName(log_level))
+    assert image or dir_in, "Either a single image -i or a dir_in -di is required"
+    enhancer_object = Enhancer(
+        model,
+        logger=getLogger('enhancement'),
+        dir_out=out,
+        num_col_upper=num_col_upper,
+        num_col_lower=num_col_lower,
+    )
+    if dir_in:
+        enhancer_object.run(dir_in=dir_in, overwrite=overwrite)
+    else:
+        enhancer_object.run(image_filename=image, overwrite=overwrite)
 
 @main.command()
 @click.option(
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 6c00329..cf540d3 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -3612,25 +3612,12 @@ class Eynollah:
         
         inference_bs = 3
         
-        cv2.imwrite('textregions.png', text_regions_p*50)
-        cv2.imwrite('sep.png', (text_regions_p[:,:]==6)*255)
-        
         ver_kernel = np.ones((5, 1), dtype=np.uint8)
         hor_kernel = np.ones((1, 5), dtype=np.uint8)
         
         
-        
-        #separators = (text_regions_p[:,:]==6)*1
-        #text_regions_p[text_regions_p[:,:]==6] = 0
-        #separators = separators.astype('uint8')
-        
-        #separators = cv2.erode(separators , hor_kernel, iterations=1)
-        #text_regions_p[separators[:,:]==1] = 6
-        
-        #cv2.imwrite('sep_new.png', (text_regions_p[:,:]==6)*255)
-        
         min_cont_size_to_be_dilated = 10
-        if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
+        if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
             cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent)
             args_cont_located = np.array(range(len(contours_only_text_parent)))
             
@@ -3672,7 +3659,6 @@ class Eynollah:
             text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=5)
             text_regions_p_textregions_dilated[text_regions_p[:,:]>1] = 0
             
-            cv2.imwrite('text_regions_p_textregions_dilated.png', text_regions_p_textregions_dilated*255)
             
             contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated)
             contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated)
@@ -3723,21 +3709,20 @@ class Eynollah:
                 img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,
                                    int(x_min_main[j]):int(x_max_main[j])] = 1
             co_text_all_org = contours_only_text_parent + contours_only_text_parent_h
-            if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
+            if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
                 co_text_all = contours_only_dilated + contours_only_text_parent_h
             else:
                 co_text_all = contours_only_text_parent + contours_only_text_parent_h
         else:
             co_text_all_org = contours_only_text_parent
-            if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
+            if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
                 co_text_all = contours_only_dilated
             else:
                 co_text_all = contours_only_text_parent
 
         if not len(co_text_all):
             return [], []
-        print(len(co_text_all), "co_text_all")
-        print(len(co_text_all_org), "co_text_all_org")
+
         labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
         co_text_all = [(i/6).astype(int) for i in co_text_all]
         for i in range(len(co_text_all)):
@@ -3805,7 +3790,7 @@ class Eynollah:
 
         ordered = [i[0] for i in ordered]
         
-        if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
+        if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
             org_contours_indexes = []
             for ind in range(len(ordered)):
                 region_with_curr_order = ordered[ind]
@@ -3823,215 +3808,6 @@ class Eynollah:
         else:
             region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
             return ordered, region_ids
-            
-
-    ####def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot):
-        ####width = np.shape(textline_image)[1]
-        ####height = np.shape(textline_image)[0]
-        ####common_window = int(0.2*width)
-
-        ####width1 = int ( width/2. - common_window )
-        ####width2 = int ( width/2. + common_window )
-
-        ####img_sum = np.sum(textline_image[:,:,0], axis=0)
-        ####sum_smoothed = gaussian_filter1d(img_sum, 3)
-
-        ####peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        ####if len(peaks_real)>70:
-
-            ####peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-
-            ####arg_sort = np.argsort(sum_smoothed[peaks_real])
-            ####arg_sort4 =arg_sort[::-1][:4]
-            ####peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
-            ####argsort_sorted = np.argsort(peaks_sort_4)
-
-            ####first_4_sorted = peaks_sort_4[argsort_sorted]
-            ####y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
-            #####print(first_4_sorted,'first_4_sorted')
-
-            ####arg_sortnew = np.argsort(y_4_sorted)
-            ####peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] )
-
-            #####plt.figure(ind_tot)
-            #####plt.imshow(textline_image)
-            #####plt.plot([peaks_final[0], peaks_final[0]], [0, height-1])
-            #####plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
-            #####plt.savefig('./'+str(ind_tot)+'.png')
-
-            ####return peaks_final[0], peaks_final[1]
-        ####else:
-            ####pass
-
-    ##def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot):
-        ##width = np.shape(textline_image)[1]
-        ##height = np.shape(textline_image)[0]
-        ##common_window = int(0.06*width)
-
-        ##width1 = int ( width/2. - common_window )
-        ##width2 = int ( width/2. + common_window )
-
-        ##img_sum = np.sum(textline_image[:,:,0], axis=0)
-        ##sum_smoothed = gaussian_filter1d(img_sum, 3)
-
-        ##peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        ##if len(peaks_real)>70:
-            ###print(len(peaks_real), 'len(peaks_real)')
-
-            ##peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-
-            ##arg_max = np.argmax(sum_smoothed[peaks_real])
-            ##peaks_final = peaks_real[arg_max]
-
-            ###plt.figure(ind_tot)
-            ###plt.imshow(textline_image)
-            ###plt.plot([peaks_final, peaks_final], [0, height-1])
-            ####plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
-            ###plt.savefig('./'+str(ind_tot)+'.png')
-
-            ##return peaks_final
-        ##else:
-            ##return None
-
-    ###def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
-            ###self, peaks_real, sum_smoothed, start_split, end_split):
-
-        ###peaks_real = peaks_real[(peaks_real<end_split) & (peaks_real>start_split)]
-
-        ###arg_sort = np.argsort(sum_smoothed[peaks_real])
-        ###arg_sort4 =arg_sort[::-1][:4]
-        ###peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
-        ###argsort_sorted = np.argsort(peaks_sort_4)
-
-        ###first_4_sorted = peaks_sort_4[argsort_sorted]
-        ###y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
-        ####print(first_4_sorted,'first_4_sorted')
-
-        ###arg_sortnew = np.argsort(y_4_sorted)
-        ###peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] )
-        ###return peaks_final[0]
-
-    ###def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot):
-        ###width = np.shape(textline_image)[1]
-        ###height = np.shape(textline_image)[0]
-        ###common_window = int(0.15*width)
-
-        ###width1 = int ( width/2. - common_window )
-        ###width2 = int ( width/2. + common_window )
-        ###mid = int(width/2.)
-
-        ###img_sum = np.sum(textline_image[:,:,0], axis=0)
-        ###sum_smoothed = gaussian_filter1d(img_sum, 3)
-
-        ###peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        ###if len(peaks_real)>70:
-            ###peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
-                ###peaks_real, sum_smoothed, width1, mid+2)
-            ###peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
-                ###peaks_real, sum_smoothed, mid-2, width2)
-
-            ####plt.figure(ind_tot)
-            ####plt.imshow(textline_image)
-            ####plt.plot([peak_start, peak_start], [0, height-1])
-            ####plt.plot([peak_end, peak_end], [0, height-1])
-            ####plt.savefig('./'+str(ind_tot)+'.png')
-
-            ###return peak_start, peak_end
-        ###else:
-            ###pass
-
-    ##def return_ocr_of_textline_without_common_section(
-            ##self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
-
-        ##if h2w_ratio > 0.05:
-            ##pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-            ##generated_ids = model_ocr.generate(pixel_values.to(device))
-            ##generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        ##else:
-            ###width = np.shape(textline_image)[1]
-            ###height = np.shape(textline_image)[0]
-            ###common_window = int(0.3*width)
-            ###width1 = int ( width/2. - common_window )
-            ###width2 = int ( width/2. + common_window )
-
-            ##split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(
-                ##textline_image, ind_tot)
-            ##if split_point:
-                ##image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
-                ##image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
-
-                ###pixel_values1 = processor(image1, return_tensors="pt").pixel_values
-                ###pixel_values2 = processor(image2, return_tensors="pt").pixel_values
-
-                ##pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values
-                ##generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device))
-                ##generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
-
-                ###print(generated_text_merged,'generated_text_merged')
-
-                ###generated_ids1 = model_ocr.generate(pixel_values1.to(device))
-                ###generated_ids2 = model_ocr.generate(pixel_values2.to(device))
-
-                ###generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
-                ###generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
-
-                ###generated_text = generated_text1 + ' ' + generated_text2
-                ##generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1]
-
-                ###print(generated_text1,'generated_text1')
-                ###print(generated_text2, 'generated_text2')
-                ###print('########################################')
-            ##else:
-                ##pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-                ##generated_ids = model_ocr.generate(pixel_values.to(device))
-                ##generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-
-        ###print(generated_text,'generated_text')
-        ###print('########################################')
-        ##return generated_text
-
-    ###def return_ocr_of_textline(
-            ###self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
-
-        ###if h2w_ratio > 0.05:
-            ###pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-            ###generated_ids = model_ocr.generate(pixel_values.to(device))
-            ###generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        ###else:
-            ####width = np.shape(textline_image)[1]
-            ####height = np.shape(textline_image)[0]
-            ####common_window = int(0.3*width)
-            ####width1 = int ( width/2. - common_window )
-            ####width2 = int ( width/2. + common_window )
-
-            ###try:
-                ###width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot)
-
-                ###image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height))
-                ###image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height))
-
-                ###pixel_values1 = processor(image1, return_tensors="pt").pixel_values
-                ###pixel_values2 = processor(image2, return_tensors="pt").pixel_values
-
-                ###generated_ids1 = model_ocr.generate(pixel_values1.to(device))
-                ###generated_ids2 = model_ocr.generate(pixel_values2.to(device))
-
-                ###generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
-                ###generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
-                ####print(generated_text1,'generated_text1')
-                ####print(generated_text2, 'generated_text2')
-                ####print('########################################')
-
-                ###match = sq(None, generated_text1, generated_text2).find_longest_match(
-                    ###0, len(generated_text1), 0, len(generated_text2))
-                ###generated_text = generated_text1 + generated_text2[match.b+match.size:]
-            ###except:
-                ###pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-                ###generated_ids = model_ocr.generate(pixel_values.to(device))
-                ###generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-
-        ###return generated_text
-
 
     def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes):
         return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))]
diff --git a/src/eynollah/image_enhancer.py b/src/eynollah/image_enhancer.py
new file mode 100644
index 0000000..71445f7
--- /dev/null
+++ b/src/eynollah/image_enhancer.py
@@ -0,0 +1,756 @@
+"""
+Image enhancer. The output can be written as same scale of input or in new predicted scale.
+"""
+
+from logging import Logger
+from difflib import SequenceMatcher as sq
+from PIL import Image, ImageDraw, ImageFont
+import math
+import os
+import sys
+import time
+from typing import Optional
+import atexit
+import warnings
+from functools import partial
+from pathlib import Path
+from multiprocessing import cpu_count
+import gc
+import copy
+from loky import ProcessPoolExecutor
+import xml.etree.ElementTree as ET
+import cv2
+import numpy as np
+from ocrd import OcrdPage
+from ocrd_utils import getLogger, tf_disable_interactive_logs
+import statistics
+from tensorflow.keras.models import load_model
+from .utils.resize import resize_image
+from .utils import (
+    crop_image_inside_box
+)
+
+DPI_THRESHOLD = 298
+KERNEL = np.ones((5, 5), np.uint8)
+
+
+class Enhancer:
+    def __init__(
+        self,
+        dir_models : str,
+        dir_out : Optional[str] = None,
+        num_col_upper : Optional[int] = None,
+        num_col_lower : Optional[int] = None,
+        logger : Optional[Logger] = None,
+    ):
+        self.dir_out = dir_out
+        self.input_binary = False
+        self.light_version = False
+        if num_col_upper:
+            self.num_col_upper = int(num_col_upper)
+        else:
+            self.num_col_upper = num_col_upper
+        if num_col_lower:
+            self.num_col_lower = int(num_col_lower)
+        else:
+            self.num_col_lower = num_col_lower
+            
+        self.logger = logger if logger else getLogger('enhancement')
+        # for parallelization of CPU-intensive tasks:
+        self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
+        atexit.register(self.executor.shutdown)
+        self.dir_models = dir_models
+        self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"
+        self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425"
+        self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425"
+        
+        try:
+            for device in tf.config.list_physical_devices('GPU'):
+                tf.config.experimental.set_memory_growth(device, True)
+        except:
+            self.logger.warning("no GPU device available")
+
+        self.model_page = self.our_load_model(self.model_page_dir)
+        self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
+        self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement)
+
+    def cache_images(self, image_filename=None, image_pil=None, dpi=None):
+        ret = {}
+        t_c0 = time.time()
+        if image_filename:
+            ret['img'] = cv2.imread(image_filename)
+            if self.light_version:
+                self.dpi = 100
+            else:
+                self.dpi = 0#check_dpi(image_filename)
+        else:
+            ret['img'] = pil2cv(image_pil)
+            if self.light_version:
+                self.dpi = 100
+            else:
+                self.dpi = 0#check_dpi(image_pil)
+        ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY)
+        for prefix in ('',  '_grayscale'):
+            ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8)
+        self._imgs = ret
+        if dpi is not None:
+            self.dpi = dpi
+
+    def reset_file_name_dir(self, image_filename):
+        t_c = time.time()
+        self.cache_images(image_filename=image_filename)
+        self.output_filename = os.path.join(self.dir_out, Path(image_filename).stem +'.png')
+
+    def imread(self, grayscale=False, uint8=True):
+        key = 'img'
+        if grayscale:
+            key += '_grayscale'
+        if uint8:
+            key += '_uint8'
+        return self._imgs[key].copy()
+
+    def isNaN(self, num):
+        return num != num
+
+    @staticmethod
+    def our_load_model(model_file):
+        if model_file.endswith('.h5') and Path(model_file[:-3]).exists():
+            # prefer SavedModel over HDF5 format if it exists
+            model_file = model_file[:-3]
+        try:
+            model = load_model(model_file, compile=False)
+        except:
+            model = load_model(model_file, compile=False, custom_objects={
+                "PatchEncoder": PatchEncoder, "Patches": Patches})
+        return model
+    
+    def predict_enhancement(self, img):
+        self.logger.debug("enter predict_enhancement")
+
+        img_height_model = self.model_enhancement.layers[-1].output_shape[1]
+        img_width_model = self.model_enhancement.layers[-1].output_shape[2]
+        if img.shape[0] < img_height_model:
+            img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST)
+        if img.shape[1] < img_width_model:
+            img = cv2.resize(img, (img_height_model, img.shape[0]), interpolation=cv2.INTER_NEAREST)
+        margin = int(0.1 * img_width_model)
+        width_mid = img_width_model - 2 * margin
+        height_mid = img_height_model - 2 * margin
+        img = img / 255.
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        prediction_true = np.zeros((img_h, img_w, 3))
+        nxf = img_w / float(width_mid)
+        nyf = img_h / float(height_mid)
+        nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
+        nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
+
+        for i in range(nxf):
+            for j in range(nyf):
+                if i == 0:
+                    index_x_d = i * width_mid
+                    index_x_u = index_x_d + img_width_model
+                else:
+                    index_x_d = i * width_mid
+                    index_x_u = index_x_d + img_width_model
+                if j == 0:
+                    index_y_d = j * height_mid
+                    index_y_u = index_y_d + img_height_model
+                else:
+                    index_y_d = j * height_mid
+                    index_y_u = index_y_d + img_height_model
+
+                if index_x_u > img_w:
+                    index_x_u = img_w
+                    index_x_d = img_w - img_width_model
+                if index_y_u > img_h:
+                    index_y_u = img_h
+                    index_y_d = img_h - img_height_model
+
+                img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :]
+                label_p_pred = self.model_enhancement.predict(img_patch, verbose=0)
+                seg = label_p_pred[0, :, :, :] * 255
+
+                if i == 0 and j == 0:
+                    prediction_true[index_y_d + 0:index_y_u - margin,
+                                    index_x_d + 0:index_x_u - margin] = \
+                                        seg[0:-margin or None,
+                                            0:-margin or None]
+                elif i == nxf - 1 and j == nyf - 1:
+                    prediction_true[index_y_d + margin:index_y_u - 0,
+                                    index_x_d + margin:index_x_u - 0] = \
+                                        seg[margin:,
+                                            margin:]
+                elif i == 0 and j == nyf - 1:
+                    prediction_true[index_y_d + margin:index_y_u - 0,
+                                    index_x_d + 0:index_x_u - margin] = \
+                                        seg[margin:,
+                                            0:-margin or None]
+                elif i == nxf - 1 and j == 0:
+                    prediction_true[index_y_d + 0:index_y_u - margin,
+                                    index_x_d + margin:index_x_u - 0] = \
+                                        seg[0:-margin or None,
+                                            margin:]
+                elif i == 0 and j != 0 and j != nyf - 1:
+                    prediction_true[index_y_d + margin:index_y_u - margin,
+                                    index_x_d + 0:index_x_u - margin] = \
+                                        seg[margin:-margin or None,
+                                            0:-margin or None]
+                elif i == nxf - 1 and j != 0 and j != nyf - 1:
+                    prediction_true[index_y_d + margin:index_y_u - margin,
+                                    index_x_d + margin:index_x_u - 0] = \
+                                        seg[margin:-margin or None,
+                                            margin:]
+                elif i != 0 and i != nxf - 1 and j == 0:
+                    prediction_true[index_y_d + 0:index_y_u - margin,
+                                    index_x_d + margin:index_x_u - margin] = \
+                                        seg[0:-margin or None,
+                                            margin:-margin or None]
+                elif i != 0 and i != nxf - 1 and j == nyf - 1:
+                    prediction_true[index_y_d + margin:index_y_u - 0,
+                                    index_x_d + margin:index_x_u - margin] = \
+                                        seg[margin:,
+                                            margin:-margin or None]
+                else:
+                    prediction_true[index_y_d + margin:index_y_u - margin,
+                                    index_x_d + margin:index_x_u - margin] = \
+                                        seg[margin:-margin or None,
+                                            margin:-margin or None]
+
+        prediction_true = prediction_true.astype(int)
+        return prediction_true
+    
+    def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
+        self.logger.debug("enter calculate_width_height_by_columns")
+        if num_col == 1 and width_early < 1100:
+            img_w_new = 2000
+        elif num_col == 1 and width_early >= 2500:
+            img_w_new = 2000
+        elif num_col == 1 and width_early >= 1100 and width_early < 2500:
+            img_w_new = width_early
+        elif num_col == 2 and width_early < 2000:
+            img_w_new = 2400
+        elif num_col == 2 and width_early >= 3500:
+            img_w_new = 2400
+        elif num_col == 2 and width_early >= 2000 and width_early < 3500:
+            img_w_new = width_early
+        elif num_col == 3 and width_early < 2000:
+            img_w_new = 3000
+        elif num_col == 3 and width_early >= 4000:
+            img_w_new = 3000
+        elif num_col == 3 and width_early >= 2000 and width_early < 4000:
+            img_w_new = width_early
+        elif num_col == 4 and width_early < 2500:
+            img_w_new = 4000
+        elif num_col == 4 and width_early >= 5000:
+            img_w_new = 4000
+        elif num_col == 4 and width_early >= 2500 and width_early < 5000:
+            img_w_new = width_early
+        elif num_col == 5 and width_early < 3700:
+            img_w_new = 5000
+        elif num_col == 5 and width_early >= 7000:
+            img_w_new = 5000
+        elif num_col == 5 and width_early >= 3700 and width_early < 7000:
+            img_w_new = width_early
+        elif num_col == 6 and width_early < 4500:
+            img_w_new = 6500  # 5400
+        else:
+            img_w_new = width_early
+        img_h_new = img_w_new * img.shape[0] // img.shape[1]
+
+        if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early:
+            img_new = np.copy(img)
+            num_column_is_classified = False
+        #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000:
+        elif img_h_new >= 8000:
+            img_new = np.copy(img)
+            num_column_is_classified = False
+        else:
+            img_new = resize_image(img, img_h_new, img_w_new)
+            num_column_is_classified = True
+
+        return img_new, num_column_is_classified
+    
+    def early_page_for_num_of_column_classification(self,img_bin):
+        self.logger.debug("enter early_page_for_num_of_column_classification")
+        if self.input_binary:
+            img = np.copy(img_bin).astype(np.uint8)
+        else:
+            img = self.imread()
+        img = cv2.GaussianBlur(img, (5, 5), 0)
+        img_page_prediction = self.do_prediction(False, img, self.model_page)
+
+        imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
+        _, thresh = cv2.threshold(imgray, 0, 255, 0)
+        thresh = cv2.dilate(thresh, KERNEL, iterations=3)
+        contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+        if len(contours)>0:
+            cnt_size = np.array([cv2.contourArea(contours[j])
+                                    for j in range(len(contours))])
+            cnt = contours[np.argmax(cnt_size)]
+            box = cv2.boundingRect(cnt)
+        else:
+            box = [0, 0, img.shape[1], img.shape[0]]
+        cropped_page, page_coord = crop_image_inside_box(box, img)
+
+        self.logger.debug("exit early_page_for_num_of_column_classification")
+        return cropped_page, page_coord
+    
+    def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred):
+        self.logger.debug("enter calculate_width_height_by_columns")
+        if num_col == 1:
+            img_w_new = 1000
+        else:
+            img_w_new = 1300
+        img_h_new = img_w_new * img.shape[0] // img.shape[1]
+
+        if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early:
+            img_new = np.copy(img)
+            num_column_is_classified = False
+        #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000:
+        elif img_h_new >= 8000:
+            img_new = np.copy(img)
+            num_column_is_classified = False
+        else:
+            img_new = resize_image(img, img_h_new, img_w_new)
+            num_column_is_classified = True
+
+        return img_new, num_column_is_classified
+    
+    def resize_and_enhance_image_with_column_classifier(self, light_version):
+        self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
+        dpi = 0#self.dpi
+        self.logger.info("Detected %s DPI", dpi)
+        if self.input_binary:
+            img = self.imread()
+            prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5)
+            prediction_bin = 255 * (prediction_bin[:,:,0]==0)
+            prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
+            img= np.copy(prediction_bin)
+            img_bin = prediction_bin
+        else:
+            img = self.imread()
+            self.h_org, self.w_org = img.shape[:2]
+            img_bin = None
+
+        width_early = img.shape[1]
+        t1 = time.time()
+        _, page_coord = self.early_page_for_num_of_column_classification(img_bin)
+
+        self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :]
+        self.page_coord = page_coord
+
+        if self.num_col_upper and not self.num_col_lower:
+            num_col = self.num_col_upper
+            label_p_pred = [np.ones(6)]
+        elif self.num_col_lower and not self.num_col_upper:
+            num_col = self.num_col_lower
+            label_p_pred = [np.ones(6)]
+        elif not self.num_col_upper and not self.num_col_lower:
+            if self.input_binary:
+                img_in = np.copy(img)
+                img_in = img_in / 255.0
+                img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
+                img_in = img_in.reshape(1, 448, 448, 3)
+            else:
+                img_1ch = self.imread(grayscale=True)
+                width_early = img_1ch.shape[1]
+                img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
+
+                img_1ch = img_1ch / 255.0
+                img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
+                img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
+                img_in[0, :, :, 0] = img_1ch[:, :]
+                img_in[0, :, :, 1] = img_1ch[:, :]
+                img_in[0, :, :, 2] = img_1ch[:, :]
+
+            label_p_pred = self.model_classifier.predict(img_in, verbose=0)
+            num_col = np.argmax(label_p_pred[0]) + 1
+        elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower):
+            if self.input_binary:
+                img_in = np.copy(img)
+                img_in = img_in / 255.0
+                img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
+                img_in = img_in.reshape(1, 448, 448, 3)
+            else:
+                img_1ch = self.imread(grayscale=True)
+                width_early = img_1ch.shape[1]
+                img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
+
+                img_1ch = img_1ch / 255.0
+                img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
+                img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
+                img_in[0, :, :, 0] = img_1ch[:, :]
+                img_in[0, :, :, 1] = img_1ch[:, :]
+                img_in[0, :, :, 2] = img_1ch[:, :]
+
+            label_p_pred = self.model_classifier.predict(img_in, verbose=0)
+            num_col = np.argmax(label_p_pred[0]) + 1
+
+            if num_col > self.num_col_upper:
+                num_col = self.num_col_upper
+                label_p_pred = [np.ones(6)]
+            if num_col < self.num_col_lower:
+                num_col = self.num_col_lower
+                label_p_pred = [np.ones(6)]
+        else:
+            num_col = self.num_col_upper
+            label_p_pred = [np.ones(6)]
+
+        self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5))
+
+        if dpi < DPI_THRESHOLD:
+            if light_version and num_col in (1,2):
+                img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
+                    img, num_col, width_early, label_p_pred)
+            else:
+                img_new, num_column_is_classified = self.calculate_width_height_by_columns(
+                    img, num_col, width_early, label_p_pred)
+            if light_version:
+                image_res = np.copy(img_new)
+            else:
+                image_res = self.predict_enhancement(img_new)
+            is_image_enhanced = True
+
+        else:
+            num_column_is_classified = True
+            image_res = np.copy(img)
+            is_image_enhanced = False
+
+        self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
+        return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
+    def do_prediction(
+            self, patches, img, model,
+            n_batch_inference=1, marginal_of_patch_percent=0.1,
+            thresholding_for_some_classes_in_light_version=False,
+            thresholding_for_artificial_class_in_light_version=False, thresholding_for_fl_light_version=False, threshold_art_class_textline=0.1):
+
+        self.logger.debug("enter do_prediction")
+        img_height_model = model.layers[-1].output_shape[1]
+        img_width_model = model.layers[-1].output_shape[2]
+
+        if not patches:
+            img_h_page = img.shape[0]
+            img_w_page = img.shape[1]
+            img = img / float(255.0)
+            img = resize_image(img, img_height_model, img_width_model)
+
+            label_p_pred = model.predict(img[np.newaxis], verbose=0)
+            seg = np.argmax(label_p_pred, axis=3)[0]
+
+            if thresholding_for_artificial_class_in_light_version:
+                seg_art = label_p_pred[0,:,:,2]
+
+                seg_art[seg_art<threshold_art_class_textline] = 0
+                seg_art[seg_art>0] =1
+                
+                skeleton_art = skeletonize(seg_art)
+                skeleton_art = skeleton_art*1
+
+                seg[skeleton_art==1]=2
+                
+            if thresholding_for_fl_light_version:
+                seg_header = label_p_pred[0,:,:,2]
+
+                seg_header[seg_header<0.2] = 0
+                seg_header[seg_header>0] =1
+
+                seg[seg_header==1]=2
+                
+            seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
+            prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
+            return prediction_true
+
+        if img.shape[0] < img_height_model:
+            img = resize_image(img, img_height_model, img.shape[1])
+        if img.shape[1] < img_width_model:
+            img = resize_image(img, img.shape[0], img_width_model)
+
+        self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
+        margin = int(marginal_of_patch_percent * img_height_model)
+        width_mid = img_width_model - 2 * margin
+        height_mid = img_height_model - 2 * margin
+        img = img / 255.
+        #img = img.astype(np.float16)
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+        prediction_true = np.zeros((img_h, img_w, 3))
+        mask_true = np.zeros((img_h, img_w))
+        nxf = img_w / float(width_mid)
+        nyf = img_h / float(height_mid)
+        nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
+        nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
+
+        list_i_s = []
+        list_j_s = []
+        list_x_u = []
+        list_x_d = []
+        list_y_u = []
+        list_y_d = []
+
+        batch_indexer = 0
+        img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))
+        for i in range(nxf):
+            for j in range(nyf):
+                if i == 0:
+                    index_x_d = i * width_mid
+                    index_x_u = index_x_d + img_width_model
+                else:
+                    index_x_d = i * width_mid
+                    index_x_u = index_x_d + img_width_model
+                if j == 0:
+                    index_y_d = j * height_mid
+                    index_y_u = index_y_d + img_height_model
+                else:
+                    index_y_d = j * height_mid
+                    index_y_u = index_y_d + img_height_model
+                if index_x_u > img_w:
+                    index_x_u = img_w
+                    index_x_d = img_w - img_width_model
+                if index_y_u > img_h:
+                    index_y_u = img_h
+                    index_y_d = img_h - img_height_model
+
+                list_i_s.append(i)
+                list_j_s.append(j)
+                list_x_u.append(index_x_u)
+                list_x_d.append(index_x_d)
+                list_y_d.append(index_y_d)
+                list_y_u.append(index_y_u)
+
+                img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
+                batch_indexer += 1
+
+                if (batch_indexer == n_batch_inference or
+                    # last batch
+                    i == nxf - 1 and j == nyf - 1):
+                    self.logger.debug("predicting patches on %s", str(img_patch.shape))
+                    label_p_pred = model.predict(img_patch, verbose=0)
+                    seg = np.argmax(label_p_pred, axis=3)
+
+                    if thresholding_for_some_classes_in_light_version:
+                        seg_not_base = label_p_pred[:,:,:,4]
+                        seg_not_base[seg_not_base>0.03] =1
+                        seg_not_base[seg_not_base<1] =0
+
+                        seg_line = label_p_pred[:,:,:,3]
+                        seg_line[seg_line>0.1] =1
+                        seg_line[seg_line<1] =0
+
+                        seg_background = label_p_pred[:,:,:,0]
+                        seg_background[seg_background>0.25] =1
+                        seg_background[seg_background<1] =0
+
+                        seg[seg_not_base==1]=4
+                        seg[seg_background==1]=0
+                        seg[(seg_line==1) & (seg==0)]=3
+                    if thresholding_for_artificial_class_in_light_version:
+                        seg_art = label_p_pred[:,:,:,2]
+
+                        seg_art[seg_art<threshold_art_class_textline] = 0
+                        seg_art[seg_art>0] =1
+
+                        ##seg[seg_art==1]=2
+
+                    indexer_inside_batch = 0
+                    for i_batch, j_batch in zip(list_i_s, list_j_s):
+                        seg_in = seg[indexer_inside_batch]
+                        
+                        if thresholding_for_artificial_class_in_light_version:
+                            seg_in_art = seg_art[indexer_inside_batch]
+
+                        index_y_u_in = list_y_u[indexer_inside_batch]
+                        index_y_d_in = list_y_d[indexer_inside_batch]
+
+                        index_x_u_in = list_x_u[indexer_inside_batch]
+                        index_x_d_in = list_x_d[indexer_inside_batch]
+
+                        if i_batch == 0 and j_batch == 0:
+                            prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                            index_x_d_in + 0:index_x_u_in - margin] = \
+                                                seg_in[0:-margin or None,
+                                                       0:-margin or None,
+                                                       np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                                index_x_d_in + 0:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[0:-margin or None,
+                                                        0:-margin or None]
+                                
+                        elif i_batch == nxf - 1 and j_batch == nyf - 1:
+                            prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                            index_x_d_in + margin:index_x_u_in - 0] = \
+                                                seg_in[margin:,
+                                                       margin:,
+                                                       np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                                index_x_d_in + margin:index_x_u_in - 0, 1] = \
+                                                    seg_in_art[margin:,
+                                                        margin:]
+                                
+                        elif i_batch == 0 and j_batch == nyf - 1:
+                            prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                            index_x_d_in + 0:index_x_u_in - margin] = \
+                                                seg_in[margin:,
+                                                       0:-margin or None,
+                                                       np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                                index_x_d_in + 0:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:,
+                                                        0:-margin or None]
+                                
+                        elif i_batch == nxf - 1 and j_batch == 0:
+                            prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                            index_x_d_in + margin:index_x_u_in - 0] = \
+                                                seg_in[0:-margin or None,
+                                                       margin:,
+                                                       np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - 0, 1] = \
+                                                    seg_in_art[0:-margin or None,
+                                                        margin:]
+                                
+                        elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1:
+                            prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                            index_x_d_in + 0:index_x_u_in - margin] = \
+                                                seg_in[margin:-margin or None,
+                                                       0:-margin or None,
+                                                       np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                                index_x_d_in + 0:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:-margin or None,
+                                                        0:-margin or None]
+                                
+                        elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1:
+                            prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                            index_x_d_in + margin:index_x_u_in - 0] = \
+                                                seg_in[margin:-margin or None,
+                                                       margin:,
+                                                       np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - 0, 1] = \
+                                                    seg_in_art[margin:-margin or None,
+                                                        margin:]
+                                
+                        elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0:
+                            prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                            index_x_d_in + margin:index_x_u_in - margin] = \
+                                                seg_in[0:-margin or None,
+                                                       margin:-margin or None,
+                                                       np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + 0:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[0:-margin or None,
+                                                        margin:-margin or None]
+                                
+                        elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1:
+                            prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                            index_x_d_in + margin:index_x_u_in - margin] = \
+                                                seg_in[margin:,
+                                                       margin:-margin or None,
+                                                       np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - 0,
+                                                index_x_d_in + margin:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:,
+                                                        margin:-margin or None]
+                                
+                        else:
+                            prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                            index_x_d_in + margin:index_x_u_in - margin] = \
+                                                seg_in[margin:-margin or None,
+                                                       margin:-margin or None,
+                                                       np.newaxis]
+                            if thresholding_for_artificial_class_in_light_version:
+                                prediction_true[index_y_d_in + margin:index_y_u_in - margin,
+                                                index_x_d_in + margin:index_x_u_in - margin, 1] = \
+                                                    seg_in_art[margin:-margin or None,
+                                                        margin:-margin or None]
+                        indexer_inside_batch += 1
+
+
+                    list_i_s = []
+                    list_j_s = []
+                    list_x_u = []
+                    list_x_d = []
+                    list_y_u = []
+                    list_y_d = []
+
+                    batch_indexer = 0
+                    img_patch[:] = 0
+
+        prediction_true = prediction_true.astype(np.uint8)
+        
+        if thresholding_for_artificial_class_in_light_version:
+            kernel_min = np.ones((3, 3), np.uint8)
+            prediction_true[:,:,0][prediction_true[:,:,0]==2] = 0
+            
+            skeleton_art = skeletonize(prediction_true[:,:,1])
+            skeleton_art = skeleton_art*1
+            
+            skeleton_art = skeleton_art.astype('uint8')
+            
+            skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
+
+            prediction_true[:,:,0][skeleton_art==1]=2
+        #del model
+        gc.collect()
+        return prediction_true
+    
+    def run_enhancement(self, light_version):
+        t_in = time.time()
+        self.logger.info("Resizing and enhancing image...")
+        is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = \
+            self.resize_and_enhance_image_with_column_classifier(light_version)
+        
+        self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ')
+        return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified
+
+
+    def run_single(self):
+        t0 = time.time()
+        img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(light_version=False)
+        
+        return img_res
+        
+        
+    def run(self, image_filename : Optional[str] = None, dir_in : Optional[str] = None, overwrite : bool = False):
+        """
+        Get image and scales, then extract the page of scanned image
+        """
+        self.logger.debug("enter run")
+        t0_tot = time.time()
+
+        if dir_in:
+            self.ls_imgs  = os.listdir(dir_in)
+        elif image_filename:
+            self.ls_imgs = [image_filename]
+        else:
+            raise ValueError("run requires either a single image filename or a directory")
+
+        for img_filename in self.ls_imgs:
+            self.logger.info(img_filename)
+            t0 = time.time()
+
+            self.reset_file_name_dir(os.path.join(dir_in or "", img_filename))
+            #print("text region early -11 in %.1fs", time.time() - t0)
+            
+            if os.path.exists(self.output_filename):
+                if overwrite:
+                    self.logger.warning("will overwrite existing output file '%s'", self.output_filename)
+                else:
+                    self.logger.warning("will skip input for existing output file '%s'", self.output_filename)
+                    continue
+
+            image_enhanced = self.run_single()
+            img_enhanced_org_scale = resize_image(image_enhanced, self.h_org, self.w_org)
+            
+            cv2.imwrite(self.output_filename, img_enhanced_org_scale)
+            

From d14bd162caa82030a9dee28ec2f063215bd64dce Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Sun, 1 Jun 2025 22:10:13 +0200
Subject: [PATCH 26/40] saving enhanced image in org or scaled resolution

---
 src/eynollah/cli.py            | 9 ++++++++-
 src/eynollah/eynollah.py       | 5 ++---
 src/eynollah/image_enhancer.py | 7 +++++--
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 840bc4b..9398c47 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -116,6 +116,12 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
     "-ncl",
     help="upper limit of columns in document image",
 )
+@click.option(
+    "--save_org_scale/--no_save_org_scale",
+    "-sos/-nosos",
+    is_flag=True,
+    help="if this parameter set to true, this tool will save the enhanced image in org scale.",
+)
 @click.option(
     "--log_level",
     "-l",
@@ -123,7 +129,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
     help="Override log level globally to this",
 )
 
-def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, log_level):
+def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_lower, save_org_scale,  log_level):
     initLogging()
     if log_level:
         getLogger('enhancement').setLevel(getLevelName(log_level))
@@ -134,6 +140,7 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
         dir_out=out,
         num_col_upper=num_col_upper,
         num_col_lower=num_col_lower,
+        save_org_scale=save_org_scale,
     )
     if dir_in:
         enhancer_object.run(dir_in=dir_in, overwrite=overwrite)
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index cf540d3..9c834e2 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5434,10 +5434,9 @@ class Eynollah_ocr:
                                     img_crop = img_poly_on_img[y:y+h, x:x+w, :]
                                     
 
-                                        
                                     #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
                                     if not self.do_not_mask_with_textline_contour:
-                                        if angle_degrees > 15:
+                                        if angle_degrees > 3:
                                             better_des_slope = get_orientation_moments(textline_coords)
                                             
                                             img_crop = rotate_image_with_padding(img_crop, better_des_slope )
@@ -5484,7 +5483,7 @@ class Eynollah_ocr:
                                     
                                     
                                     if not self.export_textline_images_and_text:
-                                        if w_scaled < 530:#640:#1.5*image_width:
+                                        if w_scaled < 640:#1.5*image_width:
                                             img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                             cropped_lines.append(img_fin)
                                             if angle_degrees > 15:
diff --git a/src/eynollah/image_enhancer.py b/src/eynollah/image_enhancer.py
index 71445f7..c89f532 100644
--- a/src/eynollah/image_enhancer.py
+++ b/src/eynollah/image_enhancer.py
@@ -41,11 +41,13 @@ class Enhancer:
         dir_out : Optional[str] = None,
         num_col_upper : Optional[int] = None,
         num_col_lower : Optional[int] = None,
+        save_org_scale : bool = False,
         logger : Optional[Logger] = None,
     ):
         self.dir_out = dir_out
         self.input_binary = False
         self.light_version = False
+        self.save_org_scale = save_org_scale
         if num_col_upper:
             self.num_col_upper = int(num_col_upper)
         else:
@@ -750,7 +752,8 @@ class Enhancer:
                     continue
 
             image_enhanced = self.run_single()
-            img_enhanced_org_scale = resize_image(image_enhanced, self.h_org, self.w_org)
+            if self.save_org_scale:
+                image_enhanced = resize_image(image_enhanced, self.h_org, self.w_org)
             
-            cv2.imwrite(self.output_filename, img_enhanced_org_scale)
+            cv2.imwrite(self.output_filename, image_enhanced)
             

From 7996afac69f6f7b8508fb24bb66ca3d5cd577c1d Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Sun, 1 Jun 2025 22:44:50 +0200
Subject: [PATCH 27/40] image enhancer updated

---
 src/eynollah/image_enhancer.py | 40 +++++++---------------------------
 1 file changed, 8 insertions(+), 32 deletions(-)

diff --git a/src/eynollah/image_enhancer.py b/src/eynollah/image_enhancer.py
index c89f532..983712d 100644
--- a/src/eynollah/image_enhancer.py
+++ b/src/eynollah/image_enhancer.py
@@ -225,47 +225,23 @@ class Enhancer:
     
     def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
         self.logger.debug("enter calculate_width_height_by_columns")
-        if num_col == 1 and width_early < 1100:
+        if num_col == 1:
             img_w_new = 2000
-        elif num_col == 1 and width_early >= 2500:
-            img_w_new = 2000
-        elif num_col == 1 and width_early >= 1100 and width_early < 2500:
-            img_w_new = width_early
-        elif num_col == 2 and width_early < 2000:
+        elif num_col == 2:
             img_w_new = 2400
-        elif num_col == 2 and width_early >= 3500:
-            img_w_new = 2400
-        elif num_col == 2 and width_early >= 2000 and width_early < 3500:
-            img_w_new = width_early
-        elif num_col == 3 and width_early < 2000:
+        elif num_col == 3:
             img_w_new = 3000
-        elif num_col == 3 and width_early >= 4000:
-            img_w_new = 3000
-        elif num_col == 3 and width_early >= 2000 and width_early < 4000:
-            img_w_new = width_early
-        elif num_col == 4 and width_early < 2500:
+        elif num_col == 4:
             img_w_new = 4000
-        elif num_col == 4 and width_early >= 5000:
-            img_w_new = 4000
-        elif num_col == 4 and width_early >= 2500 and width_early < 5000:
-            img_w_new = width_early
-        elif num_col == 5 and width_early < 3700:
+        elif num_col == 5:
             img_w_new = 5000
-        elif num_col == 5 and width_early >= 7000:
-            img_w_new = 5000
-        elif num_col == 5 and width_early >= 3700 and width_early < 7000:
-            img_w_new = width_early
-        elif num_col == 6 and width_early < 4500:
-            img_w_new = 6500  # 5400
+        elif num_col == 6:
+            img_w_new = 6500
         else:
             img_w_new = width_early
         img_h_new = img_w_new * img.shape[0] // img.shape[1]
 
-        if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early:
-            img_new = np.copy(img)
-            num_column_is_classified = False
-        #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000:
-        elif img_h_new >= 8000:
+        if img_h_new >= 8000:
             img_new = np.copy(img)
             num_column_is_classified = False
         else:

From 065f1f9a9368def46ac0e4df4888bd29c168dea1 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Mon, 2 Jun 2025 18:21:33 +0200
Subject: [PATCH 28/40] Fix: Resolved OCR bug when text region type is
 undefined

---
 src/eynollah/eynollah.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 9c834e2..fc60f2e 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5399,7 +5399,10 @@ class Eynollah_ocr:
                 indexer_text_region = 0
                 indexer_textlines = 0
                 for nn in root1.iter(region_tags):
-                    type_textregion = nn.attrib['type']
+                    try:
+                        type_textregion = nn.attrib['type']
+                    except:
+                        type_textregion = 'paragraph'
                     for child_textregion in nn:
                         if child_textregion.tag.endswith("TextLine"):
                             for child_textlines in child_textregion:
@@ -5467,6 +5470,7 @@ class Eynollah_ocr:
         
                                                 
                                         else:
+                                            better_des_slope = 0
                                             img_crop[mask_poly==0] = 255
                                             if self.prediction_with_both_of_rgb_and_bin:
                                                 img_crop_bin[mask_poly==0] = 255
@@ -5486,7 +5490,7 @@ class Eynollah_ocr:
                                         if w_scaled < 640:#1.5*image_width:
                                             img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                             cropped_lines.append(img_fin)
-                                            if angle_degrees > 15:
+                                            if abs(better_des_slope) > 45:
                                                 cropped_lines_ver_index.append(1)
                                             else:
                                                 cropped_lines_ver_index.append(0)
@@ -5505,7 +5509,7 @@ class Eynollah_ocr:
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(1)
                                                 
-                                                if angle_degrees > 15:
+                                                if abs(better_des_slope) > 45:
                                                     cropped_lines_ver_index.append(1)
                                                 else:
                                                     cropped_lines_ver_index.append(0)
@@ -5515,7 +5519,7 @@ class Eynollah_ocr:
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(-1)
                                                 
-                                                if angle_degrees > 15:
+                                                if abs(better_des_slope) > 45:
                                                     cropped_lines_ver_index.append(1)
                                                 else:
                                                     cropped_lines_ver_index.append(0)
@@ -5531,7 +5535,7 @@ class Eynollah_ocr:
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(0)
                                                 
-                                                if angle_degrees > 15:
+                                                if abs(better_des_slope) > 45:
                                                     cropped_lines_ver_index.append(1)
                                                 else:
                                                     cropped_lines_ver_index.append(0)

From 59ea493803e5bf9f8038e8411777d137d91e27b9 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Thu, 3 Jul 2025 11:50:47 +0200
Subject: [PATCH 29/40] decorated with confidence value for cnnrnn ocr model

---
 src/eynollah/eynollah.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index fc60f2e..3b9d898 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5129,7 +5129,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_step_1225000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_step_900000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5487,7 +5487,7 @@ class Eynollah_ocr:
                                     
                                     
                                     if not self.export_textline_images_and_text:
-                                        if w_scaled < 640:#1.5*image_width:
+                                        if w_scaled < 750:#1.5*image_width:
                                             img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                             cropped_lines.append(img_fin)
                                             if abs(better_des_slope) > 45:
@@ -5580,6 +5580,7 @@ class Eynollah_ocr:
                     
                 if not self.export_textline_images_and_text:
                     extracted_texts = []
+                    extracted_conf_value = []
 
                     n_iterations  = math.ceil(len(cropped_lines) / self.b_s) 
 
@@ -5700,12 +5701,19 @@ class Eynollah_ocr:
                                     preds_bin[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
                             
                             preds = (preds + preds_bin) / 2.
+                            
 
                         pred_texts = decode_batch_predictions(preds, self.num_to_char)
+                        
+                        preds_max = np.max(preds, axis=2 )
+                        preds_max_args = np.argmax(preds, axis=2 )
+                        pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256
+                        masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
 
                         for ib in range(imgs.shape[0]):
                             pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
                             extracted_texts.append(pred_texts_ib)
+                            extracted_conf_value.append(masked_means[ib])
                             
                     del cropped_lines
                     if self.prediction_with_both_of_rgb_and_bin:
@@ -5713,7 +5721,10 @@ class Eynollah_ocr:
                     gc.collect()
                     
                     extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
+                    
+                    extracted_conf_value_merged = [extracted_conf_value[ind]  if cropped_lines_meging_indexing[ind]==0 else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2. if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
 
+                    extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm] for ind_cfm in range(len(extracted_texts_merged)) if extracted_texts_merged[ind_cfm] is not None]
                     extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
                     unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
                     
@@ -5791,6 +5802,7 @@ class Eynollah_ocr:
                                 
                                 if not is_textline_text:
                                     text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
+                                    text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
                                     unicode_textline = ET.SubElement(text_subelement, 'Unicode')
                                     unicode_textline.text = extracted_texts_merged[indexer]
                                 else:
@@ -5798,6 +5810,7 @@ class Eynollah_ocr:
                                         if childtest3.tag.endswith("TextEquiv"):
                                             for child_uc in childtest3:
                                                 if child_uc.tag.endswith("Unicode"):
+                                                    childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
                                                     child_uc.text = extracted_texts_merged[indexer]
                                         
                                 indexer = indexer + 1

From e54ebaa23e89d0381157415e33d6324c3dd8aecd Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Thu, 3 Jul 2025 15:24:52 +0200
Subject: [PATCH 30/40] ocr: make sure that image height or width is not zero

---
 src/eynollah/eynollah.py        |  4 ----
 src/eynollah/utils/utils_ocr.py | 34 +++++++++++++++++++--------------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 3b9d898..1260a96 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5435,7 +5435,6 @@ class Eynollah_ocr:
                                     
                                     mask_poly = mask_poly[y:y+h, x:x+w, :]
                                     img_crop = img_poly_on_img[y:y+h, x:x+w, :]
-                                    
 
                                     #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
                                     if not self.do_not_mask_with_textline_contour:
@@ -5482,9 +5481,6 @@ class Eynollah_ocr:
                                                         img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
                                                     else:
                                                         img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
-
-
-                                    
                                     
                                     if not self.export_textline_images_and_text:
                                         if w_scaled < 750:#1.5*image_width:
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index 81a8ae1..1e9162a 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -124,23 +124,26 @@ def return_textlines_split_if_needed(textline_image, textline_image_bin, predict
     else:
         return None, None
 def preprocess_and_resize_image_for_ocrcnn_model(img, image_height, image_width):
-    ratio = image_height /float(img.shape[0])
-    w_ratio = int(ratio * img.shape[1])
-    
-    if w_ratio <= image_width:
-        width_new = w_ratio
+    if img.shape[0]==0 or img.shape[1]==0:
+        img_fin = np.ones((image_height, image_width, 3))
     else:
-        width_new = image_width
+        ratio = image_height /float(img.shape[0])
+        w_ratio = int(ratio * img.shape[1])
         
-    if width_new == 0:
-        width_new = img.shape[1]
+        if w_ratio <= image_width:
+            width_new = w_ratio
+        else:
+            width_new = image_width
+            
+        if width_new == 0:
+            width_new = img.shape[1]
+            
         
-    
-    img = resize_image(img, image_height, width_new)
-    img_fin = np.ones((image_height, image_width, 3))*255
+        img = resize_image(img, image_height, width_new)
+        img_fin = np.ones((image_height, image_width, 3))*255
 
-    img_fin[:,:width_new,:] = img[:,:,:]
-    img_fin = img_fin / 255.
+        img_fin[:,:width_new,:] = img[:,:,:]
+        img_fin = img_fin / 255.
     return img_fin
 
 def get_deskewed_contour_and_bb_and_image(contour, image, deskew_angle):
@@ -188,7 +191,10 @@ def rotate_image_with_padding(image, angle, border_value=(0,0,0)):
     rotation_matrix[1, 2] += (new_h / 2) - center[1]
     
     # Perform the rotation
-    rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=border_value)
+    try:
+        rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=border_value)
+    except:
+        rotated_image = np.copy(image)
     
     return rotated_image
 

From e0f4a007e45255fc870f0ca12ad5c2870ea00ef1 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Wed, 16 Jul 2025 14:00:12 +0200
Subject: [PATCH 31/40] ocr model renamed - image text font for ocr result is
 now using Charis-7.000 font (downloaded from here
 https://software.sil.org/charis/download/)

---
 src/eynollah/eynollah.py | 148 +++++++++++++++++++++------------------
 1 file changed, 78 insertions(+), 70 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 1260a96..bf11dec 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -318,7 +318,7 @@ class Eynollah:
         if self.ocr and self.tr:
             self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
         elif self.ocr and not self.tr:
-            self.model_ocr_dir = dir_models + "/model_step_750000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+            self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250716"
         if self.tables:
             if self.light_version:
                 self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
@@ -5129,7 +5129,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_step_900000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250716"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5276,7 +5276,7 @@ class Eynollah_ocr:
                 
                 if self.draw_texts_on_image:
                     
-                    font_path = "NotoSans-Regular.ttf"  # Make sure this file exists!
+                    font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
                     font = ImageFont.truetype(font_path, 40)
                     
                     for indexer_text, bb_ind in enumerate(total_bb_coordinates):
@@ -5340,8 +5340,8 @@ class Eynollah_ocr:
                 tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
                 #print("Job done in %.1fs", time.time() - t0)
         else:
-            max_len = 512
-            padding_token = 299
+            max_len = 512#280#512
+            padding_token = 299#1500#299
             image_width = 512#max_len * 4
             image_height = 32
 
@@ -5435,52 +5435,57 @@ class Eynollah_ocr:
                                     
                                     mask_poly = mask_poly[y:y+h, x:x+w, :]
                                     img_crop = img_poly_on_img[y:y+h, x:x+w, :]
-
-                                    #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
-                                    if not self.do_not_mask_with_textline_contour:
-                                        if angle_degrees > 3:
-                                            better_des_slope = get_orientation_moments(textline_coords)
-                                            
-                                            img_crop = rotate_image_with_padding(img_crop, better_des_slope )
-                                            
-                                            if self.prediction_with_both_of_rgb_and_bin:
-                                                img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope )
-                                                
-                                            mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
-                                            mask_poly = mask_poly.astype('uint8')
-                                            
-                                            #new bounding box
-                                            x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
-                                            
-                                            mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                            img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                                
+                                    
+                                    if self.export_textline_images_and_text:
+                                        if not self.do_not_mask_with_textline_contour:
                                             img_crop[mask_poly==0] = 255
-                                            
-                                            if self.prediction_with_both_of_rgb_and_bin:
-                                                img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                                img_crop_bin[mask_poly==0] = 255
-                                            
-                                            if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
+                                        
+                                    else:
+                                        #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
+                                        if not self.do_not_mask_with_textline_contour:
+                                            if angle_degrees > 3:
+                                                better_des_slope = get_orientation_moments(textline_coords)
+                                                
+                                                img_crop = rotate_image_with_padding(img_crop, better_des_slope )
+                                                
                                                 if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
-                                                else:
-                                                    img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
-        
+                                                    img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope )
+                                                    
+                                                mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
+                                                mask_poly = mask_poly.astype('uint8')
                                                 
-                                        else:
-                                            better_des_slope = 0
-                                            img_crop[mask_poly==0] = 255
-                                            if self.prediction_with_both_of_rgb_and_bin:
-                                                img_crop_bin[mask_poly==0] = 255
-                                            if type_textregion=='drop-capital':
-                                                pass
-                                            else:
-                                                if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
+                                                #new bounding box
+                                                x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
+                                                
+                                                mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                                img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                                    
+                                                img_crop[mask_poly==0] = 255
+                                                
+                                                if self.prediction_with_both_of_rgb_and_bin:
+                                                    img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                                    img_crop_bin[mask_poly==0] = 255
+                                                
+                                                if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
                                                     if self.prediction_with_both_of_rgb_and_bin:
                                                         img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
                                                     else:
                                                         img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+            
+                                                    
+                                            else:
+                                                better_des_slope = 0
+                                                img_crop[mask_poly==0] = 255
+                                                if self.prediction_with_both_of_rgb_and_bin:
+                                                    img_crop_bin[mask_poly==0] = 255
+                                                if type_textregion=='drop-capital':
+                                                    pass
+                                                else:
+                                                    if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
+                                                        if self.prediction_with_both_of_rgb_and_bin:
+                                                            img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
+                                                        else:
+                                                            img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
                                     
                                     if not self.export_textline_images_and_text:
                                         if w_scaled < 750:#1.5*image_width:
@@ -5541,35 +5546,38 @@ class Eynollah_ocr:
                                                     cropped_lines_bin.append(img_fin)
                                         
                                 if self.export_textline_images_and_text:
-                                    if child_textlines.tag.endswith("TextEquiv"):
-                                        for cheild_text in child_textlines:
-                                            if cheild_text.tag.endswith("Unicode"):
-                                                textline_text = cheild_text.text
-                                                if textline_text:
-                                                    if self.do_not_mask_with_textline_contour:
-                                                        if self.pref_of_dataset:
-                                                            with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.txt'), 'w') as text_file:
-                                                                text_file.write(textline_text)
+                                    if img_crop.shape[0]==0 or img_crop.shape[1]==0:
+                                        pass
+                                    else:
+                                        if child_textlines.tag.endswith("TextEquiv"):
+                                            for cheild_text in child_textlines:
+                                                if cheild_text.tag.endswith("Unicode"):
+                                                    textline_text = cheild_text.text
+                                                    if textline_text:
+                                                        if self.do_not_mask_with_textline_contour:
+                                                            if self.pref_of_dataset:
+                                                                with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.txt'), 'w') as text_file:
+                                                                    text_file.write(textline_text)
 
-                                                            cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.png'), img_crop )
+                                                                cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'.png'), img_crop )
+                                                            else:
+                                                                with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file:
+                                                                    text_file.write(textline_text)
+
+                                                                cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop )
                                                         else:
-                                                            with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.txt'), 'w') as text_file:
-                                                                text_file.write(textline_text)
+                                                            if self.pref_of_dataset:
+                                                                with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.txt'), 'w') as text_file:
+                                                                    text_file.write(textline_text)
 
-                                                            cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'.png'), img_crop )
-                                                    else:
-                                                        if self.pref_of_dataset:
-                                                            with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.txt'), 'w') as text_file:
-                                                                text_file.write(textline_text)
+                                                                cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.png'), img_crop )
+                                                            else:
+                                                                with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.txt'), 'w') as text_file:
+                                                                    text_file.write(textline_text)
 
-                                                            cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_'+self.pref_of_dataset+'_masked.png'), img_crop )
-                                                        else:
-                                                            with open(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.txt'), 'w') as text_file:
-                                                                text_file.write(textline_text)
-
-                                                            cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.png'), img_crop )
-                                                        
-                                                indexer_textlines+=1
+                                                                cv2.imwrite(os.path.join(self.dir_out, file_name+'_line_'+str(indexer_textlines)+'_masked.png'), img_crop )
+                                                            
+                                                    indexer_textlines+=1
 
                     if not self.export_textline_images_and_text:
                         indexer_text_region = indexer_text_region +1
@@ -5727,7 +5735,7 @@ class Eynollah_ocr:
                     
                     if self.draw_texts_on_image:
                         
-                        font_path = "NotoSans-Regular.ttf"  # Make sure this file exists!
+                        font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
                         font = ImageFont.truetype(font_path, 40)
                         
                         for indexer_text, bb_ind in enumerate(total_bb_coordinates):

From 920705c3b1a70ee5f18f6731b92b5a775b6a2fa0 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Mon, 21 Jul 2025 10:54:20 +0200
Subject: [PATCH 32/40] update model names

---
 src/eynollah/eynollah.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index bf11dec..12acff7 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5129,7 +5129,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250716"
+                self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250716"#"/model_ens_ocrcnn_new6"#"/model_ens_ocrcnn_new2"#
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5143,7 +5143,6 @@ class Eynollah_ocr:
                     
                 with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
                     characters = json.load(config_file)
-
                     
                 AUTOTUNE = tf.data.AUTOTUNE
 
@@ -5154,6 +5153,7 @@ class Eynollah_ocr:
                 self.num_to_char = StringLookup(
                     vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
                 )
+                self.end_character = len(characters) + 2
 
     def run(self, overwrite : bool = False):
         if self.dir_in:
@@ -5340,8 +5340,8 @@ class Eynollah_ocr:
                 tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
                 #print("Job done in %.1fs", time.time() - t0)
         else:
-            max_len = 512#280#512
-            padding_token = 299#1500#299
+            ###max_len = 280#512#280#512
+            ###padding_token = 1500#299#1500#299
             image_width = 512#max_len * 4
             image_height = 32
 
@@ -5656,13 +5656,13 @@ class Eynollah_ocr:
                             preds_flipped = self.prediction_model.predict(imgs_ver_flipped, verbose=0)
                             preds_max_fliped = np.max(preds_flipped, axis=2 )
                             preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
-                            pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
+                            pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
                             masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
                             masked_means_flipped[np.isnan(masked_means_flipped)] = 0
                             
                             preds_max = np.max(preds, axis=2 )
                             preds_max_args = np.argmax(preds, axis=2 )
-                            pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256
+                            pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
                             
                             masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
                             masked_means[np.isnan(masked_means)] = 0
@@ -5683,13 +5683,13 @@ class Eynollah_ocr:
                                 preds_flipped = self.prediction_model.predict(imgs_bin_ver_flipped, verbose=0)
                                 preds_max_fliped = np.max(preds_flipped, axis=2 )
                                 preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
-                                pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
+                                pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
                                 masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
                                 masked_means_flipped[np.isnan(masked_means_flipped)] = 0
                                 
                                 preds_max = np.max(preds, axis=2 )
                                 preds_max_args = np.argmax(preds, axis=2 )
-                                pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256
+                                pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
                                 
                                 masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
                                 masked_means[np.isnan(masked_means)] = 0
@@ -5711,7 +5711,7 @@ class Eynollah_ocr:
                         
                         preds_max = np.max(preds, axis=2 )
                         preds_max_args = np.argmax(preds, axis=2 )
-                        pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256
+                        pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
                         masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
 
                         for ib in range(imgs.shape[0]):

From d968a306e4f55ee9be01baf8c88c4abd47cd0ef5 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Mon, 21 Jul 2025 14:50:05 +0200
Subject: [PATCH 33/40] should merged text for the whole page be written in
 xml?

---
 src/eynollah/eynollah.py | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 12acff7..bdb8f1a 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5129,7 +5129,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250716"#"/model_ens_ocrcnn_new6"#"/model_ens_ocrcnn_new2"#
+                self.model_ocr_dir = dir_models + "/model_ens_ocrcnn_new6"#"/model_eynollah_ocr_cnnrnn_20250716"#"/model_ens_ocrcnn_new6"#"/model_ens_ocrcnn_new2"#
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5141,7 +5141,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
                     
-                with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
+                with open(os.path.join(self.model_ocr_dir, "characters_20250707_all_lang.txt"),"r") as config_file:
                     characters = json.load(config_file)
                     
                 AUTOTUNE = tf.data.AUTOTUNE
@@ -5780,9 +5780,24 @@ class Eynollah_ocr:
                             text_by_textregion.append(" ".join(extracted_texts_merged_un))
                         #print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion')
                         
+                        
+                    ###index_tot_regions = []
+                    ###tot_region_ref = []
+
+                    ###for jj in root1.iter(link+'RegionRefIndexed'):
+                        ###index_tot_regions.append(jj.attrib['index'])
+                        ###tot_region_ref.append(jj.attrib['regionRef'])
+                        
+                    ###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)}
+        
+                    id_textregions = []
+                    textregions_by_existing_ids = []
                     indexer = 0
                     indexer_textregion = 0
                     for nn in root1.iter(region_tags):
+                        id_textregion = nn.attrib['id']
+                        id_textregions.append(id_textregion)
+                        textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
                         
                         is_textregion_text = False
                         for childtest in nn:
@@ -5829,7 +5844,17 @@ class Eynollah_ocr:
                             else:
                                 unicode_textregion.text = text_by_textregion[indexer_textregion]
                             indexer_textregion = indexer_textregion + 1
-
+                            
+                    ###sample_order  = [(id_to_order[tid], text) for tid, text in zip(id_textregions, textregions_by_existing_ids) if tid in id_to_order]
+                    
+                    ##ordered_texts_sample = [text for _, text in sorted(sample_order)]
+                    ##tot_page_text = ' '.join(ordered_texts_sample)
+                    
+                    ##for page_element in root1.iter(link+'Page'):
+                        ##text_page = ET.SubElement(page_element, 'TextEquiv')
+                        ##unicode_textpage = ET.SubElement(text_page, 'Unicode')
+                        ##unicode_textpage.text = tot_page_text
+                    
                     ET.register_namespace("",name_space)
                     tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
                     #print("Job done in %.1fs", time.time() - t0)

From 0803881f3675a38558145fc81e40f9a9802f59fb Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 25 Jul 2025 13:18:38 +0200
Subject: [PATCH 34/40] threshold for textline ocr + new ocr model

---
 src/eynollah/cli.py      |   8 ++-
 src/eynollah/eynollah.py | 117 +++++++++++++++++++++++----------------
 2 files changed, 76 insertions(+), 49 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 9398c47..a313860 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -496,6 +496,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     "-ds_pref",
     help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset",
 )
+@click.option(
+    "--min_conf_value_of_textline_text",
+    "-min_conf",
+    help="minimum OCR confidence value. Text lines with a confidence value lower than this threshold will not be included in the output XML file.",
+)
 @click.option(
     "--log_level",
     "-l",
@@ -503,7 +508,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="Override log level globally to this",
 )
 
-def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
+def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
@@ -530,6 +535,7 @@ def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text,
         prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
         batch_size=batch_size,
         pref_of_dataset=dataset_abbrevation,
+        min_conf_value_of_textline_text=min_conf_value_of_textline_text,
     )
     eynollah_ocr.run(overwrite=overwrite)
 
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index bdb8f1a..aa1b2e1 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -318,7 +318,7 @@ class Eynollah:
         if self.ocr and self.tr:
             self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
         elif self.ocr and not self.tr:
-            self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250716"
+            self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250725"
         if self.tables:
             if self.light_version:
                 self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
@@ -4974,13 +4974,23 @@ class Eynollah:
                 gc.collect()
                 if len(all_found_textline_polygons)>0:
                     ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                else:
+                    ocr_all_textlines = None
+                    
                 if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0:
                     ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                else:
+                    ocr_all_textlines_marginals = None
                 
                 if all_found_textline_polygons_h and len(all_found_textline_polygons)>0:
                     ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                else:
+                    ocr_all_textlines_h = None
+                    
                 if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0:
                     ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(image_page, polygons_of_drop_capitals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                else:
+                    ocr_all_textlines_drop = None
             else:
                 ocr_all_textlines = None
                 ocr_all_textlines_marginals = None
@@ -5098,7 +5108,8 @@ class Eynollah_ocr:
         do_not_mask_with_textline_contour=False,
         draw_texts_on_image=False,
         prediction_with_both_of_rgb_and_bin=False,
-        pref_of_dataset = None,
+        pref_of_dataset=None,
+        min_conf_value_of_textline_text : Optional[float]=None,
         logger=None,
     ):
         self.dir_in = dir_in
@@ -5117,6 +5128,10 @@ class Eynollah_ocr:
         self.logger = logger if logger else getLogger('eynollah')
         
         if not export_textline_images_and_text:
+            if min_conf_value_of_textline_text:
+                self.min_conf_value_of_textline_text = float(min_conf_value_of_textline_text)
+            else:
+                self.min_conf_value_of_textline_text = 0.3
             if tr_ocr:
                 self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
                 self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -5129,7 +5144,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_ens_ocrcnn_new6"#"/model_eynollah_ocr_cnnrnn_20250716"#"/model_ens_ocrcnn_new6"#"/model_ens_ocrcnn_new2"#
+                self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250725"#"/model_step_1020000_ocr"#"/model_ens_ocrcnn_new10"#"/model_step_255000_ocr"#"/model_ens_ocrcnn_new9"#"/model_step_900000_ocr"#"/model_eynollah_ocr_cnnrnn_20250716"#"/model_ens_ocrcnn_new6"#"/model_ens_ocrcnn_new2"#
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5139,9 +5154,8 @@ class Eynollah_ocr:
                     self.b_s = 8
                 else:
                     self.b_s = int(batch_size)
-
                     
-                with open(os.path.join(self.model_ocr_dir, "characters_20250707_all_lang.txt"),"r") as config_file:
+                with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
                     characters = json.load(config_file)
                     
                 AUTOTUNE = tf.data.AUTOTUNE
@@ -5442,50 +5456,54 @@ class Eynollah_ocr:
                                         
                                     else:
                                         #print(file_name, angle_degrees,w*h , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w*h) , 'didi')
-                                        if not self.do_not_mask_with_textline_contour:
-                                            if angle_degrees > 3:
-                                                better_des_slope = get_orientation_moments(textline_coords)
+                                        
+                                        if angle_degrees > 3:
+                                            better_des_slope = get_orientation_moments(textline_coords)
+                                            
+                                            img_crop = rotate_image_with_padding(img_crop, better_des_slope )
+                                            
+                                            if self.prediction_with_both_of_rgb_and_bin:
+                                                img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope )
                                                 
-                                                img_crop = rotate_image_with_padding(img_crop, better_des_slope )
+                                            mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
+                                            mask_poly = mask_poly.astype('uint8')
+                                            
+                                            #new bounding box
+                                            x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
+                                            
+                                            mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                            img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                                 
-                                                if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope )
-                                                    
-                                                mask_poly = rotate_image_with_padding(mask_poly, better_des_slope )
-                                                mask_poly = mask_poly.astype('uint8')
-                                                
-                                                #new bounding box
-                                                x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
-                                                
-                                                mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                                img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                                    
+                                            if not self.do_not_mask_with_textline_contour:
                                                 img_crop[mask_poly==0] = 255
-                                                
-                                                if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                            
+                                            if self.prediction_with_both_of_rgb_and_bin:
+                                                img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                                if not self.do_not_mask_with_textline_contour:
                                                     img_crop_bin[mask_poly==0] = 255
+                                            
+                                            if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
+                                                if self.prediction_with_both_of_rgb_and_bin:
+                                                    img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
+                                                else:
+                                                    img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+        
                                                 
-                                                if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
+                                        else:
+                                            better_des_slope = 0
+                                            if not self.do_not_mask_with_textline_contour:
+                                                img_crop[mask_poly==0] = 255
+                                            if self.prediction_with_both_of_rgb_and_bin:
+                                                if not self.do_not_mask_with_textline_contour:
+                                                    img_crop_bin[mask_poly==0] = 255
+                                            if type_textregion=='drop-capital':
+                                                pass
+                                            else:
+                                                if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
                                                     if self.prediction_with_both_of_rgb_and_bin:
                                                         img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
                                                     else:
                                                         img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
-            
-                                                    
-                                            else:
-                                                better_des_slope = 0
-                                                img_crop[mask_poly==0] = 255
-                                                if self.prediction_with_both_of_rgb_and_bin:
-                                                    img_crop_bin[mask_poly==0] = 255
-                                                if type_textregion=='drop-capital':
-                                                    pass
-                                                else:
-                                                    if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
-                                                        if self.prediction_with_both_of_rgb_and_bin:
-                                                            img_crop, img_crop_bin = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly, img_crop_bin)
-                                                        else:
-                                                            img_crop, _ = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
                                     
                                     if not self.export_textline_images_and_text:
                                         if w_scaled < 750:#1.5*image_width:
@@ -5716,9 +5734,12 @@ class Eynollah_ocr:
 
                         for ib in range(imgs.shape[0]):
                             pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
-                            extracted_texts.append(pred_texts_ib)
-                            extracted_conf_value.append(masked_means[ib])
-                            
+                            if masked_means[ib] >= self.min_conf_value_of_textline_text:
+                                extracted_texts.append(pred_texts_ib)
+                                extracted_conf_value.append(masked_means[ib])
+                            else:
+                                extracted_texts.append("")
+                                extracted_conf_value.append(0)
                     del cropped_lines
                     if self.prediction_with_both_of_rgb_and_bin:
                         del cropped_lines_bin
@@ -5790,14 +5811,14 @@ class Eynollah_ocr:
                         
                     ###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)}
         
-                    id_textregions = []
-                    textregions_by_existing_ids = []
+                    #id_textregions = []
+                    #textregions_by_existing_ids = []
                     indexer = 0
                     indexer_textregion = 0
                     for nn in root1.iter(region_tags):
-                        id_textregion = nn.attrib['id']
-                        id_textregions.append(id_textregion)
-                        textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
+                        #id_textregion = nn.attrib['id']
+                        #id_textregions.append(id_textregion)
+                        #textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
                         
                         is_textregion_text = False
                         for childtest in nn:

From a0c19c57bea82af2db65421d46cdd8c740b65455 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Tue, 5 Aug 2025 14:22:22 +0200
Subject: [PATCH 35/40] use the latest ocr model with balanced fraktur-antiqua
 training dataset

---
 src/eynollah/cli.py      | 4 ++--
 src/eynollah/eynollah.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index a313860..5135534 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -325,12 +325,12 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
 @click.option(
     "--threshold_art_class_layout",
     "-tharl",
-    help="threshold of artifical class in the case of layout detection",
+    help="threshold of artifical class in the case of layout detection. The default value is 0.1",
 )
 @click.option(
     "--threshold_art_class_textline",
     "-thart",
-    help="threshold of artifical class in the case of textline detection",
+    help="threshold of artifical class in the case of textline detection. The default value is 0.1",
 )
 @click.option(
     "--skip_layout_and_reading_order",
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index aa1b2e1..9e5ba51 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -318,7 +318,7 @@ class Eynollah:
         if self.ocr and self.tr:
             self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
         elif self.ocr and not self.tr:
-            self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250725"
+            self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250805"
         if self.tables:
             if self.light_version:
                 self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
@@ -5144,7 +5144,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250725"#"/model_step_1020000_ocr"#"/model_ens_ocrcnn_new10"#"/model_step_255000_ocr"#"/model_ens_ocrcnn_new9"#"/model_step_900000_ocr"#"/model_eynollah_ocr_cnnrnn_20250716"#"/model_ens_ocrcnn_new6"#"/model_ens_ocrcnn_new2"#
+                self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250805"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(

From 5db3e9fa64d39c128bd9bee27c9d0fb73b3459d2 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Fri, 8 Aug 2025 11:32:02 +0200
Subject: [PATCH 36/40] deskewing with faster multiprocessing

---
 src/eynollah/eynollah.py             |   9 +--
 src/eynollah/utils/separate_lines.py | 103 +++++++++++++++++++++++++++
 2 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 9e5ba51..5299d3e 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -96,6 +96,7 @@ from .utils.separate_lines import (
     textline_contours_postprocessing,
     separate_lines_new2,
     return_deskew_slop,
+    return_deskew_slop_old_mp,
     do_work_of_slopes_new,
     do_work_of_slopes_new_curved,
     do_work_of_slopes_new_light,
@@ -1936,8 +1937,8 @@ class Eynollah:
                 y_diff_mean = find_contours_mean_y_diff(textline_con_fil)
                 sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0)))
                 crop_img[crop_img > 0] = 1
-                slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des,
-                                                                    map=self.executor.map, logger=self.logger, plotter=self.plotter)
+                slope_corresponding_textregion = return_deskew_slop_old_mp(crop_img, sigma_des,
+                                                                    logger=self.logger, plotter=self.plotter)
             except Exception as why:
                 self.logger.error(why)
                 slope_corresponding_textregion = MAX_SLOPE
@@ -3203,8 +3204,8 @@ class Eynollah:
 
     def run_deskew(self, textline_mask_tot_ea):
         #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew')
-        slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True,
-                                          map=self.executor.map, logger=self.logger, plotter=self.plotter)
+        slope_deskew = return_deskew_slop_old_mp(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), 2, 30, True,
+                                          logger=self.logger, plotter=self.plotter)
         slope_first = 0
 
         if self.plotter:
diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py
index 6289d4d..ead5cfb 100644
--- a/src/eynollah/utils/separate_lines.py
+++ b/src/eynollah/utils/separate_lines.py
@@ -5,6 +5,8 @@ import numpy as np
 import cv2
 from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
+from multiprocessing import Process, Queue, cpu_count
+from multiprocessing import Pool
 from .rotate import rotate_image
 from .resize import resize_image
 from .contour import (
@@ -1526,6 +1528,107 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map
         angle = 0
     return angle
 
+
+def return_deskew_slop_old_mp(img_patch_org, sigma_des,n_tot_angles=100,
+                       main_page=False, logger=None, plotter=None):
+    if main_page and plotter:
+        plotter.save_plot_of_textline_density(img_patch_org)
+
+    img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
+    img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
+
+    max_shape=np.max(img_int.shape)
+    img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) ))
+
+    onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.)
+    onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.)
+
+    img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:]
+
+    if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
+        angles = np.array([-45, 0, 45, 90,])
+        angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
+
+        angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
+        angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
+    elif main_page:
+        angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
+        angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
+
+        early_slope_edge=11
+        if abs(angle) > early_slope_edge:
+            if angle < 0:
+                angles = np.linspace(-90, -12, n_tot_angles)
+            else:
+                angles = np.linspace(90, 12, n_tot_angles)
+            angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
+    else:
+        angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
+        angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
+
+        early_slope_edge=22
+        if abs(angle) > early_slope_edge:
+            if angle < 0:
+                angles = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
+            else:
+                angles = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
+            angle = get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=plotter)
+
+    return angle
+
+def do_image_rotation_omp(queue_of_all_params,angles_per_process, img_resized, sigma_des):
+    vars_per_each_subprocess = []
+    angles_per_each_subprocess = []
+    for mv in range(len(angles_per_process)):
+        img_rot=rotate_image(img_resized,angles_per_process[mv])
+        img_rot[img_rot!=0]=1
+        try:
+            var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
+        except:
+            var_spectrum=0
+        vars_per_each_subprocess.append(var_spectrum)
+        angles_per_each_subprocess.append(angles_per_process[mv])
+            
+    queue_of_all_params.put([vars_per_each_subprocess, angles_per_each_subprocess])
+
+def get_smallest_skew_omp(img_resized, sigma_des, angles, plotter=None):
+    num_cores = cpu_count()
+    
+    queue_of_all_params = Queue()
+    processes = []
+    nh = np.linspace(0, len(angles), num_cores + 1)
+    
+    for i in range(num_cores):
+        angles_per_process = angles[int(nh[i]) : int(nh[i + 1])]
+        processes.append(Process(target=do_image_rotation_omp, args=(queue_of_all_params, angles_per_process, img_resized, sigma_des)))
+        
+    for i in range(num_cores):
+        processes[i].start()
+    
+    var_res=[]
+    all_angles = []
+    for i in range(num_cores):
+        list_all_par = queue_of_all_params.get(True)
+        vars_for_subprocess = list_all_par[0]
+        angles_sub_process = list_all_par[1]
+        for j in range(len(vars_for_subprocess)):
+            var_res.append(vars_for_subprocess[j])
+            all_angles.append(angles_sub_process[j])
+            
+    for i in range(num_cores):
+        processes[i].join()
+        
+    if plotter:
+        plotter.save_plot_of_rotation_angle(all_angles, var_res)
+
+        
+    try:
+        var_res=np.array(var_res)
+        ang_int=all_angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
+    except:
+        ang_int=0
+    return ang_int
+
 def do_work_of_slopes_new(
         box_text, contour, contour_par, index_r_con,
         textline_mask_tot_ea, image_page_rotated, slope_deskew,

From 20614d1678fa7c586299680f017e5b7d8c12521c Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Tue, 12 Aug 2025 12:50:15 +0200
Subject: [PATCH 37/40] avoiding float in range

---
 src/eynollah/utils/__init__.py | 41 +++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py
index 7fa4a7b..ca86047 100644
--- a/src/eynollah/utils/__init__.py
+++ b/src/eynollah/utils/__init__.py
@@ -1801,8 +1801,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                             #print(y_type_2_up,x_starting_up,x_ending_up,'didid')
                             nodes_in = []
                             for ij in range(len(x_starting_up)):
-                                nodes_in = nodes_in + list(range(x_starting_up[ij],
-                                                                 x_ending_up[ij]))
+                                nodes_in = nodes_in + list(range(int(x_starting_up[ij]),
+                                                                 int(x_ending_up[ij])))
                             nodes_in = np.unique(nodes_in)
                             #print(nodes_in,'nodes_in')
 
@@ -1825,8 +1825,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                         elif len(y_diff_main_separator_up)==0:
                             nodes_in = []
                             for ij in range(len(x_starting_up)):
-                                nodes_in = nodes_in + list(range(x_starting_up[ij],
-                                                                 x_ending_up[ij]))
+                                nodes_in = nodes_in + list(range(int(x_starting_up[ij]),
+                                                                 int(x_ending_up[ij])))
                             nodes_in = np.unique(nodes_in)
                             #print(nodes_in,'nodes_in2')
                             #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
@@ -1866,8 +1866,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                             columns_covered_by_mothers = []
                             for dj in range(len(x_start_without_mother)):
                                 columns_covered_by_mothers = columns_covered_by_mothers + \
-                                    list(range(x_start_without_mother[dj],
-                                               x_end_without_mother[dj]))
+                                    list(range(int(x_start_without_mother[dj]),
+                                               int(x_end_without_mother[dj])))
                             columns_covered_by_mothers = list(set(columns_covered_by_mothers))
 
                             all_columns=np.arange(len(peaks_neg_tot)-1)
@@ -1909,8 +1909,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                         columns_covered_by_mothers = []
                         for dj in range(len(x_start_without_mother)):
                             columns_covered_by_mothers = columns_covered_by_mothers + \
-                                list(range(x_start_without_mother[dj],
-                                           x_end_without_mother[dj]))
+                                list(range(int(x_start_without_mother[dj]),
+                                           int(x_end_without_mother[dj])))
                         columns_covered_by_mothers = list(set(columns_covered_by_mothers))
 
                         all_columns=np.arange(len(peaks_neg_tot)-1)
@@ -1926,8 +1926,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                         columns_covered_by_with_child_no_mothers = []
                         for dj in range(len(x_end_with_child_without_mother)):
                             columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \
-                                list(range(x_start_with_child_without_mother[dj],
-                                           x_end_with_child_without_mother[dj]))
+                                list(range(int(x_start_with_child_without_mother[dj]),
+                                           int(x_end_with_child_without_mother[dj])))
                         columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers))
 
                         all_columns = np.arange(len(peaks_neg_tot)-1)
@@ -1970,8 +1970,8 @@ def return_boxes_of_images_by_order_of_reading_new(
                                     columns_covered_by_mothers = []
                                     for dj in range(len(x_starting_all_between_nm_wc)):
                                         columns_covered_by_mothers = columns_covered_by_mothers + \
-                                            list(range(x_starting_all_between_nm_wc[dj],
-                                                       x_ending_all_between_nm_wc[dj]))
+                                            list(range(int(x_starting_all_between_nm_wc[dj]),
+                                                       int(x_ending_all_between_nm_wc[dj])))
                                     columns_covered_by_mothers = list(set(columns_covered_by_mothers))
 
                                     all_columns=np.arange(i_s_nc, x_end_biggest_column)
@@ -1979,8 +1979,8 @@ def return_boxes_of_images_by_order_of_reading_new(
 
                                     should_longest_line_be_extended=0
                                     if (len(x_diff_all_between_nm_wc) > 0 and
-                                        set(list(range(x_starting_all_between_nm_wc[biggest],
-                                                        x_ending_all_between_nm_wc[biggest])) +
+                                        set(list(range(int(x_starting_all_between_nm_wc[biggest]),
+                                                        int(x_ending_all_between_nm_wc[biggest]))) +
                                             list(columns_not_covered)) != set(all_columns)):
                                         should_longest_line_be_extended=1
                                         index_lines_so_close_to_top_separator = \
@@ -2012,7 +2012,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                                     x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1)
 
                                     ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
-                                    for column in range(i_s_nc, x_end_biggest_column):
+                                    for column in range(int(i_s_nc), int(x_end_biggest_column)):
                                         ind_args_in_col=ind_args_between[x_starting_all_between_nm_wc==column]
                                         #print('babali2')
                                         #print(ind_args_in_col,'ind_args_in_col')
@@ -2064,7 +2064,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                         x_end_itself=x_end_copy.pop(il)
 
                         #print(y_copy,'y_copy2')
-                        for column in range(x_start_itself, x_end_itself+1):
+                        for column in range(int(x_start_itself), int(x_end_itself)+1):
                             #print(column,'cols')
                             y_in_cols=[]
                             for yic in range(len(y_copy)):
@@ -2095,11 +2095,11 @@ def return_boxes_of_images_by_order_of_reading_new(
                     all_columns = np.arange(len(peaks_neg_tot)-1)
                     columns_covered_by_lines_covered_more_than_2col = []
                     for dj in range(len(x_starting)):
-                        if set(list(range(x_starting[dj],x_ending[dj]))) == set(all_columns):
+                        if set(list(range(int(x_starting[dj]),int(x_ending[dj]) ))) == set(all_columns):
                             pass
                         else:
                             columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \
-                                list(range(x_starting[dj],x_ending[dj]))
+                                list(range(int(x_starting[dj]),int(x_ending[dj]) ))
                     columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col))
                     columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col))
 
@@ -2124,7 +2124,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                     x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
 
                 ind_args=np.array(range(len(y_type_2)))
-                #ind_args=np.array(ind_args)
+                
                 for column in range(len(peaks_neg_tot)-1):
                     #print(column,'column')
                     ind_args_in_col=ind_args[x_starting==column]
@@ -2155,8 +2155,7 @@ def return_boxes_of_images_by_order_of_reading_new(
                     x_start_itself=x_start_copy.pop(il)
                     x_end_itself=x_end_copy.pop(il)
 
-                    #print(y_copy,'y_copy2')
-                    for column in range(x_start_itself, x_end_itself+1):
+                    for column in range(int(x_start_itself), int(x_end_itself)+1):
                         #print(column,'cols')
                         y_in_cols=[]
                         for yic in range(len(y_copy)):

From 8dc2fab9faf70c4ed92ab07f5a5b3d763a14d994 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Mon, 18 Aug 2025 02:31:13 +0200
Subject: [PATCH 38/40] reading order on given layout

---
 src/eynollah/cli.py             |   48 +-
 src/eynollah/mb_ro_on_layout.py | 1134 +++++++++++++++++++++++++++++++
 2 files changed, 1158 insertions(+), 24 deletions(-)
 create mode 100644 src/eynollah/mb_ro_on_layout.py

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 5135534..67fd57e 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -4,6 +4,7 @@ from ocrd_utils import initLogging, getLevelName, getLogger
 from eynollah.eynollah import Eynollah, Eynollah_ocr
 from eynollah.sbb_binarize import SbbBinarizer
 from eynollah.image_enhancer import Enhancer
+from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout
 
 @click.group()
 def main():
@@ -13,38 +14,37 @@ def main():
 @click.option(
     "--dir_xml",
     "-dx",
-    help="directory of GT page-xml files",
+    help="directory of page-xml files",
     type=click.Path(exists=True, file_okay=False),
 )
 @click.option(
-    "--dir_out_modal_image",
-    "-domi",
-    help="directory where ground truth images would be written",
+    "--xml_file",
+    "-xml",
+    help="xml filename",
+    type=click.Path(exists=True, dir_okay=False),
+)
+@click.option(
+    "--dir_out",
+    "-do",
+    help="directory for output images",
     type=click.Path(exists=True, file_okay=False),
 )
 @click.option(
-    "--dir_out_classes",
-    "-docl",
-    help="directory where ground truth classes would be written",
+    "--model",
+    "-m",
+    help="directory of models",
     type=click.Path(exists=True, file_okay=False),
+    required=True,
 )
-@click.option(
-    "--input_height",
-    "-ih",
-    help="input height",
-)
-@click.option(
-    "--input_width",
-    "-iw",
-    help="input width",
-)
-@click.option(
-    "--min_area_size",
-    "-min",
-    help="min area size of regions considered for reading order training.",
-)
-def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size):
-    xml_files_ind = os.listdir(dir_xml)
+
+def machine_based_reading_order(dir_xml, xml_file, dir_out, model):
+    raedingorder_object = machine_based_reading_order_on_layout(model, dir_out=dir_out, logger=getLogger('enhancement'))
+    
+    if dir_xml:
+        raedingorder_object.run(dir_in=dir_xml)
+    else:
+        raedingorder_object.run(xml_filename=xml_file)
+    
 
 @main.command()
 @click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.')
diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py
new file mode 100644
index 0000000..7625a90
--- /dev/null
+++ b/src/eynollah/mb_ro_on_layout.py
@@ -0,0 +1,1134 @@
+"""
+Image enhancer. The output can be written as same scale of input or in new predicted scale.
+"""
+
+from logging import Logger
+from difflib import SequenceMatcher as sq
+from PIL import Image, ImageDraw, ImageFont
+import math
+import os
+import sys
+import time
+from typing import Optional
+import atexit
+import warnings
+from functools import partial
+from pathlib import Path
+from multiprocessing import cpu_count
+import gc
+import copy
+from loky import ProcessPoolExecutor
+import xml.etree.ElementTree as ET
+import cv2
+import numpy as np
+from ocrd import OcrdPage
+from ocrd_utils import getLogger, tf_disable_interactive_logs
+import statistics
+from tensorflow.keras.models import load_model
+from .utils.resize import resize_image
+from .utils import (
+    crop_image_inside_box
+)
+
+from .utils.contour import (
+    filter_contours_area_of_image,
+    filter_contours_area_of_image_tables,
+    find_contours_mean_y_diff,
+    find_new_features_of_contours,
+    find_features_of_contours,
+    get_text_region_boxes_by_given_contours,
+    get_textregion_contours_in_org_image,
+    get_textregion_contours_in_org_image_light,
+    return_contours_of_image,
+    return_contours_of_interested_region,
+    return_contours_of_interested_region_by_min_size,
+    return_contours_of_interested_textline,
+    return_parent_contours,
+)
+
+DPI_THRESHOLD = 298
+KERNEL = np.ones((5, 5), np.uint8)
+
+
+class machine_based_reading_order_on_layout:
+    def __init__(
+        self,
+        dir_models : str,
+        dir_out : Optional[str] = None,
+        logger : Optional[Logger] = None,
+    ):
+        self.dir_out = dir_out
+            
+        self.logger = logger if logger else getLogger('mbro on layout')
+        # for parallelization of CPU-intensive tasks:
+        self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
+        atexit.register(self.executor.shutdown)
+        self.dir_models = dir_models
+        self.model_reading_order_dir = dir_models + "/model_step_5100000_mb_ro"#"/model_ens_reading_order_machine_based"
+        
+        try:
+            for device in tf.config.list_physical_devices('GPU'):
+                tf.config.experimental.set_memory_growth(device, True)
+        except:
+            self.logger.warning("no GPU device available")
+            
+        self.model_reading_order = self.our_load_model(self.model_reading_order_dir)
+        self.light_version = True
+
+
+    def cache_images(self, image_filename=None, image_pil=None, dpi=None):
+        ret = {}
+        t_c0 = time.time()
+        if image_filename:
+            ret['img'] = cv2.imread(image_filename)
+            if self.light_version:
+                self.dpi = 100
+            else:
+                self.dpi = 0#check_dpi(image_filename)
+        else:
+            ret['img'] = pil2cv(image_pil)
+            if self.light_version:
+                self.dpi = 100
+            else:
+                self.dpi = 0#check_dpi(image_pil)
+        ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY)
+        for prefix in ('',  '_grayscale'):
+            ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8)
+        self._imgs = ret
+        if dpi is not None:
+            self.dpi = dpi
+
+    def reset_file_name_dir(self, image_filename):
+        t_c = time.time()
+        self.cache_images(image_filename=image_filename)
+        self.output_filename = os.path.join(self.dir_out, Path(image_filename).stem +'.png')
+
+    def imread(self, grayscale=False, uint8=True):
+        key = 'img'
+        if grayscale:
+            key += '_grayscale'
+        if uint8:
+            key += '_uint8'
+        return self._imgs[key].copy()
+
+    def isNaN(self, num):
+        return num != num
+
+    @staticmethod
+    def our_load_model(model_file):
+        if model_file.endswith('.h5') and Path(model_file[:-3]).exists():
+            # prefer SavedModel over HDF5 format if it exists
+            model_file = model_file[:-3]
+        try:
+            model = load_model(model_file, compile=False)
+        except:
+            model = load_model(model_file, compile=False, custom_objects={
+                "PatchEncoder": PatchEncoder, "Patches": Patches})
+        return model
+    
+    def predict_enhancement(self, img):
+        self.logger.debug("enter predict_enhancement")
+
+        img_height_model = self.model_enhancement.layers[-1].output_shape[1]
+        img_width_model = self.model_enhancement.layers[-1].output_shape[2]
+        if img.shape[0] < img_height_model:
+            img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST)
+        if img.shape[1] < img_width_model:
+            img = cv2.resize(img, (img_height_model, img.shape[0]), interpolation=cv2.INTER_NEAREST)
+        margin = int(0.1 * img_width_model)
+        width_mid = img_width_model - 2 * margin
+        height_mid = img_height_model - 2 * margin
+        img = img / 255.
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+
+        prediction_true = np.zeros((img_h, img_w, 3))
+        nxf = img_w / float(width_mid)
+        nyf = img_h / float(height_mid)
+        nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
+        nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
+
+        for i in range(nxf):
+            for j in range(nyf):
+                if i == 0:
+                    index_x_d = i * width_mid
+                    index_x_u = index_x_d + img_width_model
+                else:
+                    index_x_d = i * width_mid
+                    index_x_u = index_x_d + img_width_model
+                if j == 0:
+                    index_y_d = j * height_mid
+                    index_y_u = index_y_d + img_height_model
+                else:
+                    index_y_d = j * height_mid
+                    index_y_u = index_y_d + img_height_model
+
+                if index_x_u > img_w:
+                    index_x_u = img_w
+                    index_x_d = img_w - img_width_model
+                if index_y_u > img_h:
+                    index_y_u = img_h
+                    index_y_d = img_h - img_height_model
+
+                img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :]
+                label_p_pred = self.model_enhancement.predict(img_patch, verbose=0)
+                seg = label_p_pred[0, :, :, :] * 255
+
+                if i == 0 and j == 0:
+                    prediction_true[index_y_d + 0:index_y_u - margin,
+                                    index_x_d + 0:index_x_u - margin] = \
+                                        seg[0:-margin or None,
+                                            0:-margin or None]
+                elif i == nxf - 1 and j == nyf - 1:
+                    prediction_true[index_y_d + margin:index_y_u - 0,
+                                    index_x_d + margin:index_x_u - 0] = \
+                                        seg[margin:,
+                                            margin:]
+                elif i == 0 and j == nyf - 1:
+                    prediction_true[index_y_d + margin:index_y_u - 0,
+                                    index_x_d + 0:index_x_u - margin] = \
+                                        seg[margin:,
+                                            0:-margin or None]
+                elif i == nxf - 1 and j == 0:
+                    prediction_true[index_y_d + 0:index_y_u - margin,
+                                    index_x_d + margin:index_x_u - 0] = \
+                                        seg[0:-margin or None,
+                                            margin:]
+                elif i == 0 and j != 0 and j != nyf - 1:
+                    prediction_true[index_y_d + margin:index_y_u - margin,
+                                    index_x_d + 0:index_x_u - margin] = \
+                                        seg[margin:-margin or None,
+                                            0:-margin or None]
+                elif i == nxf - 1 and j != 0 and j != nyf - 1:
+                    prediction_true[index_y_d + margin:index_y_u - margin,
+                                    index_x_d + margin:index_x_u - 0] = \
+                                        seg[margin:-margin or None,
+                                            margin:]
+                elif i != 0 and i != nxf - 1 and j == 0:
+                    prediction_true[index_y_d + 0:index_y_u - margin,
+                                    index_x_d + margin:index_x_u - margin] = \
+                                        seg[0:-margin or None,
+                                            margin:-margin or None]
+                elif i != 0 and i != nxf - 1 and j == nyf - 1:
+                    prediction_true[index_y_d + margin:index_y_u - 0,
+                                    index_x_d + margin:index_x_u - margin] = \
+                                        seg[margin:,
+                                            margin:-margin or None]
+                else:
+                    prediction_true[index_y_d + margin:index_y_u - margin,
+                                    index_x_d + margin:index_x_u - margin] = \
+                                        seg[margin:-margin or None,
+                                            margin:-margin or None]
+
+        prediction_true = prediction_true.astype(int)
+        return prediction_true
+    
+    def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
+        self.logger.debug("enter calculate_width_height_by_columns")
+        if num_col == 1:
+            img_w_new = 2000
+        elif num_col == 2:
+            img_w_new = 2400
+        elif num_col == 3:
+            img_w_new = 3000
+        elif num_col == 4:
+            img_w_new = 4000
+        elif num_col == 5:
+            img_w_new = 5000
+        elif num_col == 6:
+            img_w_new = 6500
+        else:
+            img_w_new = width_early
+        img_h_new = img_w_new * img.shape[0] // img.shape[1]
+
+        if img_h_new >= 8000:
+            img_new = np.copy(img)
+            num_column_is_classified = False
+        else:
+            img_new = resize_image(img, img_h_new, img_w_new)
+            num_column_is_classified = True
+
+        return img_new, num_column_is_classified
+    
+    def early_page_for_num_of_column_classification(self,img_bin):
+        self.logger.debug("enter early_page_for_num_of_column_classification")
+        if self.input_binary:
+            img = np.copy(img_bin).astype(np.uint8)
+        else:
+            img = self.imread()
+        img = cv2.GaussianBlur(img, (5, 5), 0)
+        img_page_prediction = self.do_prediction(False, img, self.model_page)
+
+        imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
+        _, thresh = cv2.threshold(imgray, 0, 255, 0)
+        thresh = cv2.dilate(thresh, KERNEL, iterations=3)
+        contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+        if len(contours)>0:
+            cnt_size = np.array([cv2.contourArea(contours[j])
+                                    for j in range(len(contours))])
+            cnt = contours[np.argmax(cnt_size)]
+            box = cv2.boundingRect(cnt)
+        else:
+            box = [0, 0, img.shape[1], img.shape[0]]
+        cropped_page, page_coord = crop_image_inside_box(box, img)
+
+        self.logger.debug("exit early_page_for_num_of_column_classification")
+        return cropped_page, page_coord
+    
+    def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred):
+        self.logger.debug("enter calculate_width_height_by_columns")
+        if num_col == 1:
+            img_w_new = 1000
+        else:
+            img_w_new = 1300
+        img_h_new = img_w_new * img.shape[0] // img.shape[1]
+
+        if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early:
+            img_new = np.copy(img)
+            num_column_is_classified = False
+        #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000:
+        elif img_h_new >= 8000:
+            img_new = np.copy(img)
+            num_column_is_classified = False
+        else:
+            img_new = resize_image(img, img_h_new, img_w_new)
+            num_column_is_classified = True
+
+        return img_new, num_column_is_classified
+    
+    def resize_and_enhance_image_with_column_classifier(self, light_version):
+        self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
+        dpi = 0#self.dpi
+        self.logger.info("Detected %s DPI", dpi)
+        if self.input_binary:
+            img = self.imread()
+            prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5)
+            prediction_bin = 255 * (prediction_bin[:,:,0]==0)
+            prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
+            img= np.copy(prediction_bin)
+            img_bin = prediction_bin
+        else:
+            img = self.imread()
+            self.h_org, self.w_org = img.shape[:2]
+            img_bin = None
+
+        width_early = img.shape[1]
+        t1 = time.time()
+        _, page_coord = self.early_page_for_num_of_column_classification(img_bin)
+
+        self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :]
+        self.page_coord = page_coord
+
+        if self.num_col_upper and not self.num_col_lower:
+            num_col = self.num_col_upper
+            label_p_pred = [np.ones(6)]
+        elif self.num_col_lower and not self.num_col_upper:
+            num_col = self.num_col_lower
+            label_p_pred = [np.ones(6)]
+        elif not self.num_col_upper and not self.num_col_lower:
+            if self.input_binary:
+                img_in = np.copy(img)
+                img_in = img_in / 255.0
+                img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
+                img_in = img_in.reshape(1, 448, 448, 3)
+            else:
+                img_1ch = self.imread(grayscale=True)
+                width_early = img_1ch.shape[1]
+                img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
+
+                img_1ch = img_1ch / 255.0
+                img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
+                img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
+                img_in[0, :, :, 0] = img_1ch[:, :]
+                img_in[0, :, :, 1] = img_1ch[:, :]
+                img_in[0, :, :, 2] = img_1ch[:, :]
+
+            label_p_pred = self.model_classifier.predict(img_in, verbose=0)
+            num_col = np.argmax(label_p_pred[0]) + 1
+        elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower):
+            if self.input_binary:
+                img_in = np.copy(img)
+                img_in = img_in / 255.0
+                img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
+                img_in = img_in.reshape(1, 448, 448, 3)
+            else:
+                img_1ch = self.imread(grayscale=True)
+                width_early = img_1ch.shape[1]
+                img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
+
+                img_1ch = img_1ch / 255.0
+                img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
+                img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
+                img_in[0, :, :, 0] = img_1ch[:, :]
+                img_in[0, :, :, 1] = img_1ch[:, :]
+                img_in[0, :, :, 2] = img_1ch[:, :]
+
+            label_p_pred = self.model_classifier.predict(img_in, verbose=0)
+            num_col = np.argmax(label_p_pred[0]) + 1
+
+            if num_col > self.num_col_upper:
+                num_col = self.num_col_upper
+                label_p_pred = [np.ones(6)]
+            if num_col < self.num_col_lower:
+                num_col = self.num_col_lower
+                label_p_pred = [np.ones(6)]
+        else:
+            num_col = self.num_col_upper
+            label_p_pred = [np.ones(6)]
+
+        self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5))
+
+        if dpi < DPI_THRESHOLD:
+            if light_version and num_col in (1,2):
+                img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(
+                    img, num_col, width_early, label_p_pred)
+            else:
+                img_new, num_column_is_classified = self.calculate_width_height_by_columns(
+                    img, num_col, width_early, label_p_pred)
+            if light_version:
+                image_res = np.copy(img_new)
+            else:
+                image_res = self.predict_enhancement(img_new)
+            is_image_enhanced = True
+
+        else:
+            num_column_is_classified = True
+            image_res = np.copy(img)
+            is_image_enhanced = False
+
+        self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
+        return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
+    def read_xml(self, xml_file):
+        file_name = Path(xml_file).stem
+        tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding='utf-8'))
+        root1=tree1.getroot()
+        alltags=[elem.tag for elem in root1.iter()]
+        link=alltags[0].split('}')[0]+'}'
+
+        index_tot_regions = []
+        tot_region_ref = []
+
+        for jj in root1.iter(link+'Page'):
+            y_len=int(jj.attrib['imageHeight'])
+            x_len=int(jj.attrib['imageWidth'])
+
+        for jj in root1.iter(link+'RegionRefIndexed'):
+            index_tot_regions.append(jj.attrib['index'])
+            tot_region_ref.append(jj.attrib['regionRef'])
+            
+        if (link+'PrintSpace' in alltags) or  (link+'Border' in alltags):
+            co_printspace = []
+            if link+'PrintSpace' in alltags:
+                region_tags_printspace = np.unique([x for x in alltags if x.endswith('PrintSpace')])
+            elif link+'Border' in alltags:
+                region_tags_printspace = np.unique([x for x in alltags if x.endswith('Border')])
+                
+            for tag in region_tags_printspace:
+                if link+'PrintSpace' in alltags:
+                    tag_endings_printspace = ['}PrintSpace','}printspace']
+                elif link+'Border' in alltags:
+                    tag_endings_printspace = ['}Border','}border']
+                    
+                if tag.endswith(tag_endings_printspace[0]) or tag.endswith(tag_endings_printspace[1]):
+                    for nn in root1.iter(tag):
+                        c_t_in = []
+                        sumi = 0
+                        for vv in nn.iter():
+                            # check the format of coords
+                            if vv.tag == link + 'Coords':
+                                coords = bool(vv.attrib)
+                                if coords:
+                                    p_h = vv.attrib['points'].split(' ')
+                                    c_t_in.append(
+                                        np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]))
+                                    break
+                                else:
+                                    pass
+
+                            if vv.tag == link + 'Point':
+                                c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))])
+                                sumi += 1
+                            elif vv.tag != link + 'Point' and sumi >= 1:
+                                break
+                        co_printspace.append(np.array(c_t_in))
+            img_printspace = np.zeros( (y_len,x_len,3) ) 
+            img_printspace=cv2.fillPoly(img_printspace, pts =co_printspace, color=(1,1,1))
+            img_printspace = img_printspace.astype(np.uint8)
+            
+            imgray = cv2.cvtColor(img_printspace, cv2.COLOR_BGR2GRAY)
+            _, thresh = cv2.threshold(imgray, 0, 255, 0)
+            contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+            cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))])
+            cnt = contours[np.argmax(cnt_size)]
+            x, y, w, h = cv2.boundingRect(cnt)
+            
+            bb_coord_printspace = [x, y, w, h]
+                        
+        else:
+            bb_coord_printspace = None
+                        
+
+        region_tags=np.unique([x for x in alltags if x.endswith('Region')])   
+        co_text_paragraph=[]
+        co_text_drop=[]
+        co_text_heading=[]
+        co_text_header=[]
+        co_text_marginalia=[]
+        co_text_catch=[]
+        co_text_page_number=[]
+        co_text_signature_mark=[]
+        co_sep=[]
+        co_img=[]
+        co_table=[]
+        co_graphic=[]
+        co_graphic_text_annotation=[]
+        co_graphic_decoration=[]
+        co_noise=[]
+
+        co_text_paragraph_text=[]
+        co_text_drop_text=[]
+        co_text_heading_text=[]
+        co_text_header_text=[]
+        co_text_marginalia_text=[]
+        co_text_catch_text=[]
+        co_text_page_number_text=[]
+        co_text_signature_mark_text=[]
+        co_sep_text=[]
+        co_img_text=[]
+        co_table_text=[]
+        co_graphic_text=[]
+        co_graphic_text_annotation_text=[]
+        co_graphic_decoration_text=[]
+        co_noise_text=[]
+
+        id_paragraph = []
+        id_header = []
+        id_heading = []
+        id_marginalia = []
+
+        for tag in region_tags:
+            if tag.endswith('}TextRegion') or tag.endswith('}Textregion'):
+                for nn in root1.iter(tag):
+                    for child2 in nn:
+                        tag2 = child2.tag
+                        if tag2.endswith('}TextEquiv') or tag2.endswith('}TextEquiv'):
+                            for childtext2 in child2:
+                                if childtext2.tag.endswith('}Unicode') or childtext2.tag.endswith('}Unicode'):
+                                    if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
+                                        co_text_drop_text.append(childtext2.text)
+                                    elif "type" in nn.attrib and nn.attrib['type']=='heading':
+                                        co_text_heading_text.append(childtext2.text)
+                                    elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
+                                        co_text_signature_mark_text.append(childtext2.text)
+                                    elif "type" in nn.attrib and nn.attrib['type']=='header':
+                                        co_text_header_text.append(childtext2.text)
+                                    ###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
+                                        ###co_text_catch_text.append(childtext2.text)
+                                    ###elif "type" in nn.attrib and nn.attrib['type']=='page-number':
+                                        ###co_text_page_number_text.append(childtext2.text)
+                                    elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
+                                        co_text_marginalia_text.append(childtext2.text)
+                                    else:
+                                        co_text_paragraph_text.append(childtext2.text)
+                    c_t_in_drop=[]
+                    c_t_in_paragraph=[]
+                    c_t_in_heading=[]
+                    c_t_in_header=[]
+                    c_t_in_page_number=[]
+                    c_t_in_signature_mark=[]
+                    c_t_in_catch=[]
+                    c_t_in_marginalia=[]
+
+
+                    sumi=0
+                    for vv in nn.iter():
+                        # check the format of coords
+                        if vv.tag==link+'Coords':
+
+                            coords=bool(vv.attrib)
+                            if coords:
+                                #print('birda1')
+                                p_h=vv.attrib['points'].split(' ')
+
+
+
+                                if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
+
+                                    c_t_in_drop.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+                                elif "type" in nn.attrib and nn.attrib['type']=='heading':
+                                    ##id_heading.append(nn.attrib['id'])
+                                    c_t_in_heading.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+
+                                elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
+
+                                    c_t_in_signature_mark.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                    #print(c_t_in_paragraph)
+                                elif "type" in nn.attrib and nn.attrib['type']=='header':
+                                    #id_header.append(nn.attrib['id'])
+                                    c_t_in_header.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+
+                                ###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
+                                    ###c_t_in_catch.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+
+                                ###elif "type" in nn.attrib and nn.attrib['type']=='page-number':
+
+                                    ###c_t_in_page_number.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+                                elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
+                                    #id_marginalia.append(nn.attrib['id'])
+
+                                    c_t_in_marginalia.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                else:
+                                    #id_paragraph.append(nn.attrib['id'])
+
+                                    c_t_in_paragraph.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+                                break
+                            else:
+                                pass
+
+
+                        if vv.tag==link+'Point':
+                            if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
+
+                                c_t_in_drop.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                sumi+=1
+
+                            elif "type" in nn.attrib and nn.attrib['type']=='heading':
+                                #id_heading.append(nn.attrib['id'])
+                                c_t_in_heading.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                sumi+=1
+
+
+                            elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
+
+                                c_t_in_signature_mark.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                sumi+=1
+                            elif "type" in nn.attrib and nn.attrib['type']=='header':
+                                #id_header.append(nn.attrib['id'])
+                                c_t_in_header.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                sumi+=1
+
+
+                            ###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
+                                ###c_t_in_catch.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                ###sumi+=1
+
+                            ###elif "type" in nn.attrib and nn.attrib['type']=='page-number':
+
+                                ###c_t_in_page_number.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                ###sumi+=1
+
+                            elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
+                                #id_marginalia.append(nn.attrib['id'])
+
+                                c_t_in_marginalia.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                sumi+=1
+
+                            else:
+                                #id_paragraph.append(nn.attrib['id'])
+                                c_t_in_paragraph.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                sumi+=1
+
+                        elif vv.tag!=link+'Point' and sumi>=1:
+                            break
+
+                    if len(c_t_in_drop)>0:
+                        co_text_drop.append(np.array(c_t_in_drop))
+                    if len(c_t_in_paragraph)>0:
+                        co_text_paragraph.append(np.array(c_t_in_paragraph))
+                        id_paragraph.append(nn.attrib['id'])
+                    if len(c_t_in_heading)>0:
+                        co_text_heading.append(np.array(c_t_in_heading))
+                        id_heading.append(nn.attrib['id'])
+
+                    if len(c_t_in_header)>0:
+                        co_text_header.append(np.array(c_t_in_header))
+                        id_header.append(nn.attrib['id'])
+                    if len(c_t_in_page_number)>0:
+                        co_text_page_number.append(np.array(c_t_in_page_number))
+                    if len(c_t_in_catch)>0:
+                        co_text_catch.append(np.array(c_t_in_catch))
+
+                    if len(c_t_in_signature_mark)>0:
+                        co_text_signature_mark.append(np.array(c_t_in_signature_mark))
+
+                    if len(c_t_in_marginalia)>0:
+                        co_text_marginalia.append(np.array(c_t_in_marginalia))
+                        id_marginalia.append(nn.attrib['id'])
+
+
+            elif tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'):
+                for nn in root1.iter(tag):
+                    c_t_in=[]
+                    c_t_in_text_annotation=[]
+                    c_t_in_decoration=[]
+                    sumi=0
+                    for vv in nn.iter():
+                        # check the format of coords
+                        if vv.tag==link+'Coords':
+                            coords=bool(vv.attrib)
+                            if coords:
+                                p_h=vv.attrib['points'].split(' ')
+
+                                if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
+                                    c_t_in_text_annotation.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                    
+                                elif "type" in nn.attrib and nn.attrib['type']=='decoration':
+                                    c_t_in_decoration.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                    
+                                else:
+                                    c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+
+                                break
+                            else:
+                                pass
+
+
+                        if vv.tag==link+'Point':
+                            if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
+                                c_t_in_text_annotation.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                sumi+=1
+
+                            elif "type" in nn.attrib and nn.attrib['type']=='decoration':
+                                c_t_in_decoration.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                sumi+=1
+                                
+                            else:
+                                c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                                sumi+=1
+
+                    if len(c_t_in_text_annotation)>0:
+                        co_graphic_text_annotation.append(np.array(c_t_in_text_annotation))
+                    if len(c_t_in_decoration)>0:
+                        co_graphic_decoration.append(np.array(c_t_in_decoration))
+                    if len(c_t_in)>0:
+                        co_graphic.append(np.array(c_t_in))
+
+
+
+            elif tag.endswith('}ImageRegion') or tag.endswith('}imageregion'):
+                for nn in root1.iter(tag):
+                    c_t_in=[]
+                    sumi=0
+                    for vv in nn.iter():
+                        # check the format of coords
+                        if vv.tag==link+'Coords':
+                            coords=bool(vv.attrib)
+                            if coords:
+                                p_h=vv.attrib['points'].split(' ')
+                                c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                break
+                            else:
+                                pass
+
+
+                        if vv.tag==link+'Point':
+                            c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                            sumi+=1
+                        elif vv.tag!=link+'Point' and sumi>=1:
+                            break
+                    co_img.append(np.array(c_t_in))
+                    co_img_text.append(' ')
+
+
+            elif tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'):
+                for nn in root1.iter(tag):
+                    c_t_in=[]
+                    sumi=0
+                    for vv in nn.iter():
+                        # check the format of coords
+                        if vv.tag==link+'Coords':
+                            coords=bool(vv.attrib)
+                            if coords:
+                                p_h=vv.attrib['points'].split(' ')
+                                c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                break
+                            else:
+                                pass
+
+
+                        if vv.tag==link+'Point':
+                            c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                            sumi+=1
+                        elif vv.tag!=link+'Point' and sumi>=1:
+                            break
+                    co_sep.append(np.array(c_t_in))
+
+
+
+            elif tag.endswith('}TableRegion') or tag.endswith('}tableregion'):
+                for nn in root1.iter(tag):
+                    c_t_in=[]
+                    sumi=0
+                    for vv in nn.iter():
+                        # check the format of coords
+                        if vv.tag==link+'Coords':
+                            coords=bool(vv.attrib)
+                            if coords:
+                                p_h=vv.attrib['points'].split(' ')
+                                c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                break
+                            else:
+                                pass
+
+
+                        if vv.tag==link+'Point':
+                            c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                            sumi+=1
+                            
+                        elif vv.tag!=link+'Point' and sumi>=1:
+                            break
+                    co_table.append(np.array(c_t_in))
+                    co_table_text.append(' ')
+
+            elif tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'):
+                for nn in root1.iter(tag):
+                    c_t_in=[]
+                    sumi=0
+                    for vv in nn.iter():
+                        # check the format of coords
+                        if vv.tag==link+'Coords':
+                            coords=bool(vv.attrib)
+                            if coords:
+                                p_h=vv.attrib['points'].split(' ')
+                                c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                break
+                            else:
+                                pass
+
+
+                        if vv.tag==link+'Point':
+                            c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
+                            sumi+=1
+
+                        elif vv.tag!=link+'Point' and sumi>=1:
+                            break
+                    co_noise.append(np.array(c_t_in))
+                    co_noise_text.append(' ')
+
+        img = np.zeros( (y_len,x_len,3) ) 
+        img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=(1,1,1))
+
+        img_poly=cv2.fillPoly(img, pts =co_text_heading, color=(2,2,2))
+        img_poly=cv2.fillPoly(img, pts =co_text_header, color=(2,2,2))
+        img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=(3,3,3))
+        img_poly=cv2.fillPoly(img, pts =co_img, color=(4,4,4))
+        img_poly=cv2.fillPoly(img, pts =co_sep, color=(5,5,5))
+
+        return tree1, root1, bb_coord_printspace, file_name, id_paragraph, id_header+id_heading, co_text_paragraph, co_text_header+co_text_heading,\
+    tot_region_ref,x_len, y_len,index_tot_regions, img_poly
+
+    def return_indexes_of_contours_loctaed_inside_another_list_of_contours(self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc):
+        indexes_of_located_cont = []
+        center_x_coordinates_of_located = []
+        center_y_coordinates_of_located = []
+        #M_main_tot = [cv2.moments(contours_loc[j])
+                        #for j in range(len(contours_loc))]
+        #cx_main_loc = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
+        #cy_main_loc = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
+        
+        for ij in range(len(contours)):
+            results = [cv2.pointPolygonTest(contours[ij], (cx_main_loc[ind], cy_main_loc[ind]), False)
+                        for ind in range(len(cy_main_loc)) ]
+            results = np.array(results)
+            indexes_in = np.where((results == 0) | (results == 1))
+            indexes = indexes_loc[indexes_in]# [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1))
+
+            indexes_of_located_cont.append(indexes)
+            center_x_coordinates_of_located.append(np.array(cx_main_loc)[indexes_in] )
+            center_y_coordinates_of_located.append(np.array(cy_main_loc)[indexes_in] )
+            
+        return indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located
+
+    def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p):
+        height1 =672#448
+        width1 = 448#224
+
+        height2 =672#448
+        width2= 448#224
+
+        height3 =672#448
+        width3 = 448#224
+        
+        inference_bs = 3
+        
+        ver_kernel = np.ones((5, 1), dtype=np.uint8)
+        hor_kernel = np.ones((1, 5), dtype=np.uint8)
+        
+        
+        min_cont_size_to_be_dilated = 10
+        if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
+            cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent)
+            args_cont_located = np.array(range(len(contours_only_text_parent)))
+            
+            diff_y_conts = np.abs(y_max_conts[:]-y_min_conts)
+            diff_x_conts = np.abs(x_max_conts[:]-x_min_conts)
+            
+            mean_x = statistics.mean(diff_x_conts)
+            median_x = statistics.median(diff_x_conts)
+            
+            
+            diff_x_ratio= diff_x_conts/mean_x
+            
+            args_cont_located_excluded = args_cont_located[diff_x_ratio>=1.3]
+            args_cont_located_included = args_cont_located[diff_x_ratio<1.3]
+            
+            contours_only_text_parent_excluded = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]>=1.3]#contours_only_text_parent[diff_x_ratio>=1.3]
+            contours_only_text_parent_included = [contours_only_text_parent[ind] for ind in range(len(contours_only_text_parent)) if diff_x_ratio[ind]<1.3]#contours_only_text_parent[diff_x_ratio<1.3]
+            
+            
+            cx_conts_excluded = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]>=1.3]#cx_conts[diff_x_ratio>=1.3]
+            cx_conts_included = [cx_conts[ind] for ind in range(len(cx_conts)) if diff_x_ratio[ind]<1.3]#cx_conts[diff_x_ratio<1.3]
+            
+            cy_conts_excluded = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]>=1.3]#cy_conts[diff_x_ratio>=1.3]
+            cy_conts_included = [cy_conts[ind] for ind in range(len(cy_conts)) if diff_x_ratio[ind]<1.3]#cy_conts[diff_x_ratio<1.3]
+            
+            #print(diff_x_ratio, 'ratio')
+            text_regions_p = text_regions_p.astype('uint8')
+            
+            if len(contours_only_text_parent_excluded)>0:
+                textregion_par = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])).astype('uint8')
+                textregion_par = cv2.fillPoly(textregion_par, pts=contours_only_text_parent_included, color=(1,1))
+            else:
+                textregion_par = (text_regions_p[:,:]==1)*1
+                textregion_par = textregion_par.astype('uint8')
+                
+            text_regions_p_textregions_dilated = cv2.erode(textregion_par , hor_kernel, iterations=2)
+            text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=4)
+            text_regions_p_textregions_dilated = cv2.erode(text_regions_p_textregions_dilated , hor_kernel, iterations=1)
+            text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=5)
+            text_regions_p_textregions_dilated[text_regions_p[:,:]>1] = 0
+            
+            
+            contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated)
+            contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated)
+            
+            indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = self.return_indexes_of_contours_loctaed_inside_another_list_of_contours(contours_only_dilated, contours_only_text_parent_included, cx_conts_included, cy_conts_included, args_cont_located_included)
+            
+            
+            if len(args_cont_located_excluded)>0:
+                for ind in args_cont_located_excluded:
+                    indexes_of_located_cont.append(np.array([ind]))
+                    contours_only_dilated.append(contours_only_text_parent[ind])
+                    center_y_coordinates_of_located.append(0)
+            
+            array_list = [np.array([elem]) if isinstance(elem, int) else elem for elem in indexes_of_located_cont]
+            flattened_array = np.concatenate([arr.ravel() for arr in array_list])
+            #print(len( np.unique(flattened_array)), 'indexes_of_located_cont uniques')
+            
+            missing_textregions = list( set(np.array(range(len(contours_only_text_parent))) ) - set(np.unique(flattened_array)) )
+            #print(missing_textregions, 'missing_textregions')
+
+            for ind in missing_textregions:
+                indexes_of_located_cont.append(np.array([ind]))
+                contours_only_dilated.append(contours_only_text_parent[ind])
+                center_y_coordinates_of_located.append(0)
+                
+                
+            if contours_only_text_parent_h:
+                for vi in range(len(contours_only_text_parent_h)):
+                    indexes_of_located_cont.append(int(vi+len(contours_only_text_parent)))
+                    
+            array_list = [np.array([elem]) if isinstance(elem, int) else elem for elem in indexes_of_located_cont]
+            flattened_array = np.concatenate([arr.ravel() for arr in array_list])
+        
+        y_len = text_regions_p.shape[0]
+        x_len = text_regions_p.shape[1]
+
+        img_poly = np.zeros((y_len,x_len), dtype='uint8')
+        img_poly[text_regions_p[:,:]==1] = 1
+        img_poly[text_regions_p[:,:]==2] = 2
+        img_poly[text_regions_p[:,:]==3] = 4
+        img_poly[text_regions_p[:,:]==6] = 5
+        
+        img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
+        if contours_only_text_parent_h:
+            _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(
+                contours_only_text_parent_h)
+            for j in range(len(cy_main)):
+                img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,
+                                   int(x_min_main[j]):int(x_max_main[j])] = 1
+            co_text_all_org = contours_only_text_parent + contours_only_text_parent_h
+            if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
+                co_text_all = contours_only_dilated + contours_only_text_parent_h
+            else:
+                co_text_all = contours_only_text_parent + contours_only_text_parent_h
+        else:
+            co_text_all_org = contours_only_text_parent
+            if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
+                co_text_all = contours_only_dilated
+            else:
+                co_text_all = contours_only_text_parent
+
+        if not len(co_text_all):
+            return [], []
+
+        labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
+        
+        co_text_all = [(i/6).astype(int) for i in co_text_all]
+        for i in range(len(co_text_all)):
+            img = labels_con[:,:,i].astype(np.uint8)
+            
+            #img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
+            
+            cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,))
+            labels_con[:,:,i] = img
+
+
+        labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool)
+        img_header_and_sep = resize_image(img_header_and_sep, height1, width1)
+        img_poly = resize_image(img_poly, height3, width3)
+        
+
+        
+        input_1 = np.zeros((inference_bs, height1, width1, 3))
+        ordered = [list(range(len(co_text_all)))]
+        index_update = 0
+        #print(labels_con.shape[2],"number of regions for reading order")
+        while index_update>=0:
+            ij_list = ordered.pop(index_update)
+            i = ij_list.pop(0)
+
+            ante_list = []
+            post_list = []
+            tot_counter = 0
+            batch = []
+            for j in ij_list:
+                img1 = labels_con[:,:,i].astype(float)
+                img2 = labels_con[:,:,j].astype(float)
+                img1[img_poly==5] = 2
+                img2[img_poly==5] = 2
+                img1[img_header_and_sep==1] = 3
+                img2[img_header_and_sep==1] = 3
+
+                input_1[len(batch), :, :, 0] = img1 / 3.
+                input_1[len(batch), :, :, 2] = img2 / 3.
+                input_1[len(batch), :, :, 1] = img_poly / 5.
+
+                tot_counter += 1
+                batch.append(j)
+                if tot_counter % inference_bs == 0 or tot_counter == len(ij_list):
+                    y_pr = self.model_reading_order.predict(input_1 , verbose=0)
+                    for jb, j in enumerate(batch):
+                        if y_pr[jb][0]>=0.5:
+                            post_list.append(j)
+                        else:
+                            ante_list.append(j)
+                    batch = []
+
+            if len(ante_list):
+                ordered.insert(index_update, ante_list)
+                index_update += 1
+            ordered.insert(index_update, [i])
+            if len(post_list):
+                ordered.insert(index_update + 1, post_list)
+
+            index_update = -1
+            for index_next, ij_list in enumerate(ordered):
+                if len(ij_list) > 1:
+                    index_update = index_next
+                    break
+
+        ordered = [i[0] for i in ordered]
+        
+        ##id_all_text = np.array(id_all_text)[index_sort]
+        
+        
+        if len(contours_only_text_parent)>min_cont_size_to_be_dilated and self.light_version:
+            org_contours_indexes = []
+            for ind in range(len(ordered)):
+                region_with_curr_order = ordered[ind]
+                if region_with_curr_order < len(contours_only_dilated):
+                    if np.isscalar(indexes_of_located_cont[region_with_curr_order]):
+                        org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]]
+                    else:
+                        arg_sort_located_cont = np.argsort(center_y_coordinates_of_located[region_with_curr_order])
+                        org_contours_indexes = org_contours_indexes + list(np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont]) ##org_contours_indexes + list ( 
+                else:
+                    org_contours_indexes = org_contours_indexes + [indexes_of_located_cont[region_with_curr_order]]
+            
+            region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
+            return org_contours_indexes, region_ids
+        else:
+            region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
+            return ordered, region_ids
+    
+
+        
+        
+    def run(self, xml_filename : Optional[str] = None, dir_in : Optional[str] = None, overwrite : bool = False):
+        """
+        Get image and scales, then extract the page of scanned image
+        """
+        self.logger.debug("enter run")
+        t0_tot = time.time()
+
+        if dir_in:
+            self.ls_xmls  = os.listdir(dir_in)
+        elif xml_filename:
+            self.ls_xmls = [xml_filename]
+        else:
+            raise ValueError("run requires either a single image filename or a directory")
+
+        for xml_filename in self.ls_xmls:
+            self.logger.info(xml_filename)
+            t0 = time.time()
+            
+            if dir_in:
+                xml_file = os.path.join(dir_in, xml_filename)
+            else:
+                xml_file = xml_filename
+            
+            tree_xml, root_xml, bb_coord_printspace, file_name, id_paragraph, id_header, co_text_paragraph, co_text_header, tot_region_ref, x_len, y_len, index_tot_regions, img_poly = self.read_xml(xml_file)
+            
+            id_all_text = id_paragraph + id_header
+            
+            order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(co_text_paragraph, co_text_header, img_poly[:,:,0])
+            
+            id_all_text = np.array(id_all_text)[order_text_new]
+            
+            alltags=[elem.tag for elem in root_xml.iter()]
+            
+            
+            
+            link=alltags[0].split('}')[0]+'}'
+            name_space = alltags[0].split('}')[0]
+            name_space = name_space.split('{')[1]
+            
+            page_element = root_xml.find(link+'Page')
+            
+            
+            old_ro = root_xml.find(".//{*}ReadingOrder")
+            
+            if old_ro is not None:
+                page_element.remove(old_ro)
+            
+            #print(old_ro, 'old_ro')
+            ro_subelement = ET.Element('ReadingOrder')
+            
+            ro_subelement2 = ET.SubElement(ro_subelement, 'OrderedGroup')
+            ro_subelement2.set('id', "ro357564684568544579089")
+            
+            for index, id_text in enumerate(id_all_text):
+                new_element_2 = ET.SubElement(ro_subelement2, 'RegionRefIndexed')
+                new_element_2.set('regionRef', id_all_text[index])
+                new_element_2.set('index', str(index))
+            
+            if (link+'PrintSpace' in alltags) or  (link+'Border' in alltags):
+                page_element.insert(1, ro_subelement)
+            else:
+                page_element.insert(0, ro_subelement)
+            
+            alltags=[elem.tag for elem in root_xml.iter()]
+            
+            ET.register_namespace("",name_space)
+            tree_xml.write(os.path.join(self.dir_out, file_name+'.xml'),xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
+            
+            #sys.exit()
+            

From 7dd281267df33c89ec26945559fb2e10bd67f9c1 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Tue, 26 Aug 2025 22:38:03 +0200
Subject: [PATCH 39/40] Marginals are divided into left and right, and written
 from top to bottom.

---
 src/eynollah/eynollah.py        | 138 ++++++++++++++++++++++++--------
 src/eynollah/mb_ro_on_layout.py |  18 +++--
 src/eynollah/utils/utils_ocr.py |  88 ++++++++++----------
 src/eynollah/utils/xml.py       |  10 ++-
 src/eynollah/writer.py          |  58 ++++++++++----
 5 files changed, 215 insertions(+), 97 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 5299d3e..30e180d 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -289,7 +289,7 @@ class Eynollah:
         self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
         self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
         self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
-        self.model_reading_order_dir = dir_models + "/model_step_4800000_mb_ro"#"/model_ens_reading_order_machine_based"
+        self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"#"/model_mb_ro_aug_ens_11"#"/model_step_3200000_mb_ro"#"/model_ens_reading_order_machine_based"#"/model_mb_ro_aug_ens_8"#"/model_ens_reading_order_machine_based"
         #"/modelens_12sp_elay_0_3_4__3_6_n"
         #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"
         #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"
@@ -725,6 +725,7 @@ class Eynollah:
 
             label_p_pred = self.model_classifier.predict(img_in, verbose=0)
             num_col = np.argmax(label_p_pred[0]) + 1
+            
         elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower):
             if self.input_binary:
                 img_in = np.copy(img)
@@ -3090,6 +3091,26 @@ class Eynollah:
             num_col = num_col + 1
             if not num_column_is_classified:
                 num_col_classifier = num_col + 1
+            if self.num_col_upper and self.num_col_lower:
+                if self.num_col_upper == self.num_col_lower:
+                    num_col_classifier = self.num_col_upper
+                else:
+                    if num_col_classifier < self.num_col_lower:
+                        num_col_classifier = self.num_col_lower
+                    if num_col_classifier > self.num_col_upper:
+                        num_col_classifier = self.num_col_upper
+                        
+            elif self.num_col_lower and not self.num_col_upper:
+                if num_col_classifier < self.num_col_lower:
+                    num_col_classifier = self.num_col_lower
+                    
+            elif self.num_col_upper and not self.num_col_lower:
+                if num_col_classifier > self.num_col_upper:
+                    num_col_classifier = self.num_col_upper
+                    
+            else:
+                pass
+                
         except Exception as why:
             self.logger.error(why)
             num_col = None
@@ -3223,7 +3244,6 @@ class Eynollah:
         text_regions_p_1[mask_lines[:, :] == 1] = 3
         text_regions_p = text_regions_p_1[:, :]
         text_regions_p = np.array(text_regions_p)
-
         if num_col_classifier in (1, 2):
             try:
                 regions_without_separators = (text_regions_p[:, :] == 1) * 1
@@ -4447,6 +4467,43 @@ class Eynollah:
 
         return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem,
                 contours_only_text_parent_rem, index_by_text_par_con_rem_sort)
+    
+    def separate_marginals_to_left_and_right_and_order_from_top_to_down(self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width):
+        cx_marg, cy_marg, _, _, _, _, _ = find_new_features_of_contours(
+            polygons_of_marginals)
+        
+        cx_marg = np.array(cx_marg)
+        cy_marg = np.array(cy_marg)
+        
+        poly_marg_left = list( np.array(polygons_of_marginals)[cx_marg < mid_point_of_page_width] )
+        poly_marg_right = list( np.array(polygons_of_marginals)[cx_marg >= mid_point_of_page_width] )
+        
+        all_found_textline_polygons_marginals_left = list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] )
+        all_found_textline_polygons_marginals_right = list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] )
+        
+        all_box_coord_marginals_left = list( np.array(all_box_coord_marginals)[cx_marg < mid_point_of_page_width] )
+        all_box_coord_marginals_right = list( np.array(all_box_coord_marginals)[cx_marg >= mid_point_of_page_width] )
+        
+        slopes_marg_left = list( np.array(slopes_marginals)[cx_marg < mid_point_of_page_width] )
+        slopes_marg_right = list( np.array(slopes_marginals)[cx_marg >= mid_point_of_page_width] )
+        
+        cy_marg_left = cy_marg[cx_marg < mid_point_of_page_width]
+        cy_marg_right = cy_marg[cx_marg >= mid_point_of_page_width]
+        
+        ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), key=lambda x: x[0])]
+        ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right), key=lambda x: x[0])]
+        
+        ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, all_found_textline_polygons_marginals_left), key=lambda x: x[0])]
+        ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right, all_found_textline_polygons_marginals_right), key=lambda x: x[0])]
+        
+        ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left, all_box_coord_marginals_left), key=lambda x: x[0])]
+        ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right, all_box_coord_marginals_right), key=lambda x: x[0])]
+        
+        ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left), key=lambda x: x[0])]
+        ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right), key=lambda x: x[0])]
+        
+        return ordered_left_marginals, ordered_right_marginals, ordered_left_marginals_textline, ordered_right_marginals_textline, ordered_left_marginals_bbox, ordered_right_marginals_bbox, ordered_left_slopes_marginals, ordered_right_slopes_marginals
+
 
     def run(self, image_filename : Optional[str] = None, dir_in : Optional[str] = None, overwrite : bool = False):
         """
@@ -4489,12 +4546,13 @@ class Eynollah:
         t0 = time.time()
         img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
         self.logger.info("Enhancing took %.1fs ", time.time() - t0)
+        
         if self.extract_only_images:
             text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \
                 self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
             pcgts = self.writer.build_pagexml_no_full_layout(
                 [], page_coord, [], [], [], [],
-                polygons_of_images, [], [], [], [], [],
+                polygons_of_images, [], [], [], [], [], [], [], [], [],
                 cont_page, [], [])
             if self.plotter:
                 self.plotter.write_images_into_directory(polygons_of_images, image_page)
@@ -4508,7 +4566,6 @@ class Eynollah:
             page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = \
                 self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
 
-
             ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
 
             cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
@@ -4530,10 +4587,14 @@ class Eynollah:
             id_of_texts_tot =['region_0001']
 
             polygons_of_images = []
-            slopes_marginals = []
-            polygons_of_marginals = []
-            all_found_textline_polygons_marginals = []
-            all_box_coord_marginals = []
+            slopes_marginals_left = []
+            slopes_marginals_right = []
+            polygons_of_marginals_left = []
+            polygons_of_marginals_right = []
+            all_found_textline_polygons_marginals_left = []
+            all_found_textline_polygons_marginals_right = []
+            all_box_coord_marginals_left = []
+            all_box_coord_marginals_right = []
             polygons_lines_xml = []
             contours_tables = []
             conf_contours_textregions =[0]
@@ -4546,8 +4607,8 @@ class Eynollah:
             
             pcgts = self.writer.build_pagexml_no_full_layout(
                 cont_page, page_coord, order_text_new, id_of_texts_tot,
-                all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
-                all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
+                all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
+                all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, 
                 cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order)
             return pcgts
 
@@ -4595,11 +4656,10 @@ class Eynollah:
             #self.logger.info('cont_page %s', cont_page)
         #plt.imshow(table_prediction)
         #plt.show()
-
         if not num_col:
             self.logger.info("No columns detected, outputting an empty PAGE-XML")
             pcgts = self.writer.build_pagexml_no_full_layout(
-                [], page_coord, [], [], [], [], [], [], [], [], [], [],
+                [], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [], [],
                 cont_page, [], [])
             return pcgts
 
@@ -4771,6 +4831,7 @@ class Eynollah:
                 contours_only_text_parent_d_ordered = []
                 contours_only_text_parent_d = []
                 #contours_only_text_parent = []
+
         if not len(contours_only_text_parent):
             # stop early
             empty_marginals = [[]] * len(polygons_of_marginals)
@@ -4778,13 +4839,13 @@ class Eynollah:
                 pcgts = self.writer.build_pagexml_full_layout(
                     [], [], page_coord, [], [], [], [], [], [],
                     polygons_of_images, contours_tables, [],
-                    polygons_of_marginals, empty_marginals, empty_marginals, [], [], [],
+                    polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], [],
                     cont_page, polygons_lines_xml)
             else:
                 pcgts = self.writer.build_pagexml_no_full_layout(
                     [], page_coord, [], [], [], [],
                     polygons_of_images,
-                    polygons_of_marginals, empty_marginals, empty_marginals, [], [],
+                    polygons_of_marginals, polygons_of_marginals,  empty_marginals, empty_marginals, empty_marginals, empty_marginals,  [], [], [], 
                     cont_page, polygons_lines_xml, contours_tables)
             return pcgts
 
@@ -4877,8 +4938,11 @@ class Eynollah:
                     num_col_classifier, scale_param, slope_deskew)
             all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(
                 all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
-
-        #print("text region early 6 in %.1fs", time.time() - t0)
+        
+        mid_point_of_page_width = text_regions_p.shape[1] / 2.
+        polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes_marginals_left, slopes_marginals_right = self.separate_marginals_to_left_and_right_and_order_from_top_to_down(polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width)
+        
+        #print(len(polygons_of_marginals), len(ordered_left_marginals), len(ordered_right_marginals), 'marginals ordred')
         if self.full_layout:
             if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
                 contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(
@@ -4961,7 +5025,6 @@ class Eynollah:
                 tror = time.time()
                 order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
                     contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
-                print('time spend for mb ro',  time.time()-tror)
             else:
                 if np.abs(slope_deskew) < SLOPE_THRESHOLD:
                     order_text_new, id_of_texts_tot = self.do_order_of_regions(
@@ -4978,10 +5041,15 @@ class Eynollah:
                 else:
                     ocr_all_textlines = None
                     
-                if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0:
-                    ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0:
+                    ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
                 else:
-                    ocr_all_textlines_marginals = None
+                    ocr_all_textlines_marginals_left = None
+                    
+                if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0:
+                    ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                else:
+                    ocr_all_textlines_marginals_right = None
                 
                 if all_found_textline_polygons_h and len(all_found_textline_polygons)>0:
                     ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
@@ -4994,15 +5062,16 @@ class Eynollah:
                     ocr_all_textlines_drop = None
             else:
                 ocr_all_textlines = None
-                ocr_all_textlines_marginals = None
+                ocr_all_textlines_marginals_left = None
+                ocr_all_textlines_marginals_right = None
                 ocr_all_textlines_h = None
                 ocr_all_textlines_drop = None
             pcgts = self.writer.build_pagexml_full_layout(
                 contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot,
                 all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
-                polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals,
-                all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals,
-                cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals, ocr_all_textlines_drop,  conf_contours_textregions, conf_contours_textregions_h)
+                polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right,
+                all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
+                cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop,  conf_contours_textregions, conf_contours_textregions_h)
             return pcgts
 
         contours_only_text_parent_h = None
@@ -5077,19 +5146,24 @@ class Eynollah:
             gc.collect()
             if len(all_found_textline_polygons)>0:
                 ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
-            if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0:
-                ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                
+            if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0:
+                ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+                
+            if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0:
+                ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
 
         else:
             ocr_all_textlines = None
-            ocr_all_textlines_marginals = None
+            ocr_all_textlines_marginals_left = None
+            ocr_all_textlines_marginals_right = None
         self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
 
         pcgts = self.writer.build_pagexml_no_full_layout(
             txt_con_org, page_coord, order_text_new, id_of_texts_tot,
-            all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals,
-            all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
-            cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals, conf_contours_textregions)
+            all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
+            all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, 
+            cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions)
         return pcgts
 
 
@@ -5145,7 +5219,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250805"
+                self.model_ocr_dir = dir_models + "/model_step_45000_ocr"#"/model_eynollah_ocr_cnnrnn_20250805"#
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5156,7 +5230,7 @@ class Eynollah_ocr:
                 else:
                     self.b_s = int(batch_size)
                     
-                with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
+                with open(os.path.join(self.model_ocr_dir, "characters_20250707_all_lang.txt"),"r") as config_file:
                     characters = json.load(config_file)
                     
                 AUTOTUNE = tf.data.AUTOTUNE
diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py
index 7625a90..c03d831 100644
--- a/src/eynollah/mb_ro_on_layout.py
+++ b/src/eynollah/mb_ro_on_layout.py
@@ -64,7 +64,7 @@ class machine_based_reading_order_on_layout:
         self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
         atexit.register(self.executor.shutdown)
         self.dir_models = dir_models
-        self.model_reading_order_dir = dir_models + "/model_step_5100000_mb_ro"#"/model_ens_reading_order_machine_based"
+        self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"#"/model_ens_reading_order_machine_based"
         
         try:
             for device in tf.config.list_physical_devices('GPU'):
@@ -942,10 +942,18 @@ class machine_based_reading_order_on_layout:
         x_len = text_regions_p.shape[1]
 
         img_poly = np.zeros((y_len,x_len), dtype='uint8')
-        img_poly[text_regions_p[:,:]==1] = 1
-        img_poly[text_regions_p[:,:]==2] = 2
-        img_poly[text_regions_p[:,:]==3] = 4
-        img_poly[text_regions_p[:,:]==6] = 5
+        ###img_poly[text_regions_p[:,:]==1] = 1
+        ###img_poly[text_regions_p[:,:]==2] = 2
+        ###img_poly[text_regions_p[:,:]==3] = 4
+        ###img_poly[text_regions_p[:,:]==6] = 5
+        
+        ##img_poly[text_regions_p[:,:]==1] = 1
+        ##img_poly[text_regions_p[:,:]==2] = 2
+        ##img_poly[text_regions_p[:,:]==3] = 3
+        ##img_poly[text_regions_p[:,:]==4] = 4
+        ##img_poly[text_regions_p[:,:]==5] = 5
+        
+        img_poly = np.copy(text_regions_p)
         
         img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
         if contours_only_text_parent_h:
diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py
index 1e9162a..d974650 100644
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@@ -384,57 +384,63 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr
     
     for indexing, ind_poly_first in enumerate(all_found_textline_polygons):
         #ocr_textline_in_textregion = []
-        for indexing2, ind_poly in enumerate(ind_poly_first):
+        if len(ind_poly_first)==0:
             cropped_lines_region_indexer.append(indexer_text_region)
-            if not (textline_light or curved_line):
-                ind_poly = copy.deepcopy(ind_poly)
-                box_ind = all_box_coord[indexing]
+            cropped_lines_meging_indexing.append(0)
+            img_fin = np.ones((image_height, image_width, 3))*1
+            cropped_lines.append(img_fin)
 
-                ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind)
-                #print(ind_poly_copy)
-                ind_poly[ind_poly<0] = 0
-            x, y, w, h = cv2.boundingRect(ind_poly)
-            
-            w_scaled = w *  image_height/float(h)
+        else:
+            for indexing2, ind_poly in enumerate(ind_poly_first):
+                cropped_lines_region_indexer.append(indexer_text_region)
+                if not (textline_light or curved_line):
+                    ind_poly = copy.deepcopy(ind_poly)
+                    box_ind = all_box_coord[indexing]
 
-            mask_poly = np.zeros(image.shape)
-
-            img_poly_on_img = np.copy(image)
-            
-            mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1))
-
-
-            
-            mask_poly = mask_poly[y:y+h, x:x+w, :]
-            img_crop = img_poly_on_img[y:y+h, x:x+w, :]
-            
-            img_crop[mask_poly==0] = 255
-            
-            if w_scaled < 640:#1.5*image_width:
-                img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
-                cropped_lines.append(img_fin)
-                cropped_lines_meging_indexing.append(0)
-            else:
-                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
+                    ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind)
+                    #print(ind_poly_copy)
+                    ind_poly[ind_poly<0] = 0
+                x, y, w, h = cv2.boundingRect(ind_poly)
                 
-                if splited_images:
-                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
-                    cropped_lines.append(img_fin)
-                    cropped_lines_meging_indexing.append(1)
-                    
-                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
-                    
-                    cropped_lines.append(img_fin)
-                    cropped_lines_meging_indexing.append(-1)
-                    
-                else:
+                w_scaled = w *  image_height/float(h)
+
+                mask_poly = np.zeros(image.shape)
+
+                img_poly_on_img = np.copy(image)
+                
+                mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1))
+
+
+                
+                mask_poly = mask_poly[y:y+h, x:x+w, :]
+                img_crop = img_poly_on_img[y:y+h, x:x+w, :]
+                
+                img_crop[mask_poly==0] = 255
+                
+                if w_scaled < 640:#1.5*image_width:
                     img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                     cropped_lines.append(img_fin)
                     cropped_lines_meging_indexing.append(0)
+                else:
+                    splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
+                    
+                    if splited_images:
+                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
+                        cropped_lines.append(img_fin)
+                        cropped_lines_meging_indexing.append(1)
+                        
+                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
+                        
+                        cropped_lines.append(img_fin)
+                        cropped_lines_meging_indexing.append(-1)
+                        
+                    else:
+                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                        cropped_lines.append(img_fin)
+                        cropped_lines_meging_indexing.append(0)
             
         indexer_text_region+=1
         
-        
     extracted_texts = []
 
     n_iterations  = math.ceil(len(cropped_lines) / b_s_ocr) 
diff --git a/src/eynollah/utils/xml.py b/src/eynollah/utils/xml.py
index bd95702..13420df 100644
--- a/src/eynollah/utils/xml.py
+++ b/src/eynollah/utils/xml.py
@@ -46,16 +46,22 @@ def create_page_xml(imageFilename, height, width):
         ))
     return pcgts
 
-def xml_reading_order(page, order_of_texts, id_of_marginalia):
+def xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right):
     region_order = ReadingOrderType()
     og = OrderedGroupType(id="ro357564684568544579089")
     page.set_ReadingOrder(region_order)
     region_order.set_OrderedGroup(og)
     region_counter = EynollahIdCounter()
+    
+    for id_marginal in id_of_marginalia_left:
+        og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
+        region_counter.inc('region')
+        
     for idx_textregion, _ in enumerate(order_of_texts):
         og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(order_of_texts[idx_textregion] + 1)))
         region_counter.inc('region')
-    for id_marginal in id_of_marginalia:
+        
+    for id_marginal in id_of_marginalia_right:
         og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
         region_counter.inc('region')
 
diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py
index 085ee6f..2f9caf3 100644
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@@ -170,7 +170,7 @@ class EynollahXmlWriter():
         with open(self.output_filename, 'w') as f:
             f.write(to_xml(pcgts))
 
-    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals=None, conf_contours_textregion=None, skip_layout_reading_order=False):
+    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals_left, found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, conf_contours_textregion=None, skip_layout_reading_order=False):
         self.logger.debug('enter build_pagexml_no_full_layout')
 
         # create the file structure
@@ -181,8 +181,9 @@ class EynollahXmlWriter():
         counter = EynollahIdCounter()
         if len(found_polygons_text_region) > 0:
             _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
-            id_of_marginalia = [_counter_marginals.next_region_id for _ in found_polygons_marginals]
-            xml_reading_order(page, order_of_texts, id_of_marginalia)
+            id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left]
+            id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right]
+            xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right)
 
         for mm in range(len(found_polygons_text_region)):
             textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
@@ -195,17 +196,29 @@ class EynollahXmlWriter():
             else:
                 ocr_textlines = None
             self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines)
-
-        for mm in range(len(found_polygons_marginals)):
+        
+        for mm in range(len(found_polygons_marginals_left)):
             marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
-                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)))
+                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord)))
             page.add_TextRegion(marginal)
-            if ocr_all_textlines_marginals:
-                ocr_textlines = ocr_all_textlines_marginals[mm]
+            if ocr_all_textlines_marginals_left:
+                ocr_textlines = ocr_all_textlines_marginals_left[mm]
             else:
                 ocr_textlines = None
                 
-            self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_textlines)
+            #print(ocr_textlines, mm, len(all_found_textline_polygons_marginals_left[mm]) )
+            self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
+            
+        for mm in range(len(found_polygons_marginals_right)):
+            marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
+                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord)))
+            page.add_TextRegion(marginal)
+            if ocr_all_textlines_marginals_right:
+                ocr_textlines = ocr_all_textlines_marginals_right[mm]
+            else:
+                ocr_textlines = None
+                
+            self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
 
         for mm in range(len(found_polygons_text_region_img)):
             img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType())
@@ -249,7 +262,7 @@ class EynollahXmlWriter():
 
         return pcgts
 
-    def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None):
+    def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals_left,found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None):
         self.logger.debug('enter build_pagexml_full_layout')
 
         # create the file structure
@@ -259,8 +272,9 @@ class EynollahXmlWriter():
 
         counter = EynollahIdCounter()
         _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
-        id_of_marginalia = [_counter_marginals.next_region_id for _ in found_polygons_marginals]
-        xml_reading_order(page, order_of_texts, id_of_marginalia)
+        id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left]
+        id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right]
+        xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right)
 
         for mm in range(len(found_polygons_text_region)):
             textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
@@ -285,15 +299,25 @@ class EynollahXmlWriter():
                 ocr_textlines = None
             self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines)
 
-        for mm in range(len(found_polygons_marginals)):
+        for mm in range(len(found_polygons_marginals_left)):
             marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
-                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)))
+                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord)))
             page.add_TextRegion(marginal)
-            if ocr_all_textlines_marginals:
-                ocr_textlines = ocr_all_textlines_marginals[mm]
+            if ocr_all_textlines_marginals_left:
+                ocr_textlines = ocr_all_textlines_marginals_left[mm]
             else:
                 ocr_textlines = None
-            self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_textlines)
+            self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
+            
+        for mm in range(len(found_polygons_marginals_right)):
+            marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
+                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord)))
+            page.add_TextRegion(marginal)
+            if ocr_all_textlines_marginals_right:
+                ocr_textlines = ocr_all_textlines_marginals_right[mm]
+            else:
+                ocr_textlines = None
+            self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
         
         for mm in range(len(found_polygons_drop_capitals)):
             dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital',

From fdcae8dd6e35c15e7e627be9bcd0a9a940b8b316 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Thu, 28 Aug 2025 11:30:59 +0200
Subject: [PATCH 40/40] eynollah ocr: support using either a specific model
 name or a models directory (default model)

---
 src/eynollah/cli.py      | 18 +++++++++---------
 src/eynollah/eynollah.py | 28 +++++++++++++++++-----------
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index 67fd57e..9dc326d 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -456,6 +456,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="directory of models",
     type=click.Path(exists=True, file_okay=False),
 )
+@click.option(
+    "--model_name",
+    help="Specific model file path to use for OCR",
+    type=click.Path(exists=True, file_okay=False),
+)
 @click.option(
     "--tr_ocr",
     "-trocr/-notrocr",
@@ -474,12 +479,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     is_flag=True,
     help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
 )
-@click.option(
-    "--draw_texts_on_image",
-    "-dtoi/-ndtoi",
-    is_flag=True,
-    help="if this parameter set to true, the predicted texts will be displayed on an image.",
-)
 @click.option(
     "--prediction_with_both_of_rgb_and_bin",
     "-brb/-nbrb",
@@ -508,16 +507,17 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="Override log level globally to this",
 )
 
-def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
+def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
+        
+    assert not model or not model_name, "model directory  -m can not be set alongside specific model name --model_name"
     assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text  -etit can not be set alongside transformer ocr -tr_ocr"
     assert not export_textline_images_and_text or not model, "Exporting textline and text  -etit can not be set alongside model -m"
     assert not export_textline_images_and_text or not batch_size, "Exporting textline and text  -etit can not be set alongside batch size -bs"
     assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text  -etit can not be set alongside directory of bin images -dib"
     assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text  -etit can not be set alongside directory of images with predicted text -doit"
-    assert not export_textline_images_and_text or not draw_texts_on_image, "Exporting textline and text  -etit can not be set alongside draw text on image -dtoi"
     assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text  -etit can not be set alongside prediction with both rgb and bin -brb"
     assert (bool(image) ^ bool(dir_in)), "Either -i (single image) or -di (directory) must be provided, but not both."
     eynollah_ocr = Eynollah_ocr(
@@ -528,10 +528,10 @@ def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text,
         dir_in_bin=dir_in_bin,
         dir_out=out,
         dir_models=model,
+        model_name=model_name,
         tr_ocr=tr_ocr,
         export_textline_images_and_text=export_textline_images_and_text,
         do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
-        draw_texts_on_image=draw_texts_on_image,
         prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
         batch_size=batch_size,
         pref_of_dataset=dataset_abbrevation,
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 30e180d..ec2900f 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5171,6 +5171,7 @@ class Eynollah_ocr:
     def __init__(
         self,
         dir_models,
+        model_name=None,
         dir_xmls=None,
         dir_in=None,
         image_filename=None,
@@ -5181,7 +5182,6 @@ class Eynollah_ocr:
         batch_size=None,
         export_textline_images_and_text=False,
         do_not_mask_with_textline_contour=False,
-        draw_texts_on_image=False,
         prediction_with_both_of_rgb_and_bin=False,
         pref_of_dataset=None,
         min_conf_value_of_textline_text : Optional[float]=None,
@@ -5193,10 +5193,10 @@ class Eynollah_ocr:
         self.dir_out = dir_out
         self.dir_xmls = dir_xmls
         self.dir_models = dir_models
+        self.model_name = model_name
         self.tr_ocr = tr_ocr
         self.export_textline_images_and_text = export_textline_images_and_text
         self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
-        self.draw_texts_on_image = draw_texts_on_image
         self.dir_out_image_text = dir_out_image_text
         self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
         self.pref_of_dataset = pref_of_dataset
@@ -5210,7 +5210,10 @@ class Eynollah_ocr:
             if tr_ocr:
                 self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
                 self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-                self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
+                if self.model_name:
+                    self.model_ocr_dir = self.model_name
+                else:
+                    self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
                 self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
                 self.model_ocr.to(self.device)
                 if not batch_size:
@@ -5219,7 +5222,10 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_step_45000_ocr"#"/model_eynollah_ocr_cnnrnn_20250805"#
+                if self.model_name:
+                    self.model_ocr_dir = self.model_name
+                else:
+                    self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250805"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5230,7 +5236,7 @@ class Eynollah_ocr:
                 else:
                     self.b_s = int(batch_size)
                     
-                with open(os.path.join(self.model_ocr_dir, "characters_20250707_all_lang.txt"),"r") as config_file:
+                with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
                     characters = json.load(config_file)
                     
                 AUTOTUNE = tf.data.AUTOTUNE
@@ -5271,7 +5277,7 @@ class Eynollah_ocr:
                     
                 img = cv2.imread(dir_img)
                 
-                if self.draw_texts_on_image:
+                if self.dir_out_image_text:
                     out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
                     image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
                     draw = ImageDraw.Draw(image_text)
@@ -5306,7 +5312,7 @@ class Eynollah_ocr:
                                     textline_coords =  np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] )
                                     x,y,w,h = cv2.boundingRect(textline_coords)
                                     
-                                    if self.draw_texts_on_image:
+                                    if self.dir_out_image_text:
                                         total_bb_coordinates.append([x,y,w,h])
                                     
                                     h2w_ratio = h/float(w)
@@ -5363,7 +5369,7 @@ class Eynollah_ocr:
 
                 unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
                 
-                if self.draw_texts_on_image:
+                if self.dir_out_image_text:
                     
                     font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
                     font = ImageFont.truetype(font_path, 40)
@@ -5463,7 +5469,7 @@ class Eynollah_ocr:
                     dir_img_bin = os.path.join(self.dir_in_bin, file_name+'.png')
                     img_bin = cv2.imread(dir_img_bin)
                 
-                if self.draw_texts_on_image:
+                if self.dir_out_image_text:
                     out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
                     image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
                     draw = ImageDraw.Draw(image_text)
@@ -5508,7 +5514,7 @@ class Eynollah_ocr:
                                     if type_textregion=='drop-capital':
                                         angle_degrees = 0
                                         
-                                    if self.draw_texts_on_image:
+                                    if self.dir_out_image_text:
                                         total_bb_coordinates.append([x,y,w,h])
                                        
                                     w_scaled = w *  image_height/float(h)
@@ -5829,7 +5835,7 @@ class Eynollah_ocr:
                     unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
                     
                     
-                    if self.draw_texts_on_image:
+                    if self.dir_out_image_text:
                         
                         font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
                         font = ImageFont.truetype(font_path, 40)