commit 21ec4fb is picked + rnn ocr at the same time with segmentation + enhancement of mb reading order

2025-11-17 09:54:18 +01:00 · 2025-05-23 15:55:03 +02:00 · 2025-05-23 15:55:03 +02:00 · d4f6e10251
commit d4f6e10251
parent a0647eff93
4 changed files with 729 additions and 526 deletions
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@ -225,6 +225,17 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
    is_flag=True,
    help="if this parameter set to true, this tool will try to do ocr",
 )
+@click.option(
+    "--transformer_ocr",
+    "-tr/-notr",
+    is_flag=True,
+    help="if this parameter set to true, this tool will apply transformer ocr",
+)
+@click.option(
+    "--batch_size_ocr",
+    "-bs_ocr",
+    help="number of inference batch size of ocr model. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
+)
@click.option(
    "--num_col_upper",
    "-ncu",
@ -258,7 +269,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
    help="Override log level globally to this",
 )

-def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level):
+def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level):
    initLogging()
    if log_level:
        getLogger('eynollah').setLevel(getLevelName(log_level))
@ -305,6 +316,8 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
        ignore_page_extraction=ignore_page_extraction,
        reading_order_machine_based=reading_order_machine_based,
        do_ocr=do_ocr,
+        transformer_ocr=transformer_ocr,
+        batch_size_ocr=batch_size_ocr,
        num_col_upper=num_col_upper,
        num_col_lower=num_col_lower,
        skip_layout_and_reading_order=skip_layout_and_reading_order,
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -80,6 +80,13 @@ from .utils.rotate import (
    rotation_not_90_func_full_layout,
    rotation_image_new
 )
+from .utils.utils_ocr import (
+    return_textline_contour_with_added_box_coordinate,
+    preprocess_and_resize_image_for_ocrcnn_model,
+    return_textlines_split_if_needed,
+    decode_batch_predictions,
+    return_rnn_cnn_ocr_of_given_textlines
+)
 from .utils.separate_lines import (
    textline_contours_postprocessing,
    separate_lines_new2,
@ -199,6 +206,8 @@ class Eynollah:
        ignore_page_extraction : bool = False,
        reading_order_machine_based : bool = False,
        do_ocr : bool = False,
+        transformer_ocr: bool = False,
+        batch_size_ocr: Optional[int] = None,
        num_col_upper : Optional[int] = None,
        num_col_lower : Optional[int] = None,
        threshold_art_class_layout: Optional[float] = None,
@ -232,6 +241,7 @@ class Eynollah:
        self.ignore_page_extraction = ignore_page_extraction
        self.skip_layout_and_reading_order = skip_layout_and_reading_order
        self.ocr = do_ocr
+        self.tr = transformer_ocr
        if num_col_upper:
            self.num_col_upper = int(num_col_upper)
        else:
@ -273,7 +283,7 @@ class Eynollah:
        self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
        self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
        self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
-        self.model_reading_order_dir = dir_models + "/model_step_2500000_mb_ro"#"/model_ens_reading_order_machine_based"
+        self.model_reading_order_dir = dir_models + "/model_step_4800000_mb_ro"#"/model_ens_reading_order_machine_based"
        #"/modelens_12sp_elay_0_3_4__3_6_n"
        #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"
        #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"
@ -300,8 +310,10 @@ class Eynollah:
        else:
            #"/eynollah-textline_20210425"
            self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"
-        if self.ocr:
+        if self.ocr and self.tr:
            self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
+        elif self.ocr and not self.tr:
+            self.model_ocr_dir = dir_models + "/model_step_750000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
        if self.tables:
            if self.light_version:
                self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
@ -341,11 +353,37 @@ class Eynollah:
            self.model_region_fl = self.our_load_model(self.model_region_dir_fully)
            if self.reading_order_machine_based:
                self.model_reading_order = self.our_load_model(self.model_reading_order_dir)
-            if self.ocr:
+            if self.ocr and self.tr:
                self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
                self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
                #("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten")
                self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+            elif self.ocr and not self.tr:
+                model_ocr = load_model(self.model_ocr_dir , compile=False)
+                
+                self.prediction_model = tf.keras.models.Model(
+                                model_ocr.get_layer(name = "image").input, 
+                                model_ocr.get_layer(name = "dense2").output)
+                if not batch_size_ocr:
+                    self.b_s_ocr = 8
+                else:
+                    self.b_s_ocr = int(batch_size_ocr)
+
+                    
+                with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
+                    characters = json.load(config_file)
+
+                    
+                AUTOTUNE = tf.data.AUTOTUNE
+
+                # Mapping characters to integers.
+                char_to_num = StringLookup(vocabulary=list(characters), mask_token=None)
+
+                # Mapping integers back to original characters.
+                self.num_to_char = StringLookup(
+                    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
+                )
+                
            if self.tables:
                self.model_table = self.our_load_model(self.model_table_dir)

@ -1325,11 +1363,11 @@ class Eynollah:
                        seg_art[seg_art>0] =1

                        seg_line = label_p_pred[:,:,:,3]
-                        seg_line[seg_line>0.3] =1#seg_line[seg_line>0.5] =1#seg_line[seg_line>0.1] =1
+                        seg_line[seg_line>0.4] =1#seg_line[seg_line>0.5] =1#seg_line[seg_line>0.1] =1
                        seg_line[seg_line<1] =0

                        ##seg[seg_art==1]=4
-                        seg[(seg_line==1) & (seg==0)]=3
+                        #seg[(seg_line==1) & (seg==0)]=3
                    if thresholding_for_artificial_class_in_light_version:
                        seg_art = label_p_pred[:,:,:,2]

@ -2060,7 +2098,7 @@ class Eynollah:
            ###img_bin = np.copy(prediction_bin)
        ###else:
            ###img_bin = np.copy(img_resized)
-        if self.ocr and not self.input_binary:
+        if (self.ocr and self.tr) and not self.input_binary:
            prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5)
            prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
            prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
@ -3485,8 +3523,10 @@ class Eynollah:
        # 6 is the separators lable in old full layout model
        # 4 is the drop capital class in old full layout model
        # in the new full layout drop capital is 3 and separators are 5
-
-        text_regions_p[:,:][regions_fully[:,:,0]==5]=6
+        
+        # the separators in full layout will not be written on layout
+        if not self.reading_order_machine_based:
+            text_regions_p[:,:][regions_fully[:,:,0]==5]=6
        ###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4

        #text_regions_p[:,:][regions_fully[:,:,0]==6]=6
@ -3555,11 +3595,37 @@ class Eynollah:
        return model

    def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p):
-        #cv2.imwrite('textregions.png', text_regions_p*50)
+        
+        height1 =672#448
+        width1 = 448#224
+
+        height2 =672#448
+        width2= 448#224
+
+        height3 =672#448
+        width3 = 448#224
+        
+        inference_bs = 3
+        
+        cv2.imwrite('textregions.png', text_regions_p*50)
+        cv2.imwrite('sep.png', (text_regions_p[:,:]==6)*255)
+        
+        ver_kernel = np.ones((5, 1), dtype=np.uint8)
+        hor_kernel = np.ones((1, 5), dtype=np.uint8)
+        
+        
+        
+        #separators = (text_regions_p[:,:]==6)*1
+        #text_regions_p[text_regions_p[:,:]==6] = 0
+        #separators = separators.astype('uint8')
+        
+        #separators = cv2.erode(separators , hor_kernel, iterations=1)
+        #text_regions_p[separators[:,:]==1] = 6
+        
+        #cv2.imwrite('sep_new.png', (text_regions_p[:,:]==6)*255)
+        
        min_cont_size_to_be_dilated = 10
        if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
-            ver_kernel = np.ones((5, 1), dtype=np.uint8)
-            
            cx_conts, cy_conts, x_min_conts, x_max_conts, y_min_conts, y_max_conts, _ = find_new_features_of_contours(contours_only_text_parent)
            args_cont_located = np.array(range(len(contours_only_text_parent)))
            
@ -3595,12 +3661,13 @@ class Eynollah:
                textregion_par = (text_regions_p[:,:]==1)*1
                textregion_par = textregion_par.astype('uint8')
                
-            
-            text_regions_p_textregions_dilated = cv2.dilate(textregion_par , ver_kernel, iterations=8)
+            text_regions_p_textregions_dilated = cv2.erode(textregion_par , hor_kernel, iterations=2)
+            text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=4)
+            text_regions_p_textregions_dilated = cv2.erode(text_regions_p_textregions_dilated , hor_kernel, iterations=1)
+            text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=5)
            text_regions_p_textregions_dilated[text_regions_p[:,:]>1] = 0
            
-            #cv2.imwrite('textregions_dilated.png', text_regions_p_textregions_dilated*255)
-
+            cv2.imwrite('text_regions_p_textregions_dilated.png', text_regions_p_textregions_dilated*255)
            
            contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated)
            contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated)
@ -3664,7 +3731,8 @@ class Eynollah:

        if not len(co_text_all):
            return [], []
-
+        print(len(co_text_all), "co_text_all")
+        print(len(co_text_all_org), "co_text_all_org")
        labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
        co_text_all = [(i/6).astype(int) for i in co_text_all]
        for i in range(len(co_text_all)):
@ -3675,21 +3743,13 @@ class Eynollah:
            cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,))
            labels_con[:,:,i] = img

-        height1 =672#448
-        width1 = 448#224
-
-        height2 =672#448
-        width2= 448#224
-
-        height3 =672#448
-        width3 = 448#224

        labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool)
        img_header_and_sep = resize_image(img_header_and_sep, height1, width1)
        img_poly = resize_image(img_poly, height3, width3)
        

-        inference_bs = 3
+        
        input_1 = np.zeros((inference_bs, height1, width1, 3))
        ordered = [list(range(len(co_text_all)))]
        index_update = 0
@ -3760,217 +3820,213 @@ class Eynollah:
            return ordered, region_ids
            

-    def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot):
-        width = np.shape(textline_image)[1]
-        height = np.shape(textline_image)[0]
-        common_window = int(0.2*width)
+    ####def return_start_and_end_of_common_text_of_textline_ocr(self, textline_image, ind_tot):
+        ####width = np.shape(textline_image)[1]
+        ####height = np.shape(textline_image)[0]
+        ####common_window = int(0.2*width)

-        width1 = int ( width/2. - common_window )
-        width2 = int ( width/2. + common_window )
+        ####width1 = int ( width/2. - common_window )
+        ####width2 = int ( width/2. + common_window )

-        img_sum = np.sum(textline_image[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 3)
+        ####img_sum = np.sum(textline_image[:,:,0], axis=0)
+        ####sum_smoothed = gaussian_filter1d(img_sum, 3)

-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        if len(peaks_real)>70:
+        ####peaks_real, _ = find_peaks(sum_smoothed, height=0)
+        ####if len(peaks_real)>70:

-            peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
+            ####peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]

-            arg_sort = np.argsort(sum_smoothed[peaks_real])
-            arg_sort4 =arg_sort[::-1][:4]
-            peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
-            argsort_sorted = np.argsort(peaks_sort_4)
+            ####arg_sort = np.argsort(sum_smoothed[peaks_real])
+            ####arg_sort4 =arg_sort[::-1][:4]
+            ####peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
+            ####argsort_sorted = np.argsort(peaks_sort_4)

-            first_4_sorted = peaks_sort_4[argsort_sorted]
-            y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
-            #print(first_4_sorted,'first_4_sorted')
+            ####first_4_sorted = peaks_sort_4[argsort_sorted]
+            ####y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
+            #####print(first_4_sorted,'first_4_sorted')

-            arg_sortnew = np.argsort(y_4_sorted)
-            peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] )
+            ####arg_sortnew = np.argsort(y_4_sorted)
+            ####peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] )

-            #plt.figure(ind_tot)
-            #plt.imshow(textline_image)
-            #plt.plot([peaks_final[0], peaks_final[0]], [0, height-1])
-            #plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
-            #plt.savefig('./'+str(ind_tot)+'.png')
+            #####plt.figure(ind_tot)
+            #####plt.imshow(textline_image)
+            #####plt.plot([peaks_final[0], peaks_final[0]], [0, height-1])
+            #####plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
+            #####plt.savefig('./'+str(ind_tot)+'.png')

-            return peaks_final[0], peaks_final[1]
-        else:
-            pass
+            ####return peaks_final[0], peaks_final[1]
+        ####else:
+            ####pass

-    def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot):
-        width = np.shape(textline_image)[1]
-        height = np.shape(textline_image)[0]
-        common_window = int(0.06*width)
+    ##def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image, ind_tot):
+        ##width = np.shape(textline_image)[1]
+        ##height = np.shape(textline_image)[0]
+        ##common_window = int(0.06*width)

-        width1 = int ( width/2. - common_window )
-        width2 = int ( width/2. + common_window )
+        ##width1 = int ( width/2. - common_window )
+        ##width2 = int ( width/2. + common_window )

-        img_sum = np.sum(textline_image[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 3)
+        ##img_sum = np.sum(textline_image[:,:,0], axis=0)
+        ##sum_smoothed = gaussian_filter1d(img_sum, 3)

-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        if len(peaks_real)>70:
-            #print(len(peaks_real), 'len(peaks_real)')
+        ##peaks_real, _ = find_peaks(sum_smoothed, height=0)
+        ##if len(peaks_real)>70:
+            ###print(len(peaks_real), 'len(peaks_real)')

-            peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
+            ##peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]

-            arg_max = np.argmax(sum_smoothed[peaks_real])
-            peaks_final = peaks_real[arg_max]
+            ##arg_max = np.argmax(sum_smoothed[peaks_real])
+            ##peaks_final = peaks_real[arg_max]

-            #plt.figure(ind_tot)
-            #plt.imshow(textline_image)
-            #plt.plot([peaks_final, peaks_final], [0, height-1])
-            ##plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
-            #plt.savefig('./'+str(ind_tot)+'.png')
+            ###plt.figure(ind_tot)
+            ###plt.imshow(textline_image)
+            ###plt.plot([peaks_final, peaks_final], [0, height-1])
+            ####plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
+            ###plt.savefig('./'+str(ind_tot)+'.png')

-            return peaks_final
-        else:
-            return None
+            ##return peaks_final
+        ##else:
+            ##return None

-    def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
-            self, peaks_real, sum_smoothed, start_split, end_split):
+    ###def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
+            ###self, peaks_real, sum_smoothed, start_split, end_split):

-        peaks_real = peaks_real[(peaks_real<end_split) & (peaks_real>start_split)]
+        ###peaks_real = peaks_real[(peaks_real<end_split) & (peaks_real>start_split)]

-        arg_sort = np.argsort(sum_smoothed[peaks_real])
-        arg_sort4 =arg_sort[::-1][:4]
-        peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
-        argsort_sorted = np.argsort(peaks_sort_4)
+        ###arg_sort = np.argsort(sum_smoothed[peaks_real])
+        ###arg_sort4 =arg_sort[::-1][:4]
+        ###peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
+        ###argsort_sorted = np.argsort(peaks_sort_4)

-        first_4_sorted = peaks_sort_4[argsort_sorted]
-        y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
-        #print(first_4_sorted,'first_4_sorted')
+        ###first_4_sorted = peaks_sort_4[argsort_sorted]
+        ###y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
+        ####print(first_4_sorted,'first_4_sorted')

-        arg_sortnew = np.argsort(y_4_sorted)
-        peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] )
-        return peaks_final[0]
+        ###arg_sortnew = np.argsort(y_4_sorted)
+        ###peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] )
+        ###return peaks_final[0]

-    def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot):
-        width = np.shape(textline_image)[1]
-        height = np.shape(textline_image)[0]
-        common_window = int(0.15*width)
+    ###def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot):
+        ###width = np.shape(textline_image)[1]
+        ###height = np.shape(textline_image)[0]
+        ###common_window = int(0.15*width)

-        width1 = int ( width/2. - common_window )
-        width2 = int ( width/2. + common_window )
-        mid = int(width/2.)
+        ###width1 = int ( width/2. - common_window )
+        ###width2 = int ( width/2. + common_window )
+        ###mid = int(width/2.)

-        img_sum = np.sum(textline_image[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 3)
+        ###img_sum = np.sum(textline_image[:,:,0], axis=0)
+        ###sum_smoothed = gaussian_filter1d(img_sum, 3)

-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        if len(peaks_real)>70:
-            peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
-                peaks_real, sum_smoothed, width1, mid+2)
-            peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
-                peaks_real, sum_smoothed, mid-2, width2)
+        ###peaks_real, _ = find_peaks(sum_smoothed, height=0)
+        ###if len(peaks_real)>70:
+            ###peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
+                ###peaks_real, sum_smoothed, width1, mid+2)
+            ###peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
+                ###peaks_real, sum_smoothed, mid-2, width2)

-            #plt.figure(ind_tot)
-            #plt.imshow(textline_image)
-            #plt.plot([peak_start, peak_start], [0, height-1])
-            #plt.plot([peak_end, peak_end], [0, height-1])
-            #plt.savefig('./'+str(ind_tot)+'.png')
+            ####plt.figure(ind_tot)
+            ####plt.imshow(textline_image)
+            ####plt.plot([peak_start, peak_start], [0, height-1])
+            ####plt.plot([peak_end, peak_end], [0, height-1])
+            ####plt.savefig('./'+str(ind_tot)+'.png')

-            return peak_start, peak_end
-        else:
-            pass
+            ###return peak_start, peak_end
+        ###else:
+            ###pass

-    def return_ocr_of_textline_without_common_section(
-            self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
+    ##def return_ocr_of_textline_without_common_section(
+            ##self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):

-        if h2w_ratio > 0.05:
-            pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-            generated_ids = model_ocr.generate(pixel_values.to(device))
-            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        else:
-            #width = np.shape(textline_image)[1]
-            #height = np.shape(textline_image)[0]
-            #common_window = int(0.3*width)
-            #width1 = int ( width/2. - common_window )
-            #width2 = int ( width/2. + common_window )
+        ##if h2w_ratio > 0.05:
+            ##pixel_values = processor(textline_image, return_tensors="pt").pixel_values
+            ##generated_ids = model_ocr.generate(pixel_values.to(device))
+            ##generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        ##else:
+            ###width = np.shape(textline_image)[1]
+            ###height = np.shape(textline_image)[0]
+            ###common_window = int(0.3*width)
+            ###width1 = int ( width/2. - common_window )
+            ###width2 = int ( width/2. + common_window )

-            split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(
-                textline_image, ind_tot)
-            if split_point:
-                image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
-                image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
+            ##split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(
+                ##textline_image, ind_tot)
+            ##if split_point:
+                ##image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
+                ##image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))

-                #pixel_values1 = processor(image1, return_tensors="pt").pixel_values
-                #pixel_values2 = processor(image2, return_tensors="pt").pixel_values
+                ###pixel_values1 = processor(image1, return_tensors="pt").pixel_values
+                ###pixel_values2 = processor(image2, return_tensors="pt").pixel_values

-                pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values
-                generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device))
-                generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
+                ##pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values
+                ##generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device))
+                ##generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True)

-                #print(generated_text_merged,'generated_text_merged')
+                ###print(generated_text_merged,'generated_text_merged')

-                #generated_ids1 = model_ocr.generate(pixel_values1.to(device))
-                #generated_ids2 = model_ocr.generate(pixel_values2.to(device))
+                ###generated_ids1 = model_ocr.generate(pixel_values1.to(device))
+                ###generated_ids2 = model_ocr.generate(pixel_values2.to(device))

-                #generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
-                #generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
+                ###generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
+                ###generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]

-                #generated_text = generated_text1 + ' ' + generated_text2
-                generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1]
+                ###generated_text = generated_text1 + ' ' + generated_text2
+                ##generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1]

-                #print(generated_text1,'generated_text1')
-                #print(generated_text2, 'generated_text2')
-                #print('########################################')
-            else:
-                pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-                generated_ids = model_ocr.generate(pixel_values.to(device))
-                generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+                ###print(generated_text1,'generated_text1')
+                ###print(generated_text2, 'generated_text2')
+                ###print('########################################')
+            ##else:
+                ##pixel_values = processor(textline_image, return_tensors="pt").pixel_values
+                ##generated_ids = model_ocr.generate(pixel_values.to(device))
+                ##generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

-        #print(generated_text,'generated_text')
-        #print('########################################')
-        return generated_text
+        ###print(generated_text,'generated_text')
+        ###print('########################################')
+        ##return generated_text

-    def return_ocr_of_textline(
-            self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
+    ###def return_ocr_of_textline(
+            ###self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):

-        if h2w_ratio > 0.05:
-            pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-            generated_ids = model_ocr.generate(pixel_values.to(device))
-            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        else:
-            #width = np.shape(textline_image)[1]
-            #height = np.shape(textline_image)[0]
-            #common_window = int(0.3*width)
-            #width1 = int ( width/2. - common_window )
-            #width2 = int ( width/2. + common_window )
+        ###if h2w_ratio > 0.05:
+            ###pixel_values = processor(textline_image, return_tensors="pt").pixel_values
+            ###generated_ids = model_ocr.generate(pixel_values.to(device))
+            ###generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        ###else:
+            ####width = np.shape(textline_image)[1]
+            ####height = np.shape(textline_image)[0]
+            ####common_window = int(0.3*width)
+            ####width1 = int ( width/2. - common_window )
+            ####width2 = int ( width/2. + common_window )

-            try:
-                width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot)
+            ###try:
+                ###width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot)

-                image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height))
-                image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height))
+                ###image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height))
+                ###image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height))

-                pixel_values1 = processor(image1, return_tensors="pt").pixel_values
-                pixel_values2 = processor(image2, return_tensors="pt").pixel_values
+                ###pixel_values1 = processor(image1, return_tensors="pt").pixel_values
+                ###pixel_values2 = processor(image2, return_tensors="pt").pixel_values

-                generated_ids1 = model_ocr.generate(pixel_values1.to(device))
-                generated_ids2 = model_ocr.generate(pixel_values2.to(device))
+                ###generated_ids1 = model_ocr.generate(pixel_values1.to(device))
+                ###generated_ids2 = model_ocr.generate(pixel_values2.to(device))

-                generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
-                generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
-                #print(generated_text1,'generated_text1')
-                #print(generated_text2, 'generated_text2')
-                #print('########################################')
+                ###generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
+                ###generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
+                ####print(generated_text1,'generated_text1')
+                ####print(generated_text2, 'generated_text2')
+                ####print('########################################')

-                match = sq(None, generated_text1, generated_text2).find_longest_match(
-                    0, len(generated_text1), 0, len(generated_text2))
-                generated_text = generated_text1 + generated_text2[match.b+match.size:]
-            except:
-                pixel_values = processor(textline_image, return_tensors="pt").pixel_values
-                generated_ids = model_ocr.generate(pixel_values.to(device))
-                generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+                ###match = sq(None, generated_text1, generated_text2).find_longest_match(
+                    ###0, len(generated_text1), 0, len(generated_text2))
+                ###generated_text = generated_text1 + generated_text2[match.b+match.size:]
+            ###except:
+                ###pixel_values = processor(textline_image, return_tensors="pt").pixel_values
+                ###generated_ids = model_ocr.generate(pixel_values.to(device))
+                ###generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

-        return generated_text
+        ###return generated_text

-    def return_textline_contour_with_added_box_coordinate(self, textline_contour,  box_ind):
-        textline_contour[:,0] = textline_contour[:,0] + box_ind[2]
-        textline_contour[:,1] = textline_contour[:,1] + box_ind[0]
-        return textline_contour

    def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes):
        return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))]
@ -4625,6 +4681,7 @@ class Eynollah:
            raise ValueError("run requires either a single image filename or a directory")

        for img_filename in self.ls_imgs:
+            print(img_filename, 'img_filename')
            self.logger.info(img_filename)
            t0 = time.time()

@ -4698,13 +4755,19 @@ class Eynollah:
            all_box_coord_marginals = []
            polygons_lines_xml = []
            contours_tables = []
-            ocr_all_textlines = None
            conf_contours_textregions =[0]
+            
+            if self.ocr and not self.tr:
+                gc.collect()
+                ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, textline_light=True)
+            else:
+                ocr_all_textlines = None
+            
            pcgts = self.writer.build_pagexml_no_full_layout(
                cont_page, page_coord, order_text_new, id_of_texts_tot,
                all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
                all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
-                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
+                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions, self.skip_layout_and_reading_order)
            return pcgts

        #print("text region early -1 in %.1fs", time.time() - t0)
@ -5118,7 +5181,7 @@ class Eynollah:
                tror = time.time()
                order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
                    contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
-                print('time spend for mb ro', time.time()-tror)
+                print('time spend for mb ro',  time.time()-tror)
            else:
                if np.abs(slope_deskew) < SLOPE_THRESHOLD:
                    order_text_new, id_of_texts_tot = self.do_order_of_regions(
@ -5160,7 +5223,7 @@ class Eynollah:
                order_text_new, id_of_texts_tot = self.do_order_of_regions(
                    contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)

-        if self.ocr:
+        if self.ocr and self.tr:
            device = cuda.get_current_device()
            device.reset()
            gc.collect()
@ -5207,6 +5270,11 @@ class Eynollah:
                    ocr_textline_in_textregion.append(text_ocr)
                    ind_tot = ind_tot +1
                ocr_all_textlines.append(ocr_textline_in_textregion)
+                
+        elif self.ocr and not self.tr:
+            gc.collect()
+            ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
+

        else:
            ocr_all_textlines = None
@ -5289,329 +5357,6 @@ class Eynollah_ocr:
                    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
                )

-        
-    def decode_batch_predictions(self, pred, max_len = 128):
-        # input_len is the product of the batch size and the
-        # number of time steps.
-        input_len = np.ones(pred.shape[0]) * pred.shape[1]
-        
-        # Decode CTC predictions using greedy search.
-        # decoded is a tuple with 2 elements.
-        decoded = tf.keras.backend.ctc_decode(pred, 
-                        input_length = input_len, 
-                                    beam_width = 100)
-        # The outputs are in the first element of the tuple.
-        # Additionally, the first element is actually a list,
-        # therefore we take the first element of that list as well.
-        #print(decoded,'decoded')
-        decoded = decoded[0][0][:, :max_len]
-        
-        #print(decoded, decoded.shape,'decoded')
-
-        output = []
-        for d in decoded:
-            # Convert the predicted indices to the corresponding chars.
-            d = tf.strings.reduce_join(self.num_to_char(d))
-            d = d.numpy().decode("utf-8")
-            output.append(d)
-        return output
-        
-        
-    def distortion_free_resize(self, image, img_size):
-        w, h = img_size
-        image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)
-
-        # Check tha amount of padding needed to be done.
-        pad_height = h - tf.shape(image)[0]
-        pad_width = w - tf.shape(image)[1]
-
-        # Only necessary if you want to do same amount of padding on both sides.
-        if pad_height % 2 != 0:
-            height = pad_height // 2
-            pad_height_top = height + 1
-            pad_height_bottom = height
-        else:
-            pad_height_top = pad_height_bottom = pad_height // 2
-
-        if pad_width % 2 != 0:
-            width = pad_width // 2
-            pad_width_left = width + 1
-            pad_width_right = width
-        else:
-            pad_width_left = pad_width_right = pad_width // 2
-
-        image = tf.pad(
-            image,
-            paddings=[
-                [pad_height_top, pad_height_bottom],
-                [pad_width_left, pad_width_right],
-                [0, 0],
-            ],
-        )
-
-        image = tf.transpose(image, (1, 0, 2))
-        image = tf.image.flip_left_right(image)
-        return image
-    
-    def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self, textline_image):
-        width = np.shape(textline_image)[1]
-        height = np.shape(textline_image)[0]
-        common_window = int(0.22*width)
-
-        width1 = int ( width/2. - common_window )
-        width2 = int ( width/2. + common_window )
-        
-        img_sum = np.sum(textline_image[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 3)
-        
-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        
-        if len(peaks_real)>35:
-
-            #peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-            argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
-            peaks_real_top_six = peaks_real[argsort[:6]]
-            midpoint = textline_image.shape[1] / 2.
-            arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
-
-            #arg_max = np.argmax(sum_smoothed[peaks_real])
-
-            peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
-            
-            return peaks_final
-        else:
-            return None
-        
-    # Function to fit text inside the given area
-    def fit_text_single_line(self, draw, text, font_path, max_width, max_height):
-        initial_font_size = 50
-        font_size = initial_font_size
-        while font_size > 10:  # Minimum font size
-            font = ImageFont.truetype(font_path, font_size)
-            text_bbox = draw.textbbox((0, 0), text, font=font)  # Get text bounding box
-            text_width = text_bbox[2] - text_bbox[0]
-            text_height = text_bbox[3] - text_bbox[1]
-
-            if text_width <= max_width and text_height <= max_height:
-                return font  # Return the best-fitting font
-
-            font_size -= 2  # Reduce font size and retry
-
-        return ImageFont.truetype(font_path, 10)  # Smallest font fallback
-    
-    def return_textlines_split_if_needed(self, textline_image, textline_image_bin):
-
-        split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image)
-        if split_point:
-            image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
-            image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
-            if self.prediction_with_both_of_rgb_and_bin:
-                image1_bin = textline_image_bin[:, :split_point,:]# image.crop((0, 0, width2, height))
-                image2_bin = textline_image_bin[:, split_point:,:]#image.crop((width1, 0, width, height))
-                return [image1, image2], [image1_bin, image2_bin]
-            else:
-                return [image1, image2], None
-        else:
-            return None, None
-    def preprocess_and_resize_image_for_ocrcnn_model(self, img, image_height, image_width):
-        ratio = image_height /float(img.shape[0])
-        w_ratio = int(ratio * img.shape[1])
-        
-        if w_ratio <= image_width:
-            width_new = w_ratio
-        else:
-            width_new = image_width
-            
-        if width_new == 0:
-            width_new = img.shape[1]
-            
-        ##if width_new+32 >= image_width:
-            ##width_new = width_new - 32
-            
-        ###patch_zero = np.zeros((32, 32, 3))#+255
-        ###patch_zero[9:19,8:18,:] = 0
-            
-        
-        img = resize_image(img, image_height, width_new)
-        img_fin = np.ones((image_height, image_width, 3))*255
-        ###img_fin[:,:32,:] = patch_zero[:,:,:]
-        ###img_fin[:,32:32+width_new,:] = img[:,:,:]
-        img_fin[:,:width_new,:] = img[:,:,:]
-        img_fin = img_fin / 255.
-        return img_fin
-    
-    def get_deskewed_contour_and_bb_and_image(self, contour, image, deskew_angle):
-        (h_in, w_in) = image.shape[:2]
-        center = (w_in // 2, h_in // 2)
-        
-        rotation_matrix = cv2.getRotationMatrix2D(center, deskew_angle, 1.0)
-        
-        cos_angle = abs(rotation_matrix[0, 0])
-        sin_angle = abs(rotation_matrix[0, 1])
-        new_w = int((h_in * sin_angle) + (w_in * cos_angle))
-        new_h = int((h_in * cos_angle) + (w_in * sin_angle))
-        
-        rotation_matrix[0, 2] += (new_w / 2) - center[0]
-        rotation_matrix[1, 2] += (new_h / 2) - center[1]
-        
-        deskewed_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h))
-        
-        contour_points = np.array(contour, dtype=np.float32)
-        transformed_points = cv2.transform(np.array([contour_points]), rotation_matrix)[0]
-        
-        x, y, w, h = cv2.boundingRect(np.array(transformed_points, dtype=np.int32))
-        cropped_textline = deskewed_image[y:y+h, x:x+w]
-        
-        return cropped_textline
-    
-    def rotate_image_with_padding(self, image, angle, border_value=(0,0,0)):
-        # Get image dimensions
-        (h, w) = image.shape[:2]
-        
-        # Calculate the center of the image
-        center = (w // 2, h // 2)
-        
-        # Get the rotation matrix
-        rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
-        
-        # Compute the new bounding dimensions
-        cos = abs(rotation_matrix[0, 0])
-        sin = abs(rotation_matrix[0, 1])
-        new_w = int((h * sin) + (w * cos))
-        new_h = int((h * cos) + (w * sin))
-        
-        # Adjust the rotation matrix to account for translation
-        rotation_matrix[0, 2] += (new_w / 2) - center[0]
-        rotation_matrix[1, 2] += (new_h / 2) - center[1]
-        
-        # Perform the rotation
-        rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=border_value)
-        
-        return rotated_image
-    
-    def get_orientation_moments(self, contour):
-        moments = cv2.moments(contour)
-        if moments["mu20"] - moments["mu02"] == 0:  # Avoid division by zero
-            return 90 if moments["mu11"] > 0 else -90
-        else:
-            angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
-            return np.degrees(angle)  # Convert radians to degrees
-        
-        
-    def get_orientation_moments_of_mask(self, mask):
-        mask=mask.astype('uint8')
-        contours, _ = cv2.findContours(mask[:,:,0], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        
-        largest_contour = max(contours, key=cv2.contourArea) if contours else None
-        
-        moments = cv2.moments(largest_contour)
-        if moments["mu20"] - moments["mu02"] == 0:  # Avoid division by zero
-            return 90 if moments["mu11"] > 0 else -90
-        else:
-            angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
-            return np.degrees(angle)  # Convert radians to degrees
-    
-    def get_contours_and_bounding_boxes(self, mask):
-        # Find contours in the binary mask
-        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        
-        largest_contour = max(contours, key=cv2.contourArea) if contours else None
-
-        # Get the bounding rectangle for the contour
-        x, y, w, h = cv2.boundingRect(largest_contour)
-        #bounding_boxes.append((x, y, w, h))
-        
-        return x, y, w, h
-    
-    def return_splitting_point_of_image(self, image_to_spliited):
-        width = np.shape(image_to_spliited)[1]
-        height = np.shape(image_to_spliited)[0]
-        common_window = int(0.03*width)
-
-        width1 = int ( common_window)
-        width2 = int ( width - common_window )
-
-        img_sum = np.sum(image_to_spliited[:,:,0], axis=0)
-        sum_smoothed = gaussian_filter1d(img_sum, 1)
-
-        peaks_real, _ = find_peaks(sum_smoothed, height=0)
-        peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
-        
-        arg_sort = np.argsort(sum_smoothed[peaks_real])
-        peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
-        
-        return np.sort(peaks_sort_4)
-        
-    def break_curved_line_into_small_pieces_and_then_merge(self, img_curved, mask_curved):
-        peaks_4 = self.return_splitting_point_of_image(img_curved)
-        if len(peaks_4)>0:
-            imgs_tot = []
-            
-            for ind in range(len(peaks_4)+1):
-                if ind==0:
-                    img = img_curved[:, :peaks_4[ind], :]
-                    mask = mask_curved[:, :peaks_4[ind], :]
-                elif ind==len(peaks_4):
-                    img = img_curved[:, peaks_4[ind-1]:, :]
-                    mask = mask_curved[:, peaks_4[ind-1]:, :]
-                else:
-                    img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
-                    mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
-                    
-                or_ma = self.get_orientation_moments_of_mask(mask)
-            
-                imgs_tot.append([img, mask, or_ma] )
-            
-            
-            w_tot_des_list = []
-            w_tot_des = 0
-            imgs_deskewed_list = []
-            for ind in range(len(imgs_tot)):
-                img_in = imgs_tot[ind][0]
-                mask_in = imgs_tot[ind][1]
-                ori_in = imgs_tot[ind][2]
-                
-                if abs(ori_in)<45:
-                    img_in_des = self.rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
-                    mask_in_des = self.rotate_image_with_padding(mask_in, ori_in)
-                    mask_in_des = mask_in_des.astype('uint8')
-                    
-                    #new bounding box
-                    x_n, y_n, w_n, h_n = self.get_contours_and_bounding_boxes(mask_in_des[:,:,0])
-                    
-                    mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                    img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                    
-                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
-                    if w_relative==0:
-                        w_relative = img_in_des.shape[1]
-                    img_in_des = resize_image(img_in_des, 32, w_relative)
-                    
-
-                else:
-                    img_in_des = np.copy(img_in)
-                    w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
-                    if w_relative==0:
-                        w_relative = img_in_des.shape[1]
-                    img_in_des = resize_image(img_in_des, 32, w_relative)
-                    
-                w_tot_des+=img_in_des.shape[1]
-                w_tot_des_list.append(img_in_des.shape[1])
-                imgs_deskewed_list.append(img_in_des)
-                
-                
-                
-
-            img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
-            
-            w_indexer = 0
-            for ind in range(len(w_tot_des_list)):
-                img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
-                w_indexer = w_indexer+w_tot_des_list[ind]
-            return img_final_deskewed
-        else:
-            return img_curved
-
    def run(self):
        ls_imgs = os.listdir(self.dir_in)
        
@ -6069,7 +5814,7 @@ class Eynollah_ocr:
                            preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
                            preds = (preds + preds_bin) / 2.

-                        pred_texts = self.decode_batch_predictions(preds)
+                        pred_texts = self.decode_batch_predictions(preds, self.num_to_char)

                        for ib in range(imgs.shape[0]):
                            pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
--- a/src/eynollah/utils/utils_ocr.py
+++ b/src/eynollah/utils/utils_ocr.py
@ -0,0 +1,435 @@
+import numpy as np
+import cv2
+import tensorflow as tf
+from scipy.signal import find_peaks
+from scipy.ndimage import gaussian_filter1d
+import math
+from .resize import resize_image
+
+def decode_batch_predictions(pred, num_to_char, max_len = 128):
+    # input_len is the product of the batch size and the
+    # number of time steps.
+    input_len = np.ones(pred.shape[0]) * pred.shape[1]
+    
+    # Decode CTC predictions using greedy search.
+    # decoded is a tuple with 2 elements.
+    decoded = tf.keras.backend.ctc_decode(pred, 
+                    input_length = input_len, 
+                                beam_width = 100)
+    # The outputs are in the first element of the tuple.
+    # Additionally, the first element is actually a list,
+    # therefore we take the first element of that list as well.
+    #print(decoded,'decoded')
+    decoded = decoded[0][0][:, :max_len]
+    
+    #print(decoded, decoded.shape,'decoded')
+
+    output = []
+    for d in decoded:
+        # Convert the predicted indices to the corresponding chars.
+        d = tf.strings.reduce_join(num_to_char(d))
+        d = d.numpy().decode("utf-8")
+        output.append(d)
+    return output
+    
+    
+def distortion_free_resize(image, img_size):
+    w, h = img_size
+    image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)
+
+    # Check tha amount of padding needed to be done.
+    pad_height = h - tf.shape(image)[0]
+    pad_width = w - tf.shape(image)[1]
+
+    # Only necessary if you want to do same amount of padding on both sides.
+    if pad_height % 2 != 0:
+        height = pad_height // 2
+        pad_height_top = height + 1
+        pad_height_bottom = height
+    else:
+        pad_height_top = pad_height_bottom = pad_height // 2
+
+    if pad_width % 2 != 0:
+        width = pad_width // 2
+        pad_width_left = width + 1
+        pad_width_right = width
+    else:
+        pad_width_left = pad_width_right = pad_width // 2
+
+    image = tf.pad(
+        image,
+        paddings=[
+            [pad_height_top, pad_height_bottom],
+            [pad_width_left, pad_width_right],
+            [0, 0],
+        ],
+    )
+
+    image = tf.transpose(image, (1, 0, 2))
+    image = tf.image.flip_left_right(image)
+    return image
+
+def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image):
+    width = np.shape(textline_image)[1]
+    height = np.shape(textline_image)[0]
+    common_window = int(0.22*width)
+
+    width1 = int ( width/2. - common_window )
+    width2 = int ( width/2. + common_window )
+    
+    img_sum = np.sum(textline_image[:,:,0], axis=0)
+    sum_smoothed = gaussian_filter1d(img_sum, 3)
+    
+    peaks_real, _ = find_peaks(sum_smoothed, height=0)
+    
+    if len(peaks_real)>35:
+
+        #peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
+        argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
+        peaks_real_top_six = peaks_real[argsort[:6]]
+        midpoint = textline_image.shape[1] / 2.
+        arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
+
+        #arg_max = np.argmax(sum_smoothed[peaks_real])
+
+        peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
+        
+        return peaks_final
+    else:
+        return None
+    
+# Function to fit text inside the given area
+def fit_text_single_line(draw, text, font_path, max_width, max_height):
+    initial_font_size = 50
+    font_size = initial_font_size
+    while font_size > 10:  # Minimum font size
+        font = ImageFont.truetype(font_path, font_size)
+        text_bbox = draw.textbbox((0, 0), text, font=font)  # Get text bounding box
+        text_width = text_bbox[2] - text_bbox[0]
+        text_height = text_bbox[3] - text_bbox[1]
+
+        if text_width <= max_width and text_height <= max_height:
+            return font  # Return the best-fitting font
+
+        font_size -= 2  # Reduce font size and retry
+
+    return ImageFont.truetype(font_path, 10)  # Smallest font fallback
+
+def return_textlines_split_if_needed(textline_image, textline_image_bin, prediction_with_both_of_rgb_and_bin=False):
+
+    split_point = return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image)
+    if split_point:
+        image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
+        image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
+        if prediction_with_both_of_rgb_and_bin:
+            image1_bin = textline_image_bin[:, :split_point,:]# image.crop((0, 0, width2, height))
+            image2_bin = textline_image_bin[:, split_point:,:]#image.crop((width1, 0, width, height))
+            return [image1, image2], [image1_bin, image2_bin]
+        else:
+            return [image1, image2], None
+    else:
+        return None, None
+def preprocess_and_resize_image_for_ocrcnn_model(img, image_height, image_width):
+    ratio = image_height /float(img.shape[0])
+    w_ratio = int(ratio * img.shape[1])
+    
+    if w_ratio <= image_width:
+        width_new = w_ratio
+    else:
+        width_new = image_width
+        
+    if width_new == 0:
+        width_new = img.shape[1]
+        
+    
+    img = resize_image(img, image_height, width_new)
+    img_fin = np.ones((image_height, image_width, 3))*255
+
+    img_fin[:,:width_new,:] = img[:,:,:]
+    img_fin = img_fin / 255.
+    return img_fin
+
+def get_deskewed_contour_and_bb_and_image(contour, image, deskew_angle):
+    (h_in, w_in) = image.shape[:2]
+    center = (w_in // 2, h_in // 2)
+    
+    rotation_matrix = cv2.getRotationMatrix2D(center, deskew_angle, 1.0)
+    
+    cos_angle = abs(rotation_matrix[0, 0])
+    sin_angle = abs(rotation_matrix[0, 1])
+    new_w = int((h_in * sin_angle) + (w_in * cos_angle))
+    new_h = int((h_in * cos_angle) + (w_in * sin_angle))
+    
+    rotation_matrix[0, 2] += (new_w / 2) - center[0]
+    rotation_matrix[1, 2] += (new_h / 2) - center[1]
+    
+    deskewed_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h))
+    
+    contour_points = np.array(contour, dtype=np.float32)
+    transformed_points = cv2.transform(np.array([contour_points]), rotation_matrix)[0]
+    
+    x, y, w, h = cv2.boundingRect(np.array(transformed_points, dtype=np.int32))
+    cropped_textline = deskewed_image[y:y+h, x:x+w]
+    
+    return cropped_textline
+
+def rotate_image_with_padding(image, angle, border_value=(0,0,0)):
+    # Get image dimensions
+    (h, w) = image.shape[:2]
+    
+    # Calculate the center of the image
+    center = (w // 2, h // 2)
+    
+    # Get the rotation matrix
+    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
+    
+    # Compute the new bounding dimensions
+    cos = abs(rotation_matrix[0, 0])
+    sin = abs(rotation_matrix[0, 1])
+    new_w = int((h * sin) + (w * cos))
+    new_h = int((h * cos) + (w * sin))
+    
+    # Adjust the rotation matrix to account for translation
+    rotation_matrix[0, 2] += (new_w / 2) - center[0]
+    rotation_matrix[1, 2] += (new_h / 2) - center[1]
+    
+    # Perform the rotation
+    rotated_image = cv2.warpAffine(image, rotation_matrix, (new_w, new_h), borderValue=border_value)
+    
+    return rotated_image
+
+def get_orientation_moments(contour):
+    moments = cv2.moments(contour)
+    if moments["mu20"] - moments["mu02"] == 0:  # Avoid division by zero
+        return 90 if moments["mu11"] > 0 else -90
+    else:
+        angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
+        return np.degrees(angle)  # Convert radians to degrees
+    
+    
+def get_orientation_moments_of_mask(mask):
+    mask=mask.astype('uint8')
+    contours, _ = cv2.findContours(mask[:,:,0], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    
+    largest_contour = max(contours, key=cv2.contourArea) if contours else None
+    
+    moments = cv2.moments(largest_contour)
+    if moments["mu20"] - moments["mu02"] == 0:  # Avoid division by zero
+        return 90 if moments["mu11"] > 0 else -90
+    else:
+        angle = 0.5 * np.arctan2(2 * moments["mu11"], moments["mu20"] - moments["mu02"])
+        return np.degrees(angle)  # Convert radians to degrees
+
+def get_contours_and_bounding_boxes(mask):
+    # Find contours in the binary mask
+    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    
+    largest_contour = max(contours, key=cv2.contourArea) if contours else None
+
+    # Get the bounding rectangle for the contour
+    x, y, w, h = cv2.boundingRect(largest_contour)
+    #bounding_boxes.append((x, y, w, h))
+    
+    return x, y, w, h
+
+def return_splitting_point_of_image(image_to_spliited):
+    width = np.shape(image_to_spliited)[1]
+    height = np.shape(image_to_spliited)[0]
+    common_window = int(0.03*width)
+
+    width1 = int ( common_window)
+    width2 = int ( width - common_window )
+
+    img_sum = np.sum(image_to_spliited[:,:,0], axis=0)
+    sum_smoothed = gaussian_filter1d(img_sum, 1)
+
+    peaks_real, _ = find_peaks(sum_smoothed, height=0)
+    peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
+    
+    arg_sort = np.argsort(sum_smoothed[peaks_real])
+    peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
+    
+    return np.sort(peaks_sort_4)
+    
+def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved):
+    peaks_4 = return_splitting_point_of_image(img_curved)
+    if len(peaks_4)>0:
+        imgs_tot = []
+        
+        for ind in range(len(peaks_4)+1):
+            if ind==0:
+                img = img_curved[:, :peaks_4[ind], :]
+                mask = mask_curved[:, :peaks_4[ind], :]
+            elif ind==len(peaks_4):
+                img = img_curved[:, peaks_4[ind-1]:, :]
+                mask = mask_curved[:, peaks_4[ind-1]:, :]
+            else:
+                img = img_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
+                mask = mask_curved[:, peaks_4[ind-1]:peaks_4[ind], :]
+                
+            or_ma = get_orientation_moments_of_mask(mask)
+        
+            imgs_tot.append([img, mask, or_ma] )
+        
+        
+        w_tot_des_list = []
+        w_tot_des = 0
+        imgs_deskewed_list = []
+        for ind in range(len(imgs_tot)):
+            img_in = imgs_tot[ind][0]
+            mask_in = imgs_tot[ind][1]
+            ori_in = imgs_tot[ind][2]
+            
+            if abs(ori_in)<45:
+                img_in_des = rotate_image_with_padding(img_in, ori_in, border_value=(255,255,255) )
+                mask_in_des = rotate_image_with_padding(mask_in, ori_in)
+                mask_in_des = mask_in_des.astype('uint8')
+                
+                #new bounding box
+                x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_in_des[:,:,0])
+                
+                mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                
+                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                if w_relative==0:
+                    w_relative = img_in_des.shape[1]
+                img_in_des = resize_image(img_in_des, 32, w_relative)
+                
+
+            else:
+                img_in_des = np.copy(img_in)
+                w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
+                if w_relative==0:
+                    w_relative = img_in_des.shape[1]
+                img_in_des = resize_image(img_in_des, 32, w_relative)
+                
+            w_tot_des+=img_in_des.shape[1]
+            w_tot_des_list.append(img_in_des.shape[1])
+            imgs_deskewed_list.append(img_in_des)
+            
+            
+            
+
+        img_final_deskewed = np.zeros((32, w_tot_des, 3))+255
+        
+        w_indexer = 0
+        for ind in range(len(w_tot_des_list)):
+            img_final_deskewed[:,w_indexer:w_indexer+w_tot_des_list[ind],:] = imgs_deskewed_list[ind][:,:,:]
+            w_indexer = w_indexer+w_tot_des_list[ind]
+        return img_final_deskewed
+    else:
+        return img_curved
+    
+def return_textline_contour_with_added_box_coordinate(textline_contour,  box_ind):
+    textline_contour[:,0] = textline_contour[:,0] + box_ind[2]
+    textline_contour[:,1] = textline_contour[:,1] + box_ind[0]
+    return textline_contour
+
+
+def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, prediction_model, b_s_ocr, num_to_char, textline_light=False, curved_line=False):
+    max_len = 512
+    padding_token = 299
+    image_width = 512#max_len * 4
+    image_height = 32
+    ind_tot = 0
+    #cv2.imwrite('./img_out.png', image_page)
+    ocr_all_textlines = []
+    cropped_lines_region_indexer = []
+    cropped_lines_meging_indexing = []
+    cropped_lines = []
+    indexer_text_region = 0
+    
+    for indexing, ind_poly_first in enumerate(all_found_textline_polygons):
+        #ocr_textline_in_textregion = []
+        for indexing2, ind_poly in enumerate(ind_poly_first):
+            cropped_lines_region_indexer.append(indexer_text_region)
+            if not (textline_light or curved_line):
+                ind_poly = copy.deepcopy(ind_poly)
+                box_ind = all_box_coord[indexing]
+
+                ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind)
+                #print(ind_poly_copy)
+                ind_poly[ind_poly<0] = 0
+            x, y, w, h = cv2.boundingRect(ind_poly)
+            
+            w_scaled = w *  image_height/float(h)
+
+            mask_poly = np.zeros(image.shape)
+
+            img_poly_on_img = np.copy(image)
+            
+            mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1))
+
+
+            
+            mask_poly = mask_poly[y:y+h, x:x+w, :]
+            img_crop = img_poly_on_img[y:y+h, x:x+w, :]
+            
+            img_crop[mask_poly==0] = 255
+            
+            if w_scaled < 640:#1.5*image_width:
+                img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                cropped_lines.append(img_fin)
+                cropped_lines_meging_indexing.append(0)
+            else:
+                splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
+                
+                if splited_images:
+                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
+                    cropped_lines.append(img_fin)
+                    cropped_lines_meging_indexing.append(1)
+                    
+                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
+                    
+                    cropped_lines.append(img_fin)
+                    cropped_lines_meging_indexing.append(-1)
+                    
+                else:
+                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
+                    cropped_lines.append(img_fin)
+                    cropped_lines_meging_indexing.append(0)
+            
+        indexer_text_region+=1
+        
+        
+    extracted_texts = []
+
+    n_iterations  = math.ceil(len(cropped_lines) / b_s_ocr) 
+
+    for i in range(n_iterations):
+        if i==(n_iterations-1):
+            n_start = i*b_s_ocr
+            imgs = cropped_lines[n_start:]
+            imgs = np.array(imgs)
+            imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
+            
+            
+        else:
+            n_start = i*b_s_ocr
+            n_end = (i+1)*b_s_ocr
+            imgs = cropped_lines[n_start:n_end]
+            imgs = np.array(imgs).reshape(b_s_ocr, image_height, image_width, 3)
+            
+
+        preds = prediction_model.predict(imgs, verbose=0)
+        
+        pred_texts = decode_batch_predictions(preds, num_to_char)
+
+        for ib in range(imgs.shape[0]):
+            pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
+            extracted_texts.append(pred_texts_ib)
+            
+    extracted_texts_merged = [extracted_texts[ind]  if cropped_lines_meging_indexing[ind]==0 else extracted_texts[ind]+" "+extracted_texts[ind+1] if cropped_lines_meging_indexing[ind]==1 else None for ind in range(len(cropped_lines_meging_indexing))]
+
+    extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
+    unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
+    
+    ocr_all_textlines = []
+    for ind in unique_cropped_lines_region_indexer:
+        ocr_textline_in_textregion = []
+        extracted_texts_merged_un = np.array(extracted_texts_merged)[np.array(cropped_lines_region_indexer)==ind]
+        for  it_ind, text_textline in enumerate(extracted_texts_merged_un):
+            ocr_textline_in_textregion.append(text_textline)
+        ocr_all_textlines.append(ocr_textline_in_textregion)
+    return ocr_all_textlines
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@ -168,7 +168,7 @@ class EynollahXmlWriter():
        with open(self.output_filename, 'w') as f:
            f.write(to_xml(pcgts))

-    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion):
+    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion, skip_layout_reading_order=False):
        self.logger.debug('enter build_pagexml_no_full_layout')

        # create the file structure
@ -184,7 +184,7 @@ class EynollahXmlWriter():

        for mm in range(len(found_polygons_text_region)):
            textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
-                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm]),
+                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]),
                    )
            #textregion.set_conf(conf_contours_textregion[mm])
            page.add_TextRegion(textregion)
@ -303,18 +303,28 @@ class EynollahXmlWriter():

        return pcgts

-    def calculate_polygon_coords(self, contour, page_coord):
+    def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False):
        self.logger.debug('enter calculate_polygon_coords')
        coords = ''
        for value_bbox in contour:
-            if len(value_bbox) == 2:
-                coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
-                coords += ','
-                coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y))
+            if skip_layout_reading_order:
+                if len(value_bbox) == 2:
+                    coords += str(int((value_bbox[0]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[1]) / self.scale_y))
+                else:
+                    coords += str(int((value_bbox[0][0]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[0][1]) / self.scale_y))
            else:
-                coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x))
-                coords += ','
-                coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
+                if len(value_bbox) == 2:
+                    coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y))
+                else:
+                    coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
            coords=coords + ' '
        return coords[:-1]